From 0d05000e97d95b9f73d983ae1e20acecf60205bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Louis=20V=C3=A9zina?= <5130500+morpheus65535@users.noreply.github.com> Date: Sat, 6 Apr 2019 08:25:27 -0400 Subject: [PATCH] Initial anticaptcha commit --- bazarr/init.py | 3 + libs/subliminal_patch/providers/addic7ed.py | 91 +++-- .../providers/greeksubtitles.py | 184 ---------- libs/subliminal_patch/providers/subs4free.py | 283 ---------------- .../subliminal_patch/providers/subs4series.py | 272 --------------- libs/subliminal_patch/providers/subssabbz.py | 159 --------- libs/subliminal_patch/providers/subsunacs.py | 161 --------- libs/subliminal_patch/providers/subz.py | 318 ------------------ libs/subliminal_patch/providers/xsubs.py | 302 ----------------- libs/subliminal_patch/refiners/omdb.py | 10 +- 10 files changed, 82 insertions(+), 1701 deletions(-) delete mode 100644 libs/subliminal_patch/providers/greeksubtitles.py delete mode 100644 libs/subliminal_patch/providers/subs4free.py delete mode 100644 libs/subliminal_patch/providers/subs4series.py delete mode 100644 libs/subliminal_patch/providers/subssabbz.py delete mode 100644 libs/subliminal_patch/providers/subsunacs.py delete mode 100644 libs/subliminal_patch/providers/subz.py delete mode 100644 libs/subliminal_patch/providers/xsubs.py diff --git a/bazarr/init.py b/bazarr/init.py index eb3af0ce3..7c13cc24d 100644 --- a/bazarr/init.py +++ b/bazarr/init.py @@ -17,6 +17,9 @@ from get_args import args # set subliminal_patch user agent os.environ["SZ_USER_AGENT"] = "Bazarr/1" +# set anticaptcha account key +os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = settings.general.anticaptcha_key + # Check if args.config_dir exist if not os.path.exists(args.config_dir): # Create config_dir directory tree diff --git a/libs/subliminal_patch/providers/addic7ed.py b/libs/subliminal_patch/providers/addic7ed.py index 51913d887..086343e98 100644 --- a/libs/subliminal_patch/providers/addic7ed.py +++ b/libs/subliminal_patch/providers/addic7ed.py @@ -1,13 +1,16 @@ # coding=utf-8 import logging import re +import os import datetime import subliminal import time +import requests + from random import randint from dogpile.cache.api import NO_VALUE from requests import Session - +from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \ Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup, show_cells_re @@ -15,7 +18,7 @@ from subliminal.cache import region from subliminal.subtitle import fix_line_ending from subliminal_patch.utils import sanitize from subliminal_patch.exceptions import TooManyRequests - +from subliminal_patch.pitcher import pitchers from subzero.language import Language logger = logging.getLogger(__name__) @@ -64,6 +67,7 @@ class Addic7edProvider(_Addic7edProvider): USE_ADDICTED_RANDOM_AGENTS = False hearing_impaired_verifiable = True subtitle_class = Addic7edSubtitle + server_url = 'https://www.addic7ed.com/' sanitize_characters = {'-', ':', '(', ')', '.', '/'} @@ -75,45 +79,90 @@ class Addic7edProvider(_Addic7edProvider): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__ - if self.USE_ADDICTED_RANDOM_AGENTS: - from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST - logger.debug("Addic7ed: using random user agents") - self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] - self.session.headers['Referer'] = self.server_url + from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST + logger.debug("Addic7ed: using random user agents") + self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] + self.session.headers['Referer'] = self.server_url # login if self.username and self.password: - ccks = region.get("addic7ed_cookies", expiration_time=86400) + ccks = region.get("addic7ed_data", expiration_time=15552000) # 6m if ccks != NO_VALUE: + cookies, user_agent = ccks + logger.debug("Addic7ed: Re-using previous user agent") + self.session.headers["User-Agent"] = user_agent try: - self.session.cookies._cookies.update(ccks) - r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10) + self.session.cookies._cookies.update(cookies) + r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10, + headers={"Referer": self.server_url}) if r.status_code == 302: logger.info('Addic7ed: Login expired') - region.delete("addic7ed_cookies") + region.delete("addic7ed_data") else: - logger.info('Addic7ed: Reusing old login') + logger.info('Addic7ed: Re-using old login') self.logged_in = True return except: pass logger.info('Addic7ed: Logging in') - data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'} - r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10, - headers={"Referer": self.server_url + "login.php"}) + data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '', + 'remember': 'true'} - if "relax, slow down" in r.content: - raise TooManyRequests(self.username) + tries = 0 + while tries < 3: + r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url}) + if "grecaptcha" in r.content: + logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only ' + 'happen once every so often') + anticaptcha_key = os.environ.get("ANTICAPTCHA_ACCOUNT_KEY") + if not anticaptcha_key: + logger.error("AntiCaptcha key not given, exiting") + return - if r.status_code != 302: - raise AuthenticationError(self.username) + anticaptcha_proxy = os.environ.get("ANTICAPTCHA_PROXY") - region.set("addic7ed_cookies", self.session.cookies._cookies) + site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1) + if not site_key: + logger.error("Addic7ed: Captcha site-key not found!") + return + + #pitcher_cls = pitchers.get_pitcher("AntiCaptchaProxyLess") + #pitcher = pitcher_cls("Addic7ed", anticaptcha_key, self.server_url + 'login.php', site_key) + pitcher_cls = pitchers.get_pitcher("AntiCaptchaProxyLess") + pitcher = pitcher_cls("Addic7ed", anticaptcha_key, self.server_url + 'login.php', site_key, + user_agent=self.session.headers["User-Agent"], + cookies=self.session.cookies.get_dict(), + is_invisible=True) + + result = pitcher.throw() + if not result: + raise Exception("Addic7ed: Couldn't solve captcha!") + + data["recaptcha_response"] = result + + r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10, + headers={"Referer": self.server_url + "login.php"}) + + if "relax, slow down" in r.content: + raise TooManyRequests(self.username) + + if r.status_code != 302: + if "User doesn't exist" in r.content and tries <= 2: + logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3) + tries += 1 + continue + + raise AuthenticationError(self.username) + break + + region.set("addic7ed_data", (self.session.cookies._cookies, self.session.headers["User-Agent"])) logger.debug('Addic7ed: Logged in') self.logged_in = True + def terminate(self): + pass @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def _get_show_ids(self): @@ -140,7 +189,7 @@ class Addic7edProvider(_Addic7edProvider): # populate the show ids show_ids = {} - for show in soup.select('td.version > h3 > a[href^="/show/"]'): + for show in soup.select('td > h3 > a[href^="/show/"]'): show_clean = sanitize(show.text, default_characters=self.sanitize_characters) try: show_id = int(show['href'][6:]) diff --git a/libs/subliminal_patch/providers/greeksubtitles.py b/libs/subliminal_patch/providers/greeksubtitles.py deleted file mode 100644 index 98dfc289e..000000000 --- a/libs/subliminal_patch/providers/greeksubtitles.py +++ /dev/null @@ -1,184 +0,0 @@ -# -*- coding: utf-8 -*- -import io -import logging -import os -import zipfile - -import rarfile -from subzero.language import Language -from guessit import guessit -from requests import Session -from six import text_type - -from subliminal import __short_version__ -from subliminal.providers import ParserBeautifulSoup, Provider -from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches -from subliminal.video import Episode, Movie - -logger = logging.getLogger(__name__) - - -class GreekSubtitlesSubtitle(Subtitle): - """GreekSubtitles Subtitle.""" - provider_name = 'greeksubtitles' - - def __init__(self, language, page_link, version, download_link): - super(GreekSubtitlesSubtitle, self).__init__(language, page_link=page_link) - self.version = version - self.download_link = download_link - self.hearing_impaired = None - self.encoding = 'windows-1253' - - @property - def id(self): - return self.download_link - - def get_matches(self, video): - matches = set() - - # episode - if isinstance(video, Episode): - # other properties - matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True) - # movie - elif isinstance(video, Movie): - # other properties - matches |= guess_matches(video, guessit(self.version, {'type': 'movie'}), partial=True) - - return matches - - -class GreekSubtitlesProvider(Provider): - """GreekSubtitles Provider.""" - languages = {Language(l) for l in ['ell', 'eng']} - server_url = 'http://gr.greek-subtitles.com/' - search_url = 'search.php?name={}' - download_url = 'http://www.greeksubtitles.info/getp.php?id={:d}' - subtitle_class = GreekSubtitlesSubtitle - - def __init__(self): - self.session = None - - def initialize(self): - self.session = Session() - self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__) - - def terminate(self): - self.session.close() - - def query(self, keyword, season=None, episode=None, year=None): - params = keyword - if season and episode: - params += ' S{season:02d}E{episode:02d}'.format(season=season, episode=episode) - elif year: - params += ' {:4d}'.format(year) - - logger.debug('Searching subtitles %r', params) - subtitles = [] - search_link = self.server_url + text_type(self.search_url).format(params) - while True: - r = self.session.get(search_link, timeout=30) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return [] - - soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) - - # loop over subtitles cells - for cell in soup.select('td.latest_name > a:nth-of-type(1)'): - # read the item - subtitle_id = int(cell['href'].rsplit('/', 2)[1]) - page_link = cell['href'] - language = Language.fromalpha2(cell.parent.find('img')['src'].split('/')[-1].split('.')[0]) - version = cell.text.strip() or None - if version is None: - version = "" - - subtitle = self.subtitle_class(language, page_link, version, self.download_url.format(subtitle_id)) - - logger.debug('Found subtitle %r', subtitle) - subtitles.append(subtitle) - - anchors = soup.select('td a') - next_page_available = False - for anchor in anchors: - if 'Next' in anchor.text and 'search.php' in anchor['href']: - search_link = self.server_url + anchor['href'] - next_page_available = True - break - if not next_page_available: - break - - return subtitles - - def list_subtitles(self, video, languages): - if isinstance(video, Episode): - titles = [video.series] + video.alternative_series - elif isinstance(video, Movie): - titles = [video.title] + video.alternative_titles - else: - titles = [] - - subtitles = [] - # query for subtitles with the show_id - for title in titles: - if isinstance(video, Episode): - subtitles += [s for s in self.query(title, season=video.season, episode=video.episode, - year=video.year) - if s.language in languages] - elif isinstance(video, Movie): - subtitles += [s for s in self.query(title, year=video.year) - if s.language in languages] - - return subtitles - - def download_subtitle(self, subtitle): - if isinstance(subtitle, GreekSubtitlesSubtitle): - # download the subtitle - logger.info('Downloading subtitle %r', subtitle) - r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, - timeout=30) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return - - archive = _get_archive(r.content) - - subtitle_content = _get_subtitle_from_archive(archive) - if subtitle_content: - subtitle.content = fix_line_ending(subtitle_content) - else: - logger.debug('Could not extract subtitle from %r', archive) - - -def _get_archive(content): - # open the archive - archive_stream = io.BytesIO(content) - archive = None - if rarfile.is_rarfile(archive_stream): - logger.debug('Identified rar archive') - archive = rarfile.RarFile(archive_stream) - elif zipfile.is_zipfile(archive_stream): - logger.debug('Identified zip archive') - archive = zipfile.ZipFile(archive_stream) - - return archive - - -def _get_subtitle_from_archive(archive): - for name in archive.namelist(): - # discard hidden files - if os.path.split(name)[-1].startswith('.'): - continue - - # discard non-subtitle files - if not name.lower().endswith(SUBTITLE_EXTENSIONS): - continue - - return archive.read(name) - - return None diff --git a/libs/subliminal_patch/providers/subs4free.py b/libs/subliminal_patch/providers/subs4free.py deleted file mode 100644 index 181b99351..000000000 --- a/libs/subliminal_patch/providers/subs4free.py +++ /dev/null @@ -1,283 +0,0 @@ -# -*- coding: utf-8 -*- -# encoding=utf8 -import io -import logging -import os -import random - -import rarfile -import re -import zipfile - -from subzero.language import Language -from guessit import guessit -from requests import Session -from six import text_type - -from subliminal.providers import ParserBeautifulSoup, Provider -from subliminal import __short_version__ -from subliminal.cache import SHOW_EXPIRATION_TIME, region -from subliminal.score import get_equivalent_release_groups -from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches -from subliminal.utils import sanitize, sanitize_release_group -from subliminal.video import Movie - -logger = logging.getLogger(__name__) - -year_re = re.compile(r'^\((\d{4})\)$') - - -class Subs4FreeSubtitle(Subtitle): - """Subs4Free Subtitle.""" - provider_name = 'subs4free' - - def __init__(self, language, page_link, title, year, version, download_link): - super(Subs4FreeSubtitle, self).__init__(language, page_link=page_link) - self.title = title - self.year = year - self.version = version - self.download_link = download_link - self.hearing_impaired = None - self.encoding = 'utf8' - - @property - def id(self): - return self.download_link - - def get_matches(self, video): - matches = set() - - # movie - if isinstance(video, Movie): - # title - if video.title and (sanitize(self.title) in ( - sanitize(name) for name in [video.title] + video.alternative_titles)): - matches.add('title') - # year - if video.year and self.year == video.year: - matches.add('year') - - # release_group - if (video.release_group and self.version and - any(r in sanitize_release_group(self.version) - for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): - matches.add('release_group') - # other properties - matches |= guess_matches(video, guessit(self.version, {'type': 'movie'}), partial=True) - - return matches - - -class Subs4FreeProvider(Provider): - """Subs4Free Provider.""" - languages = {Language(l) for l in ['ell', 'eng']} - video_types = (Movie,) - server_url = 'https://www.sf4-industry.com' - download_url = '/getSub.html' - search_url = '/search_report.php?search={}&searchType=1' - subtitle_class = Subs4FreeSubtitle - - def __init__(self): - self.session = None - - def initialize(self): - self.session = Session() - self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__) - - def terminate(self): - self.session.close() - - def get_show_ids(self, title, year=None): - """Get the best matching show id for `series` and `year``. - - First search in the result of :meth:`_get_show_suggestions`. - - :param title: show title. - :param year: year of the show, if any. - :type year: int - :return: the show id, if found. - :rtype: str - - """ - title_sanitized = sanitize(title).lower() - show_ids = self._get_suggestions(title) - - matched_show_ids = [] - for show in show_ids: - show_id = None - show_title = sanitize(show['title']) - # attempt with year - if not show_id and year: - logger.debug('Getting show id with year') - show_id = show['link'].split('?p=')[-1] if show_title == '{title} {year:d}'.format( - title=title_sanitized, year=year) else None - - # attempt clean - if not show_id: - logger.debug('Getting show id') - show_id = show['link'].split('?p=')[-1] if show_title == title_sanitized else None - - if show_id: - matched_show_ids.append(show_id) - - return matched_show_ids - - @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, to_str=text_type, - should_cache_fn=lambda value: value) - def _get_suggestions(self, title): - """Search the show or movie id from the `title` and `year`. - - :param str title: title of the show. - :return: the show suggestions found. - :rtype: dict - - """ - # make the search - logger.info('Searching show ids with %r', title) - r = self.session.get(self.server_url + text_type(self.search_url).format(title), - headers={'Referer': self.server_url}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return {} - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - suggestions = [{'link': l.attrs['value'], 'title': l.text} - for l in soup.select('select[name="Mov_sel"] > option[value]')] - logger.debug('Found suggestions: %r', suggestions) - - return suggestions - - def query(self, movie_id, title, year): - # get the season list of the show - logger.info('Getting the subtitle list of show id %s', movie_id) - if movie_id: - page_link = self.server_url + '/' + movie_id - else: - page_link = self.server_url + text_type(self.search_url).format(' '.join([title, str(year)])) - - r = self.session.get(page_link, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return [] - - soup = ParserBeautifulSoup(r.content, ['html.parser']) - - year_num = None - year_element = soup.select_one('td#dates_header > table div') - matches = False - if year_element: - matches = year_re.match(str(year_element.contents[2]).strip()) - if matches: - year_num = int(matches.group(1)) - - title_element = soup.select_one('td#dates_header > table u') - show_title = str(title_element.contents[0]).strip() if title_element else None - - subtitles = [] - # loop over episode rows - for subtitle in soup.select('table.table_border div[align="center"] > div'): - # read common info - version = subtitle.find('b').text - download_link = self.server_url + subtitle.find('a')['href'] - language = Language.fromalpha2(subtitle.find('img')['src'].split('/')[-1].split('.')[0]) - - subtitle = self.subtitle_class(language, page_link, show_title, year_num, version, download_link) - - logger.debug('Found subtitle {!r}'.format(subtitle)) - subtitles.append(subtitle) - - return subtitles - - def list_subtitles(self, video, languages): - # lookup show_id - titles = [video.title] + video.alternative_titles if isinstance(video, Movie) else [] - - show_ids = None - for title in titles: - show_ids = self.get_show_ids(title, video.year) - if show_ids and len(show_ids) > 0: - break - - subtitles = [] - # query for subtitles with the show_id - if show_ids and len(show_ids) > 0: - for show_id in show_ids: - subtitles += [s for s in self.query(show_id, video.title, video.year) if s.language in languages] - else: - subtitles += [s for s in self.query(None, video.title, video.year) if s.language in languages] - - return subtitles - - def download_subtitle(self, subtitle): - if isinstance(subtitle, Subs4FreeSubtitle): - # download the subtitle - logger.info('Downloading subtitle %r', subtitle) - r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - download_element = soup.select_one('input[name="id"]') - image_element = soup.select_one('input[type="image"]') - subtitle_id = download_element['value'] if download_element else None - width = int(str(image_element['width']).strip('px')) if image_element else 0 - height = int(str(image_element['height']).strip('px')) if image_element else 0 - - if not subtitle_id: - logger.debug('Unable to download subtitle. No download link found') - return - - download_url = self.server_url + self.download_url - r = self.session.post(download_url, data={'utf8': 1, 'id': subtitle_id, 'x': random.randint(0, width), - 'y': random.randint(0, height)}, - headers={'Referer': subtitle.download_link}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return - - archive = _get_archive(r.content) - - subtitle_content = _get_subtitle_from_archive(archive) if archive else r.content - - if subtitle_content: - subtitle.content = fix_line_ending(subtitle_content) - else: - logger.debug('Could not extract subtitle from %r', archive) - - -def _get_archive(content): - # open the archive - archive_stream = io.BytesIO(content) - archive = None - if rarfile.is_rarfile(archive_stream): - logger.debug('Identified rar archive') - archive = rarfile.RarFile(archive_stream) - elif zipfile.is_zipfile(archive_stream): - logger.debug('Identified zip archive') - archive = zipfile.ZipFile(archive_stream) - - return archive - - -def _get_subtitle_from_archive(archive): - for name in archive.namelist(): - # discard hidden files - if os.path.split(name)[-1].startswith('.'): - continue - - # discard non-subtitle files - if not name.lower().endswith(SUBTITLE_EXTENSIONS): - continue - - return archive.read(name) - - return None diff --git a/libs/subliminal_patch/providers/subs4series.py b/libs/subliminal_patch/providers/subs4series.py deleted file mode 100644 index 5f381feeb..000000000 --- a/libs/subliminal_patch/providers/subs4series.py +++ /dev/null @@ -1,272 +0,0 @@ -# -*- coding: utf-8 -*- -import io -import logging -import os - -import rarfile -import re -import zipfile - -from subzero.language import Language -from guessit import guessit -from requests import Session -from six import text_type - -from subliminal.providers import ParserBeautifulSoup, Provider -from subliminal import __short_version__ -from subliminal.cache import SHOW_EXPIRATION_TIME, region -from subliminal.score import get_equivalent_release_groups -from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches -from subliminal.utils import sanitize, sanitize_release_group -from subliminal.video import Episode - -logger = logging.getLogger(__name__) - -year_re = re.compile(r'^\((\d{4})\)$') - - -class Subs4SeriesSubtitle(Subtitle): - """Subs4Series Subtitle.""" - provider_name = 'subs4series' - - def __init__(self, language, page_link, series, year, version, download_link): - super(Subs4SeriesSubtitle, self).__init__(language, page_link=page_link) - self.series = series - self.year = year - self.version = version - self.download_link = download_link - self.hearing_impaired = None - self.encoding = 'windows-1253' - - @property - def id(self): - return self.download_link - - def get_matches(self, video): - matches = set() - - # episode - if isinstance(video, Episode): - # series name - if video.series and sanitize(self.series) in ( - sanitize(name) for name in [video.series] + video.alternative_series): - matches.add('series') - # year - if video.original_series and self.year is None or video.year and video.year == self.year: - matches.add('year') - - # release_group - if (video.release_group and self.version and - any(r in sanitize_release_group(self.version) - for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): - matches.add('release_group') - # other properties - matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True) - - return matches - - -class Subs4SeriesProvider(Provider): - """Subs4Series Provider.""" - languages = {Language(l) for l in ['ell', 'eng']} - video_types = (Episode,) - server_url = 'https://www.subs4series.com' - search_url = '/search_report.php?search={}&searchType=1' - episode_link = '/tv-series/{show_id}/season-{season:d}/episode-{episode:d}' - subtitle_class = Subs4SeriesSubtitle - - def __init__(self): - self.session = None - - def initialize(self): - self.session = Session() - self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__) - - def terminate(self): - self.session.close() - - def get_show_ids(self, title, year=None): - """Get the best matching show id for `series` and `year`. - - First search in the result of :meth:`_get_show_suggestions`. - - :param title: show title. - :param year: year of the show, if any. - :type year: int - :return: the show id, if found. - :rtype: str - - """ - title_sanitized = sanitize(title).lower() - show_ids = self._get_suggestions(title) - - matched_show_ids = [] - for show in show_ids: - show_id = None - show_title = sanitize(show['title']) - # attempt with year - if not show_id and year: - logger.debug('Getting show id with year') - show_id = '/'.join(show['link'].rsplit('/', 2)[1:]) if show_title == '{title} {year:d}'.format( - title=title_sanitized, year=year) else None - - # attempt clean - if not show_id: - logger.debug('Getting show id') - show_id = '/'.join(show['link'].rsplit('/', 2)[1:]) if show_title == title_sanitized else None - - if show_id: - matched_show_ids.append(show_id) - - return matched_show_ids - - @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, to_str=text_type, - should_cache_fn=lambda value: value) - def _get_suggestions(self, title): - """Search the show or movie id from the `title` and `year`. - - :param str title: title of the show. - :return: the show suggestions found. - :rtype: dict - - """ - # make the search - logger.info('Searching show ids with %r', title) - r = self.session.get(self.server_url + text_type(self.search_url).format(title), - headers={'Referer': self.server_url}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return {} - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - series = [{'link': l.attrs['value'], 'title': l.text} - for l in soup.select('select[name="Mov_sel"] > option[value]')] - logger.debug('Found suggestions: %r', series) - - return series - - def query(self, show_id, series, season, episode, title): - # get the season list of the show - logger.info('Getting the subtitle list of show id %s', show_id) - if all((show_id, season, episode)): - page_link = self.server_url + self.episode_link.format(show_id=show_id, season=season, episode=episode) - else: - return [] - - r = self.session.get(page_link, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return [] - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - - year_num = None - matches = year_re.match(str(soup.select_one('#dates_header_br > table div').contents[2]).strip()) - if matches: - year_num = int(matches.group(1)) - show_title = str(soup.select_one('#dates_header_br > table u').contents[0]).strip() - - subtitles = [] - # loop over episode rows - for subtitle in soup.select('table.table_border div[align="center"] > div'): - # read common info - version = subtitle.find('b').text - download_link = self.server_url + subtitle.find('a')['href'] - language = Language.fromalpha2(subtitle.find('img')['src'].split('/')[-1].split('.')[0]) - - subtitle = self.subtitle_class(language, page_link, show_title, year_num, version, download_link) - - logger.debug('Found subtitle %r', subtitle) - subtitles.append(subtitle) - - return subtitles - - def list_subtitles(self, video, languages): - # lookup show_id - titles = [video.series] + video.alternative_series if isinstance(video, Episode) else [] - - show_ids = None - for title in titles: - show_ids = self.get_show_ids(title, video.year) - if show_ids and len(show_ids) > 0: - break - - subtitles = [] - # query for subtitles with the show_id - for show_id in show_ids: - subtitles += [s for s in self.query(show_id, video.series, video.season, video.episode, video.title) - if s.language in languages] - - return subtitles - - def download_subtitle(self, subtitle): - if isinstance(subtitle, Subs4SeriesSubtitle): - # download the subtitle - logger.info('Downloading subtitle %r', subtitle) - r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - download_element = soup.select_one('a.style55ws') - if not download_element: - download_element = soup.select_one('form[method="post"]') - target = download_element['action'] if download_element else None - else: - target = download_element['href'] - - if not target: - logger.debug('Unable to download subtitle. No download link found') - return - - download_url = self.server_url + target - r = self.session.get(download_url, headers={'Referer': subtitle.download_link}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return - - archive = _get_archive(r.content) - subtitle_content = _get_subtitle_from_archive(archive) if archive else r.content - - if subtitle_content: - subtitle.content = fix_line_ending(subtitle_content) - else: - logger.debug('Could not extract subtitle from %r', archive) - - -def _get_archive(content): - # open the archive - archive_stream = io.BytesIO(content) - archive = None - if rarfile.is_rarfile(archive_stream): - logger.debug('Identified rar archive') - archive = rarfile.RarFile(archive_stream) - elif zipfile.is_zipfile(archive_stream): - logger.debug('Identified zip archive') - archive = zipfile.ZipFile(archive_stream) - - return archive - - -def _get_subtitle_from_archive(archive): - for name in archive.namelist(): - # discard hidden files - if os.path.split(name)[-1].startswith('.'): - continue - - # discard non-subtitle files - if not name.lower().endswith(SUBTITLE_EXTENSIONS): - continue - - return archive.read(name) - - return None diff --git a/libs/subliminal_patch/providers/subssabbz.py b/libs/subliminal_patch/providers/subssabbz.py deleted file mode 100644 index d3d138884..000000000 --- a/libs/subliminal_patch/providers/subssabbz.py +++ /dev/null @@ -1,159 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -import re -import io -import os -from random import randint -from bs4 import BeautifulSoup -from zipfile import ZipFile, is_zipfile -from rarfile import RarFile, is_rarfile -from requests import Session -from guessit import guessit -from subliminal_patch.providers import Provider -from subliminal_patch.subtitle import Subtitle -from subliminal_patch.utils import sanitize -from subliminal.exceptions import ProviderError -from subliminal.utils import sanitize_release_group -from subliminal.subtitle import guess_matches -from subliminal.video import Episode, Movie -from subliminal.subtitle import fix_line_ending -from subzero.language import Language -from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST - -logger = logging.getLogger(__name__) - -class SubsSabBzSubtitle(Subtitle): - """SubsSabBz Subtitle.""" - provider_name = 'subssabbz' - - def __init__(self, langauge, filename, type): - super(SubsSabBzSubtitle, self).__init__(langauge) - self.langauge = langauge - self.filename = filename - self.type = type - - @property - def id(self): - return self.filename - - def get_matches(self, video): - matches = set() - - video_filename = video.name - video_filename = os.path.basename(video_filename) - video_filename, _ = os.path.splitext(video_filename) - video_filename = sanitize_release_group(video_filename) - - subtitle_filename = self.filename - subtitle_filename = os.path.basename(subtitle_filename) - subtitle_filename, _ = os.path.splitext(subtitle_filename) - subtitle_filename = sanitize_release_group(subtitle_filename) - - if video_filename == subtitle_filename: - matches.add('hash') - - matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) - - matches.add(id(self)) - return matches - - -class SubsSabBzProvider(Provider): - """SubsSabBz Provider.""" - languages = {Language('por', 'BR')} | {Language(l) for l in [ - 'bul', 'eng' - ]} - - def initialize(self): - self.session = Session() - self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] - self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - self.session.headers["Accept-Language"] = "en-US,en;q=0.5" - self.session.headers["Accept-Encoding"] = "gzip, deflate, br" - self.session.headers["DNT"] = "1" - self.session.headers["Connection"] = "keep-alive" - self.session.headers["Upgrade-Insecure-Requests"] = "1" - self.session.headers["Cache-Control"] = "max-age=0" - - def terminate(self): - self.session.close() - - def query(self, language, video): - subtitles = [] - isEpisode = isinstance(video, Episode) - - params = { - 'act': 'search', - 'movie': '', - 'select-language': '2', - 'upldr': '', - 'yr': '', - 'release': '' - } - - if isEpisode: - params['movie'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode) - else: - params['yr'] = video.year - params['movie'] = (video.title) - - if language == 'en' or language == 'eng': - params['select-language'] = 1 - - logger.info('Searching subtitle %r', params) - response = self.session.post('http://subs.sab.bz/index.php?', params=params, allow_redirects=False, timeout=10, headers={ - 'Referer': 'http://subs.sab.bz/', - }) - - response.raise_for_status() - - if response.status_code != 200: - logger.debug('No subtitles found') - return subtitles - - soup = BeautifulSoup(response.content, 'html.parser') - rows = soup.findAll('tr', {'class': 'subs-row'}) - - # Search on first 10 rows only - for row in rows[:10]: - a_element_wrapper = row.find('td', { 'class': 'c2field' }) - if a_element_wrapper: - element = a_element_wrapper.find('a') - if element: - link = element.get('href') - logger.info('Found subtitle link %r', link) - subtitles = subtitles + self.download_archive_and_add_subtitle_files(link, language, video) - - return subtitles - - def list_subtitles(self, video, languages): - return [s for l in languages for s in self.query(l, video)] - - def download_subtitle(self, subtitle): - pass - - def process_archive_subtitle_files(self, archiveStream, language, video): - subtitles = [] - type = 'episode' if isinstance(video, Episode) else 'movie' - for file_name in archiveStream.namelist(): - if file_name.lower().endswith(('.srt', '.sub')): - logger.info('Found subtitle file %r', file_name) - subtitle = SubsSabBzSubtitle(language, file_name, type) - subtitle.content = archiveStream.read(file_name) - subtitles.append(subtitle) - return subtitles - - def download_archive_and_add_subtitle_files(self, link, language, video ): - logger.info('Downloading subtitle %r', link) - request = self.session.get(link, headers={ - 'Referer': 'http://subs.sab.bz/index.php?' - }) - request.raise_for_status() - - archive_stream = io.BytesIO(request.content) - if is_rarfile(archive_stream): - return self.process_archive_subtitle_files( RarFile(archive_stream), language, video ) - elif is_zipfile(archive_stream): - return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video ) - else: - raise ValueError('Not a valid archive') diff --git a/libs/subliminal_patch/providers/subsunacs.py b/libs/subliminal_patch/providers/subsunacs.py deleted file mode 100644 index bbc41f520..000000000 --- a/libs/subliminal_patch/providers/subsunacs.py +++ /dev/null @@ -1,161 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -import re -import io -import os -from random import randint -from bs4 import BeautifulSoup -from zipfile import ZipFile, is_zipfile -from rarfile import RarFile, is_rarfile -from requests import Session -from guessit import guessit -from subliminal_patch.providers import Provider -from subliminal_patch.subtitle import Subtitle -from subliminal_patch.utils import sanitize -from subliminal.exceptions import ProviderError -from subliminal.utils import sanitize_release_group -from subliminal.subtitle import guess_matches -from subliminal.video import Episode, Movie -from subliminal.subtitle import fix_line_ending -from subzero.language import Language -from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST - -logger = logging.getLogger(__name__) - -class SubsUnacsSubtitle(Subtitle): - """SubsUnacs Subtitle.""" - provider_name = 'subsunacs' - - def __init__(self, langauge, filename, type): - super(SubsUnacsSubtitle, self).__init__(langauge) - self.langauge = langauge - self.filename = filename - self.type = type - - @property - def id(self): - return self.filename - - def get_matches(self, video): - matches = set() - - video_filename = video.name - video_filename = os.path.basename(video_filename) - video_filename, _ = os.path.splitext(video_filename) - video_filename = sanitize_release_group(video_filename) - - subtitle_filename = self.filename - subtitle_filename = os.path.basename(subtitle_filename) - subtitle_filename, _ = os.path.splitext(subtitle_filename) - subtitle_filename = sanitize_release_group(subtitle_filename) - - if video_filename == subtitle_filename: - matches.add('hash') - - matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) - - matches.add(id(self)) - return matches - - -class SubsUnacsProvider(Provider): - """SubsUnacs Provider.""" - languages = {Language('por', 'BR')} | {Language(l) for l in [ - 'bul', 'eng' - ]} - - def initialize(self): - self.session = Session() - self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] - self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - self.session.headers["Accept-Language"] = "en-US,en;q=0.5" - self.session.headers["Accept-Encoding"] = "gzip, deflate, br" - self.session.headers["DNT"] = "1" - self.session.headers["Connection"] = "keep-alive" - self.session.headers["Upgrade-Insecure-Requests"] = "1" - self.session.headers["Cache-Control"] = "max-age=0" - - def terminate(self): - self.session.close() - - def query(self, language, video): - subtitles = [] - isEpisode = isinstance(video, Episode) - - params = { - 'm': '', - 'l': 0, - 'c': '', - 'y': '', - 'action': " Търси ", - 'a': '', - 'd': '', - 'u': '', - 'g': '', - 't': '', - 'imdbcheck': 1} - - if isEpisode: - params['m'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode) - else: - params['y'] = video.year - params['m'] = (video.title) - - if language == 'en' or language == 'eng': - params['l'] = 1 - - logger.info('Searching subtitle %r', params) - response = self.session.post('https://subsunacs.net/search.php', params=params, allow_redirects=False, timeout=10, headers={ - 'Referer': 'https://subsunacs.net/index.php', - }) - - response.raise_for_status() - - if response.status_code != 200: - logger.debug('No subtitles found') - return subtitles - - soup = BeautifulSoup(response.content, 'html.parser') - rows = soup.findAll('td', {'class': 'tdMovie'}) - - # Search on first 10 rows only - for row in rows[:10]: - element = row.find('a', {'class': 'tooltip'}) - if element: - link = element.get('href') - logger.info('Found subtitle link %r', link) - subtitles = subtitles + self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video) - - return subtitles - - def list_subtitles(self, video, languages): - return [s for l in languages for s in self.query(l, video)] - - def download_subtitle(self, subtitle): - pass - - def process_archive_subtitle_files(self, archiveStream, language, video): - subtitles = [] - type = 'episode' if isinstance(video, Episode) else 'movie' - for file_name in archiveStream.namelist(): - if file_name.lower().endswith(('.srt', '.sub')): - logger.info('Found subtitle file %r', file_name) - subtitle = SubsUnacsSubtitle(language, file_name, type) - subtitle.content = archiveStream.read(file_name) - subtitles.append(subtitle) - return subtitles - - def download_archive_and_add_subtitle_files(self, link, language, video ): - logger.info('Downloading subtitle %r', link) - request = self.session.get(link, headers={ - 'Referer': 'https://subsunacs.net/search.php' - }) - request.raise_for_status() - - archive_stream = io.BytesIO(request.content) - if is_rarfile(archive_stream): - return self.process_archive_subtitle_files( RarFile(archive_stream), language, video ) - elif is_zipfile(archive_stream): - return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video ) - else: - raise ValueError('Not a valid archive') diff --git a/libs/subliminal_patch/providers/subz.py b/libs/subliminal_patch/providers/subz.py deleted file mode 100644 index dc95cb8d7..000000000 --- a/libs/subliminal_patch/providers/subz.py +++ /dev/null @@ -1,318 +0,0 @@ -# -*- coding: utf-8 -*- -import io -import json -import logging -import os - -import rarfile -import re -import zipfile - -from subzero.language import Language -from guessit import guessit -from requests import Session -from six import text_type - -from subliminal.providers import ParserBeautifulSoup, Provider -from subliminal import __short_version__ -from subliminal.cache import SHOW_EXPIRATION_TIME, region -from subliminal.score import get_equivalent_release_groups -from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches -from subliminal.utils import sanitize, sanitize_release_group -from subliminal.video import Episode, Movie - -logger = logging.getLogger(__name__) - -episode_re = re.compile(r'^S(\d{2})E(\d{2})$') - - -class SubzSubtitle(Subtitle): - """Subz Subtitle.""" - provider_name = 'subz' - - def __init__(self, language, page_link, series, season, episode, title, year, version, download_link): - super(SubzSubtitle, self).__init__(language, page_link=page_link) - self.series = series - self.season = season - self.episode = episode - self.title = title - self.year = year - self.version = version - self.download_link = download_link - self.hearing_impaired = None - self.encoding = 'windows-1253' - - @property - def id(self): - return self.download_link - - def get_matches(self, video): - matches = set() - video_type = None - - # episode - if isinstance(video, Episode): - video_type = 'episode' - # series name - if video.series and sanitize(self.series) in ( - sanitize(name) for name in [video.series] + video.alternative_series): - matches.add('series') - # season - if video.season and self.season == video.season: - matches.add('season') - # episode - if video.episode and self.episode == video.episode: - matches.add('episode') - # title of the episode - if video.title and sanitize(self.title) == sanitize(video.title): - matches.add('title') - # year - if video.original_series and self.year is None or video.year and video.year == self.year: - matches.add('year') - # movie - elif isinstance(video, Movie): - video_type = 'movie' - # title - if video.title and (sanitize(self.title) in ( - sanitize(name) for name in [video.title] + video.alternative_titles)): - matches.add('title') - # year - if video.year and self.year == video.year: - matches.add('year') - - # release_group - if (video.release_group and self.version and - any(r in sanitize_release_group(self.version) - for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): - matches.add('release_group') - # other properties - matches |= guess_matches(video, guessit(self.version, {'type': video_type}), partial=True) - - return matches - - -class SubzProvider(Provider): - """Subz Provider.""" - languages = {Language(l) for l in ['ell']} - server_url = 'https://subz.xyz' - sign_in_url = '/sessions' - sign_out_url = '/logout' - search_url = '/typeahead/{}' - episode_link = '/series/{show_id}/seasons/{season:d}/episodes/{episode:d}' - movie_link = '/movies/{}' - subtitle_class = SubzSubtitle - - def __init__(self): - self.logged_in = False - self.session = None - - def initialize(self): - self.session = Session() - self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__) - - def terminate(self): - self.session.close() - - def get_show_ids(self, title, year=None, is_episode=True, country_code=None): - """Get the best matching show id for `series`, `year` and `country_code`. - - First search in the result of :meth:`_get_show_suggestions`. - - :param title: show title. - :param year: year of the show, if any. - :type year: int - :param is_episode: if the search is for episode. - :type is_episode: bool - :param country_code: country code of the show, if any. - :type country_code: str - :return: the show id, if found. - :rtype: str - - """ - title_sanitized = sanitize(title).lower() - show_ids = self._get_suggestions(title, is_episode) - - matched_show_ids = [] - for show in show_ids: - show_id = None - # attempt with country - if not show_id and country_code: - logger.debug('Getting show id with country') - if sanitize(show['title']) == text_type('{title} {country}').format(title=title_sanitized, - country=country_code.lower()): - show_id = show['link'].split('/')[-1] - - # attempt with year - if not show_id and year: - logger.debug('Getting show id with year') - if sanitize(show['title']) == text_type('{title} {year}').format(title=title_sanitized, year=year): - show_id = show['link'].split('/')[-1] - - # attempt clean - if not show_id: - logger.debug('Getting show id') - show_id = show['link'].split('/')[-1] if sanitize(show['title']) == title_sanitized else None - - if show_id: - matched_show_ids.append(show_id) - - return matched_show_ids - - @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, to_str=text_type, - should_cache_fn=lambda value: value) - def _get_suggestions(self, title, is_episode=True): - """Search the show or movie id from the `title` and `year`. - - :param str title: title of the show. - :param is_episode: if the search is for episode. - :type is_episode: bool - :return: the show suggestions found. - :rtype: dict - - """ - # make the search - logger.info('Searching show ids with %r', title) - r = self.session.get(self.server_url + text_type(self.search_url).format(title), timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return {} - - show_type = 'series' if is_episode else 'movie' - parsed_suggestions = [s for s in json.loads(r.text) if 'type' in s and s['type'] == show_type] - logger.debug('Found suggestions: %r', parsed_suggestions) - - return parsed_suggestions - - def query(self, show_id, series, season, episode, title): - # get the season list of the show - logger.info('Getting the subtitle list of show id %s', show_id) - is_episode = False - if all((show_id, season, episode)): - is_episode = True - page_link = self.server_url + self.episode_link.format(show_id=show_id, season=season, episode=episode) - elif all((show_id, title)): - page_link = self.server_url + self.movie_link.format(show_id) - else: - return [] - - r = self.session.get(page_link, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return [] - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - - year_num = None - if not is_episode: - year_num = int(soup.select_one('span.year').text) - show_title = str(soup.select_one('#summary-wrapper > div.summary h1').contents[0]).strip() - - subtitles = [] - # loop over episode rows - for subtitle in soup.select('div[id="subtitles"] tr[data-id]'): - # read common info - version = subtitle.find('td', {'class': 'name'}).text - download_link = subtitle.find('a', {'class': 'btn-success'})['href'].strip('\'') - - # read the episode info - if is_episode: - episode_numbers = soup.select_one('#summary-wrapper > div.container.summary span.main-title-sxe').text - season_num = None - episode_num = None - matches = episode_re.match(episode_numbers.strip()) - if matches: - season_num = int(matches.group(1)) - episode_num = int(matches.group(2)) - - episode_title = soup.select_one('#summary-wrapper > div.container.summary span.main-title').text - - subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, show_title, season_num, - episode_num, episode_title, year_num, version, download_link) - # read the movie info - else: - subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, None, None, None, show_title, - year_num, version, download_link) - - logger.debug('Found subtitle %r', subtitle) - subtitles.append(subtitle) - - return subtitles - - def list_subtitles(self, video, languages): - # lookup show_id - if isinstance(video, Episode): - titles = [video.series] + video.alternative_series - elif isinstance(video, Movie): - titles = [video.title] + video.alternative_titles - else: - titles = [] - - show_ids = None - for title in titles: - show_ids = self.get_show_ids(title, video.year, isinstance(video, Episode)) - if show_ids is not None and len(show_ids) > 0: - break - - subtitles = [] - # query for subtitles with the show_id - for show_id in show_ids: - if isinstance(video, Episode): - subtitles += [s for s in self.query(show_id, video.series, video.season, video.episode, video.title) - if s.language in languages and s.season == video.season and s.episode == video.episode] - elif isinstance(video, Movie): - subtitles += [s for s in self.query(show_id, None, None, None, video.title) - if s.language in languages and s.year == video.year] - - return subtitles - - def download_subtitle(self, subtitle): - if isinstance(subtitle, SubzSubtitle): - # download the subtitle - logger.info('Downloading subtitle %r', subtitle) - r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return - - archive = _get_archive(r.content) - - subtitle_content = _get_subtitle_from_archive(archive) - if subtitle_content: - subtitle.content = fix_line_ending(subtitle_content) - else: - logger.debug('Could not extract subtitle from %r', archive) - - -def _get_archive(content): - # open the archive - archive_stream = io.BytesIO(content) - archive = None - if rarfile.is_rarfile(archive_stream): - logger.debug('Identified rar archive') - archive = rarfile.RarFile(archive_stream) - elif zipfile.is_zipfile(archive_stream): - logger.debug('Identified zip archive') - archive = zipfile.ZipFile(archive_stream) - - return archive - - -def _get_subtitle_from_archive(archive): - for name in archive.namelist(): - # discard hidden files - if os.path.split(name)[-1].startswith('.'): - continue - - # discard non-subtitle files - if not name.lower().endswith(SUBTITLE_EXTENSIONS): - continue - - return archive.read(name) - - return None diff --git a/libs/subliminal_patch/providers/xsubs.py b/libs/subliminal_patch/providers/xsubs.py deleted file mode 100644 index 102571dd9..000000000 --- a/libs/subliminal_patch/providers/xsubs.py +++ /dev/null @@ -1,302 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -import re - -from subzero.language import Language -from guessit import guessit -from requests import Session - -from subliminal.providers import ParserBeautifulSoup, Provider -from subliminal import __short_version__ -from subliminal.cache import SHOW_EXPIRATION_TIME, region -from subliminal.exceptions import AuthenticationError, ConfigurationError -from subliminal.score import get_equivalent_release_groups -from subliminal.subtitle import Subtitle, fix_line_ending, guess_matches -from subliminal.utils import sanitize, sanitize_release_group -from subliminal.video import Episode - -logger = logging.getLogger(__name__) -article_re = re.compile(r'^([A-Za-z]{1,3}) (.*)$') - - -class XSubsSubtitle(Subtitle): - """XSubs Subtitle.""" - provider_name = 'xsubs' - - def __init__(self, language, page_link, series, season, episode, year, title, version, download_link): - super(XSubsSubtitle, self).__init__(language, page_link=page_link) - self.series = series - self.season = season - self.episode = episode - self.year = year - self.title = title - self.version = version - self.download_link = download_link - self.hearing_impaired = None - self.encoding = 'windows-1253' - - @property - def id(self): - return self.download_link - - def get_matches(self, video): - matches = set() - - if isinstance(video, Episode): - # series name - if video.series and sanitize(self.series) in ( - sanitize(name) for name in [video.series] + video.alternative_series): - matches.add('series') - # season - if video.season and self.season == video.season: - matches.add('season') - # episode - if video.episode and self.episode == video.episode: - matches.add('episode') - # title of the episode - if video.title and sanitize(self.title) == sanitize(video.title): - matches.add('title') - # year - if video.original_series and self.year is None or video.year and video.year == self.year: - matches.add('year') - # release_group - if (video.release_group and self.version and - any(r in sanitize_release_group(self.version) - for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): - matches.add('release_group') - # other properties - matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True) - - return matches - - -class XSubsProvider(Provider): - """XSubs Provider.""" - languages = {Language(l) for l in ['ell']} - video_types = (Episode,) - server_url = 'http://xsubs.tv' - sign_in_url = '/xforum/account/signin/' - sign_out_url = '/xforum/account/signout/' - all_series_url = '/series/all.xml' - series_url = '/series/{:d}/main.xml' - season_url = '/series/{show_id:d}/{season:d}.xml' - page_link = '/ice/xsw.xml?srsid={show_id:d}#{season_id:d};{season:d}' - download_link = '/xthru/getsub/{:d}' - subtitle_class = XSubsSubtitle - - def __init__(self, username=None, password=None): - if any((username, password)) and not all((username, password)): - raise ConfigurationError('Username and password must be specified') - - self.username = username - self.password = password - self.logged_in = False - self.session = None - - def initialize(self): - self.session = Session() - self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__) - - # login - if self.username and self.password: - logger.info('Logging in') - self.session.get(self.server_url + self.sign_in_url) - data = {'username': self.username, - 'password': self.password, - 'csrfmiddlewaretoken': self.session.cookies['csrftoken']} - r = self.session.post(self.server_url + self.sign_in_url, data, allow_redirects=False, timeout=10) - - if r.status_code != 302: - raise AuthenticationError(self.username) - - logger.debug('Logged in') - self.logged_in = True - - def terminate(self): - # logout - if self.logged_in: - logger.info('Logging out') - r = self.session.get(self.server_url + self.sign_out_url, timeout=10) - r.raise_for_status() - logger.debug('Logged out') - self.logged_in = False - - self.session.close() - - @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value) - def _get_show_ids(self): - # get the shows page - logger.info('Getting show ids') - r = self.session.get(self.server_url + self.all_series_url, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return [] - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - - # populate the show ids - show_ids = {} - for show_category in soup.findAll('seriesl'): - if show_category.attrs['category'] == u'Σειρές': - for show in show_category.findAll('series'): - show_ids[sanitize(show.text)] = int(show['srsid']) - break - logger.debug('Found %d show ids', len(show_ids)) - - return show_ids - - def get_show_id(self, series_names, year=None, country_code=None): - series_sanitized_names = [] - for name in series_names: - sanitized_name = sanitize(name) - series_sanitized_names.append(sanitized_name) - alternative_name = _get_alternative_name(sanitized_name) - if alternative_name: - series_sanitized_names.append(alternative_name) - - show_ids = self._get_show_ids() - show_id = None - - for series_sanitized in series_sanitized_names: - # attempt with country - if not show_id and country_code: - logger.debug('Getting show id with country') - show_id = show_ids.get('{series} {country}'.format(series=series_sanitized, - country=country_code.lower())) - - # attempt with year - if not show_id and year: - logger.debug('Getting show id with year') - show_id = show_ids.get('{series} {year:d}'.format(series=series_sanitized, year=year)) - - # attempt with article at the end - if not show_id and year: - logger.debug('Getting show id with year in brackets') - show_id = show_ids.get('{series} [{year:d}]'.format(series=series_sanitized, year=year)) - - # attempt clean - if not show_id: - logger.debug('Getting show id') - show_id = show_ids.get(series_sanitized) - - if show_id: - break - - return int(show_id) if show_id else None - - def query(self, show_id, series, season, year=None, country=None): - # get the season list of the show - logger.info('Getting the season list of show id %d', show_id) - r = self.session.get(self.server_url + self.series_url.format(show_id), timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return [] - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - - series_title = soup.find('name').text - - # loop over season rows - seasons = soup.findAll('series_group') - season_id = None - - for season_row in seasons: - try: - parsed_season = int(season_row['ssnnum']) - if parsed_season == season: - season_id = int(season_row['ssnid']) - break - except (ValueError, TypeError): - continue - - if season_id is None: - logger.debug('Season not found in provider') - return [] - - # get the subtitle list of the season - logger.info('Getting the subtitle list of season %d', season) - r = self.session.get(self.server_url + self.season_url.format(show_id=show_id, season=season_id), timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('No data returned from provider') - return [] - - soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) - - subtitles = [] - # loop over episode rows - for episode in soup.findAll('subg'): - # read the episode info - etitle = episode.find('etitle') - if etitle is None: - continue - - episode_num = int(etitle['number'].split('-')[0]) - - sgt = episode.find('sgt') - if sgt is None: - continue - - season_num = int(sgt['ssnnum']) - - # filter out unreleased subtitles - for subtitle in episode.findAll('sr'): - if subtitle['published_on'] == '': - continue - - page_link = self.server_url + self.page_link.format(show_id=show_id, season_id=season_id, - season=season_num) - episode_title = etitle['title'] - version = subtitle.fmt.text + ' ' + subtitle.team.text - download_link = self.server_url + self.download_link.format(int(subtitle['rlsid'])) - - subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, series_title, season_num, - episode_num, year, episode_title, version, download_link) - logger.debug('Found subtitle %r', subtitle) - subtitles.append(subtitle) - - return subtitles - - def list_subtitles(self, video, languages): - if isinstance(video, Episode): - # lookup show_id - titles = [video.series] + video.alternative_series - show_id = self.get_show_id(titles, video.year) - - # query for subtitles with the show_id - if show_id: - subtitles = [s for s in self.query(show_id, video.series, video.season, video.year) - if s.language in languages and s.season == video.season and s.episode == video.episode] - if subtitles: - return subtitles - else: - logger.error('No show id found for %r (%r)', video.series, {'year': video.year}) - - return [] - - def download_subtitle(self, subtitle): - if isinstance(subtitle, XSubsSubtitle): - # download the subtitle - logger.info('Downloading subtitle %r', subtitle) - r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, - timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return - - subtitle.content = fix_line_ending(r.content) - - -def _get_alternative_name(series): - article_match = article_re.match(series) - if article_match: - return '{series} {article}'.format(series=article_match.group(2), article=article_match.group(1)) - - return None diff --git a/libs/subliminal_patch/refiners/omdb.py b/libs/subliminal_patch/refiners/omdb.py index 9ecb5155b..bef212f75 100644 --- a/libs/subliminal_patch/refiners/omdb.py +++ b/libs/subliminal_patch/refiners/omdb.py @@ -4,7 +4,7 @@ import subliminal import base64 import zlib from subliminal import __short_version__ -from subliminal.refiners.omdb import OMDBClient, refine +from subliminal.refiners.omdb import OMDBClient, refine as refine_orig, Episode, Movie class SZOMDBClient(OMDBClient): @@ -63,5 +63,13 @@ class SZOMDBClient(OMDBClient): return j +def refine(video, **kwargs): + refine_orig(video, **kwargs) + if isinstance(video, Episode) and video.series_imdb_id: + video.series_imdb_id = video.series_imdb_id.strip() + elif isinstance(video, Movie) and video.imdb_id: + video.imdb_id = video.imdb_id.strip() + + omdb_client = SZOMDBClient(headers={'User-Agent': 'Subliminal/%s' % __short_version__}) subliminal.refiners.omdb.omdb_client = omdb_client