From 76d5be0e77baab67b20c1692ec141e7748f239da Mon Sep 17 00:00:00 2001 From: e01 Date: Tue, 29 Jan 2019 15:25:24 +0200 Subject: [PATCH] Add subsunacs.net and subs.sab.bz providers --- libs/subliminal/providers/subssabbz.py | 156 ++++++++++++++++++++++++ libs/subliminal/providers/subsunacs.py | 158 +++++++++++++++++++++++++ 2 files changed, 314 insertions(+) create mode 100644 libs/subliminal/providers/subssabbz.py create mode 100644 libs/subliminal/providers/subsunacs.py diff --git a/libs/subliminal/providers/subssabbz.py b/libs/subliminal/providers/subssabbz.py new file mode 100644 index 000000000..23e5c3cc1 --- /dev/null +++ b/libs/subliminal/providers/subssabbz.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +import logging +import re +import io +import os + +from rarfile import RarFile, is_rarfile +from zipfile import ZipFile, is_zipfile + +from babelfish import Language +from requests import Session +from guessit import guessit + +from . import ParserBeautifulSoup, Provider +from ..utils import sanitize, sanitize_release_group +from ..subtitle import Subtitle, fix_line_ending, guess_matches +from ..video import Episode +from ..video import Movie + +logger = logging.getLogger(__name__) + +class SubsSabBzSubtitle(Subtitle): + """SubsSabBz Subtitle.""" + provider_name = 'subssabbz' + + def __init__(self, langauge, filename, type): + super(SubsSabBzSubtitle, self).__init__(langauge) + self.langauge = langauge + self.filename = filename + self.type = type + + @property + def id(self): + return self.filename + + def get_matches(self, video): + matches = set() + + video_filename = video.name + video_filename = os.path.basename(video_filename) + video_filename, _ = os.path.splitext(video_filename) + video_filename = sanitize_release_group(video_filename) + + subtitle_filename = self.filename + subtitle_filename = os.path.basename(subtitle_filename) + subtitle_filename, _ = os.path.splitext(subtitle_filename) + subtitle_filename = sanitize_release_group(subtitle_filename) + + if video_filename == subtitle_filename: + matches.add('hash') + + matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) + + matches.add(id(self)) + return matches + + +class SubsSabBzProvider(Provider): + """SubsSabBz Provider.""" + languages = {Language('por', 'BR')} | {Language(l) for l in [ + 'bul', 'eng' + ]} + + def initialize(self): + self.session = Session() + self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' + self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + self.session.headers["Accept-Language"] = "en-US,en;q=0.5" + self.session.headers["Accept-Encoding"] = "gzip, deflate, br" + self.session.headers["DNT"] = "1" + self.session.headers["Connection"] = "keep-alive" + self.session.headers["Upgrade-Insecure-Requests"] = "1" + self.session.headers["Cache-Control"] = "max-age=0" + + def terminate(self): + self.session.close() + + def query(self, language, video): + subtitles = [] + isEpisode = isinstance(video, Episode) + + params = { + 'act': 'search', + 'movie': '', + 'select-language': '2', + 'upldr': '', + 'yr': '', + 'release': '' + } + + if isEpisode: + params['movie'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode) + else: + params['yr'] = video.year + params['movie'] = (video.title) + + if language == 'en' or language == 'eng': + params['select-language'] = 1 + + logger.info('Searching subtitle %r', params) + response = self.session.post('http://subs.sab.bz/index.php?', params=params, allow_redirects=False, timeout=10, headers={ + 'Referer': 'http://subs.sab.bz/', + }) + + response.raise_for_status() + + if response.status_code != 200: + logger.debug('No subtitles found') + return subtitles + + soup = ParserBeautifulSoup(response.content, ['html.parser']) + rows = soup.findAll('tr', {'class': 'subs-row'}) + + # Search on first 10 rows only + for row in rows[:10]: + a_element_wrapper = row.find('td', { 'class': 'c2field' }) + if a_element_wrapper: + element = row.find('a') + if element: + link = element.get('href') + logger.info('Found subtitle link %r', link) + subtitles = subtitles + self.download_archive_and_add_subtitle_files(link, language, video) + + return subtitles + + def list_subtitles(self, video, languages): + return [s for l in languages for s in self.query(l, video)] + + def download_subtitle(self, subtitle): + pass + + def process_archive_subtitle_files(self, archiveStream, language, video): + subtitles = [] + type = 'episode' if isinstance(video, Episode) else 'movie' + for f in archiveStream.infolist(): + if f.filename.lower().endswith(('.srt', '.sub')): + logger.info('Found subtitle file %r', f.filename) + subtitle = SubsSabBzSubtitle(language, f.filename, type) + subtitle.content = archiveStream.read(f) + subtitles.append(subtitle) + return subtitles + + def download_archive_and_add_subtitle_files(self, link, language, video ): + logger.info('Downloading subtitle %r', link) + request = self.session.get(link, headers={ + 'Referer': 'http://subs.sab.bz/index.php?' + }) + request.raise_for_status() + + archive_stream = io.BytesIO(request.content) + if is_rarfile(archive_stream): + return self.process_archive_subtitle_files( RarFile(archive_stream), language, video ) + elif is_zipfile(archive_stream): + return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video ) + else: + raise ValueError('Not a valid archive') diff --git a/libs/subliminal/providers/subsunacs.py b/libs/subliminal/providers/subsunacs.py new file mode 100644 index 000000000..e101fe23d --- /dev/null +++ b/libs/subliminal/providers/subsunacs.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +import logging +import re +import io +import os + +from rarfile import RarFile, is_rarfile +from zipfile import ZipFile, is_zipfile + +from babelfish import Language +from requests import Session +from guessit import guessit + +from . import ParserBeautifulSoup, Provider +from ..utils import sanitize, sanitize_release_group +from ..subtitle import Subtitle, fix_line_ending, guess_matches +from ..video import Episode +from ..video import Movie + +logger = logging.getLogger(__name__) + +class SubsUnacsSubtitle(Subtitle): + """SubsUnacs Subtitle.""" + provider_name = 'subsunacs' + + def __init__(self, langauge, filename, type): + super(SubsUnacsSubtitle, self).__init__(langauge) + self.langauge = langauge + self.filename = filename + self.type = type + + @property + def id(self): + return self.filename + + def get_matches(self, video): + matches = set() + + video_filename = video.name + video_filename = os.path.basename(video_filename) + video_filename, _ = os.path.splitext(video_filename) + video_filename = sanitize_release_group(video_filename) + + subtitle_filename = self.filename + subtitle_filename = os.path.basename(subtitle_filename) + subtitle_filename, _ = os.path.splitext(subtitle_filename) + subtitle_filename = sanitize_release_group(subtitle_filename) + + if video_filename == subtitle_filename: + matches.add('hash') + + matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) + + matches.add(id(self)) + return matches + + +class SubsUnacsProvider(Provider): + """SubsUnacs Provider.""" + languages = {Language('por', 'BR')} | {Language(l) for l in [ + 'bul', 'eng' + ]} + + def initialize(self): + self.session = Session() + self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' + self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + self.session.headers["Accept-Language"] = "en-US,en;q=0.5" + self.session.headers["Accept-Encoding"] = "gzip, deflate, br" + self.session.headers["DNT"] = "1" + self.session.headers["Connection"] = "keep-alive" + self.session.headers["Upgrade-Insecure-Requests"] = "1" + self.session.headers["Cache-Control"] = "max-age=0" + + def terminate(self): + self.session.close() + + def query(self, language, video): + subtitles = [] + isEpisode = isinstance(video, Episode) + + params = { + 'm': '', + 'l': 0, + 'c': '', + 'y': '', + 'action': " Търси ", + 'a': '', + 'd': '', + 'u': '', + 'g': '', + 't': '', + 'imdbcheck': 1} + + if isEpisode: + params['m'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode) + else: + params['y'] = video.year + params['m'] = (video.title) + + if language == 'en' or language == 'eng': + params['l'] = 1 + + logger.info('Searching subtitle %r', params) + response = self.session.post('https://subsunacs.net/search.php', params=params, allow_redirects=False, timeout=10, headers={ + 'Referer': 'https://subsunacs.net/index.php', + }) + + response.raise_for_status() + + if response.status_code != 200: + logger.debug('No subtitles found') + return subtitles + + soup = ParserBeautifulSoup(response.content, ['html.parser']) + rows = soup.findAll('td', {'class': 'tdMovie'}) + + # Search on first 10 rows only + for row in rows[:10]: + element = row.find('a', {'class': 'tooltip'}) + if element: + link = element.get('href') + logger.info('Found subtitle link %r', link) + subtitles = subtitles + self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video) + + return subtitles + + def list_subtitles(self, video, languages): + return [s for l in languages for s in self.query(l, video)] + + def download_subtitle(self, subtitle): + pass + + def process_archive_subtitle_files(self, archiveStream, language, video): + subtitles = [] + type = 'episode' if isinstance(video, Episode) else 'movie' + for f in archiveStream.infolist(): + if f.filename.lower().endswith(('.srt', '.sub')): + logger.info('Found subtitle file %r', f.filename) + subtitle = SubsUnacsSubtitle(language, f.filename, type) + subtitle.content = archiveStream.read(f) + subtitles.append(subtitle) + return subtitles + + def download_archive_and_add_subtitle_files(self, link, language, video ): + logger.info('Downloading subtitle %r', link) + request = self.session.get(link, headers={ + 'Referer': 'https://subsunacs.net/search.php' + }) + request.raise_for_status() + + archive_stream = io.BytesIO(request.content) + if is_rarfile(archive_stream): + return self.process_archive_subtitle_files( RarFile(archive_stream), language, video ) + elif is_zipfile(archive_stream): + return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video ) + else: + raise ValueError('Not a valid archive')