From c6355be1bd1c3cfd7ad6e3f0b1a6a69c171adbf1 Mon Sep 17 00:00:00 2001 From: josdion Date: Thu, 19 Mar 2020 07:42:08 +0200 Subject: [PATCH 1/7] Add yavka.net subtitles provider Bulgarian (mostly) Subtitles Provider. Also provide subtitles in English, Russian, Spanish, and Italian. --- libs/subliminal_patch/providers/yavkanet.py | 177 ++++++++++++++++++++ views/providers.tpl | 22 +++ 2 files changed, 199 insertions(+) create mode 100644 libs/subliminal_patch/providers/yavkanet.py diff --git a/libs/subliminal_patch/providers/yavkanet.py b/libs/subliminal_patch/providers/yavkanet.py new file mode 100644 index 000000000..7763fefb4 --- /dev/null +++ b/libs/subliminal_patch/providers/yavkanet.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +import logging +import re +import io +import os +from random import randint +from bs4 import BeautifulSoup +from zipfile import ZipFile, is_zipfile +from rarfile import RarFile, is_rarfile +from requests import Session +from guessit import guessit +from subliminal_patch.providers import Provider +from subliminal_patch.subtitle import Subtitle +from subliminal_patch.utils import sanitize +from subliminal.exceptions import ProviderError +from subliminal.utils import sanitize_release_group +from subliminal.subtitle import guess_matches +from subliminal.video import Episode, Movie +from subliminal.subtitle import fix_line_ending +from subzero.language import Language +from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST + +logger = logging.getLogger(__name__) + +class YavkaNetSubtitle(Subtitle): + """YavkaNet Subtitle.""" + provider_name = 'yavkanet' + + def __init__(self, langauge, filename, type, video, link): + super(YavkaNetSubtitle, self).__init__(langauge) + self.langauge = langauge + self.filename = filename + self.page_link = link + self.type = type + self.video = video + self.release_info = os.path.splitext(filename)[0] + + @property + def id(self): + return self.filename + + def make_picklable(self): + self.content = None + return self + + def get_matches(self, video): + matches = set() + + video_filename = video.name + video_filename = os.path.basename(video_filename) + video_filename, _ = os.path.splitext(video_filename) + video_filename = sanitize_release_group(video_filename) + + subtitle_filename = self.filename + subtitle_filename = os.path.basename(subtitle_filename) + subtitle_filename, _ = os.path.splitext(subtitle_filename) + subtitle_filename = sanitize_release_group(subtitle_filename) + + if video_filename == subtitle_filename: + matches.add('hash') + + matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) + + matches.add(id(self)) + return matches + + +class YavkaNetProvider(Provider): + """YavkaNet Provider.""" + languages = {Language(l) for l in [ + 'bul', 'eng', 'rus', 'spa', 'ita' + ]} + + def initialize(self): + self.session = Session() + self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] + self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + self.session.headers["Accept-Language"] = "en-US,en;q=0.5" + self.session.headers["Accept-Encoding"] = "gzip, deflate, br" + self.session.headers["DNT"] = "1" + self.session.headers["Connection"] = "keep-alive" + self.session.headers["Upgrade-Insecure-Requests"] = "1" + self.session.headers["Cache-Control"] = "max-age=0" + + def terminate(self): + self.session.close() + + def query(self, language, video): + subtitles = [] + isEpisode = isinstance(video, Episode) + params = { + 's': '', + 'y': '', + 'u': '', + 'l': 'BG', + 'i': '' + } + + if isEpisode: + params['s'] = "%s s%02de%02d" % (sanitize(video.series), video.season, video.episode) + else: + params['y'] = video.year + params['s'] = video.title + + if language == 'en' or language == 'eng': + params['l'] = 'EN' + elif language == 'ru' or language == 'rus': + params['l'] = 'RU' + elif language == 'es' or language == 'spa': + params['l'] = 'ES' + elif language == 'it' or language == 'ita': + params['l'] = 'IT' + + logger.info('Searching subtitle %r', params) + response = self.session.get('http://yavka.net/subtitles.php', params=params, allow_redirects=False, timeout=10, headers={ + 'Referer': 'http://yavka.net/', + }) + + response.raise_for_status() + + if response.status_code != 200: + logger.debug('No subtitles found') + return subtitles + + soup = BeautifulSoup(response.content, 'html.parser') + rows = soup.findAll('tr', {'class': 'info'}) + + # Search on first 20 rows only + for row in rows[:20]: + element = row.find('a', {'class': 'selector'}) + if element: + link = element.get('href') + logger.info('Found subtitle link %r', link) + subtitles = subtitles + self.download_archive_and_add_subtitle_files('http://yavka.net/' + link, language, video) + + return subtitles + + def list_subtitles(self, video, languages): + return [s for l in languages for s in self.query(l, video)] + + def download_subtitle(self, subtitle): + if subtitle.content: + pass + else: + seeking_subtitle_file = subtitle.filename + arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video) + for s in arch: + if s.filename == seeking_subtitle_file: + subtitle.content = s.content + + def process_archive_subtitle_files(self, archiveStream, language, video, link): + subtitles = [] + type = 'episode' if isinstance(video, Episode) else 'movie' + for file_name in archiveStream.namelist(): + if file_name.lower().endswith(('.srt', '.sub')): + logger.info('Found subtitle file %r', file_name) + subtitle = YavkaNetSubtitle(language, file_name, type, video, link) + subtitle.content = archiveStream.read(file_name) + subtitles.append(subtitle) + return subtitles + + def download_archive_and_add_subtitle_files(self, link, language, video ): + logger.info('Downloading subtitle %r', link) + request = self.session.get(link, headers={ + 'Referer': 'http://yavka.net/subtitles.php' + }) + request.raise_for_status() + + archive_stream = io.BytesIO(request.content) + if is_rarfile(archive_stream): + return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link ) + elif is_zipfile(archive_stream): + return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link ) + else: + raise ValueError('Not a valid archive') + diff --git a/views/providers.tpl b/views/providers.tpl index ec41fb04a..665e3674d 100644 --- a/views/providers.tpl +++ b/views/providers.tpl @@ -790,6 +790,28 @@ +
+
+ +
+
+
+ + +
+
+ +
+
+ +
+
From 13bf1b7a7639a1de7758452ebbe3ee0e02ade1f6 Mon Sep 17 00:00:00 2001 From: josdion Date: Thu, 19 Mar 2020 12:03:07 +0200 Subject: [PATCH 2/7] remove matches.add(id(self)) from get_matches remove matches.add(id(self)) from get_matches as this is an obvious mistake, that I copied from another provider :) --- libs/subliminal_patch/providers/yavkanet.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/libs/subliminal_patch/providers/yavkanet.py b/libs/subliminal_patch/providers/yavkanet.py index 7763fefb4..8c2a51aa0 100644 --- a/libs/subliminal_patch/providers/yavkanet.py +++ b/libs/subliminal_patch/providers/yavkanet.py @@ -61,8 +61,6 @@ class YavkaNetSubtitle(Subtitle): matches.add('hash') matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) - - matches.add(id(self)) return matches From 43fc2f093bd5401a86369ed4ff3ee0d6ea3f54ef Mon Sep 17 00:00:00 2001 From: josdion Date: Thu, 19 Mar 2020 13:54:27 +0200 Subject: [PATCH 3/7] do not remove apostrophe when calling sanitize() Removing apostrophe from movie and tv names will lead to missing matches. --- libs/subliminal_patch/providers/yavkanet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/subliminal_patch/providers/yavkanet.py b/libs/subliminal_patch/providers/yavkanet.py index 8c2a51aa0..a74c28038 100644 --- a/libs/subliminal_patch/providers/yavkanet.py +++ b/libs/subliminal_patch/providers/yavkanet.py @@ -96,10 +96,10 @@ class YavkaNetProvider(Provider): } if isEpisode: - params['s'] = "%s s%02de%02d" % (sanitize(video.series), video.season, video.episode) + params['s'] = "%s s%02de%02d" % (sanitize(video.series, {'\''}), video.season, video.episode) else: params['y'] = video.year - params['s'] = video.title + params['s'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['l'] = 'EN' From d3d7991db74fa0468d3d945f3b0d8ac7878136ae Mon Sep 17 00:00:00 2001 From: josdion Date: Fri, 20 Mar 2020 09:56:18 +0200 Subject: [PATCH 4/7] subssabbz, subsunacs - various fixes to improve match-finding - fix inconsistent names of some TV shows - sanitaze movie names - do not remove apostrophe when sanitize TV and movie names --- libs/subliminal_patch/providers/subssabbz.py | 28 +++++++++++++++----- libs/subliminal_patch/providers/subsunacs.py | 27 ++++++++++++++----- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/libs/subliminal_patch/providers/subssabbz.py b/libs/subliminal_patch/providers/subssabbz.py index 709029169..2f9321c12 100644 --- a/libs/subliminal_patch/providers/subssabbz.py +++ b/libs/subliminal_patch/providers/subssabbz.py @@ -12,7 +12,7 @@ from requests import Session from guessit import guessit from subliminal_patch.providers import Provider from subliminal_patch.subtitle import Subtitle -from subliminal_patch.utils import sanitize +from subliminal_patch.utils import sanitize, fix_inconsistent_naming from subliminal.exceptions import ProviderError from subliminal.utils import sanitize_release_group from subliminal.subtitle import guess_matches @@ -23,6 +23,21 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger = logging.getLogger(__name__) +def fix_tv_naming(title): + """Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them. + + :param str title: original title. + :return: new title. + :rtype: str + + """ + return fix_inconsistent_naming(title, {"Marvel's Daredevil": "Daredevil", + "Marvel's Luke Cage": "Luke Cage", + "Marvel's Iron Fist": "Iron Fist", + "Marvel's Jessica Jones": "Jessica Jones", + "DC's Legends of Tomorrow": "Legends of Tomorrow" + }, True) + class SubsSabBzSubtitle(Subtitle): """SubsSabBz Subtitle.""" provider_name = 'subssabbz' @@ -34,6 +49,7 @@ class SubsSabBzSubtitle(Subtitle): self.page_link = link self.type = type self.video = video + self.release_info = os.path.splitext(filename)[0] @property def id(self): @@ -60,8 +76,6 @@ class SubsSabBzSubtitle(Subtitle): matches.add('hash') matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) - - matches.add(id(self)) return matches @@ -99,10 +113,10 @@ class SubsSabBzProvider(Provider): } if isEpisode: - params['movie'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode) + params['movie'] = "%s %02d %02d" % (sanitize(fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['yr'] = video.year - params['movie'] = (video.title) + params['movie'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['select-language'] = 1 @@ -121,8 +135,8 @@ class SubsSabBzProvider(Provider): soup = BeautifulSoup(response.content, 'html.parser') rows = soup.findAll('tr', {'class': 'subs-row'}) - # Search on first 10 rows only - for row in rows[:10]: + # Search on first 20 rows only + for row in rows[:20]: a_element_wrapper = row.find('td', { 'class': 'c2field' }) if a_element_wrapper: element = a_element_wrapper.find('a') diff --git a/libs/subliminal_patch/providers/subsunacs.py b/libs/subliminal_patch/providers/subsunacs.py index 5af116d99..72e0febc7 100644 --- a/libs/subliminal_patch/providers/subsunacs.py +++ b/libs/subliminal_patch/providers/subsunacs.py @@ -12,7 +12,7 @@ from requests import Session from guessit import guessit from subliminal_patch.providers import Provider from subliminal_patch.subtitle import Subtitle -from subliminal_patch.utils import sanitize +from subliminal_patch.utils import sanitize, fix_inconsistent_naming from subliminal.exceptions import ProviderError from subliminal.utils import sanitize_release_group from subliminal.subtitle import guess_matches @@ -23,6 +23,20 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger = logging.getLogger(__name__) +def fix_tv_naming(title): + """Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them. + + :param str title: original title. + :return: new title. + :rtype: str + + """ + return fix_inconsistent_naming(title, {"Marvel's Daredevil": "Daredevil", + "Marvel's Luke Cage": "Luke Cage", + "Marvel's Iron Fist": "Iron Fist", + "DC's Legends of Tomorrow": "Legends of Tomorrow" + }, True) + class SubsUnacsSubtitle(Subtitle): """SubsUnacs Subtitle.""" provider_name = 'subsunacs' @@ -34,6 +48,7 @@ class SubsUnacsSubtitle(Subtitle): self.page_link = link self.type = type self.video = video + self.release_info = os.path.splitext(filename)[0] @property def id(self): @@ -60,8 +75,6 @@ class SubsUnacsSubtitle(Subtitle): matches.add('hash') matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) - - matches.add(id(self)) return matches @@ -103,10 +116,10 @@ class SubsUnacsProvider(Provider): 'imdbcheck': 1} if isEpisode: - params['m'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode) + params['m'] = "%s %02d %02d" % (sanitize(fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['y'] = video.year - params['m'] = (video.title) + params['m'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['l'] = 1 @@ -125,8 +138,8 @@ class SubsUnacsProvider(Provider): soup = BeautifulSoup(response.content, 'html.parser') rows = soup.findAll('td', {'class': 'tdMovie'}) - # Search on first 10 rows only - for row in rows[:10]: + # Search on first 20 rows only + for row in rows[:20]: element = row.find('a', {'class': 'tooltip'}) if element: link = element.get('href') From ec7cf0734e60b0a9c57e50d6377da0f1537a4240 Mon Sep 17 00:00:00 2001 From: josdion Date: Fri, 20 Mar 2020 16:49:16 +0200 Subject: [PATCH 5/7] release info in manual search results In manual search dialog, show dropdown icon only when there is more than one element in release_info array. Otherwise just show the release info without dropdown button. --- views/episodes.tpl | 1 + views/movie.tpl | 1 + 2 files changed, 2 insertions(+) diff --git a/views/episodes.tpl b/views/episodes.tpl index 7a0c829d8..82bb6f739 100644 --- a/views/episodes.tpl +++ b/views/episodes.tpl @@ -734,6 +734,7 @@ const array_release_info = data.release_info; let i; let text = '
...
'; + if (array_release_info.length <= 1) text = '
'; for (i = 0; i < array_release_info.length; i++) { text += '
' + array_release_info[i] + '
'; } diff --git a/views/movie.tpl b/views/movie.tpl index 7b917b7ac..c53849d77 100644 --- a/views/movie.tpl +++ b/views/movie.tpl @@ -674,6 +674,7 @@ const array_release_info = data.release_info; let i; let text = '
...
'; + if (array_release_info.length <= 1) text = '
'; for (i = 0; i < array_release_info.length; i++) { text += '
' + array_release_info[i] + '
'; } From da2c5e521298995bf54b7187fe07e69a698ef263 Mon Sep 17 00:00:00 2001 From: German Gutierrez Date: Fri, 20 Mar 2020 20:22:53 +0100 Subject: [PATCH 6/7] New Provider: RegieLive --- libs/subliminal_patch/providers/regielive.py | 182 +++++++++++++++++++ views/providers.tpl | 21 +++ 2 files changed, 203 insertions(+) create mode 100644 libs/subliminal_patch/providers/regielive.py diff --git a/libs/subliminal_patch/providers/regielive.py b/libs/subliminal_patch/providers/regielive.py new file mode 100644 index 000000000..65cbfc93d --- /dev/null +++ b/libs/subliminal_patch/providers/regielive.py @@ -0,0 +1,182 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +import logging +import io +import os + +from requests import Session +from guessit import guessit +from subliminal_patch.providers import Provider +from subliminal_patch.subtitle import Subtitle +from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending +from subliminal.subtitle import guess_matches +from subliminal.video import Episode, Movie +from subzero.language import Language + +import zipfile + +logger = logging.getLogger(__name__) + + +class RegieLiveSubtitle(Subtitle): + """RegieLive Subtitle.""" + provider_name = 'regielive' + hash_verifiable = False + + def __init__(self, filename, video, link, rating, language): + super(RegieLiveSubtitle, self).__init__(language) + self.filename = filename + self.page_link = link + self.video = video + self.rating = rating + self.language = language + + @property + def id(self): + return self.page_link + + @property + def release_info(self): + return self.filename + + def get_matches(self, video): + matches = set() + matches |= guess_matches(video, guessit(self.filename)) + + subtitle_filename = self.filename + + # episode + if isinstance(video, Episode): + # already matched in search query + matches.update(['title', 'series', 'season', 'episode', 'year']) + + # movie + elif isinstance(video, Movie): + # already matched in search query + matches.update(['title', 'year']) + + # release_group + if video.release_group and video.release_group.lower() in subtitle_filename: + matches.add('release_group') + + # resolution + if video.resolution and video.resolution.lower() in subtitle_filename: + matches.add('resolution') + + # format + formats = [] + if video.format: + formats = [video.format.lower()] + if formats[0] == "web-dl": + formats.append("webdl") + formats.append("webrip") + formats.append("web ") + for frmt in formats: + if frmt.lower() in subtitle_filename: + matches.add('format') + break + + # video_codec + if video.video_codec: + video_codecs = [video.video_codec.lower()] + if video_codecs[0] == "h264": + formats.append("x264") + elif video_codecs[0] == "h265": + formats.append("x265") + for vc in formats: + if vc.lower() in subtitle_filename: + matches.add('video_codec') + break + + return matches + + +class RegieLiveProvider(Provider): + """RegieLive Provider.""" + languages = {Language(l) for l in ['ron']} + language = list(languages)[0] + SEARCH_THROTTLE = 8 + + def __init__(self): + self.initialize() + + def initialize(self): + self.session = Session() + self.url = 'http://api.regielive.ro/kodi/cauta.php' + self.api = 'API-KODI-KINGUL' + self.headers = {'RL-API': self.api} + + def terminate(self): + self.session.close() + + def query(self, video, language): + payload = {} + if isinstance (video, Episode): + payload['nume'] = video.series + payload['sezon'] = video.season + payload['episod'] = video.episode + elif isinstance(video, Movie): + payload['nume'] = video.title + payload['an'] = video.year + response = self.session.post(self.url, data=payload, headers=self.headers) + logger.info(response.json()) + response_json = response.json()['rezultate'] + subtitles = [] + if not 'eroare' in response_json: + for film in response_json: + for sub in response_json[film]['subtitrari']: + logger.debug(sub) + subtitles.append( + RegieLiveSubtitle(sub['titlu'], video, sub['url'], sub['rating'], language) + ) + + # {'titlu': 'Chernobyl.S01E04.The.Happiness.of.All.Mankind.720p.AMZN.WEB-DL.DDP5.1.H.264-NTb', 'url': 'https://subtitrari.regielive.ro/descarca-33336-418567.zip', 'rating': {'nota': 4.89, 'voturi': 48}} + # subtitle def __init__(self, language, filename, subtype, video, link): + return subtitles + + def list_subtitles(self, video, languages): + return self.query(video, self.language) + + def download_subtitle(self, subtitle): + session = Session() + _addheaders = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Origin': 'https://subtitrari.regielive.ro', + 'Accept-Language' : 'en-US,en;q=0.5', + 'Referer': 'https://subtitrari.regielive.ro', + 'Pragma': 'no-cache', + 'Cache-Control': 'no-cache' + } + session.headers.update(_addheaders) + res = session.get('https://subtitrari.regielive.ro') + cookies = res.cookies + _zipped = session.get(subtitle.page_link, cookies=cookies) + if _zipped: + if _zipped.text == '500': + raise ValueError('Error 500 on server') + archive = zipfile.ZipFile(io.BytesIO(_zipped.content)) + subtitle_content = self._get_subtitle_from_archive(archive) + subtitle.content = fix_line_ending(subtitle_content) + + return subtitle + raise ValueError('Problems conecting to the server') + + def _get_subtitle_from_archive(self, archive): + # some files have a non subtitle with .txt extension + _tmp = list(SUBTITLE_EXTENSIONS) + _tmp.remove('.txt') + _subtitle_extensions = tuple(_tmp) + + for name in archive.namelist(): + # discard hidden files + if os.path.split(name)[-1].startswith('.'): + continue + + # discard non-subtitle files + if not name.lower().endswith(_subtitle_extensions): + continue + + return archive.read(name) + + raise APIThrottled('Can not find the subtitle in the compressed file') diff --git a/views/providers.tpl b/views/providers.tpl index 665e3674d..9b3b83093 100644 --- a/views/providers.tpl +++ b/views/providers.tpl @@ -481,6 +481,27 @@
+
+
+ +
+
+
+ + +
+
+ +
+
+ +
From 03b181701e0f52d22e2576d33da5b17be93caa62 Mon Sep 17 00:00:00 2001 From: josdion Date: Sat, 21 Mar 2020 10:08:50 +0200 Subject: [PATCH 7/7] do not show release info if it contains only whitespace characters if the release_info contains only whitespace characters in the manual search result dialog they will appear as empty labels. Now they will not be included in releases won't be shown. --- bazarr/get_subtitle.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bazarr/get_subtitle.py b/bazarr/get_subtitle.py index 93aa0f6e7..7d0081716 100644 --- a/bazarr/get_subtitle.py +++ b/bazarr/get_subtitle.py @@ -346,10 +346,15 @@ def manual_search(path, language, hi, forced, providers, providers_auth, sceneNa not_matched = scores - matches s.score = score - releases = ['n/a'] + releases = [] if hasattr(s, 'release_info'): if s.release_info is not None: - releases = s.release_info.split(',') + for s_item in s.release_info.split(','): + if s_item.strip(): + releases.append(s_item) + + if len(releases) == 0: + releases = ['n/a'] subtitles_list.append( dict(score=round((score / max_score * 100), 2),