From d910db796531f0e7169e9ff41c2f316db8d1d4a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=A2irts=20Kokars?= Date: Thu, 2 May 2019 21:32:52 +0300 Subject: [PATCH] add subtitri.nekur.net and subtitri.id.lv subtitle providers --- libs/subliminal_patch/providers/nekur.py | 178 ++++++++++++++++++ libs/subliminal_patch/providers/subtitriid.py | 163 ++++++++++++++++ views/settings.tpl | 44 +++++ 3 files changed, 385 insertions(+) create mode 100644 libs/subliminal_patch/providers/nekur.py create mode 100644 libs/subliminal_patch/providers/subtitriid.py diff --git a/libs/subliminal_patch/providers/nekur.py b/libs/subliminal_patch/providers/nekur.py new file mode 100644 index 000000000..859025865 --- /dev/null +++ b/libs/subliminal_patch/providers/nekur.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +import io +import logging +from random import randint + +from zipfile import ZipFile, is_zipfile +from rarfile import RarFile, is_rarfile + +from guessit import guessit +from requests import Session +from bs4 import NavigableString +from ftfy import fix_text +from subzero.language import Language + +from subliminal_patch.providers import Provider +from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin +from subliminal_patch.subtitle import Subtitle +from subliminal_patch.score import framerate_equal +from subliminal.exceptions import ProviderError +from subliminal.providers import ParserBeautifulSoup +from subliminal.subtitle import sanitize, guess_matches +from subliminal.video import Movie +from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST + +logger = logging.getLogger(__name__) + + +class NekurSubtitle(Subtitle): + """Nekur Subtitle.""" + provider_name = 'nekur' + + def __init__(self, language, page_link, download_link, title, year, imdb_id, fps, notes): + super(NekurSubtitle, self).__init__(language, page_link=page_link) + self.download_link = download_link + self.title = title + self.year = year + self.imdb_id = imdb_id + self.fps = fps + self.notes = notes + self.matches = None + # self.encoding = 'utf-16' + + @property + def id(self): + return self.download_link + + def get_matches(self, video): + matches = set() + + if isinstance(video, Movie): + # title + if video.title and sanitize(self.title) == sanitize(video.title): + matches.add('title') + # year + if video.year and self.year == video.year: + matches.add('year') + # imdb id + if video.imdb_id and self.imdb_id == video.imdb_id: + matches.add('imdb_id') + # fps + if video.fps and self.fps and not framerate_equal(video.fps, self.fps): + logger.warning("nekur: Wrong FPS (expected: %s, got: %s)", video.fps, self.fps) + # guess additional info from notes + matches |= guess_matches(video, guessit(self.notes, {'type': 'movie'}), partial=True) + + self.matches = matches + return matches + + +class NekurProvider(Provider, ProviderSubtitleArchiveMixin): + """Nekur Provider.""" + subtitle_class = NekurSubtitle + languages = {Language('lva', 'LV')} | {Language.fromalpha2(l) for l in ['lv']} + server_url = 'http://subtitri.nekur.net/' + search_url = server_url + 'modules/Subtitles.php' + + def __init__(self): + self.session = None + + def initialize(self): + self.session = Session() + self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] + self.session.headers['Referer'] = self.server_url + + def terminate(self): + self.session.close() + + def query(self, title): + subtitles = [] + + data = { + 'ajax': '1', + 'sSearch': title, + } + + r = self.session.post(self.search_url, data=data, timeout=10) + r.raise_for_status() + + if not r.content: + logger.debug('No data returned from provider') + return [] + + soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) + + # loop over subtitle cells + rows = soup.select('tbody > tr') + for row in rows: + # title + title_anchor_el = row.select_one('.title > a') + title_inner_text = [element for element in title_anchor_el if isinstance(element, NavigableString)] + title = title_inner_text[0].strip() + + # year + year = row.select_one('.year').text.strip('()') + + # download link + href = title_anchor_el.get('href') + download_link = self.server_url + href + + # imdb id + imdb_td = row.select_one('td:nth-of-type(4)') + imdb_link = imdb_td.select_one('a').get('href') + imdb_id = imdb_link.split('/')[-2] + + # fps + fps = row.select_one('.fps').text.strip() + + # additional notes + notes = row.select_one('.notes').text.strip() + + # page link = archive link (there is no seperate subtitle page link) + page_link = 'http://subtitri.nekur.net/filmu-subtitri/' + + # create/add the subitle + subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id, fps, notes) + logger.debug('nekur: Found subtitle %r', subtitle) + subtitles.append(subtitle) + + return subtitles + + def list_subtitles(self, video, languages): + if isinstance(video, Movie): + titles = [video.title] + video.alternative_titles + else: + titles = [] + + subtitles = [] + # query for subtitles + for title in titles: + if isinstance(video, Movie): + subtitles += [s for s in self.query(title) if s.language in languages] + + return subtitles + + def download_subtitle(self, subtitle): + if isinstance(subtitle, NekurSubtitle): + # download the subtitle + r = self.session.get(subtitle.download_link, timeout=10) + r.raise_for_status() + + # open the archive + archive_stream = io.BytesIO(r.content) + if is_rarfile(archive_stream): + archive = RarFile(archive_stream) + elif is_zipfile(archive_stream): + archive = ZipFile(archive_stream) + else: + subtitle.content = r.content + if subtitle.is_valid(): + return + subtitle.content = None + + raise ProviderError('Unidentified archive type') + + subtitle_content = self.get_subtitle_from_archive(subtitle, archive) + # fix content encoding (utf-16 encoded by default) + fixed_subtitle_content = fix_text(subtitle_content.decode('utf-16'), {'uncurl_quotes': False, 'fix_character_width': False}).encode(encoding='utf-8') + subtitle.content = fixed_subtitle_content diff --git a/libs/subliminal_patch/providers/subtitriid.py b/libs/subliminal_patch/providers/subtitriid.py new file mode 100644 index 000000000..4a3259fa3 --- /dev/null +++ b/libs/subliminal_patch/providers/subtitriid.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +import io +import logging +from random import randint + +from zipfile import ZipFile, is_zipfile +from rarfile import RarFile, is_rarfile + +from requests import Session +from ftfy import fix_text +from subzero.language import Language + +from subliminal_patch.providers import Provider +from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin +from subliminal_patch.subtitle import Subtitle +from subliminal.exceptions import ProviderError +from subliminal.providers import ParserBeautifulSoup +from subliminal.subtitle import sanitize +from subliminal.video import Movie +from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST + +logger = logging.getLogger(__name__) + + +class SubtitriIdSubtitle(Subtitle): + """subtitri.id.lv Subtitle.""" + provider_name = 'subtitriid' + + def __init__(self, language, page_link, download_link, title, year, imdb_id): + super(SubtitriIdSubtitle, self).__init__(language, page_link=page_link) + self.download_link = download_link + self.title = title + self.year = year + self.imdb_id = imdb_id + self.matches = None + # self.encoding = 'utf-16' + + @property + def id(self): + return self.download_link + + def get_matches(self, video): + matches = set() + if isinstance(video, Movie): + # title + if video.title and sanitize(self.title) == sanitize(video.title): + matches.add('title') + # year + if video.year and self.year == video.year: + matches.add('year') + # imdb id + if video.imdb_id and self.imdb_id == video.imdb_id: + matches.add('imdb_id') + + self.matches = matches + return matches + + +class SubtitriIdProvider(Provider, ProviderSubtitleArchiveMixin): + """subtitri.id.lv Provider.""" + subtitle_class = SubtitriIdSubtitle + languages = {Language('lva', 'LV')} | {Language.fromalpha2(l) for l in ['lv']} + server_url = 'http://subtitri.id.lv' + search_url = server_url + '/search/' + + def __init__(self): + self.session = None + + def initialize(self): + self.session = Session() + self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] + self.session.headers['Referer'] = self.server_url + + def terminate(self): + self.session.close() + + def query(self, title): + subtitles = [] + + r = self.session.get(self.search_url, params = {'q': title}, timeout=10) + r.raise_for_status() + + if not r.content: + logger.debug('No data returned from provider') + return [] + + soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) + + # loop over subtitle cells + rows = soup.select('.eBlock') + for row in rows: + result_anchor_el = row.select_one('.eTitle > a') + + # page link + page_link = result_anchor_el.get('href') + + # fetch/parse additional info + r = self.session.get(page_link, timeout=10) + soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) + + # title + movie_titles_string = soup.select_one('.main-header').text.strip() + movie_titles_list = movie_titles_string.split(' / ') + title = movie_titles_list[-1] + # # TODO alternate titles(?) + # alternate_titles = movie_title_list.remove(title) + + # year + year = soup.select_one('#film-page-year').text.strip() + + # imdb id + imdb_link = soup.select_one('#actors-page > a').get('href') + imdb_id = imdb_link.split('/')[-2] + + # download link + href = soup.select_one('.hvr').get('href') + download_link = self.server_url + href + + # create/add the subitle + subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id) + logger.debug('subtitri.id.lv: Found subtitle %r', subtitle) + subtitles.append(subtitle) + + return subtitles + + def list_subtitles(self, video, languages): + if isinstance(video, Movie): + titles = [video.title] + video.alternative_titles + else: + titles = [] + + subtitles = [] + # query for subtitles + for title in titles: + if isinstance(video, Movie): + subtitles += [s for s in self.query(title) if s.language in languages] + + return subtitles + + def download_subtitle(self, subtitle): + if isinstance(subtitle, SubtitriIdSubtitle): + # download the subtitle + r = self.session.get(subtitle.download_link, timeout=10) + r.raise_for_status() + + # open the archive + archive_stream = io.BytesIO(r.content) + if is_rarfile(archive_stream): + archive = RarFile(archive_stream) + elif is_zipfile(archive_stream): + archive = ZipFile(archive_stream) + else: + subtitle.content = r.content + if subtitle.is_valid(): + return + subtitle.content = None + + raise ProviderError('Unidentified archive type') + + subtitle_content = self.get_subtitle_from_archive(subtitle, archive) + # fix content encoding (utf-16 encoded by default) + fixed_subtitle_content = fix_text(subtitle_content.decode('utf-16'), {'uncurl_quotes': False, 'fix_character_width': False}).encode(encoding='utf-8') + subtitle.content = fixed_subtitle_content diff --git a/views/settings.tpl b/views/settings.tpl index 3111f8d4d..bf0f49616 100644 --- a/views/settings.tpl +++ b/views/settings.tpl @@ -1472,6 +1472,28 @@ +
+
+ +
+
+
+ + +
+
+ +
+
+ +
+
@@ -1756,6 +1778,28 @@
+
+
+ +
+
+
+ + +
+
+ +
+
+ +
+