bazarr/libs/subliminal_patch/providers/podnapisi.py
2020-09-10 14:26:37 -04:00

258 lines
9.8 KiB
Python

# coding=utf-8
from __future__ import absolute_import
import logging
import re
import io
from zipfile import ZipFile
from guessit import guessit
from subliminal.subtitle import guess_matches
from subliminal.utils import sanitize
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
try:
from lxml import etree
except ImportError:
try:
import xml.etree.cElementTree as etree
except ImportError:
import xml.etree.ElementTree as etree
from babelfish import language_converters
from subliminal import Episode
from subliminal import Movie
from subliminal.providers.podnapisi import PodnapisiProvider as _PodnapisiProvider, \
PodnapisiSubtitle as _PodnapisiSubtitle
from subliminal_patch.utils import sanitize, fix_inconsistent_naming as _fix_inconsistent_naming
from subzero.language import Language
logger = logging.getLogger(__name__)
def fix_inconsistent_naming(title):
"""Fix titles with inconsistent naming using dictionary and sanitize them.
:param str title: original title.
:return: new title.
:rtype: str
"""
d = {}
nt = title.replace("Marvels", "").replace("Marvel's", "")
if nt != title:
d[title] = nt
return _fix_inconsistent_naming(title, d)
class PodnapisiSubtitle(_PodnapisiSubtitle):
provider_name = 'podnapisi'
hearing_impaired_verifiable = True
def __init__(self, language, hearing_impaired, page_link, pid, releases, title, season=None, episode=None,
year=None, asked_for_release_group=None, asked_for_episode=None):
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
self.release_info = u", ".join(releases)
self.asked_for_release_group = asked_for_release_group
self.asked_for_episode = asked_for_episode
self.matches = None
def get_matches(self, video):
"""
patch: set guessit to single_value
:param video:
:return:
"""
matches = set()
# episode
if isinstance(video, Episode):
# series
if video.series and (fix_inconsistent_naming(self.title) in (
fix_inconsistent_naming(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'episode', "single_value": True}))
# movie
elif isinstance(video, Movie):
# title
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'movie', "single_value": True}))
self.matches = matches
return matches
class PodnapisiProvider(_PodnapisiProvider, ProviderSubtitleArchiveMixin):
languages = ({Language('por', 'BR'), Language('srp', script='Latn'), Language('srp', script='Cyrl')} |
{Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
languages.update(set(Language.rebuild(l, forced=True) for l in languages))
languages.update(set(Language.rebuild(l, hi=True) for l in languages))
server_url = 'https://podnapisi.net/subtitles/'
only_foreign = False
also_foreign = False
subtitle_class = PodnapisiSubtitle
hearing_impaired_verifiable = True
def __init__(self, only_foreign=False, also_foreign=False):
self.only_foreign = only_foreign
self.also_foreign = also_foreign
if only_foreign:
logger.info("Only searching for foreign/forced subtitles")
super(PodnapisiProvider, self).__init__()
def list_subtitles(self, video, languages):
if video.is_special:
logger.info("%s can't search for specials right now, skipping", self)
return []
season = episode = None
if isinstance(video, Episode):
titles = [fix_inconsistent_naming(title) for title in [video.series] + video.alternative_series]
season = video.season
episode = video.episode
else:
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, video, season=season, episode=episode, year=video.year,
only_foreign=self.only_foreign, also_foreign=self.also_foreign)]
if subtitles:
return subtitles
return []
def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False,
also_foreign=False):
search_language = str(language).lower()
# sr-Cyrl specialcase
if search_language == "sr-cyrl":
search_language = "sr"
# set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
params = {'sXML': 1, 'sL': search_language, 'sK': keyword}
is_episode = False
if season and episode:
is_episode = True
params['sTS'] = season
params['sTE'] = episode
if year:
params['sY'] = year
# loop over paginated results
logger.info('Searching subtitles %r', params)
subtitles = []
pids = set()
while True:
# query the server
content = None
try:
content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content
xml = etree.fromstring(content)
except etree.ParseError:
logger.error("Wrong data returned: %r", content)
break
# exit if no results
if not int(xml.find('pagination/results').text):
logger.debug('No subtitles found')
break
# loop over subtitles
for subtitle_xml in xml.findall('subtitle'):
# read xml elements
pid = subtitle_xml.find('pid').text
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
_language = Language.fromietf(subtitle_xml.find('language').text)
hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
foreign = 'f' in (subtitle_xml.find('flags').text or '')
if only_foreign and not foreign:
continue
elif not only_foreign and not also_foreign and foreign:
continue
elif also_foreign and foreign:
_language = Language.rebuild(_language, forced=True)
# set subtitle language to hi if it's hearing_impaired
if hearing_impaired:
_language = Language.rebuild(_language, hi=True)
if language != _language:
continue
page_link = subtitle_xml.find('url').text
releases = []
if subtitle_xml.find('release').text:
for release in subtitle_xml.find('release').text.split():
releases.append(re.sub(r'\.+$', '', release)) # remove trailing dots
title = subtitle_xml.find('title').text
r_season = int(subtitle_xml.find('tvSeason').text)
r_episode = int(subtitle_xml.find('tvEpisode').text)
r_year = int(subtitle_xml.find('year').text)
if is_episode:
subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
season=r_season, episode=r_episode, year=r_year,
asked_for_release_group=video.release_group,
asked_for_episode=episode)
else:
subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
year=r_year, asked_for_release_group=video.release_group)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
pids.add(pid)
# stop on last page
if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
break
# increment current page
params['page'] = int(xml.find('pagination/current').text) + 1
logger.debug('Getting page %d', params['page'])
xml = None
return subtitles
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
subtitle.content = self.get_subtitle_from_archive(subtitle, zf)