mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-02-21 21:34:48 +08:00
core: update subliminal_patch to 2.6.4.2917-dev; fix addic7ed, subscene, titlovi; fix SSAStyle parsing in SRT
This commit is contained in:
parent
d896599417
commit
8879f5a82e
12 changed files with 3900 additions and 58 deletions
3801
libs/inflect.py
Normal file
3801
libs/inflect.py
Normal file
File diff suppressed because it is too large
Load diff
|
@ -56,7 +56,7 @@ class SSAStyle(object):
|
|||
self.encoding = 1 #: Charset
|
||||
|
||||
for k, v in fields.items():
|
||||
if k in self.FIELDS:
|
||||
if k in self.FIELDS and v is not None:
|
||||
setattr(self, k, v)
|
||||
else:
|
||||
raise ValueError("SSAStyle has no field named %r" % k)
|
||||
|
|
|
@ -150,7 +150,14 @@ class SubstationFormat(FormatBase):
|
|||
if format_ == "ass":
|
||||
return ass_rgba_to_color(v)
|
||||
else:
|
||||
return ssa_rgb_to_color(v)
|
||||
try:
|
||||
return ssa_rgb_to_color(v)
|
||||
except ValueError:
|
||||
try:
|
||||
return ass_rgba_to_color(v)
|
||||
except:
|
||||
return Color(255, 255, 255, 0)
|
||||
|
||||
elif f in {"bold", "underline", "italic", "strikeout"}:
|
||||
return v == "-1"
|
||||
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
|
||||
|
|
|
@ -493,7 +493,7 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
|
|||
raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
|
||||
|
||||
dirpath, filename = os.path.split(path)
|
||||
logger.info('Scanning video %r in %r', filename, dirpath)
|
||||
logger.info('Determining basic video properties for %r in %r', filename, dirpath)
|
||||
|
||||
# hint guessit the filename itself and its 2 parent directories if we're an episode (most likely
|
||||
# Series name/Season/filename), else only one
|
||||
|
|
|
@ -84,32 +84,35 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
# login
|
||||
if self.username and self.password:
|
||||
ccks = region.get("addic7ed_cookies", expiration_time=86400)
|
||||
do_login = False
|
||||
if ccks != NO_VALUE:
|
||||
self.session.cookies.update(ccks)
|
||||
r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
|
||||
if r.status_code == 302:
|
||||
logger.info('Addic7ed: Login expired')
|
||||
do_login = True
|
||||
else:
|
||||
logger.info('Addic7ed: Reusing old login')
|
||||
self.logged_in = True
|
||||
try:
|
||||
self.session.cookies._cookies.update(ccks)
|
||||
r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
|
||||
if r.status_code == 302:
|
||||
logger.info('Addic7ed: Login expired')
|
||||
region.delete("addic7ed_cookies")
|
||||
else:
|
||||
logger.info('Addic7ed: Reusing old login')
|
||||
self.logged_in = True
|
||||
return
|
||||
except:
|
||||
pass
|
||||
|
||||
if do_login:
|
||||
logger.info('Addic7ed: Logging in')
|
||||
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
|
||||
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
|
||||
logger.info('Addic7ed: Logging in')
|
||||
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
|
||||
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
|
||||
headers={"Referer": self.server_url + "login.php"})
|
||||
|
||||
if "relax, slow down" in r.content:
|
||||
raise TooManyRequests(self.username)
|
||||
if "relax, slow down" in r.content:
|
||||
raise TooManyRequests(self.username)
|
||||
|
||||
if r.status_code != 302:
|
||||
raise AuthenticationError(self.username)
|
||||
if r.status_code != 302:
|
||||
raise AuthenticationError(self.username)
|
||||
|
||||
region.set("addic7ed_cookies", r.cookies)
|
||||
region.set("addic7ed_cookies", self.session.cookies._cookies)
|
||||
|
||||
logger.debug('Addic7ed: Logged in')
|
||||
self.logged_in = True
|
||||
logger.debug('Addic7ed: Logged in')
|
||||
self.logged_in = True
|
||||
|
||||
|
||||
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
|
||||
|
|
|
@ -18,7 +18,6 @@ except ImportError:
|
|||
import xml.etree.cElementTree as etree
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from babelfish import language_converters
|
||||
from subliminal import Episode
|
||||
from subliminal import Movie
|
||||
|
|
|
@ -4,6 +4,7 @@ import io
|
|||
import logging
|
||||
import os
|
||||
import time
|
||||
import inflect
|
||||
|
||||
from random import randint
|
||||
from zipfile import ZipFile
|
||||
|
@ -20,6 +21,8 @@ from subliminal_patch.converters.subscene import language_ids, supported_languag
|
|||
from subscene_api.subscene import search, Subtitle as APISubtitle
|
||||
from subzero.language import Language
|
||||
|
||||
p = inflect.engine()
|
||||
|
||||
|
||||
language_converters.register('subscene = subliminal_patch.converters.subscene:SubsceneConverter')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -192,21 +195,27 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
|
||||
def query(self, video):
|
||||
vfn = get_video_filename(video)
|
||||
subtitles = []
|
||||
logger.debug(u"Searching for: %s", vfn)
|
||||
film = search(vfn, session=self.session)
|
||||
|
||||
subtitles = []
|
||||
if film and film.subtitles:
|
||||
logger.debug('Release results found: %s', len(film.subtitles))
|
||||
subtitles = self.parse_results(video, film)
|
||||
else:
|
||||
logger.debug('No release results found')
|
||||
|
||||
# re-search for episodes without explicit release name
|
||||
if isinstance(video, Episode):
|
||||
term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
|
||||
#term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
|
||||
term = u"%s - %s Season" % (video.series, p.number_to_words("%sth" % video.season).capitalize())
|
||||
time.sleep(self.search_throttle)
|
||||
logger.debug('Searching for alternative results: %s', term)
|
||||
film = search(term, session=self.session)
|
||||
film = search(term, session=self.session, release=False)
|
||||
if film and film.subtitles:
|
||||
logger.debug('Alternative results found: %s', len(film.subtitles))
|
||||
subtitles += self.parse_results(video, film)
|
||||
else:
|
||||
logger.debug('No alternative results found')
|
||||
|
||||
# packs
|
||||
if video.season_fully_aired:
|
||||
|
@ -215,9 +224,17 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
time.sleep(self.search_throttle)
|
||||
film = search(term, session=self.session)
|
||||
if film and film.subtitles:
|
||||
logger.debug('Pack results found: %s', len(film.subtitles))
|
||||
subtitles += self.parse_results(video, film)
|
||||
else:
|
||||
logger.debug('No pack results found')
|
||||
else:
|
||||
logger.debug("Not searching for packs, because the season hasn't fully aired")
|
||||
else:
|
||||
logger.debug('Searching for movie results: %s', video.title)
|
||||
film = search(video.title, year=video.year, session=self.session, limit_to=None, release=False)
|
||||
if film and film.subtitles:
|
||||
subtitles += self.parse_results(video, film)
|
||||
|
||||
logger.info("%s subtitles found" % len(subtitles))
|
||||
return subtitles
|
||||
|
|
|
@ -134,8 +134,8 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
|
||||
def initialize(self):
|
||||
self.session = Session()
|
||||
self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' \
|
||||
'(KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
|
||||
self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3)' \
|
||||
'Gecko/20100401 Firefox/3.6.3 ( .NET CLR 3.5.30729)'
|
||||
logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
|
||||
self.session.headers['Referer'] = self.server_url
|
||||
logger.debug('Referer set to %s', self.session.headers['Referer'])
|
||||
|
@ -202,7 +202,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
current_page = int(params['pg'])
|
||||
|
||||
try:
|
||||
sublist = soup.select('section.titlovi > ul.titlovi > li')
|
||||
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
|
||||
for sub in sublist:
|
||||
# subtitle id
|
||||
sid = sub.find(attrs={'data-id': True}).attrs['data-id']
|
||||
|
|
|
@ -25,6 +25,7 @@ this script that does the job by parsing the website"s pages.
|
|||
|
||||
# imports
|
||||
import re
|
||||
|
||||
import enum
|
||||
import sys
|
||||
|
||||
|
@ -36,7 +37,7 @@ else:
|
|||
from contextlib import suppress
|
||||
from urllib2.request import Request, urlopen
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
|
||||
# constants
|
||||
HEADERS = {
|
||||
|
@ -207,7 +208,7 @@ def section_exists(soup, section):
|
|||
return False
|
||||
|
||||
|
||||
def get_first_film(soup, section, session=None):
|
||||
def get_first_film(soup, section, year=None, session=None):
|
||||
tag_part = SectionsParts[section]
|
||||
tag = None
|
||||
|
||||
|
@ -220,12 +221,26 @@ def get_first_film(soup, section, session=None):
|
|||
if not tag:
|
||||
return
|
||||
|
||||
url = SITE_DOMAIN + tag.findNext("ul").find("li").div.a.get("href")
|
||||
url = None
|
||||
|
||||
if not year:
|
||||
url = SITE_DOMAIN + tag.findNext("ul").find("li").div.a.get("href")
|
||||
else:
|
||||
for t in tag.findNext("ul").findAll("li"):
|
||||
if isinstance(t, NavigableString) or not t.div:
|
||||
continue
|
||||
|
||||
if str(year) in t.div.a.string:
|
||||
url = SITE_DOMAIN + t.div.a.get("href")
|
||||
break
|
||||
if not url:
|
||||
return
|
||||
|
||||
return Film.from_url(url, session=session)
|
||||
|
||||
|
||||
def search(term, session=None, limit_to=SearchTypes.Exact):
|
||||
soup = soup_for("%s/subtitles/title?q=%s" % (SITE_DOMAIN, term), session=session)
|
||||
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact):
|
||||
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, "release" if release else "title", term), session=session)
|
||||
|
||||
if "Subtitle search by" in str(soup):
|
||||
rows = soup.find("table").tbody.find_all("tr")
|
||||
|
@ -234,7 +249,7 @@ def search(term, session=None, limit_to=SearchTypes.Exact):
|
|||
|
||||
for junk, search_type in SearchTypes.__members__.items():
|
||||
if section_exists(soup, search_type):
|
||||
return get_first_film(soup, search_type)
|
||||
return get_first_film(soup, search_type, year=year, session=session)
|
||||
|
||||
if limit_to == search_type:
|
||||
return
|
||||
|
|
|
@ -4,7 +4,6 @@ import types
|
|||
from babelfish.exceptions import LanguageError
|
||||
from babelfish import Language as Language_, basestr
|
||||
|
||||
|
||||
repl_map = {
|
||||
"dk": "da",
|
||||
"nld": "nl",
|
||||
|
|
|
@ -28,13 +28,16 @@ class CommonFixes(SubtitleTextModification):
|
|||
NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1—", name="CM_multidash"),
|
||||
|
||||
# line = _/-/\s
|
||||
NReProcessor(re.compile(r'(?u)(^\W*[-_.:]+\W*$)'), "", name="CM_non_word_only"),
|
||||
NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"),
|
||||
|
||||
# remove >>
|
||||
NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
|
||||
|
||||
# line = : text
|
||||
NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),
|
||||
|
||||
# fix music symbols
|
||||
NReProcessor(re.compile(ur'(?u)(^[-\s]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
|
||||
NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
|
||||
lambda x: u"♪ " if x.group(1) else u" ♪",
|
||||
name="CM_music_symbols"),
|
||||
|
||||
|
@ -85,9 +88,6 @@ class CommonFixes(SubtitleTextModification):
|
|||
|
||||
# space before ending doublequote?
|
||||
|
||||
# remove >>
|
||||
NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
|
||||
|
||||
# replace uppercase I with lowercase L in words
|
||||
NReProcessor(re.compile(ur'(?u)([a-zà-ž]+)(I+)'),
|
||||
lambda match: ur'%s%s' % (match.group(1), "l" * len(match.group(2))),
|
||||
|
|
|
@ -29,6 +29,22 @@ class HearingImpaired(SubtitleTextModification):
|
|||
FullBracketEntryProcessor(re.compile(ur'(?sux)^-?%(t)s[([].+(?=[^)\]]{3,}).+[)\]]%(t)s$' % {"t": TAG}),
|
||||
"", name="HI_brackets_full"),
|
||||
|
||||
# uppercase text before colon (at least 3 uppercase chars); at start or after a sentence,
|
||||
# possibly with a dash in front; ignore anything ending with a quote
|
||||
NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"\']))([\s\->~]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
|
||||
ur'[A-zÀ-ž-_0-9\s\"\'&+()\[\],:]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "",
|
||||
name="HI_before_colon_caps"),
|
||||
|
||||
# any text before colon (at least 3 chars); at start or after a sentence,
|
||||
# possibly with a dash in front; try not breaking actual sentences with a colon at the end by not matching if
|
||||
# a space is inside the text; ignore anything ending with a quote
|
||||
NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"]))([\s\->~]*((?=[A-zÀ-ž&+]\s*[A-zÀ-ž&+]\s*[A-zÀ-ž&+])'
|
||||
ur'[A-zÀ-ž-_0-9\s\"\'&+()\[\]]+:)(?![\"’ʼ❜‘‛”“‟„])\s*)(?![0-9])'),
|
||||
lambda match:
|
||||
match.group(1) if (match.group(2).count(" ") > 0 or match.group(1).count("-") > 0)
|
||||
else "" if not match.group(1).startswith(" ") else " ",
|
||||
name="HI_before_colon_noncaps"),
|
||||
|
||||
# brackets (only remove if at least 3 chars in brackets)
|
||||
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
|
||||
{"t": TAG}), "", name="HI_brackets"),
|
||||
|
@ -46,21 +62,6 @@ class HearingImpaired(SubtitleTextModification):
|
|||
#NReProcessor(re.compile(ur'(?u)(\b|^)([\s-]*(?=[A-zÀ-ž-_0-9"\']{3,})[A-zÀ-ž-_0-9"\']+:\s*)'), "",
|
||||
# name="HI_before_colon"),
|
||||
|
||||
# uppercase text before colon (at least 3 uppercase chars); at start or after a sentence,
|
||||
# possibly with a dash in front; ignore anything ending with a quote
|
||||
NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"\']))([\s-]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
|
||||
ur'[A-ZÀ-Ž-_0-9\s\"\'&+]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "",
|
||||
name="HI_before_colon_caps"),
|
||||
|
||||
# any text before colon (at least 3 chars); at start or after a sentence,
|
||||
# possibly with a dash in front; try not breaking actual sentences with a colon at the end by not matching if
|
||||
# a space is inside the text; ignore anything ending with a quote
|
||||
NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"]))([\s-]*((?=[A-zÀ-ž&+]\s*[A-zÀ-ž&+]\s*[A-zÀ-ž&+])'
|
||||
ur'[A-zÀ-ž-_0-9\s\"\'&+]+:)(?![\"’ʼ❜‘‛”“‟„])\s*)(?![0-9])'),
|
||||
lambda match:
|
||||
match.group(1) if (match.group(2).count(" ") > 0 or match.group(1).count("-") > 0)
|
||||
else "" if not match.group(1).startswith(" ") else " ",
|
||||
name="HI_before_colon_noncaps"),
|
||||
|
||||
# text in brackets at start, after optional dash, before colon or at end of line
|
||||
# fixme: may be too aggressive
|
||||
|
|
Loading…
Reference in a new issue