core: update to subliminal_patch:head; support file hashes even when scenename is used

This commit is contained in:
panni 2019-06-21 15:06:27 +02:00
parent 05aaf8094d
commit b100d4ed5c
7 changed files with 75 additions and 29 deletions

View file

@ -52,17 +52,20 @@ def get_video(path, title, sceneName, use_scenename, providers=None, media_type=
:return: `Video` instance
"""
hints = {"title": title, "type": "movie" if media_type == "movie" else "episode"}
dont_use_actual_file = False
used_scene_name = False
original_path = path
original_name = os.path.basename(path)
hash_from = None
if sceneName != "None" and use_scenename:
# use the sceneName but keep the folder structure for better guessing
path = os.path.join(os.path.dirname(path), sceneName + os.path.splitext(path)[1])
dont_use_actual_file = True
used_scene_name = True
hash_from = original_path
if providers:
try:
video = parse_video(path, hints=hints, providers=providers, dry_run=dont_use_actual_file)
video.used_scene_name = dont_use_actual_file
video = parse_video(path, hints=hints, providers=providers, dry_run=used_scene_name,
hash_from=hash_from)
video.used_scene_name = used_scene_name
video.original_name = original_name
video.original_path = original_path
refine_from_db(original_path, video)

View file

@ -473,7 +473,7 @@ if is_windows_special_path:
SZAsyncProviderPool = SZProviderPool
def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False):
def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False, hash_from=None):
"""Scan a video from a `path`.
patch:
@ -538,32 +538,34 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
video.alternative_titles.append(alt_guess["title"])
logger.debug("Adding alternative title: %s", alt_guess["title"])
if dont_use_actual_file:
if dont_use_actual_file and not hash_from:
return video
# size and hashes
if not skip_hashing:
video.size = os.path.getsize(path)
hash_path = hash_from or path
video.size = os.path.getsize(hash_path)
if video.size > 10485760:
logger.debug('Size is %d', video.size)
osub_hash = None
if "opensubtitles" in providers:
video.hashes['opensubtitles'] = hash_opensubtitles(path)
video.hashes['opensubtitles'] = osub_hash = hash_opensubtitles(hash_path)
if "shooter" in providers:
video.hashes['shooter'] = hash_shooter(path)
video.hashes['shooter'] = hash_shooter(hash_path)
if "thesubdb" in providers:
video.hashes['thesubdb'] = hash_thesubdb(path)
video.hashes['thesubdb'] = hash_thesubdb(hash_path)
if "napiprojekt" in providers:
try:
video.hashes['napiprojekt'] = hash_napiprojekt(path)
video.hashes['napiprojekt'] = hash_napiprojekt(hash_path)
except MemoryError:
logger.warning(u"Couldn't compute napiprojekt hash for %s", path)
logger.warning(u"Couldn't compute napiprojekt hash for %s", hash_path)
if "napisy24" in providers:
# Napisy24 uses the same hash as opensubtitles
video.hashes['napisy24'] = hash_opensubtitles(path)
video.hashes['napisy24'] = osub_hash or hash_opensubtitles(hash_path)
logger.debug('Computed hashes %r', video.hashes)
else:

View file

@ -293,6 +293,9 @@ class SubtitleModifications(object):
end_tag = line[-5:]
line = line[:-5]
last_procs_mods = []
# fixme: this double loop is ugly
for order, identifier, args in mods:
mod = self.initialized_mods[identifier]
@ -312,6 +315,33 @@ class SubtitleModifications(object):
break
applied_mods.append(identifier)
if mod.last_processors:
last_procs_mods.append([identifier, args])
if skip_entry:
lines = []
break
if skip_line:
continue
for identifier, args in last_procs_mods:
mod = self.initialized_mods[identifier]
try:
line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
procs=["last_process"], **args)
except EmptyEntryError:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
skip_entry = True
break
if not line:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
skip_line = True
break
if skip_entry:
lines = []

View file

@ -21,6 +21,7 @@ class SubtitleModification(object):
pre_processors = []
processors = []
post_processors = []
last_processors = []
languages = []
def __init__(self, parent):
@ -67,15 +68,16 @@ class SubtitleModification(object):
def post_process(self, content, debug=False, parent=None, **kwargs):
return self._process(content, self.post_processors, debug=debug, parent=parent, **kwargs)
def modify(self, content, debug=False, parent=None, **kwargs):
def modify(self, content, debug=False, parent=None, procs=None, **kwargs):
if not content:
return
new_content = content
for method in ("pre_process", "process", "post_process"):
for method in procs or ("pre_process", "process", "post_process"):
if not new_content:
return
new_content = getattr(self, method)(new_content, debug=debug, parent=parent, **kwargs)
new_content = self._process(new_content, getattr(self, "%sors" % method),
debug=debug, parent=parent, **kwargs)
return new_content
@ -107,3 +109,7 @@ empty_line_post_processors = [
class EmptyEntryError(Exception):
pass
class EmptyLineError(Exception):
pass

View file

@ -28,7 +28,7 @@ class CommonFixes(SubtitleTextModification):
NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1", name="CM_multidash"),
# line = _/-/\s
NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"),
NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="<CM_non_word_only"),
# remove >>
NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
@ -37,7 +37,7 @@ class CommonFixes(SubtitleTextModification):
NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),
# fix music symbols
NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)'),
lambda x: u"" if x.group(1) else u"",
name="CM_music_symbols"),

View file

@ -49,11 +49,11 @@ class HearingImpaired(SubtitleTextModification):
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
{"t": TAG}), "", name="HI_brackets"),
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
"", name="HI_bracket_open_start"),
#NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
# "", name="HI_bracket_open_start"),
NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
name="HI_bracket_open_end"),
#NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
# name="HI_bracket_open_end"),
# text before colon (and possible dash in front), max 11 chars after the first whitespace (if any)
# NReProcessor(re.compile(r'(?u)(^[A-z\-\'"_]+[\w\s]{0,11}:[^0-9{2}][\s]*)'), "", name="HI_before_colon"),
@ -73,7 +73,7 @@ class HearingImpaired(SubtitleTextModification):
supported=lambda p: not p.only_uppercase),
# remove MAN:
NReProcessor(re.compile(ur'(?suxi)(.*MAN:\s*)'), "", name="HI_remove_man"),
NReProcessor(re.compile(ur'(?suxi)(\b(?:WO)MAN:\s*)'), "", name="HI_remove_man"),
# dash in front
# NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"),
@ -81,13 +81,18 @@ class HearingImpaired(SubtitleTextModification):
# all caps at start before new sentence
NReProcessor(re.compile(ur'(?u)^(?=[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+\s([A-ZÀ-Ž][a-zà-ž].+)'), r"\1",
name="HI_starting_upper_then_sentence", supported=lambda p: not p.only_uppercase),
# remove music symbols
NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
"", name="HI_music_symbols_only"),
]
post_processors = empty_line_post_processors
last_processors = [
# remove music symbols
NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
"", name="HI_music_symbols_only"),
# remove music entries
NReProcessor(re.compile(ur'(?ums)(^[-\s>~]*[♫♪]+\s*.+|.+\s*[♫♪]+\s*$)'),
"", name="HI_music"),
]
registry.register(HearingImpaired)

View file

@ -52,10 +52,10 @@ def set_existing_languages(video, video_info, external_subtitles=False, embedded
video.subtitle_languages.add(language)
def parse_video(fn, hints, skip_hashing=False, dry_run=False, providers=None):
def parse_video(fn, hints, skip_hashing=False, dry_run=False, providers=None, hash_from=None):
logger.debug("Parsing video: %s, hints: %s", os.path.basename(fn), hints)
return scan_video(fn, hints=hints, dont_use_actual_file=dry_run, providers=providers,
skip_hashing=skip_hashing)
skip_hashing=skip_hashing, hash_from=hash_from)
def refine_video(video, no_refining=False, refiner_settings=None):