From b100d4ed5cd15a391b554585a4b7fd789d0d2405 Mon Sep 17 00:00:00 2001 From: panni Date: Fri, 21 Jun 2019 15:06:27 +0200 Subject: [PATCH] core: update to subliminal_patch:head; support file hashes even when scenename is used --- bazarr/get_subtitle.py | 11 ++++--- libs/subliminal_patch/core.py | 20 +++++++------ libs/subzero/modification/main.py | 30 +++++++++++++++++++ libs/subzero/modification/mods/__init__.py | 12 ++++++-- libs/subzero/modification/mods/common.py | 4 +-- .../modification/mods/hearing_impaired.py | 23 ++++++++------ libs/subzero/video.py | 4 +-- 7 files changed, 75 insertions(+), 29 deletions(-) diff --git a/bazarr/get_subtitle.py b/bazarr/get_subtitle.py index 6aa73f0d1..d0db3a752 100644 --- a/bazarr/get_subtitle.py +++ b/bazarr/get_subtitle.py @@ -52,17 +52,20 @@ def get_video(path, title, sceneName, use_scenename, providers=None, media_type= :return: `Video` instance """ hints = {"title": title, "type": "movie" if media_type == "movie" else "episode"} - dont_use_actual_file = False + used_scene_name = False original_path = path original_name = os.path.basename(path) + hash_from = None if sceneName != "None" and use_scenename: # use the sceneName but keep the folder structure for better guessing path = os.path.join(os.path.dirname(path), sceneName + os.path.splitext(path)[1]) - dont_use_actual_file = True + used_scene_name = True + hash_from = original_path if providers: try: - video = parse_video(path, hints=hints, providers=providers, dry_run=dont_use_actual_file) - video.used_scene_name = dont_use_actual_file + video = parse_video(path, hints=hints, providers=providers, dry_run=used_scene_name, + hash_from=hash_from) + video.used_scene_name = used_scene_name video.original_name = original_name video.original_path = original_path refine_from_db(original_path, video) diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index 9dd6881ed..2963988e9 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -473,7 +473,7 @@ if is_windows_special_path: SZAsyncProviderPool = SZProviderPool -def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False): +def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False, hash_from=None): """Scan a video from a `path`. patch: @@ -538,32 +538,34 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski video.alternative_titles.append(alt_guess["title"]) logger.debug("Adding alternative title: %s", alt_guess["title"]) - if dont_use_actual_file: + if dont_use_actual_file and not hash_from: return video # size and hashes if not skip_hashing: - video.size = os.path.getsize(path) + hash_path = hash_from or path + video.size = os.path.getsize(hash_path) if video.size > 10485760: logger.debug('Size is %d', video.size) + osub_hash = None if "opensubtitles" in providers: - video.hashes['opensubtitles'] = hash_opensubtitles(path) + video.hashes['opensubtitles'] = osub_hash = hash_opensubtitles(hash_path) if "shooter" in providers: - video.hashes['shooter'] = hash_shooter(path) + video.hashes['shooter'] = hash_shooter(hash_path) if "thesubdb" in providers: - video.hashes['thesubdb'] = hash_thesubdb(path) + video.hashes['thesubdb'] = hash_thesubdb(hash_path) if "napiprojekt" in providers: try: - video.hashes['napiprojekt'] = hash_napiprojekt(path) + video.hashes['napiprojekt'] = hash_napiprojekt(hash_path) except MemoryError: - logger.warning(u"Couldn't compute napiprojekt hash for %s", path) + logger.warning(u"Couldn't compute napiprojekt hash for %s", hash_path) if "napisy24" in providers: # Napisy24 uses the same hash as opensubtitles - video.hashes['napisy24'] = hash_opensubtitles(path) + video.hashes['napisy24'] = osub_hash or hash_opensubtitles(hash_path) logger.debug('Computed hashes %r', video.hashes) else: diff --git a/libs/subzero/modification/main.py b/libs/subzero/modification/main.py index 05c882d9e..7d35c2e27 100644 --- a/libs/subzero/modification/main.py +++ b/libs/subzero/modification/main.py @@ -293,6 +293,9 @@ class SubtitleModifications(object): end_tag = line[-5:] line = line[:-5] + last_procs_mods = [] + + # fixme: this double loop is ugly for order, identifier, args in mods: mod = self.initialized_mods[identifier] @@ -312,6 +315,33 @@ class SubtitleModifications(object): break applied_mods.append(identifier) + if mod.last_processors: + last_procs_mods.append([identifier, args]) + + if skip_entry: + lines = [] + break + + if skip_line: + continue + + for identifier, args in last_procs_mods: + mod = self.initialized_mods[identifier] + + try: + line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index, + procs=["last_process"], **args) + except EmptyEntryError: + if self.debug: + logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text) + skip_entry = True + break + + if not line: + if self.debug: + logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line) + skip_line = True + break if skip_entry: lines = [] diff --git a/libs/subzero/modification/mods/__init__.py b/libs/subzero/modification/mods/__init__.py index 10d0553da..aaf4c37e4 100644 --- a/libs/subzero/modification/mods/__init__.py +++ b/libs/subzero/modification/mods/__init__.py @@ -21,6 +21,7 @@ class SubtitleModification(object): pre_processors = [] processors = [] post_processors = [] + last_processors = [] languages = [] def __init__(self, parent): @@ -67,15 +68,16 @@ class SubtitleModification(object): def post_process(self, content, debug=False, parent=None, **kwargs): return self._process(content, self.post_processors, debug=debug, parent=parent, **kwargs) - def modify(self, content, debug=False, parent=None, **kwargs): + def modify(self, content, debug=False, parent=None, procs=None, **kwargs): if not content: return new_content = content - for method in ("pre_process", "process", "post_process"): + for method in procs or ("pre_process", "process", "post_process"): if not new_content: return - new_content = getattr(self, method)(new_content, debug=debug, parent=parent, **kwargs) + new_content = self._process(new_content, getattr(self, "%sors" % method), + debug=debug, parent=parent, **kwargs) return new_content @@ -107,3 +109,7 @@ empty_line_post_processors = [ class EmptyEntryError(Exception): pass + + +class EmptyLineError(Exception): + pass diff --git a/libs/subzero/modification/mods/common.py b/libs/subzero/modification/mods/common.py index eba386b1d..14c360937 100644 --- a/libs/subzero/modification/mods/common.py +++ b/libs/subzero/modification/mods/common.py @@ -28,7 +28,7 @@ class CommonFixes(SubtitleTextModification): NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1—", name="CM_multidash"), # line = _/-/\s - NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"), + NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="> NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"), @@ -37,7 +37,7 @@ class CommonFixes(SubtitleTextModification): NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"), # fix music symbols - NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'), + NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)'), lambda x: u"♪ " if x.group(1) else u" ♪", name="CM_music_symbols"), diff --git a/libs/subzero/modification/mods/hearing_impaired.py b/libs/subzero/modification/mods/hearing_impaired.py index 8912834d7..cb72d898c 100644 --- a/libs/subzero/modification/mods/hearing_impaired.py +++ b/libs/subzero/modification/mods/hearing_impaired.py @@ -49,11 +49,11 @@ class HearingImpaired(SubtitleTextModification): NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "", name="HI_brackets"), - NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}), - "", name="HI_bracket_open_start"), + #NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}), + # "", name="HI_bracket_open_start"), - NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "", - name="HI_bracket_open_end"), + #NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "", + # name="HI_bracket_open_end"), # text before colon (and possible dash in front), max 11 chars after the first whitespace (if any) # NReProcessor(re.compile(r'(?u)(^[A-z\-\'"_]+[\w\s]{0,11}:[^0-9{2}][\s]*)'), "", name="HI_before_colon"), @@ -73,7 +73,7 @@ class HearingImpaired(SubtitleTextModification): supported=lambda p: not p.only_uppercase), # remove MAN: - NReProcessor(re.compile(ur'(?suxi)(.*MAN:\s*)'), "", name="HI_remove_man"), + NReProcessor(re.compile(ur'(?suxi)(\b(?:WO)MAN:\s*)'), "", name="HI_remove_man"), # dash in front # NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"), @@ -81,13 +81,18 @@ class HearingImpaired(SubtitleTextModification): # all caps at start before new sentence NReProcessor(re.compile(ur'(?u)^(?=[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+\s([A-ZÀ-Ž][a-zà-ž].+)'), r"\1", name="HI_starting_upper_then_sentence", supported=lambda p: not p.only_uppercase), - - # remove music symbols - NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}), - "", name="HI_music_symbols_only"), ] post_processors = empty_line_post_processors + last_processors = [ + # remove music symbols + NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}), + "", name="HI_music_symbols_only"), + + # remove music entries + NReProcessor(re.compile(ur'(?ums)(^[-\s>~]*[♫♪]+\s*.+|.+\s*[♫♪]+\s*$)'), + "", name="HI_music"), + ] registry.register(HearingImpaired) diff --git a/libs/subzero/video.py b/libs/subzero/video.py index fc6dc99de..13db33ddf 100644 --- a/libs/subzero/video.py +++ b/libs/subzero/video.py @@ -52,10 +52,10 @@ def set_existing_languages(video, video_info, external_subtitles=False, embedded video.subtitle_languages.add(language) -def parse_video(fn, hints, skip_hashing=False, dry_run=False, providers=None): +def parse_video(fn, hints, skip_hashing=False, dry_run=False, providers=None, hash_from=None): logger.debug("Parsing video: %s, hints: %s", os.path.basename(fn), hints) return scan_video(fn, hints=hints, dont_use_actual_file=dry_run, providers=providers, - skip_hashing=skip_hashing) + skip_hashing=skip_hashing, hash_from=hash_from) def refine_video(video, no_refining=False, refiner_settings=None):