core: update to subliminal_patch:head; support file hashes even when scenename is used

2025-02-20 12:53:05 +08:00 · 2019-06-21 15:06:27 +02:00 · 2019-06-21 15:06:27 +02:00 · b100d4ed5c
commit b100d4ed5c
parent 05aaf8094d
7 changed files with 75 additions and 29 deletions
--- a/bazarr/get_subtitle.py
+++ b/bazarr/get_subtitle.py
@ -52,17 +52,20 @@ def get_video(path, title, sceneName, use_scenename, providers=None, media_type=
    :return: `Video` instance
    """
    hints = {"title": title, "type": "movie" if media_type == "movie" else "episode"}
-    dont_use_actual_file = False
+    used_scene_name = False
    original_path = path
    original_name = os.path.basename(path)
+    hash_from = None
    if sceneName != "None" and use_scenename:
        # use the sceneName but keep the folder structure for better guessing
        path = os.path.join(os.path.dirname(path), sceneName + os.path.splitext(path)[1])
-        dont_use_actual_file = True
+        used_scene_name = True
+        hash_from = original_path
    if providers:
        try:
-            video = parse_video(path, hints=hints, providers=providers, dry_run=dont_use_actual_file)
-            video.used_scene_name = dont_use_actual_file
+            video = parse_video(path, hints=hints, providers=providers, dry_run=used_scene_name,
+                                hash_from=hash_from)
+            video.used_scene_name = used_scene_name
            video.original_name = original_name
            video.original_path = original_path
            refine_from_db(original_path, video)
--- a/libs/subliminal_patch/core.py
+++ b/libs/subliminal_patch/core.py
@ -473,7 +473,7 @@ if is_windows_special_path:
    SZAsyncProviderPool = SZProviderPool


-def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False):
+def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False, hash_from=None):
    """Scan a video from a `path`.

    patch:
@ -538,32 +538,34 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
            video.alternative_titles.append(alt_guess["title"])
        logger.debug("Adding alternative title: %s", alt_guess["title"])

-    if dont_use_actual_file:
+    if dont_use_actual_file and not hash_from:
        return video

    # size and hashes
    if not skip_hashing:
-        video.size = os.path.getsize(path)
+        hash_path = hash_from or path
+        video.size = os.path.getsize(hash_path)
        if video.size > 10485760:
            logger.debug('Size is %d', video.size)
+            osub_hash = None
            if "opensubtitles" in providers:
-                video.hashes['opensubtitles'] = hash_opensubtitles(path)
+                video.hashes['opensubtitles'] = osub_hash = hash_opensubtitles(hash_path)

            if "shooter" in providers:
-                video.hashes['shooter'] = hash_shooter(path)
+                video.hashes['shooter'] = hash_shooter(hash_path)

            if "thesubdb" in providers:
-                video.hashes['thesubdb'] = hash_thesubdb(path)
+                video.hashes['thesubdb'] = hash_thesubdb(hash_path)

            if "napiprojekt" in providers:
                try:
-                    video.hashes['napiprojekt'] = hash_napiprojekt(path)
+                    video.hashes['napiprojekt'] = hash_napiprojekt(hash_path)
                except MemoryError:
-                    logger.warning(u"Couldn't compute napiprojekt hash for %s", path)
+                    logger.warning(u"Couldn't compute napiprojekt hash for %s", hash_path)

            if "napisy24" in providers:
                # Napisy24 uses the same hash as opensubtitles
-                video.hashes['napisy24'] = hash_opensubtitles(path)
+                video.hashes['napisy24'] = osub_hash or hash_opensubtitles(hash_path)

            logger.debug('Computed hashes %r', video.hashes)
        else:
--- a/libs/subzero/modification/main.py
+++ b/libs/subzero/modification/main.py
@ -293,6 +293,9 @@ class SubtitleModifications(object):
                    end_tag = line[-5:]
                    line = line[:-5]

+                last_procs_mods = []
+
+                # fixme: this double loop is ugly
                for order, identifier, args in mods:
                    mod = self.initialized_mods[identifier]

@ -312,6 +315,33 @@ class SubtitleModifications(object):
                        break

                    applied_mods.append(identifier)
+                    if mod.last_processors:
+                        last_procs_mods.append([identifier, args])
+
+                if skip_entry:
+                    lines = []
+                    break
+
+                if skip_line:
+                    continue
+
+                for identifier, args in last_procs_mods:
+                    mod = self.initialized_mods[identifier]
+
+                    try:
+                        line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
+                                          procs=["last_process"], **args)
+                    except EmptyEntryError:
+                        if self.debug:
+                            logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
+                        skip_entry = True
+                        break
+
+                    if not line:
+                        if self.debug:
+                            logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
+                        skip_line = True
+                        break

                if skip_entry:
                    lines = []
--- a/libs/subzero/modification/mods/init.py
+++ b/libs/subzero/modification/mods/init.py
@ -21,6 +21,7 @@ class SubtitleModification(object):
    pre_processors = []
    processors = []
    post_processors = []
+    last_processors = []
    languages = []

    def __init__(self, parent):
@ -67,15 +68,16 @@ class SubtitleModification(object):
    def post_process(self, content, debug=False, parent=None, **kwargs):
        return self._process(content, self.post_processors, debug=debug, parent=parent, **kwargs)

-    def modify(self, content, debug=False, parent=None, **kwargs):
+    def modify(self, content, debug=False, parent=None, procs=None, **kwargs):
        if not content:
            return

        new_content = content
-        for method in ("pre_process", "process", "post_process"):
+        for method in procs or ("pre_process", "process", "post_process"):
            if not new_content:
                return
-            new_content = getattr(self, method)(new_content, debug=debug, parent=parent, **kwargs)
+            new_content = self._process(new_content, getattr(self, "%sors" % method),
+                                        debug=debug, parent=parent, **kwargs)

        return new_content

@ -107,3 +109,7 @@ empty_line_post_processors = [

 class EmptyEntryError(Exception):
    pass
+
+
+class EmptyLineError(Exception):
+    pass
--- a/libs/subzero/modification/mods/common.py
+++ b/libs/subzero/modification/mods/common.py
@ -28,7 +28,7 @@ class CommonFixes(SubtitleTextModification):
        NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1—", name="CM_multidash"),

        # line = _/-/\s
-        NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"),
+        NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="<CM_non_word_only"),

        # remove >>
        NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
@ -37,7 +37,7 @@ class CommonFixes(SubtitleTextModification):
        NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),

        # fix music symbols
-        NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
+        NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)'),
                     lambda x: u"♪ " if x.group(1) else u" ♪",
                     name="CM_music_symbols"),

--- a/libs/subzero/modification/mods/hearing_impaired.py
+++ b/libs/subzero/modification/mods/hearing_impaired.py
@ -49,11 +49,11 @@ class HearingImpaired(SubtitleTextModification):
        NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
                                {"t": TAG}), "", name="HI_brackets"),

-        NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
-                     "", name="HI_bracket_open_start"),
+        #NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
+        #             "", name="HI_bracket_open_start"),

-        NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
-                     name="HI_bracket_open_end"),
+        #NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
+        #             name="HI_bracket_open_end"),

        # text before colon (and possible dash in front), max 11 chars after the first whitespace (if any)
        # NReProcessor(re.compile(r'(?u)(^[A-z\-\'"_]+[\w\s]{0,11}:[^0-9{2}][\s]*)'), "", name="HI_before_colon"),
@ -73,7 +73,7 @@ class HearingImpaired(SubtitleTextModification):
                     supported=lambda p: not p.only_uppercase),

        # remove MAN:
-        NReProcessor(re.compile(ur'(?suxi)(.*MAN:\s*)'), "", name="HI_remove_man"),
+        NReProcessor(re.compile(ur'(?suxi)(\b(?:WO)MAN:\s*)'), "", name="HI_remove_man"),

        # dash in front
        # NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"),
@ -81,13 +81,18 @@ class HearingImpaired(SubtitleTextModification):
        # all caps at start before new sentence
        NReProcessor(re.compile(ur'(?u)^(?=[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+\s([A-ZÀ-Ž][a-zà-ž].+)'), r"\1",
                     name="HI_starting_upper_then_sentence", supported=lambda p: not p.only_uppercase),
-
-        # remove music symbols
-        NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
-                     "", name="HI_music_symbols_only"),
    ]

    post_processors = empty_line_post_processors
+    last_processors = [
+        # remove music symbols
+        NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
+                     "", name="HI_music_symbols_only"),
+
+        # remove music entries
+        NReProcessor(re.compile(ur'(?ums)(^[-\s>~]*[♫♪]+\s*.+|.+\s*[♫♪]+\s*$)'),
+                     "", name="HI_music"),
+    ]


 registry.register(HearingImpaired)
--- a/libs/subzero/video.py
+++ b/libs/subzero/video.py
@ -52,10 +52,10 @@ def set_existing_languages(video, video_info, external_subtitles=False, embedded
            video.subtitle_languages.add(language)


-def parse_video(fn, hints, skip_hashing=False, dry_run=False, providers=None):
+def parse_video(fn, hints, skip_hashing=False, dry_run=False, providers=None, hash_from=None):
    logger.debug("Parsing video: %s, hints: %s", os.path.basename(fn), hints)
    return scan_video(fn, hints=hints, dont_use_actual_file=dry_run, providers=providers,
-                      skip_hashing=skip_hashing)
+                      skip_hashing=skip_hashing, hash_from=hash_from)


 def refine_video(video, no_refining=False, refiner_settings=None):