From b100d4ed5cd15a391b554585a4b7fd789d0d2405 Mon Sep 17 00:00:00 2001
From: panni <panni@fragstore.net>
Date: Fri, 21 Jun 2019 15:06:27 +0200
Subject: [PATCH] core: update to subliminal_patch:head; support file hashes
 even when scenename is used

---
 bazarr/get_subtitle.py                        | 11 ++++---
 libs/subliminal_patch/core.py                 | 20 +++++++------
 libs/subzero/modification/main.py             | 30 +++++++++++++++++++
 libs/subzero/modification/mods/__init__.py    | 12 ++++++--
 libs/subzero/modification/mods/common.py      |  4 +--
 .../modification/mods/hearing_impaired.py     | 23 ++++++++------
 libs/subzero/video.py                         |  4 +--
 7 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/bazarr/get_subtitle.py b/bazarr/get_subtitle.py
index 6aa73f0d1..d0db3a752 100644
--- a/bazarr/get_subtitle.py
+++ b/bazarr/get_subtitle.py
@@ -52,17 +52,20 @@ def get_video(path, title, sceneName, use_scenename, providers=None, media_type=
     :return: `Video` instance
     """
     hints = {"title": title, "type": "movie" if media_type == "movie" else "episode"}
-    dont_use_actual_file = False
+    used_scene_name = False
     original_path = path
     original_name = os.path.basename(path)
+    hash_from = None
     if sceneName != "None" and use_scenename:
         # use the sceneName but keep the folder structure for better guessing
         path = os.path.join(os.path.dirname(path), sceneName + os.path.splitext(path)[1])
-        dont_use_actual_file = True
+        used_scene_name = True
+        hash_from = original_path
     if providers:
         try:
-            video = parse_video(path, hints=hints, providers=providers, dry_run=dont_use_actual_file)
-            video.used_scene_name = dont_use_actual_file
+            video = parse_video(path, hints=hints, providers=providers, dry_run=used_scene_name,
+                                hash_from=hash_from)
+            video.used_scene_name = used_scene_name
             video.original_name = original_name
             video.original_path = original_path
             refine_from_db(original_path, video)
diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py
index 9dd6881ed..2963988e9 100644
--- a/libs/subliminal_patch/core.py
+++ b/libs/subliminal_patch/core.py
@@ -473,7 +473,7 @@ if is_windows_special_path:
     SZAsyncProviderPool = SZProviderPool
 
 
-def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False):
+def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False, hash_from=None):
     """Scan a video from a `path`.
 
     patch:
@@ -538,32 +538,34 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
             video.alternative_titles.append(alt_guess["title"])
         logger.debug("Adding alternative title: %s", alt_guess["title"])
 
-    if dont_use_actual_file:
+    if dont_use_actual_file and not hash_from:
         return video
 
     # size and hashes
     if not skip_hashing:
-        video.size = os.path.getsize(path)
+        hash_path = hash_from or path
+        video.size = os.path.getsize(hash_path)
         if video.size > 10485760:
             logger.debug('Size is %d', video.size)
+            osub_hash = None
             if "opensubtitles" in providers:
-                video.hashes['opensubtitles'] = hash_opensubtitles(path)
+                video.hashes['opensubtitles'] = osub_hash = hash_opensubtitles(hash_path)
 
             if "shooter" in providers:
-                video.hashes['shooter'] = hash_shooter(path)
+                video.hashes['shooter'] = hash_shooter(hash_path)
 
             if "thesubdb" in providers:
-                video.hashes['thesubdb'] = hash_thesubdb(path)
+                video.hashes['thesubdb'] = hash_thesubdb(hash_path)
 
             if "napiprojekt" in providers:
                 try:
-                    video.hashes['napiprojekt'] = hash_napiprojekt(path)
+                    video.hashes['napiprojekt'] = hash_napiprojekt(hash_path)
                 except MemoryError:
-                    logger.warning(u"Couldn't compute napiprojekt hash for %s", path)
+                    logger.warning(u"Couldn't compute napiprojekt hash for %s", hash_path)
 
             if "napisy24" in providers:
                 # Napisy24 uses the same hash as opensubtitles
-                video.hashes['napisy24'] = hash_opensubtitles(path)
+                video.hashes['napisy24'] = osub_hash or hash_opensubtitles(hash_path)
 
             logger.debug('Computed hashes %r', video.hashes)
         else:
diff --git a/libs/subzero/modification/main.py b/libs/subzero/modification/main.py
index 05c882d9e..7d35c2e27 100644
--- a/libs/subzero/modification/main.py
+++ b/libs/subzero/modification/main.py
@@ -293,6 +293,9 @@ class SubtitleModifications(object):
                     end_tag = line[-5:]
                     line = line[:-5]
 
+                last_procs_mods = []
+
+                # fixme: this double loop is ugly
                 for order, identifier, args in mods:
                     mod = self.initialized_mods[identifier]
 
@@ -312,6 +315,33 @@ class SubtitleModifications(object):
                         break
 
                     applied_mods.append(identifier)
+                    if mod.last_processors:
+                        last_procs_mods.append([identifier, args])
+
+                if skip_entry:
+                    lines = []
+                    break
+
+                if skip_line:
+                    continue
+
+                for identifier, args in last_procs_mods:
+                    mod = self.initialized_mods[identifier]
+
+                    try:
+                        line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
+                                          procs=["last_process"], **args)
+                    except EmptyEntryError:
+                        if self.debug:
+                            logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
+                        skip_entry = True
+                        break
+
+                    if not line:
+                        if self.debug:
+                            logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
+                        skip_line = True
+                        break
 
                 if skip_entry:
                     lines = []
diff --git a/libs/subzero/modification/mods/__init__.py b/libs/subzero/modification/mods/__init__.py
index 10d0553da..aaf4c37e4 100644
--- a/libs/subzero/modification/mods/__init__.py
+++ b/libs/subzero/modification/mods/__init__.py
@@ -21,6 +21,7 @@ class SubtitleModification(object):
     pre_processors = []
     processors = []
     post_processors = []
+    last_processors = []
     languages = []
 
     def __init__(self, parent):
@@ -67,15 +68,16 @@ class SubtitleModification(object):
     def post_process(self, content, debug=False, parent=None, **kwargs):
         return self._process(content, self.post_processors, debug=debug, parent=parent, **kwargs)
 
-    def modify(self, content, debug=False, parent=None, **kwargs):
+    def modify(self, content, debug=False, parent=None, procs=None, **kwargs):
         if not content:
             return
 
         new_content = content
-        for method in ("pre_process", "process", "post_process"):
+        for method in procs or ("pre_process", "process", "post_process"):
             if not new_content:
                 return
-            new_content = getattr(self, method)(new_content, debug=debug, parent=parent, **kwargs)
+            new_content = self._process(new_content, getattr(self, "%sors" % method),
+                                        debug=debug, parent=parent, **kwargs)
 
         return new_content
 
@@ -107,3 +109,7 @@ empty_line_post_processors = [
 
 class EmptyEntryError(Exception):
     pass
+
+
+class EmptyLineError(Exception):
+    pass
diff --git a/libs/subzero/modification/mods/common.py b/libs/subzero/modification/mods/common.py
index eba386b1d..14c360937 100644
--- a/libs/subzero/modification/mods/common.py
+++ b/libs/subzero/modification/mods/common.py
@@ -28,7 +28,7 @@ class CommonFixes(SubtitleTextModification):
         NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1—", name="CM_multidash"),
 
         # line = _/-/\s
-        NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"),
+        NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="<CM_non_word_only"),
 
         # remove >>
         NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
@@ -37,7 +37,7 @@ class CommonFixes(SubtitleTextModification):
         NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),
 
         # fix music symbols
-        NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
+        NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)'),
                      lambda x: u"♪ " if x.group(1) else u" ♪",
                      name="CM_music_symbols"),
 
diff --git a/libs/subzero/modification/mods/hearing_impaired.py b/libs/subzero/modification/mods/hearing_impaired.py
index 8912834d7..cb72d898c 100644
--- a/libs/subzero/modification/mods/hearing_impaired.py
+++ b/libs/subzero/modification/mods/hearing_impaired.py
@@ -49,11 +49,11 @@ class HearingImpaired(SubtitleTextModification):
         NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
                                 {"t": TAG}), "", name="HI_brackets"),
 
-        NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
-                     "", name="HI_bracket_open_start"),
+        #NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
+        #             "", name="HI_bracket_open_start"),
 
-        NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
-                     name="HI_bracket_open_end"),
+        #NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
+        #             name="HI_bracket_open_end"),
 
         # text before colon (and possible dash in front), max 11 chars after the first whitespace (if any)
         # NReProcessor(re.compile(r'(?u)(^[A-z\-\'"_]+[\w\s]{0,11}:[^0-9{2}][\s]*)'), "", name="HI_before_colon"),
@@ -73,7 +73,7 @@ class HearingImpaired(SubtitleTextModification):
                      supported=lambda p: not p.only_uppercase),
 
         # remove MAN:
-        NReProcessor(re.compile(ur'(?suxi)(.*MAN:\s*)'), "", name="HI_remove_man"),
+        NReProcessor(re.compile(ur'(?suxi)(\b(?:WO)MAN:\s*)'), "", name="HI_remove_man"),
 
         # dash in front
         # NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"),
@@ -81,13 +81,18 @@ class HearingImpaired(SubtitleTextModification):
         # all caps at start before new sentence
         NReProcessor(re.compile(ur'(?u)^(?=[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+\s([A-ZÀ-Ž][a-zà-ž].+)'), r"\1",
                      name="HI_starting_upper_then_sentence", supported=lambda p: not p.only_uppercase),
-
-        # remove music symbols
-        NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
-                     "", name="HI_music_symbols_only"),
     ]
 
     post_processors = empty_line_post_processors
+    last_processors = [
+        # remove music symbols
+        NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
+                     "", name="HI_music_symbols_only"),
+
+        # remove music entries
+        NReProcessor(re.compile(ur'(?ums)(^[-\s>~]*[♫♪]+\s*.+|.+\s*[♫♪]+\s*$)'),
+                     "", name="HI_music"),
+    ]
 
 
 registry.register(HearingImpaired)
diff --git a/libs/subzero/video.py b/libs/subzero/video.py
index fc6dc99de..13db33ddf 100644
--- a/libs/subzero/video.py
+++ b/libs/subzero/video.py
@@ -52,10 +52,10 @@ def set_existing_languages(video, video_info, external_subtitles=False, embedded
             video.subtitle_languages.add(language)
 
 
-def parse_video(fn, hints, skip_hashing=False, dry_run=False, providers=None):
+def parse_video(fn, hints, skip_hashing=False, dry_run=False, providers=None, hash_from=None):
     logger.debug("Parsing video: %s, hints: %s", os.path.basename(fn), hints)
     return scan_video(fn, hints=hints, dont_use_actual_file=dry_run, providers=providers,
-                      skip_hashing=skip_hashing)
+                      skip_hashing=skip_hashing, hash_from=hash_from)
 
 
 def refine_video(video, no_refining=False, refiner_settings=None):