Improvement and simplification to subtitles synchronisation mechanism. We let the reference track selection to ffsubsync that already include an algorithm to choose the best possible track.

2024-11-10 09:02:44 +08:00 · 2020-08-04 22:42:27 -04:00 · 2020-08-04 22:42:27 -04:00 · ea097d6ec4
commit ea097d6ec4
parent 00efd4888c
9 changed files with 340 additions and 327 deletions
--- a/bazarr/subsyncer.py
+++ b/bazarr/subsyncer.py
@ -1,8 +1,6 @@
 import logging
 import os
-from ffsubsync.ffsubsync import run
-from ffsubsync.constants import *
-from knowit import api
+from ffsubsync.ffsubsync import run, make_parser
 from utils import get_binary
 from utils import history_log, history_log_movie
 from get_languages import alpha2_from_alpha3, language_from_alpha3
@ -13,31 +11,17 @@ class SubSyncer:
    def __init__(self):
        self.reference = None
        self.srtin = None
-        self.reference_stream = None
-        self.overwrite_input = True
-        self.ffmpeg_path = None
-
-        # unused attributes
-        self.encoding = DEFAULT_ENCODING
-        self.vlc_mode = None
-        self.make_test_case = None
-        self.gui_mode = None
        self.srtout = None
+        self.ffmpeg_path = None
+        self.args = None
        self.vad = 'subs_then_auditok'
-        self.reference_encoding = None
-        self.frame_rate = DEFAULT_FRAME_RATE
-        self.start_seconds = DEFAULT_START_SECONDS
-        self.no_fix_framerate = None
-        self.serialize_speech = None
-        self.max_offset_seconds = DEFAULT_MAX_OFFSET_SECONDS
-        self.merge_with_reference = None
-        self.output_encoding = 'same'

    def sync(self, video_path, srt_path, srt_lang, media_type, sonarr_series_id=None, sonarr_episode_id=None,
             radarr_id=None):
        self.reference = video_path
        self.srtin = srt_path
        self.srtout = None
+        self.args = None

        ffprobe_exe = get_binary('ffprobe')
        if not ffprobe_exe:
@ -46,69 +30,6 @@ class SubSyncer:
        else:
            logging.debug('BAZARR FFprobe used is %s', ffprobe_exe)

-        api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_exe})
-        data = api.know(self.reference)
-
-        using_what = None
-        first_embedded_subs = None
-
-        if 'subtitle' in data:
-            for i, embedded_subs in enumerate(data['subtitle']):
-                if i == 0:
-                    first_embedded_subs = embedded_subs
-                if 'language' in embedded_subs:
-                    language = embedded_subs['language'].alpha3
-                    forced = False
-                    if 'forced' in embedded_subs:
-                        if embedded_subs['forced']:
-                            forced = True
-                    str_format = embedded_subs['format'].lower()
-                    if forced or str_format in ['pgs', 'vobsub']:
-                        pass
-                    if language == "eng":
-                        using_what = "English embedded subtitle track"
-                        self.reference_stream = "s:{}".format(i)
-                        break
-            if not self.reference_stream:
-                language = None
-                if 'language' in first_embedded_subs:
-                    language = language_from_alpha3(first_embedded_subs['language'].alpha3)
-                    forced = False
-                    if 'forced' in first_embedded_subs:
-                        if first_embedded_subs['forced']:
-                            forced = True
-                    str_format = first_embedded_subs['format'].lower()
-                    if forced or str_format in ['pgs', 'vobsub']:
-                        pass
-
-                using_what = "{0} embedded subtitle track".format(language or
-                                                                  'unknown language embedded subtitles track')
-                self.reference_stream = "s:0"
-        elif 'audio' in data:
-            audio_tracks = data['audio']
-            for i, audio_track in enumerate(audio_tracks):
-                if 'language' in audio_track:
-                    language = audio_track['language'].alpha3
-                    if language == srt_lang:
-                        using_what = "{0} audio track".format(language_from_alpha3(audio_track['language'].alpha3) or
-                                                              'unknown language audio track')
-                        self.reference_stream = "a:{}".format(i)
-                        break
-            if not self.reference_stream:
-                audio_tracks = data['audio']
-                for i, audio_track in enumerate(audio_tracks):
-                    if 'language' in audio_track:
-                        language = audio_track['language'].alpha3
-                        if language == "eng":
-                            using_what = "English audio track"
-                            self.reference_stream = "a:{}".format(i)
-                            break
-                if not self.reference_stream:
-                    using_what = "first audio track"
-                    self.reference_stream = "a:0"
-        else:
-            raise NoAudioTrack
-
        ffmpeg_exe = get_binary('ffmpeg')
        if not ffmpeg_exe:
            logging.debug('BAZARR FFmpeg not found!')
@ -118,18 +39,19 @@ class SubSyncer:

        self.ffmpeg_path = os.path.dirname(ffmpeg_exe)
        try:
-            result = run(self)
+            unparsed_args = [self.reference, '-i', self.srtin, '--overwrite-input', '--ffmpegpath', self.ffmpeg_path,
+                             '--vad', self.vad]
+            parser = make_parser()
+            self.args = parser.parse_args(args=unparsed_args)
+            result = run(self.args)
        except Exception as e:
-            logging.error('BAZARR an exception occurs during the synchronization process for this subtitles: ' +
-                          self.srtin)
+            logging.exception('BAZARR an exception occurs during the synchronization process for this subtitles: '
+                              '{0}'.format(self.srtin))
        else:
            if result['sync_was_successful']:
                message = "{0} subtitles synchronization ended with an offset of {1} seconds and a framerate scale " \
-                          "factor of {2} using {3} (0:{4}).".format(language_from_alpha3(srt_lang),
-                                                                    result['offset_seconds'],
-                                                                    result['framerate_scale_factor'],
-                                                                    using_what,
-                                                                    self.reference_stream)
+                          "factor of {2}.".format(language_from_alpha3(srt_lang), result['offset_seconds'],
+                                                  "{:.2f}".format(result['framerate_scale_factor']))

                if media_type == 'series':
                    history_log(action=5, sonarr_series_id=sonarr_series_id, sonarr_episode_id=sonarr_episode_id,
@ -140,16 +62,9 @@ class SubSyncer:
                                      video_path=path_mappings.path_replace_reverse_movie(self.reference),
                                      language=alpha2_from_alpha3(srt_lang), subtitles_path=srt_path)
            else:
-                logging.error('BAZARR unable to sync subtitles using {0}({1}): {2}'.format(using_what,
-                                                                                           self.reference_stream,
-                                                                                           self.srtin))
+                logging.error('BAZARR unable to sync subtitles: {0}'.format(self.srtin))

            return result


-class NoAudioTrack(Exception):
-    """Exception raised if no audio track can be found in video file."""
-    pass
-
-
 subsync = SubSyncer()
--- a/libs/ffsubsync/_version.py
+++ b/libs/ffsubsync/_version.py
@ -23,9 +23,9 @@ def get_keywords():
    # setup.py/versioneer.py will grep for the variable names, so they must
    # each be defined on a line of their own. _version.py will just call
    # get_keywords().
-    git_refnames = "$Format:%d$"
-    git_full = "$Format:%H$"
-    git_date = "$Format:%ci$"
+    git_refnames = " (HEAD -> master)"
+    git_full = "558bc6dc1d5342d4a5910166cf12ebb5890e86b7"
+    git_date = "2020-07-11 17:02:56 -0700"
    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
    return keywords

--- a/libs/ffsubsync/ffmpeg_utils.py
+++ b/libs/ffsubsync/ffmpeg_utils.py
@ -0,0 +1,76 @@
+import logging
+import os
+import platform
+import subprocess
+
+from .constants import SUBSYNC_RESOURCES_ENV_MAGIC
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+# ref: https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
+# Create a set of arguments which make a ``subprocess.Popen`` (and
+# variants) call work with or without Pyinstaller, ``--noconsole`` or
+# not, on Windows and Linux. Typical use::
+#
+#   subprocess.call(['program_to_run', 'arg_1'], **subprocess_args())
+#
+# When calling ``check_output``::
+#
+#   subprocess.check_output(['program_to_run', 'arg_1'],
+#                           **subprocess_args(False))
+def subprocess_args(include_stdout=True):
+    # The following is true only on Windows.
+    if hasattr(subprocess, 'STARTUPINFO'):
+        # On Windows, subprocess calls will pop up a command window by default
+        # when run from Pyinstaller with the ``--noconsole`` option. Avoid this
+        # distraction.
+        si = subprocess.STARTUPINFO()
+        si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        # Windows doesn't search the path by default. Pass it an environment so
+        # it will.
+        env = os.environ
+    else:
+        si = None
+        env = None
+
+    # ``subprocess.check_output`` doesn't allow specifying ``stdout``::
+    #
+    #   Traceback (most recent call last):
+    #     File "test_subprocess.py", line 58, in <module>
+    #       **subprocess_args(stdout=None))
+    #     File "C:\Python27\lib\subprocess.py", line 567, in check_output
+    #       raise ValueError('stdout argument not allowed, it will be overridden.')
+    #   ValueError: stdout argument not allowed, it will be overridden.
+    #
+    # So, add it only if it's needed.
+    if include_stdout:
+        ret = {'stdout': subprocess.PIPE}
+    else:
+        ret = {}
+
+    # On Windows, running this from the binary produced by Pyinstaller
+    # with the ``--noconsole`` option requires redirecting everything
+    # (stdin, stdout, stderr) to avoid an OSError exception
+    # "[Error 6] the handle is invalid."
+    ret.update({'stdin': subprocess.PIPE,
+                'stderr': subprocess.PIPE,
+                'startupinfo': si,
+                'env': env})
+    return ret
+
+
+def ffmpeg_bin_path(bin_name, gui_mode, ffmpeg_resources_path=None):
+    if platform.system() == 'Windows':
+        bin_name = '{}.exe'.format(bin_name)
+    if ffmpeg_resources_path is not None:
+        return os.path.join(ffmpeg_resources_path, bin_name)
+    try:
+        resource_path = os.environ[SUBSYNC_RESOURCES_ENV_MAGIC]
+        if len(resource_path) > 0:
+            return os.path.join(resource_path, 'ffmpeg-bin', bin_name)
+    except KeyError:
+        if gui_mode:
+            logger.info("Couldn't find resource path; falling back to searching system path")
+    return bin_name
--- a/libs/ffsubsync/ffsubsync.py
+++ b/libs/ffsubsync/ffsubsync.py
@ -5,12 +5,14 @@ from datetime import datetime
 import logging
 import os
 import shutil
+import subprocess
 import sys

 import numpy as np

 from .aligners import FFTAligner, MaxScoreAligner, FailedToFindAlignmentException
 from .constants import *
+from .ffmpeg_utils import ffmpeg_bin_path
 from .sklearn_shim import Pipeline
 from .speech_transformers import (
    VideoSpeechTransformer,
@ -19,7 +21,7 @@ from .speech_transformers import (
 )
 from .subtitle_parser import make_subtitle_parser
 from .subtitle_transformers import SubtitleMerger, SubtitleShifter
-from .version import __version__
+from .version import get_version

 logger = logging.getLogger(__name__)

@ -30,104 +32,56 @@ def override(args, **kwargs):
    return args_dict


-def run(args):
-    result = {'retval': 0,
-              'offset_seconds': None,
-              'framerate_scale_factor': None,
-              'sync_was_successful': None}
-    if args.vlc_mode:
-        logger.setLevel(logging.CRITICAL)
-    if args.make_test_case and not args.gui_mode:  # this validation not necessary for gui mode
-        if args.srtin is None or args.srtout is None:
-            logger.error('need to specify input and output srt files for test cases')
-            result['retval'] = 1
-            return result
-    if args.overwrite_input:
-        if args.srtin is None:
-            logger.error('need to specify input srt if --overwrite-input is specified since we cannot overwrite stdin')
-            result['retval'] = 1
-            return result
-        if args.srtout is not None:
-            logger.error('overwrite input set but output file specified; refusing to run in case this was not intended')
-            result['retval'] = 1
-            return result
-        args.srtout = args.srtin
-    if args.gui_mode and args.srtout is None:
-        args.srtout = '{}.synced.srt'.format(os.path.splitext(args.srtin)[0])
-    ref_format = args.reference[-3:]
-    if args.merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS:
-        logger.error('merging synced output with reference only valid '
-                     'when reference composed of subtitles')
-        return 1
-    if args.make_test_case:
-        handler = logging.FileHandler('ffsubsync.log')
-        logger.addHandler(handler)
-    if ref_format in SUBTITLE_EXTENSIONS:
-        if args.vad is not None:
-            logger.warning('Vad specified, but reference was not a movie')
-        reference_pipe = make_subtitle_speech_pipeline(
-            fmt=ref_format,
-            **override(
-                args,
-                encoding=args.reference_encoding or DEFAULT_ENCODING
-            )
-        )
-    elif ref_format in ('npy', 'npz'):
-        if args.vad is not None:
-            logger.warning('Vad specified, but reference was not a movie')
-        reference_pipe = Pipeline([
-            ('deserialize', DeserializeSpeechTransformer())
-        ])
-    else:
-        vad = args.vad or DEFAULT_VAD
-        if args.reference_encoding is not None:
-            logger.warning('Reference srt encoding specified, but reference was a video file')
-        ref_stream = args.reference_stream
-        if ref_stream is not None and not ref_stream.startswith('0:'):
-            ref_stream = '0:' + ref_stream
-        reference_pipe = Pipeline([
-            ('speech_extract', VideoSpeechTransformer(vad=vad,
-                                                      sample_rate=SAMPLE_RATE,
-                                                      frame_rate=args.frame_rate,
-                                                      start_seconds=args.start_seconds,
-                                                      ffmpeg_path=args.ffmpeg_path,
-                                                      ref_stream=ref_stream,
-                                                      vlc_mode=args.vlc_mode,
-                                                      gui_mode=args.gui_mode))
-        ])
-    if args.no_fix_framerate:
-        framerate_ratios = [1.]
-    else:
-        framerate_ratios = np.concatenate([
-            [1.], np.array(FRAMERATE_RATIOS), 1./np.array(FRAMERATE_RATIOS)
-        ])
-    logger.info("extracting speech segments from reference '%s'...", args.reference)
-    reference_pipe.fit(args.reference)
-    logger.info('...done')
-    npy_savename = None
-    if args.make_test_case or args.serialize_speech:
-        logger.info('serializing speech...')
-        npy_savename = os.path.splitext(args.reference)[0] + '.npz'
-        np.savez_compressed(npy_savename, speech=reference_pipe.transform(args.reference))
-        logger.info('...done')
-        if args.srtin is None:
-            logger.info('unsynchronized subtitle file not specified; skipping synchronization')
-            return result
-    parser = make_subtitle_parser(fmt=os.path.splitext(args.srtin)[-1][1:], caching=True, **args.__dict__)
-    logger.info("extracting speech segments from subtitles '%s'...", args.srtin)
-    srt_pipes = [
-        make_subtitle_speech_pipeline(
-            **override(args, scale_factor=scale_factor, parser=parser)
-        ).fit(args.srtin)
-        for scale_factor in framerate_ratios
-    ]
-    logger.info('...done')
-    logger.info('computing alignments...')
-    max_offset_seconds = args.max_offset_seconds
+def _ref_format(ref_fname):
+    return ref_fname[-3:]
+
+
+def make_test_case(args, npy_savename, sync_was_successful):
+    if npy_savename is None:
+        raise ValueError('need non-null npy_savename')
+    tar_dir = '{}.{}'.format(
+        args.reference,
+        datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
+    )
+    logger.info('creating test archive {}.tar.gz...'.format(tar_dir))
+    os.mkdir(tar_dir)
    try:
-        sync_was_successful = True
+        shutil.move('ffsubsync.log', tar_dir)
+        shutil.copy(args.srtin, tar_dir)
+        if sync_was_successful:
+            shutil.move(args.srtout, tar_dir)
+        if _ref_format(args.reference) in SUBTITLE_EXTENSIONS:
+            shutil.copy(args.reference, tar_dir)
+        elif args.serialize_speech or args.reference == npy_savename:
+            shutil.copy(npy_savename, tar_dir)
+        else:
+            shutil.move(npy_savename, tar_dir)
+        supported_formats = set(list(zip(*shutil.get_archive_formats()))[0])
+        preferred_formats = ['gztar', 'bztar', 'xztar', 'zip', 'tar']
+        for archive_format in preferred_formats:
+            if archive_format in supported_formats:
+                shutil.make_archive(tar_dir, 'gztar', os.curdir, tar_dir)
+                break
+        else:
+            logger.error('failed to create test archive; no formats supported '
+                         '(this should not happen)')
+            return 1
+        logger.info('...done')
+    finally:
+        shutil.rmtree(tar_dir)
+    return 0
+
+
+def try_sync(args, reference_pipe, srt_pipes, result):
+    sync_was_successful = True
+    try:
+        logger.info('extracting speech segments from subtitles file %s...', args.srtin)
+        for srt_pipe in srt_pipes:
+            srt_pipe.fit(args.srtin)
+        logger.info('...done')
+        logger.info('computing alignments...')
        offset_samples, best_srt_pipe = MaxScoreAligner(
-            FFTAligner, SAMPLE_RATE, max_offset_seconds
+            FFTAligner, SAMPLE_RATE, args.max_offset_seconds
        ).fit_transform(
            reference_pipe.transform(args.reference),
            srt_pipes,
@ -157,39 +111,163 @@ def run(args):
        result['framerate_scale_factor'] = scale_step.scale_factor
    finally:
        result['sync_was_successful'] = sync_was_successful
-    if args.make_test_case:
-        if npy_savename is None:
-            raise ValueError('need non-null npy_savename')
-        tar_dir = '{}.{}'.format(
-            args.reference,
-            datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
+        return sync_was_successful
+
+
+def make_reference_pipe(args):
+    ref_format = _ref_format(args.reference)
+    if ref_format in SUBTITLE_EXTENSIONS:
+        if args.vad is not None:
+            logger.warning('Vad specified, but reference was not a movie')
+        return make_subtitle_speech_pipeline(
+            fmt=ref_format,
+            **override(
+                args,
+                encoding=args.reference_encoding or DEFAULT_ENCODING
+            )
        )
-        logger.info('creating test archive {}.tar.gz...'.format(tar_dir))
-        os.mkdir(tar_dir)
-        try:
-            shutil.move('ffsubsync.log', tar_dir)
-            shutil.copy(args.srtin, tar_dir)
-            if sync_was_successful:
-                shutil.move(args.srtout, tar_dir)
-            if ref_format in SUBTITLE_EXTENSIONS:
-                shutil.copy(args.reference, tar_dir)
-            elif args.serialize_speech or args.reference == npy_savename:
-                shutil.copy(npy_savename, tar_dir)
-            else:
-                shutil.move(npy_savename, tar_dir)
-            supported_formats = set(list(zip(*shutil.get_archive_formats()))[0])
-            preferred_formats = ['gztar', 'bztar', 'xztar', 'zip', 'tar']
-            for archive_format in preferred_formats:
-                if archive_format in supported_formats:
-                    shutil.make_archive(tar_dir, 'gztar', os.curdir, tar_dir)
-                    break
-            else:
-                logger.error('failed to create test archive; no formats supported '
-                             '(this should not happen)')
-                result['retval'] = 1
-            logger.info('...done')
-        finally:
-            shutil.rmtree(tar_dir)
+    elif ref_format in ('npy', 'npz'):
+        if args.vad is not None:
+            logger.warning('Vad specified, but reference was not a movie')
+        return Pipeline([
+            ('deserialize', DeserializeSpeechTransformer())
+        ])
+    else:
+        vad = args.vad or DEFAULT_VAD
+        if args.reference_encoding is not None:
+            logger.warning('Reference srt encoding specified, but reference was a video file')
+        ref_stream = args.reference_stream
+        if ref_stream is not None and not ref_stream.startswith('0:'):
+            ref_stream = '0:' + ref_stream
+        return Pipeline([
+            ('speech_extract', VideoSpeechTransformer(vad=vad,
+                                                      sample_rate=SAMPLE_RATE,
+                                                      frame_rate=args.frame_rate,
+                                                      start_seconds=args.start_seconds,
+                                                      ffmpeg_path=args.ffmpeg_path,
+                                                      ref_stream=ref_stream,
+                                                      vlc_mode=args.vlc_mode,
+                                                      gui_mode=args.gui_mode))
+        ])
+
+
+def make_srt_pipes(args):
+    if args.no_fix_framerate:
+        framerate_ratios = [1.]
+    else:
+        framerate_ratios = np.concatenate([
+            [1.], np.array(FRAMERATE_RATIOS), 1./np.array(FRAMERATE_RATIOS)
+        ])
+    parser = make_subtitle_parser(fmt=os.path.splitext(args.srtin)[-1][1:], caching=True, **args.__dict__)
+    srt_pipes = [
+        make_subtitle_speech_pipeline(
+            **override(args, scale_factor=scale_factor, parser=parser)
+        )
+        for scale_factor in framerate_ratios
+    ]
+    return srt_pipes
+
+
+def extract_subtitles_from_reference(args):
+    stream = args.extract_subs_from_stream
+    if not stream.startswith('0:s:'):
+        stream = '0:s:{}'.format(stream)
+    elif not stream.startswith('0:') and stream.startswith('s:'):
+        stream = '0:{}'.format(stream)
+    if not stream.startswith('0:s:'):
+        logger.error('invalid stream for subtitle extraction: %s', args.extract_subs_from_stream)
+    ffmpeg_args = [ffmpeg_bin_path('ffmpeg', args.gui_mode, ffmpeg_resources_path=args.ffmpeg_path)]
+    ffmpeg_args.extend([
+        '-y',
+        '-nostdin',
+        '-loglevel', 'fatal',
+        '-i', args.reference,
+        '-map', '{}'.format(stream),
+        '-f', 'srt',
+    ])
+    if args.srtout is None:
+        ffmpeg_args.append('-')
+    else:
+        ffmpeg_args.append(args.srtout)
+    logger.info('attempting to extract subtitles to {} ...'.format('stdout' if args.srtout is None else args.srtout))
+    retcode = subprocess.call(ffmpeg_args)
+    if retcode == 0:
+        logger.info('...done')
+    else:
+        logger.error('ffmpeg unable to extract subtitles from reference; return code %d', retcode)
+    return retcode
+
+
+def validate_args(args):
+    if args.vlc_mode:
+        logger.setLevel(logging.CRITICAL)
+    if args.make_test_case and not args.gui_mode:  # this validation not necessary for gui mode
+        if args.srtin is None or args.srtout is None:
+            raise ValueError('need to specify input and output srt files for test cases')
+    if args.overwrite_input:
+        if args.extract_subs_from_stream is not None:
+            raise ValueError('input overwriting not allowed for extracting subtitles from referece')
+        if args.srtin is None:
+            raise ValueError(
+                'need to specify input srt if --overwrite-input is specified since we cannot overwrite stdin'
+            )
+        if args.srtout is not None:
+            raise ValueError(
+                'overwrite input set but output file specified; refusing to run in case this was not intended'
+            )
+    if args.extract_subs_from_stream is not None:
+        if args.make_test_case:
+            raise ValueError('test case is for sync and not subtitle extraction')
+        if args.srtin is not None:
+            raise ValueError('stream specified for reference subtitle extraction; -i flag for sync input not allowed')
+
+
+def run(args):
+    result = {
+        'retval': 0,
+        'offset_seconds': None,
+        'framerate_scale_factor': None,
+        'sync_was_successful': None
+    }
+    try:
+        validate_args(args)
+    except ValueError as e:
+        logger.error(e)
+        result['retval'] = 1
+        return result
+    if args.overwrite_input:
+        args.srtout = args.srtin
+    if args.gui_mode and args.srtout is None:
+        args.srtout = '{}.synced.srt'.format(os.path.splitext(args.srtin)[0])
+    ref_format = _ref_format(args.reference)
+    if args.merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS:
+        logger.error('merging synced output with reference only valid '
+                     'when reference composed of subtitles')
+        result['retval'] = 1
+        return result
+    if args.make_test_case:
+        handler = logging.FileHandler('ffsubsync.log')
+        logger.addHandler(handler)
+    if args.extract_subs_from_stream is not None:
+        result['retval'] = extract_subtitles_from_reference(args)
+        return result
+    reference_pipe = make_reference_pipe(args)
+    logger.info("extracting speech segments from reference '%s'...", args.reference)
+    reference_pipe.fit(args.reference)
+    logger.info('...done')
+    npy_savename = None
+    if args.make_test_case or args.serialize_speech:
+        logger.info('serializing speech...')
+        npy_savename = os.path.splitext(args.reference)[0] + '.npz'
+        np.savez_compressed(npy_savename, speech=reference_pipe.transform(args.reference))
+        logger.info('...done')
+        if args.srtin is None:
+            logger.info('unsynchronized subtitle file not specified; skipping synchronization')
+            return result
+    srt_pipes = make_srt_pipes(args)
+    sync_was_successful = try_sync(args, reference_pipe, srt_pipes, result)
+    if args.make_test_case:
+        result['retval'] += make_test_case(args, npy_savename, sync_was_successful)
    return result


@ -206,11 +284,18 @@ def add_main_args_for_cli(parser):
                        help='If specified, serialize reference speech to a numpy array, '
                             'and create an archive with input/output subtitles '
                             'and serialized speech.')
+    parser.add_argument(
+        '--reference-stream', '--refstream', '--reference-track', '--reftrack',
+        default=None,
+        help='Which stream/track in the video file to use as reference, '
+             'formatted according to ffmpeg conventions. For example, s:0 '
+             'uses the first subtitle track; a:3 would use the third audio track.'
+    )


 def add_cli_only_args(parser):
-    parser.add_argument('-v', '--version', action='version',
-                        version='{package} {version}'.format(package=__package__, version=__version__))
+    # parser.add_argument('-v', '--version', action='version',
+    #                     version='{package} {version}'.format(package=__package__, version=get_version()))
    parser.add_argument('--overwrite-input', action='store_true',
                        help='If specified, will overwrite the input srt instead of writing the output to a new file.')
    parser.add_argument('--encoding', default=DEFAULT_ENCODING,
@ -243,13 +328,9 @@ def add_cli_only_args(parser):
                             'mismatch between reference and subtitles.')
    parser.add_argument('--serialize-speech', action='store_true',
                        help='If specified, serialize reference speech to a numpy array.')
-    parser.add_argument(
-        '--reference-stream', '--refstream', '--reference-track', '--reftrack',
-        default=None,
-        help='Which stream/track in the video file to use as reference, '
-             'formatted according to ffmpeg conventions. For example, s:0 '
-             'uses the first subtitle track; a:3 would use the third audio track.'
-    )
+    parser.add_argument('--extract-subs-from-stream', default=None,
+                        help='If specified, do not attempt sync; instead, just extract subtitles'
+                             ' from the specified stream using the reference.')
    parser.add_argument(
        '--ffmpeg-path', '--ffmpegpath', default=None,
        help='Where to look for ffmpeg and ffprobe. Uses the system PATH by default.'
@ -268,7 +349,7 @@ def make_parser():
 def main():
    parser = make_parser()
    args = parser.parse_args()
-    return run(args)
+    return run(args)['retval']


 if __name__ == "__main__":
--- a/libs/ffsubsync/ffsubsync_gui.py
+++ b/libs/ffsubsync/ffsubsync_gui.py
@ -17,8 +17,11 @@ from .constants import (
    COPYRIGHT_YEAR,
    SUBSYNC_RESOURCES_ENV_MAGIC,
 )
+# set the env magic so that we look for resources in the right place
+if SUBSYNC_RESOURCES_ENV_MAGIC not in os.environ:
+    os.environ[SUBSYNC_RESOURCES_ENV_MAGIC] = getattr(sys, '_MEIPASS', '')
 from .ffsubsync import run, add_cli_only_args
-from .version import __version__, update_available
+from .version import get_version, update_available

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@ -33,7 +36,7 @@ _menu = [
                'menuTitle': 'About',
                'name': PROJECT_NAME,
                'description': LONG_DESCRIPTION,
-                'version': __version__,
+                'version': get_version(),
                'copyright': COPYRIGHT_YEAR,
                'website': WEBSITE,
                'developer': DEV_WEBSITE,
@ -49,11 +52,6 @@ _menu = [
 ]


-# set the env magic so that we look for resources in the right place
-if SUBSYNC_RESOURCES_ENV_MAGIC not in os.environ:
-    os.environ[SUBSYNC_RESOURCES_ENV_MAGIC] = getattr(sys, '_MEIPASS', '')
-
-
@Gooey(
    program_name=PROJECT_NAME,
    image_dir=os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], 'img'),
--- a/libs/ffsubsync/speech_transformers.py
+++ b/libs/ffsubsync/speech_transformers.py
@ -2,8 +2,6 @@
 from contextlib import contextmanager
 import logging
 import io
-import os
-import platform
 import subprocess
 import sys
 from datetime import timedelta
@ -15,6 +13,7 @@ from .sklearn_shim import Pipeline
 import tqdm

 from .constants import *
+from .ffmpeg_utils import ffmpeg_bin_path, subprocess_args
 from .subtitle_parser import make_subtitle_parser
 from .subtitle_transformers import SubtitleScaler

@ -22,73 +21,6 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


-# ref: https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
-# Create a set of arguments which make a ``subprocess.Popen`` (and
-# variants) call work with or without Pyinstaller, ``--noconsole`` or
-# not, on Windows and Linux. Typical use::
-#
-#   subprocess.call(['program_to_run', 'arg_1'], **subprocess_args())
-#
-# When calling ``check_output``::
-#
-#   subprocess.check_output(['program_to_run', 'arg_1'],
-#                           **subprocess_args(False))
-def _subprocess_args(include_stdout=True):
-    # The following is true only on Windows.
-    if hasattr(subprocess, 'STARTUPINFO'):
-        # On Windows, subprocess calls will pop up a command window by default
-        # when run from Pyinstaller with the ``--noconsole`` option. Avoid this
-        # distraction.
-        si = subprocess.STARTUPINFO()
-        si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
-        # Windows doesn't search the path by default. Pass it an environment so
-        # it will.
-        env = os.environ
-    else:
-        si = None
-        env = None
-
-    # ``subprocess.check_output`` doesn't allow specifying ``stdout``::
-    #
-    #   Traceback (most recent call last):
-    #     File "test_subprocess.py", line 58, in <module>
-    #       **subprocess_args(stdout=None))
-    #     File "C:\Python27\lib\subprocess.py", line 567, in check_output
-    #       raise ValueError('stdout argument not allowed, it will be overridden.')
-    #   ValueError: stdout argument not allowed, it will be overridden.
-    #
-    # So, add it only if it's needed.
-    if include_stdout:
-        ret = {'stdout': subprocess.PIPE}
-    else:
-        ret = {}
-
-    # On Windows, running this from the binary produced by Pyinstaller
-    # with the ``--noconsole`` option requires redirecting everything
-    # (stdin, stdout, stderr) to avoid an OSError exception
-    # "[Error 6] the handle is invalid."
-    ret.update({'stdin': subprocess.PIPE,
-                'stderr': subprocess.PIPE,
-                'startupinfo': si,
-                'env': env})
-    return ret
-
-
-def _ffmpeg_bin_path(bin_name, gui_mode, ffmpeg_resources_path=None):
-    if platform.system() == 'Windows':
-        bin_name = '{}.exe'.format(bin_name)
-    if ffmpeg_resources_path is not None:
-        return os.path.join(ffmpeg_resources_path, bin_name)
-    try:
-        resource_path = os.environ[SUBSYNC_RESOURCES_ENV_MAGIC]
-        if len(resource_path) > 0:
-            return os.path.join(resource_path, 'ffmpeg-bin', bin_name)
-    except KeyError as e:
-        if gui_mode:
-            logger.info("Couldn't find resource path; falling back to searching system path")
-    return bin_name
-
-
 def make_subtitle_speech_pipeline(
        fmt='srt',
        encoding=DEFAULT_ENCODING,
@ -212,7 +144,7 @@ class VideoSpeechTransformer(TransformerMixin):
        else:
            streams_to_try = [self.ref_stream]
        for stream in streams_to_try:
-            ffmpeg_args = [_ffmpeg_bin_path('ffmpeg', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)]
+            ffmpeg_args = [ffmpeg_bin_path('ffmpeg', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)]
            ffmpeg_args.extend([
                '-loglevel', 'fatal',
                '-nostdin',
@ -221,7 +153,7 @@ class VideoSpeechTransformer(TransformerMixin):
                '-f', 'srt',
                '-'
            ])
-            process = subprocess.Popen(ffmpeg_args, **_subprocess_args(include_stdout=True))
+            process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True))
            output = io.BytesIO(process.communicate()[0])
            if process.returncode != 0:
                break
@ -245,7 +177,7 @@ class VideoSpeechTransformer(TransformerMixin):
                logger.info(e)
        try:
            total_duration = float(ffmpeg.probe(
-                fname, cmd=_ffmpeg_bin_path('ffprobe', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)
+                fname, cmd=ffmpeg_bin_path('ffprobe', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)
            )['format']['duration']) - self.start_seconds
        except Exception as e:
            logger.warning(e)
@ -257,7 +189,7 @@ class VideoSpeechTransformer(TransformerMixin):
        else:
            raise ValueError('unknown vad: %s' % self.vad)
        media_bstring = []
-        ffmpeg_args = [_ffmpeg_bin_path('ffmpeg', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)]
+        ffmpeg_args = [ffmpeg_bin_path('ffmpeg', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)]
        if self.start_seconds > 0:
            ffmpeg_args.extend([
                '-ss', str(timedelta(seconds=self.start_seconds)),
@ -276,7 +208,7 @@ class VideoSpeechTransformer(TransformerMixin):
            '-ar', str(self.frame_rate),
            '-'
        ])
-        process = subprocess.Popen(ffmpeg_args, **_subprocess_args(include_stdout=True))
+        process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True))
        bytes_per_frame = 2
        frames_per_window = bytes_per_frame * self.frame_rate // self.sample_rate
        windows_per_buffer = 10000
--- a/libs/ffsubsync/subtitle_parser.py
+++ b/libs/ffsubsync/subtitle_parser.py
@ -3,9 +3,9 @@ from datetime import timedelta
 import logging

 try:
-    import cchardet
+    import cchardet as chardet
 except ImportError:
-    import chardet as cchardet
+    import chardet
 import pysubs2
 from .sklearn_shim import TransformerMixin
 import srt
@ -82,7 +82,7 @@ class GenericSubtitleParser(SubsMixin, TransformerMixin):
        with open_file(fname, 'rb') as f:
            subs = f.read()
        if self.encoding == 'infer':
-            encodings_to_try = (cchardet.detect(subs)['encoding'],)
+            encodings_to_try = (chardet.detect(subs)['encoding'],)
            self.detected_encoding_ = encodings_to_try[0]
            logger.info('detected encoding: %s' % self.detected_encoding_)
        exc = None
--- a/libs/ffsubsync/version.py
+++ b/libs/ffsubsync/version.py
@ -1,9 +1,19 @@
-# -*- coding: utf-8 -*- 
+# -*- coding: utf-8 -*-
+import os
+from .constants import SUBSYNC_RESOURCES_ENV_MAGIC
 from ._version import get_versions
 __version__ = get_versions()['version']
 del get_versions


+def get_version():
+    if 'unknown' in __version__.lower():
+        with open(os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], '__version__')) as f:
+            return f.read().strip()
+    else:
+        return __version__
+
+
 def make_version_tuple(vstr=None):
    if vstr is None:
        vstr = __version__
@ -31,4 +41,4 @@ def update_available():
        return False
    if not resp.ok:
        return False
-    return make_version_tuple(__version__) < make_version_tuple(latest_vstr)
+    return make_version_tuple(get_version()) < make_version_tuple(latest_vstr)
--- a/libs/version.txt
+++ b/libs/version.txt
@ -8,6 +8,7 @@ bottle-fdsend=0.1.1
 chardet=3.0.4
 dogpile.cache=0.6.5
 enzyme=0.4.1
+ffsubsync=2020-08-04
 Flask=1.1.1
 gevent-websocker=0.10.1
 gitpython=2.1.9