Improvement and simplification to subtitles synchronisation mechanism. We let the reference track selection to ffsubsync that already include an algorithm to choose the best possible track.

This commit is contained in:
Louis Vézina 2020-08-04 22:42:27 -04:00
parent 00efd4888c
commit ea097d6ec4
9 changed files with 340 additions and 327 deletions

View file

@ -1,8 +1,6 @@
import logging
import os
from ffsubsync.ffsubsync import run
from ffsubsync.constants import *
from knowit import api
from ffsubsync.ffsubsync import run, make_parser
from utils import get_binary
from utils import history_log, history_log_movie
from get_languages import alpha2_from_alpha3, language_from_alpha3
@ -13,31 +11,17 @@ class SubSyncer:
def __init__(self):
self.reference = None
self.srtin = None
self.reference_stream = None
self.overwrite_input = True
self.ffmpeg_path = None
# unused attributes
self.encoding = DEFAULT_ENCODING
self.vlc_mode = None
self.make_test_case = None
self.gui_mode = None
self.srtout = None
self.ffmpeg_path = None
self.args = None
self.vad = 'subs_then_auditok'
self.reference_encoding = None
self.frame_rate = DEFAULT_FRAME_RATE
self.start_seconds = DEFAULT_START_SECONDS
self.no_fix_framerate = None
self.serialize_speech = None
self.max_offset_seconds = DEFAULT_MAX_OFFSET_SECONDS
self.merge_with_reference = None
self.output_encoding = 'same'
def sync(self, video_path, srt_path, srt_lang, media_type, sonarr_series_id=None, sonarr_episode_id=None,
radarr_id=None):
self.reference = video_path
self.srtin = srt_path
self.srtout = None
self.args = None
ffprobe_exe = get_binary('ffprobe')
if not ffprobe_exe:
@ -46,69 +30,6 @@ class SubSyncer:
else:
logging.debug('BAZARR FFprobe used is %s', ffprobe_exe)
api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_exe})
data = api.know(self.reference)
using_what = None
first_embedded_subs = None
if 'subtitle' in data:
for i, embedded_subs in enumerate(data['subtitle']):
if i == 0:
first_embedded_subs = embedded_subs
if 'language' in embedded_subs:
language = embedded_subs['language'].alpha3
forced = False
if 'forced' in embedded_subs:
if embedded_subs['forced']:
forced = True
str_format = embedded_subs['format'].lower()
if forced or str_format in ['pgs', 'vobsub']:
pass
if language == "eng":
using_what = "English embedded subtitle track"
self.reference_stream = "s:{}".format(i)
break
if not self.reference_stream:
language = None
if 'language' in first_embedded_subs:
language = language_from_alpha3(first_embedded_subs['language'].alpha3)
forced = False
if 'forced' in first_embedded_subs:
if first_embedded_subs['forced']:
forced = True
str_format = first_embedded_subs['format'].lower()
if forced or str_format in ['pgs', 'vobsub']:
pass
using_what = "{0} embedded subtitle track".format(language or
'unknown language embedded subtitles track')
self.reference_stream = "s:0"
elif 'audio' in data:
audio_tracks = data['audio']
for i, audio_track in enumerate(audio_tracks):
if 'language' in audio_track:
language = audio_track['language'].alpha3
if language == srt_lang:
using_what = "{0} audio track".format(language_from_alpha3(audio_track['language'].alpha3) or
'unknown language audio track')
self.reference_stream = "a:{}".format(i)
break
if not self.reference_stream:
audio_tracks = data['audio']
for i, audio_track in enumerate(audio_tracks):
if 'language' in audio_track:
language = audio_track['language'].alpha3
if language == "eng":
using_what = "English audio track"
self.reference_stream = "a:{}".format(i)
break
if not self.reference_stream:
using_what = "first audio track"
self.reference_stream = "a:0"
else:
raise NoAudioTrack
ffmpeg_exe = get_binary('ffmpeg')
if not ffmpeg_exe:
logging.debug('BAZARR FFmpeg not found!')
@ -118,18 +39,19 @@ class SubSyncer:
self.ffmpeg_path = os.path.dirname(ffmpeg_exe)
try:
result = run(self)
unparsed_args = [self.reference, '-i', self.srtin, '--overwrite-input', '--ffmpegpath', self.ffmpeg_path,
'--vad', self.vad]
parser = make_parser()
self.args = parser.parse_args(args=unparsed_args)
result = run(self.args)
except Exception as e:
logging.error('BAZARR an exception occurs during the synchronization process for this subtitles: ' +
self.srtin)
logging.exception('BAZARR an exception occurs during the synchronization process for this subtitles: '
'{0}'.format(self.srtin))
else:
if result['sync_was_successful']:
message = "{0} subtitles synchronization ended with an offset of {1} seconds and a framerate scale " \
"factor of {2} using {3} (0:{4}).".format(language_from_alpha3(srt_lang),
result['offset_seconds'],
result['framerate_scale_factor'],
using_what,
self.reference_stream)
"factor of {2}.".format(language_from_alpha3(srt_lang), result['offset_seconds'],
"{:.2f}".format(result['framerate_scale_factor']))
if media_type == 'series':
history_log(action=5, sonarr_series_id=sonarr_series_id, sonarr_episode_id=sonarr_episode_id,
@ -140,16 +62,9 @@ class SubSyncer:
video_path=path_mappings.path_replace_reverse_movie(self.reference),
language=alpha2_from_alpha3(srt_lang), subtitles_path=srt_path)
else:
logging.error('BAZARR unable to sync subtitles using {0}({1}): {2}'.format(using_what,
self.reference_stream,
self.srtin))
logging.error('BAZARR unable to sync subtitles: {0}'.format(self.srtin))
return result
class NoAudioTrack(Exception):
"""Exception raised if no audio track can be found in video file."""
pass
subsync = SubSyncer()

View file

@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = "$Format:%d$"
git_full = "$Format:%H$"
git_date = "$Format:%ci$"
git_refnames = " (HEAD -> master)"
git_full = "558bc6dc1d5342d4a5910166cf12ebb5890e86b7"
git_date = "2020-07-11 17:02:56 -0700"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords

View file

@ -0,0 +1,76 @@
import logging
import os
import platform
import subprocess
from .constants import SUBSYNC_RESOURCES_ENV_MAGIC
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ref: https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
# Create a set of arguments which make a ``subprocess.Popen`` (and
# variants) call work with or without Pyinstaller, ``--noconsole`` or
# not, on Windows and Linux. Typical use::
#
# subprocess.call(['program_to_run', 'arg_1'], **subprocess_args())
#
# When calling ``check_output``::
#
# subprocess.check_output(['program_to_run', 'arg_1'],
# **subprocess_args(False))
def subprocess_args(include_stdout=True):
# The following is true only on Windows.
if hasattr(subprocess, 'STARTUPINFO'):
# On Windows, subprocess calls will pop up a command window by default
# when run from Pyinstaller with the ``--noconsole`` option. Avoid this
# distraction.
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
# Windows doesn't search the path by default. Pass it an environment so
# it will.
env = os.environ
else:
si = None
env = None
# ``subprocess.check_output`` doesn't allow specifying ``stdout``::
#
# Traceback (most recent call last):
# File "test_subprocess.py", line 58, in <module>
# **subprocess_args(stdout=None))
# File "C:\Python27\lib\subprocess.py", line 567, in check_output
# raise ValueError('stdout argument not allowed, it will be overridden.')
# ValueError: stdout argument not allowed, it will be overridden.
#
# So, add it only if it's needed.
if include_stdout:
ret = {'stdout': subprocess.PIPE}
else:
ret = {}
# On Windows, running this from the binary produced by Pyinstaller
# with the ``--noconsole`` option requires redirecting everything
# (stdin, stdout, stderr) to avoid an OSError exception
# "[Error 6] the handle is invalid."
ret.update({'stdin': subprocess.PIPE,
'stderr': subprocess.PIPE,
'startupinfo': si,
'env': env})
return ret
def ffmpeg_bin_path(bin_name, gui_mode, ffmpeg_resources_path=None):
if platform.system() == 'Windows':
bin_name = '{}.exe'.format(bin_name)
if ffmpeg_resources_path is not None:
return os.path.join(ffmpeg_resources_path, bin_name)
try:
resource_path = os.environ[SUBSYNC_RESOURCES_ENV_MAGIC]
if len(resource_path) > 0:
return os.path.join(resource_path, 'ffmpeg-bin', bin_name)
except KeyError:
if gui_mode:
logger.info("Couldn't find resource path; falling back to searching system path")
return bin_name

View file

@ -5,12 +5,14 @@ from datetime import datetime
import logging
import os
import shutil
import subprocess
import sys
import numpy as np
from .aligners import FFTAligner, MaxScoreAligner, FailedToFindAlignmentException
from .constants import *
from .ffmpeg_utils import ffmpeg_bin_path
from .sklearn_shim import Pipeline
from .speech_transformers import (
VideoSpeechTransformer,
@ -19,7 +21,7 @@ from .speech_transformers import (
)
from .subtitle_parser import make_subtitle_parser
from .subtitle_transformers import SubtitleMerger, SubtitleShifter
from .version import __version__
from .version import get_version
logger = logging.getLogger(__name__)
@ -30,104 +32,56 @@ def override(args, **kwargs):
return args_dict
def run(args):
result = {'retval': 0,
'offset_seconds': None,
'framerate_scale_factor': None,
'sync_was_successful': None}
if args.vlc_mode:
logger.setLevel(logging.CRITICAL)
if args.make_test_case and not args.gui_mode: # this validation not necessary for gui mode
if args.srtin is None or args.srtout is None:
logger.error('need to specify input and output srt files for test cases')
result['retval'] = 1
return result
if args.overwrite_input:
if args.srtin is None:
logger.error('need to specify input srt if --overwrite-input is specified since we cannot overwrite stdin')
result['retval'] = 1
return result
if args.srtout is not None:
logger.error('overwrite input set but output file specified; refusing to run in case this was not intended')
result['retval'] = 1
return result
args.srtout = args.srtin
if args.gui_mode and args.srtout is None:
args.srtout = '{}.synced.srt'.format(os.path.splitext(args.srtin)[0])
ref_format = args.reference[-3:]
if args.merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS:
logger.error('merging synced output with reference only valid '
'when reference composed of subtitles')
return 1
if args.make_test_case:
handler = logging.FileHandler('ffsubsync.log')
logger.addHandler(handler)
if ref_format in SUBTITLE_EXTENSIONS:
if args.vad is not None:
logger.warning('Vad specified, but reference was not a movie')
reference_pipe = make_subtitle_speech_pipeline(
fmt=ref_format,
**override(
args,
encoding=args.reference_encoding or DEFAULT_ENCODING
)
)
elif ref_format in ('npy', 'npz'):
if args.vad is not None:
logger.warning('Vad specified, but reference was not a movie')
reference_pipe = Pipeline([
('deserialize', DeserializeSpeechTransformer())
])
else:
vad = args.vad or DEFAULT_VAD
if args.reference_encoding is not None:
logger.warning('Reference srt encoding specified, but reference was a video file')
ref_stream = args.reference_stream
if ref_stream is not None and not ref_stream.startswith('0:'):
ref_stream = '0:' + ref_stream
reference_pipe = Pipeline([
('speech_extract', VideoSpeechTransformer(vad=vad,
sample_rate=SAMPLE_RATE,
frame_rate=args.frame_rate,
start_seconds=args.start_seconds,
ffmpeg_path=args.ffmpeg_path,
ref_stream=ref_stream,
vlc_mode=args.vlc_mode,
gui_mode=args.gui_mode))
])
if args.no_fix_framerate:
framerate_ratios = [1.]
else:
framerate_ratios = np.concatenate([
[1.], np.array(FRAMERATE_RATIOS), 1./np.array(FRAMERATE_RATIOS)
])
logger.info("extracting speech segments from reference '%s'...", args.reference)
reference_pipe.fit(args.reference)
logger.info('...done')
npy_savename = None
if args.make_test_case or args.serialize_speech:
logger.info('serializing speech...')
npy_savename = os.path.splitext(args.reference)[0] + '.npz'
np.savez_compressed(npy_savename, speech=reference_pipe.transform(args.reference))
logger.info('...done')
if args.srtin is None:
logger.info('unsynchronized subtitle file not specified; skipping synchronization')
return result
parser = make_subtitle_parser(fmt=os.path.splitext(args.srtin)[-1][1:], caching=True, **args.__dict__)
logger.info("extracting speech segments from subtitles '%s'...", args.srtin)
srt_pipes = [
make_subtitle_speech_pipeline(
**override(args, scale_factor=scale_factor, parser=parser)
).fit(args.srtin)
for scale_factor in framerate_ratios
]
logger.info('...done')
logger.info('computing alignments...')
max_offset_seconds = args.max_offset_seconds
def _ref_format(ref_fname):
return ref_fname[-3:]
def make_test_case(args, npy_savename, sync_was_successful):
if npy_savename is None:
raise ValueError('need non-null npy_savename')
tar_dir = '{}.{}'.format(
args.reference,
datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
)
logger.info('creating test archive {}.tar.gz...'.format(tar_dir))
os.mkdir(tar_dir)
try:
sync_was_successful = True
shutil.move('ffsubsync.log', tar_dir)
shutil.copy(args.srtin, tar_dir)
if sync_was_successful:
shutil.move(args.srtout, tar_dir)
if _ref_format(args.reference) in SUBTITLE_EXTENSIONS:
shutil.copy(args.reference, tar_dir)
elif args.serialize_speech or args.reference == npy_savename:
shutil.copy(npy_savename, tar_dir)
else:
shutil.move(npy_savename, tar_dir)
supported_formats = set(list(zip(*shutil.get_archive_formats()))[0])
preferred_formats = ['gztar', 'bztar', 'xztar', 'zip', 'tar']
for archive_format in preferred_formats:
if archive_format in supported_formats:
shutil.make_archive(tar_dir, 'gztar', os.curdir, tar_dir)
break
else:
logger.error('failed to create test archive; no formats supported '
'(this should not happen)')
return 1
logger.info('...done')
finally:
shutil.rmtree(tar_dir)
return 0
def try_sync(args, reference_pipe, srt_pipes, result):
sync_was_successful = True
try:
logger.info('extracting speech segments from subtitles file %s...', args.srtin)
for srt_pipe in srt_pipes:
srt_pipe.fit(args.srtin)
logger.info('...done')
logger.info('computing alignments...')
offset_samples, best_srt_pipe = MaxScoreAligner(
FFTAligner, SAMPLE_RATE, max_offset_seconds
FFTAligner, SAMPLE_RATE, args.max_offset_seconds
).fit_transform(
reference_pipe.transform(args.reference),
srt_pipes,
@ -157,39 +111,163 @@ def run(args):
result['framerate_scale_factor'] = scale_step.scale_factor
finally:
result['sync_was_successful'] = sync_was_successful
if args.make_test_case:
if npy_savename is None:
raise ValueError('need non-null npy_savename')
tar_dir = '{}.{}'.format(
args.reference,
datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
return sync_was_successful
def make_reference_pipe(args):
ref_format = _ref_format(args.reference)
if ref_format in SUBTITLE_EXTENSIONS:
if args.vad is not None:
logger.warning('Vad specified, but reference was not a movie')
return make_subtitle_speech_pipeline(
fmt=ref_format,
**override(
args,
encoding=args.reference_encoding or DEFAULT_ENCODING
)
)
logger.info('creating test archive {}.tar.gz...'.format(tar_dir))
os.mkdir(tar_dir)
try:
shutil.move('ffsubsync.log', tar_dir)
shutil.copy(args.srtin, tar_dir)
if sync_was_successful:
shutil.move(args.srtout, tar_dir)
if ref_format in SUBTITLE_EXTENSIONS:
shutil.copy(args.reference, tar_dir)
elif args.serialize_speech or args.reference == npy_savename:
shutil.copy(npy_savename, tar_dir)
else:
shutil.move(npy_savename, tar_dir)
supported_formats = set(list(zip(*shutil.get_archive_formats()))[0])
preferred_formats = ['gztar', 'bztar', 'xztar', 'zip', 'tar']
for archive_format in preferred_formats:
if archive_format in supported_formats:
shutil.make_archive(tar_dir, 'gztar', os.curdir, tar_dir)
break
else:
logger.error('failed to create test archive; no formats supported '
'(this should not happen)')
result['retval'] = 1
logger.info('...done')
finally:
shutil.rmtree(tar_dir)
elif ref_format in ('npy', 'npz'):
if args.vad is not None:
logger.warning('Vad specified, but reference was not a movie')
return Pipeline([
('deserialize', DeserializeSpeechTransformer())
])
else:
vad = args.vad or DEFAULT_VAD
if args.reference_encoding is not None:
logger.warning('Reference srt encoding specified, but reference was a video file')
ref_stream = args.reference_stream
if ref_stream is not None and not ref_stream.startswith('0:'):
ref_stream = '0:' + ref_stream
return Pipeline([
('speech_extract', VideoSpeechTransformer(vad=vad,
sample_rate=SAMPLE_RATE,
frame_rate=args.frame_rate,
start_seconds=args.start_seconds,
ffmpeg_path=args.ffmpeg_path,
ref_stream=ref_stream,
vlc_mode=args.vlc_mode,
gui_mode=args.gui_mode))
])
def make_srt_pipes(args):
if args.no_fix_framerate:
framerate_ratios = [1.]
else:
framerate_ratios = np.concatenate([
[1.], np.array(FRAMERATE_RATIOS), 1./np.array(FRAMERATE_RATIOS)
])
parser = make_subtitle_parser(fmt=os.path.splitext(args.srtin)[-1][1:], caching=True, **args.__dict__)
srt_pipes = [
make_subtitle_speech_pipeline(
**override(args, scale_factor=scale_factor, parser=parser)
)
for scale_factor in framerate_ratios
]
return srt_pipes
def extract_subtitles_from_reference(args):
stream = args.extract_subs_from_stream
if not stream.startswith('0:s:'):
stream = '0:s:{}'.format(stream)
elif not stream.startswith('0:') and stream.startswith('s:'):
stream = '0:{}'.format(stream)
if not stream.startswith('0:s:'):
logger.error('invalid stream for subtitle extraction: %s', args.extract_subs_from_stream)
ffmpeg_args = [ffmpeg_bin_path('ffmpeg', args.gui_mode, ffmpeg_resources_path=args.ffmpeg_path)]
ffmpeg_args.extend([
'-y',
'-nostdin',
'-loglevel', 'fatal',
'-i', args.reference,
'-map', '{}'.format(stream),
'-f', 'srt',
])
if args.srtout is None:
ffmpeg_args.append('-')
else:
ffmpeg_args.append(args.srtout)
logger.info('attempting to extract subtitles to {} ...'.format('stdout' if args.srtout is None else args.srtout))
retcode = subprocess.call(ffmpeg_args)
if retcode == 0:
logger.info('...done')
else:
logger.error('ffmpeg unable to extract subtitles from reference; return code %d', retcode)
return retcode
def validate_args(args):
if args.vlc_mode:
logger.setLevel(logging.CRITICAL)
if args.make_test_case and not args.gui_mode: # this validation not necessary for gui mode
if args.srtin is None or args.srtout is None:
raise ValueError('need to specify input and output srt files for test cases')
if args.overwrite_input:
if args.extract_subs_from_stream is not None:
raise ValueError('input overwriting not allowed for extracting subtitles from referece')
if args.srtin is None:
raise ValueError(
'need to specify input srt if --overwrite-input is specified since we cannot overwrite stdin'
)
if args.srtout is not None:
raise ValueError(
'overwrite input set but output file specified; refusing to run in case this was not intended'
)
if args.extract_subs_from_stream is not None:
if args.make_test_case:
raise ValueError('test case is for sync and not subtitle extraction')
if args.srtin is not None:
raise ValueError('stream specified for reference subtitle extraction; -i flag for sync input not allowed')
def run(args):
result = {
'retval': 0,
'offset_seconds': None,
'framerate_scale_factor': None,
'sync_was_successful': None
}
try:
validate_args(args)
except ValueError as e:
logger.error(e)
result['retval'] = 1
return result
if args.overwrite_input:
args.srtout = args.srtin
if args.gui_mode and args.srtout is None:
args.srtout = '{}.synced.srt'.format(os.path.splitext(args.srtin)[0])
ref_format = _ref_format(args.reference)
if args.merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS:
logger.error('merging synced output with reference only valid '
'when reference composed of subtitles')
result['retval'] = 1
return result
if args.make_test_case:
handler = logging.FileHandler('ffsubsync.log')
logger.addHandler(handler)
if args.extract_subs_from_stream is not None:
result['retval'] = extract_subtitles_from_reference(args)
return result
reference_pipe = make_reference_pipe(args)
logger.info("extracting speech segments from reference '%s'...", args.reference)
reference_pipe.fit(args.reference)
logger.info('...done')
npy_savename = None
if args.make_test_case or args.serialize_speech:
logger.info('serializing speech...')
npy_savename = os.path.splitext(args.reference)[0] + '.npz'
np.savez_compressed(npy_savename, speech=reference_pipe.transform(args.reference))
logger.info('...done')
if args.srtin is None:
logger.info('unsynchronized subtitle file not specified; skipping synchronization')
return result
srt_pipes = make_srt_pipes(args)
sync_was_successful = try_sync(args, reference_pipe, srt_pipes, result)
if args.make_test_case:
result['retval'] += make_test_case(args, npy_savename, sync_was_successful)
return result
@ -206,11 +284,18 @@ def add_main_args_for_cli(parser):
help='If specified, serialize reference speech to a numpy array, '
'and create an archive with input/output subtitles '
'and serialized speech.')
parser.add_argument(
'--reference-stream', '--refstream', '--reference-track', '--reftrack',
default=None,
help='Which stream/track in the video file to use as reference, '
'formatted according to ffmpeg conventions. For example, s:0 '
'uses the first subtitle track; a:3 would use the third audio track.'
)
def add_cli_only_args(parser):
parser.add_argument('-v', '--version', action='version',
version='{package} {version}'.format(package=__package__, version=__version__))
# parser.add_argument('-v', '--version', action='version',
# version='{package} {version}'.format(package=__package__, version=get_version()))
parser.add_argument('--overwrite-input', action='store_true',
help='If specified, will overwrite the input srt instead of writing the output to a new file.')
parser.add_argument('--encoding', default=DEFAULT_ENCODING,
@ -243,13 +328,9 @@ def add_cli_only_args(parser):
'mismatch between reference and subtitles.')
parser.add_argument('--serialize-speech', action='store_true',
help='If specified, serialize reference speech to a numpy array.')
parser.add_argument(
'--reference-stream', '--refstream', '--reference-track', '--reftrack',
default=None,
help='Which stream/track in the video file to use as reference, '
'formatted according to ffmpeg conventions. For example, s:0 '
'uses the first subtitle track; a:3 would use the third audio track.'
)
parser.add_argument('--extract-subs-from-stream', default=None,
help='If specified, do not attempt sync; instead, just extract subtitles'
' from the specified stream using the reference.')
parser.add_argument(
'--ffmpeg-path', '--ffmpegpath', default=None,
help='Where to look for ffmpeg and ffprobe. Uses the system PATH by default.'
@ -268,7 +349,7 @@ def make_parser():
def main():
parser = make_parser()
args = parser.parse_args()
return run(args)
return run(args)['retval']
if __name__ == "__main__":

View file

@ -17,8 +17,11 @@ from .constants import (
COPYRIGHT_YEAR,
SUBSYNC_RESOURCES_ENV_MAGIC,
)
# set the env magic so that we look for resources in the right place
if SUBSYNC_RESOURCES_ENV_MAGIC not in os.environ:
os.environ[SUBSYNC_RESOURCES_ENV_MAGIC] = getattr(sys, '_MEIPASS', '')
from .ffsubsync import run, add_cli_only_args
from .version import __version__, update_available
from .version import get_version, update_available
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@ -33,7 +36,7 @@ _menu = [
'menuTitle': 'About',
'name': PROJECT_NAME,
'description': LONG_DESCRIPTION,
'version': __version__,
'version': get_version(),
'copyright': COPYRIGHT_YEAR,
'website': WEBSITE,
'developer': DEV_WEBSITE,
@ -49,11 +52,6 @@ _menu = [
]
# set the env magic so that we look for resources in the right place
if SUBSYNC_RESOURCES_ENV_MAGIC not in os.environ:
os.environ[SUBSYNC_RESOURCES_ENV_MAGIC] = getattr(sys, '_MEIPASS', '')
@Gooey(
program_name=PROJECT_NAME,
image_dir=os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], 'img'),

View file

@ -2,8 +2,6 @@
from contextlib import contextmanager
import logging
import io
import os
import platform
import subprocess
import sys
from datetime import timedelta
@ -15,6 +13,7 @@ from .sklearn_shim import Pipeline
import tqdm
from .constants import *
from .ffmpeg_utils import ffmpeg_bin_path, subprocess_args
from .subtitle_parser import make_subtitle_parser
from .subtitle_transformers import SubtitleScaler
@ -22,73 +21,6 @@ logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ref: https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
# Create a set of arguments which make a ``subprocess.Popen`` (and
# variants) call work with or without Pyinstaller, ``--noconsole`` or
# not, on Windows and Linux. Typical use::
#
# subprocess.call(['program_to_run', 'arg_1'], **subprocess_args())
#
# When calling ``check_output``::
#
# subprocess.check_output(['program_to_run', 'arg_1'],
# **subprocess_args(False))
def _subprocess_args(include_stdout=True):
# The following is true only on Windows.
if hasattr(subprocess, 'STARTUPINFO'):
# On Windows, subprocess calls will pop up a command window by default
# when run from Pyinstaller with the ``--noconsole`` option. Avoid this
# distraction.
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
# Windows doesn't search the path by default. Pass it an environment so
# it will.
env = os.environ
else:
si = None
env = None
# ``subprocess.check_output`` doesn't allow specifying ``stdout``::
#
# Traceback (most recent call last):
# File "test_subprocess.py", line 58, in <module>
# **subprocess_args(stdout=None))
# File "C:\Python27\lib\subprocess.py", line 567, in check_output
# raise ValueError('stdout argument not allowed, it will be overridden.')
# ValueError: stdout argument not allowed, it will be overridden.
#
# So, add it only if it's needed.
if include_stdout:
ret = {'stdout': subprocess.PIPE}
else:
ret = {}
# On Windows, running this from the binary produced by Pyinstaller
# with the ``--noconsole`` option requires redirecting everything
# (stdin, stdout, stderr) to avoid an OSError exception
# "[Error 6] the handle is invalid."
ret.update({'stdin': subprocess.PIPE,
'stderr': subprocess.PIPE,
'startupinfo': si,
'env': env})
return ret
def _ffmpeg_bin_path(bin_name, gui_mode, ffmpeg_resources_path=None):
if platform.system() == 'Windows':
bin_name = '{}.exe'.format(bin_name)
if ffmpeg_resources_path is not None:
return os.path.join(ffmpeg_resources_path, bin_name)
try:
resource_path = os.environ[SUBSYNC_RESOURCES_ENV_MAGIC]
if len(resource_path) > 0:
return os.path.join(resource_path, 'ffmpeg-bin', bin_name)
except KeyError as e:
if gui_mode:
logger.info("Couldn't find resource path; falling back to searching system path")
return bin_name
def make_subtitle_speech_pipeline(
fmt='srt',
encoding=DEFAULT_ENCODING,
@ -212,7 +144,7 @@ class VideoSpeechTransformer(TransformerMixin):
else:
streams_to_try = [self.ref_stream]
for stream in streams_to_try:
ffmpeg_args = [_ffmpeg_bin_path('ffmpeg', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)]
ffmpeg_args = [ffmpeg_bin_path('ffmpeg', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)]
ffmpeg_args.extend([
'-loglevel', 'fatal',
'-nostdin',
@ -221,7 +153,7 @@ class VideoSpeechTransformer(TransformerMixin):
'-f', 'srt',
'-'
])
process = subprocess.Popen(ffmpeg_args, **_subprocess_args(include_stdout=True))
process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True))
output = io.BytesIO(process.communicate()[0])
if process.returncode != 0:
break
@ -245,7 +177,7 @@ class VideoSpeechTransformer(TransformerMixin):
logger.info(e)
try:
total_duration = float(ffmpeg.probe(
fname, cmd=_ffmpeg_bin_path('ffprobe', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)
fname, cmd=ffmpeg_bin_path('ffprobe', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)
)['format']['duration']) - self.start_seconds
except Exception as e:
logger.warning(e)
@ -257,7 +189,7 @@ class VideoSpeechTransformer(TransformerMixin):
else:
raise ValueError('unknown vad: %s' % self.vad)
media_bstring = []
ffmpeg_args = [_ffmpeg_bin_path('ffmpeg', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)]
ffmpeg_args = [ffmpeg_bin_path('ffmpeg', self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path)]
if self.start_seconds > 0:
ffmpeg_args.extend([
'-ss', str(timedelta(seconds=self.start_seconds)),
@ -276,7 +208,7 @@ class VideoSpeechTransformer(TransformerMixin):
'-ar', str(self.frame_rate),
'-'
])
process = subprocess.Popen(ffmpeg_args, **_subprocess_args(include_stdout=True))
process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True))
bytes_per_frame = 2
frames_per_window = bytes_per_frame * self.frame_rate // self.sample_rate
windows_per_buffer = 10000

View file

@ -3,9 +3,9 @@ from datetime import timedelta
import logging
try:
import cchardet
import cchardet as chardet
except ImportError:
import chardet as cchardet
import chardet
import pysubs2
from .sklearn_shim import TransformerMixin
import srt
@ -82,7 +82,7 @@ class GenericSubtitleParser(SubsMixin, TransformerMixin):
with open_file(fname, 'rb') as f:
subs = f.read()
if self.encoding == 'infer':
encodings_to_try = (cchardet.detect(subs)['encoding'],)
encodings_to_try = (chardet.detect(subs)['encoding'],)
self.detected_encoding_ = encodings_to_try[0]
logger.info('detected encoding: %s' % self.detected_encoding_)
exc = None

View file

@ -1,9 +1,19 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import os
from .constants import SUBSYNC_RESOURCES_ENV_MAGIC
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
def get_version():
if 'unknown' in __version__.lower():
with open(os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], '__version__')) as f:
return f.read().strip()
else:
return __version__
def make_version_tuple(vstr=None):
if vstr is None:
vstr = __version__
@ -31,4 +41,4 @@ def update_available():
return False
if not resp.ok:
return False
return make_version_tuple(__version__) < make_version_tuple(latest_vstr)
return make_version_tuple(get_version()) < make_version_tuple(latest_vstr)

View file

@ -8,6 +8,7 @@ bottle-fdsend=0.1.1
chardet=3.0.4
dogpile.cache=0.6.5
enzyme=0.4.1
ffsubsync=2020-08-04
Flask=1.1.1
gevent-websocker=0.10.1
gitpython=2.1.9