mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-01-01 12:32:25 +08:00
Updated pysubs2 module to support newer SSA files.
This commit is contained in:
parent
60353c0367
commit
09a8335a03
17 changed files with 548 additions and 299 deletions
|
@ -10,3 +10,6 @@ load = SSAFile.load
|
|||
|
||||
#: Alias for :meth:`pysubs2.time.make_time()`.
|
||||
make_time = time.make_time
|
||||
|
||||
#: Alias for `pysubs2.common.VERSION`.
|
||||
__version__ = VERSION
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import unicode_literals, print_function
|
||||
import argparse
|
||||
import codecs
|
||||
import os
|
||||
|
@ -8,38 +7,39 @@ import io
|
|||
from io import open
|
||||
import sys
|
||||
from textwrap import dedent
|
||||
from .formats import get_file_extension
|
||||
from .formats import get_file_extension, FORMAT_IDENTIFIERS
|
||||
from .time import make_time
|
||||
from .ssafile import SSAFile
|
||||
from .common import PY3, VERSION
|
||||
from .common import VERSION
|
||||
import logging
|
||||
|
||||
|
||||
def positive_float(s):
|
||||
def positive_float(s: str) -> float:
|
||||
x = float(s)
|
||||
if not x > 0:
|
||||
raise argparse.ArgumentTypeError("%r is not a positive number" % s)
|
||||
return x
|
||||
|
||||
def character_encoding(s):
|
||||
def character_encoding(s: str) -> str:
|
||||
try:
|
||||
codecs.lookup(s)
|
||||
return s
|
||||
except LookupError:
|
||||
raise argparse.ArgumentError
|
||||
|
||||
def time(s):
|
||||
def time(s: str):
|
||||
d = {}
|
||||
for v, k in re.findall(r"(\d*\.?\d*)(ms|m|s|h)", s):
|
||||
d[k] = float(v)
|
||||
return make_time(**d)
|
||||
|
||||
|
||||
def change_ext(path, ext):
|
||||
def change_ext(path: str, ext: str) -> str:
|
||||
base, _ = op.splitext(path)
|
||||
return base + ext
|
||||
|
||||
|
||||
class Pysubs2CLI(object):
|
||||
class Pysubs2CLI:
|
||||
def __init__(self):
|
||||
parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
prog="pysubs2",
|
||||
|
@ -50,6 +50,7 @@ class Pysubs2CLI(object):
|
|||
epilog=dedent("""
|
||||
usage examples:
|
||||
python -m pysubs2 --to srt *.ass
|
||||
python -m pysubs2 --to srt --clean *.ass
|
||||
python -m pysubs2 --to microdvd --fps 23.976 *.ass
|
||||
python -m pysubs2 --shift 0.3s *.srt
|
||||
python -m pysubs2 --shift 0.3s <my_file.srt >retimed_file.srt
|
||||
|
@ -57,21 +58,21 @@ class Pysubs2CLI(object):
|
|||
python -m pysubs2 --transform-framerate 25 23.976 *.srt"""))
|
||||
|
||||
parser.add_argument("files", nargs="*", metavar="FILE",
|
||||
help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt) or "
|
||||
"MicroDVD (*.sub) formats. When no files are specified, pysubs2 will work as a pipe, "
|
||||
"reading from standard input and writing to standard output.")
|
||||
help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt), "
|
||||
"MicroDVD (*.sub) or other supported format. When no files are specified, "
|
||||
"pysubs2 will work as a pipe, reading from standard input and writing to standard output.")
|
||||
|
||||
parser.add_argument("-v", "--version", action="version", version="pysubs2 %s" % VERSION)
|
||||
|
||||
parser.add_argument("-f", "--from", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="input_format",
|
||||
parser.add_argument("-f", "--from", choices=FORMAT_IDENTIFIERS, dest="input_format",
|
||||
help="By default, subtitle format is detected from the file. This option can be used to "
|
||||
"skip autodetection and force specific format. Generally, it should never be needed.")
|
||||
parser.add_argument("-t", "--to", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="output_format",
|
||||
parser.add_argument("-t", "--to", choices=FORMAT_IDENTIFIERS, dest="output_format",
|
||||
help="Convert subtitle files to given format. By default, each file is saved in its "
|
||||
"original format.")
|
||||
parser.add_argument("--input-enc", metavar="ENCODING", default="iso-8859-1", type=character_encoding,
|
||||
help="Character encoding for input files. By default, ISO-8859-1 is used for both "
|
||||
"input and output, which should generally work (for 8-bit encodings).")
|
||||
parser.add_argument("--input-enc", metavar="ENCODING", default="utf-8", type=character_encoding,
|
||||
help="Character encoding for input files. By default, UTF-8 is used for both "
|
||||
"input and output.")
|
||||
parser.add_argument("--output-enc", metavar="ENCODING", type=character_encoding,
|
||||
help="Character encoding for output files. By default, it is the same as input encoding. "
|
||||
"If you wish to convert between encodings, make sure --input-enc is set correctly! "
|
||||
|
@ -85,6 +86,11 @@ class Pysubs2CLI(object):
|
|||
help="Use this to save all files to given directory. By default, every file is saved to its parent directory, "
|
||||
"ie. unless it's being saved in different subtitle format (and thus with different file extension), "
|
||||
"it overwrites the original file.")
|
||||
parser.add_argument("--clean", action="store_true",
|
||||
help="Attempt to remove non-essential subtitles (eg. karaoke, SSA drawing tags), "
|
||||
"strip styling information when saving to non-SSA formats")
|
||||
parser.add_argument("--verbose", action="store_true",
|
||||
help="Print misc logging")
|
||||
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
|
||||
|
@ -105,6 +111,9 @@ class Pysubs2CLI(object):
|
|||
args = self.parser.parse_args(argv)
|
||||
errors = 0
|
||||
|
||||
if args.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
if args.output_dir and not op.exists(args.output_dir):
|
||||
os.makedirs(args.output_dir)
|
||||
|
||||
|
@ -138,19 +147,15 @@ class Pysubs2CLI(object):
|
|||
outpath = op.join(args.output_dir, filename)
|
||||
|
||||
with open(outpath, "w", encoding=args.output_enc) as outfile:
|
||||
subs.to_file(outfile, output_format, args.fps)
|
||||
subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean)
|
||||
else:
|
||||
if PY3:
|
||||
infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
|
||||
outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
|
||||
else:
|
||||
infile = io.TextIOWrapper(sys.stdin, args.input_enc)
|
||||
outfile = io.TextIOWrapper(sys.stdout, args.output_enc)
|
||||
infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
|
||||
outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
|
||||
|
||||
subs = SSAFile.from_file(infile, args.input_format, args.fps)
|
||||
self.process(subs, args)
|
||||
output_format = args.output_format or subs.format
|
||||
subs.to_file(outfile, output_format, args.fps)
|
||||
subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean)
|
||||
|
||||
return (0 if errors == 0 else 1)
|
||||
|
||||
|
@ -164,6 +169,9 @@ class Pysubs2CLI(object):
|
|||
in_fps, out_fps = args.transform_framerate
|
||||
subs.transform_framerate(in_fps, out_fps)
|
||||
|
||||
if args.clean:
|
||||
subs.remove_miscellaneous_events()
|
||||
|
||||
|
||||
def __main__():
|
||||
cli = Pysubs2CLI()
|
||||
|
|
|
@ -1,30 +1,32 @@
|
|||
from collections import namedtuple
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from typing import Union
|
||||
|
||||
_Color = namedtuple("Color", "r g b a")
|
||||
|
||||
class Color(_Color):
|
||||
@dataclass(init=False)
|
||||
class Color:
|
||||
"""
|
||||
(r, g, b, a) namedtuple for 8-bit RGB color with alpha channel.
|
||||
8-bit RGB color with alpha channel.
|
||||
|
||||
All values are ints from 0 to 255.
|
||||
"""
|
||||
def __new__(cls, r, g, b, a=0):
|
||||
r: int
|
||||
g: int
|
||||
b: int
|
||||
a: int = 0
|
||||
|
||||
def __init__(self, r: int, g: int, b: int, a: int = 0):
|
||||
for value in r, g, b, a:
|
||||
if value not in range(256):
|
||||
raise ValueError("Color channels must have values 0-255")
|
||||
|
||||
return _Color.__new__(cls, r, g, b, a)
|
||||
self.r = r
|
||||
self.g = g
|
||||
self.b = b
|
||||
self.a = a
|
||||
|
||||
|
||||
#: Version of the pysubs2 library.
|
||||
VERSION = "0.2.4"
|
||||
VERSION = "1.2.0"
|
||||
|
||||
|
||||
PY3 = sys.version_info.major == 3
|
||||
|
||||
if PY3:
|
||||
text_type = str
|
||||
binary_string_type = bytes
|
||||
else:
|
||||
text_type = unicode
|
||||
binary_string_type = str
|
||||
IntOrFloat = Union[int, float]
|
||||
|
|
|
@ -1,17 +1,22 @@
|
|||
class Pysubs2Error(Exception):
|
||||
"""Base class for pysubs2 exceptions."""
|
||||
|
||||
|
||||
class UnknownFPSError(Pysubs2Error):
|
||||
"""Framerate was not specified and couldn't be inferred otherwise."""
|
||||
|
||||
|
||||
class UnknownFileExtensionError(Pysubs2Error):
|
||||
"""File extension does not pertain to any known subtitle format."""
|
||||
|
||||
|
||||
class UnknownFormatIdentifierError(Pysubs2Error):
|
||||
"""Unknown subtitle format identifier (ie. string like ``"srt"``)."""
|
||||
|
||||
|
||||
class FormatAutodetectionError(Pysubs2Error):
|
||||
"""Subtitle format is ambiguous or unknown."""
|
||||
|
||||
|
||||
class ContentNotUsable(Pysubs2Error):
|
||||
"""Current content not usable for specified format"""
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
class FormatBase(object):
|
||||
from typing import Optional
|
||||
import io
|
||||
|
||||
|
||||
class FormatBase:
|
||||
"""
|
||||
Base class for subtitle format implementations.
|
||||
|
||||
|
@ -14,7 +18,7 @@ class FormatBase(object):
|
|||
|
||||
"""
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, **kwargs):
|
||||
def from_file(cls, subs, fp: io.TextIOBase, format_: str, **kwargs):
|
||||
"""
|
||||
Load subtitle file into an empty SSAFile.
|
||||
|
||||
|
@ -37,7 +41,7 @@ class FormatBase(object):
|
|||
raise NotImplementedError("Parsing is not supported for this format")
|
||||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, **kwargs):
|
||||
def to_file(cls, subs, fp: io.TextIOBase, format_: str, **kwargs):
|
||||
"""
|
||||
Write SSAFile into a file.
|
||||
|
||||
|
@ -62,7 +66,7 @@ class FormatBase(object):
|
|||
raise NotImplementedError("Writing is not supported for this format")
|
||||
|
||||
@classmethod
|
||||
def guess_format(self, text):
|
||||
def guess_format(self, text: str) -> Optional[str]:
|
||||
"""
|
||||
Return format identifier of recognized format, or None.
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
from typing import Dict, Type
|
||||
|
||||
from .formatbase import FormatBase
|
||||
from .microdvd import MicroDVDFormat
|
||||
from .subrip import SubripFormat
|
||||
|
@ -5,20 +7,22 @@ from .jsonformat import JSONFormat
|
|||
from .substation import SubstationFormat
|
||||
from .mpl2 import MPL2Format
|
||||
from .tmp import TmpFormat
|
||||
from .webvtt import WebVTTFormat
|
||||
from .exceptions import *
|
||||
|
||||
#: Dict mapping file extensions to format identifiers.
|
||||
FILE_EXTENSION_TO_FORMAT_IDENTIFIER = {
|
||||
FILE_EXTENSION_TO_FORMAT_IDENTIFIER: Dict[str, str] = {
|
||||
".srt": "srt",
|
||||
".ass": "ass",
|
||||
".ssa": "ssa",
|
||||
".sub": "microdvd",
|
||||
".json": "json",
|
||||
".txt": "tmp",
|
||||
".vtt": "vtt",
|
||||
}
|
||||
|
||||
#: Dict mapping format identifiers to implementations (FormatBase subclasses).
|
||||
FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
|
||||
FORMAT_IDENTIFIER_TO_FORMAT_CLASS: Dict[str, Type[FormatBase]] = {
|
||||
"srt": SubripFormat,
|
||||
"ass": SubstationFormat,
|
||||
"ssa": SubstationFormat,
|
||||
|
@ -26,23 +30,29 @@ FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
|
|||
"json": JSONFormat,
|
||||
"mpl2": MPL2Format,
|
||||
"tmp": TmpFormat,
|
||||
"vtt": WebVTTFormat,
|
||||
}
|
||||
|
||||
def get_format_class(format_):
|
||||
FORMAT_IDENTIFIERS = list(FORMAT_IDENTIFIER_TO_FORMAT_CLASS.keys())
|
||||
|
||||
|
||||
def get_format_class(format_: str) -> Type[FormatBase]:
|
||||
"""Format identifier -> format class (ie. subclass of FormatBase)"""
|
||||
try:
|
||||
return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_]
|
||||
except KeyError:
|
||||
raise UnknownFormatIdentifierError(format_)
|
||||
|
||||
def get_format_identifier(ext):
|
||||
|
||||
def get_format_identifier(ext: str) -> str:
|
||||
"""File extension -> format identifier"""
|
||||
try:
|
||||
return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext]
|
||||
except KeyError:
|
||||
raise UnknownFileExtensionError(ext)
|
||||
|
||||
def get_file_extension(format_):
|
||||
|
||||
def get_file_extension(format_: str) -> str:
|
||||
"""Format identifier -> file extension"""
|
||||
if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS:
|
||||
raise UnknownFormatIdentifierError(format_)
|
||||
|
@ -53,7 +63,8 @@ def get_file_extension(format_):
|
|||
|
||||
raise RuntimeError("No file extension for format %r" % format_)
|
||||
|
||||
def autodetect_format(content):
|
||||
|
||||
def autodetect_format(content: str) -> str:
|
||||
"""Return format identifier for given fragment or raise FormatAutodetectionError."""
|
||||
formats = set()
|
||||
for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values():
|
||||
|
|
|
@ -1,20 +1,35 @@
|
|||
from __future__ import unicode_literals, print_function
|
||||
|
||||
import dataclasses
|
||||
import json
|
||||
from .common import Color, PY3
|
||||
from .common import Color
|
||||
from .ssaevent import SSAEvent
|
||||
from .ssastyle import SSAStyle
|
||||
from .formatbase import FormatBase
|
||||
|
||||
|
||||
# We're using Color dataclass
|
||||
# https://stackoverflow.com/questions/51286748/make-the-python-json-encoder-support-pythons-new-dataclasses
|
||||
class EnhancedJSONEncoder(json.JSONEncoder):
|
||||
def default(self, o):
|
||||
if dataclasses.is_dataclass(o):
|
||||
return dataclasses.asdict(o)
|
||||
return super().default(o)
|
||||
|
||||
|
||||
class JSONFormat(FormatBase):
|
||||
"""
|
||||
Implementation of JSON subtitle pseudo-format (serialized pysubs2 internal representation)
|
||||
|
||||
This is essentially SubStation Alpha as JSON.
|
||||
"""
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
||||
if text.startswith("{\""):
|
||||
return "json"
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, **kwargs):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
|
||||
data = json.load(fp)
|
||||
|
||||
subs.info.clear()
|
||||
|
@ -25,7 +40,7 @@ class JSONFormat(FormatBase):
|
|||
subs.styles[name] = sty = SSAStyle()
|
||||
for k, v in fields.items():
|
||||
if "color" in k:
|
||||
setattr(sty, k, Color(*v))
|
||||
setattr(sty, k, Color(**v))
|
||||
else:
|
||||
setattr(sty, k, v)
|
||||
|
||||
|
@ -33,14 +48,11 @@ class JSONFormat(FormatBase):
|
|||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, **kwargs):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
|
||||
data = {
|
||||
"info": dict(**subs.info),
|
||||
"styles": {name: sty.as_dict() for name, sty in subs.styles.items()},
|
||||
"events": [ev.as_dict() for ev in subs.events]
|
||||
}
|
||||
|
||||
if PY3:
|
||||
json.dump(data, fp)
|
||||
else:
|
||||
text = json.dumps(data, fp)
|
||||
fp.write(unicode(text))
|
||||
json.dump(data, fp, cls=EnhancedJSONEncoder)
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
from __future__ import unicode_literals, print_function
|
||||
|
||||
from functools import partial
|
||||
import re
|
||||
from .common import text_type
|
||||
from .exceptions import UnknownFPSError
|
||||
from .ssaevent import SSAEvent
|
||||
from .ssastyle import SSAStyle
|
||||
|
@ -15,13 +12,16 @@ MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
|
|||
|
||||
|
||||
class MicroDVDFormat(FormatBase):
|
||||
"""MicroDVD subtitle format implementation"""
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
||||
if any(map(MICRODVD_LINE.match, text.splitlines())):
|
||||
return "microdvd"
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, fps=None, **kwargs):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
|
||||
for line in fp:
|
||||
match = MICRODVD_LINE.match(line)
|
||||
if not match:
|
||||
|
@ -63,7 +63,18 @@ class MicroDVDFormat(FormatBase):
|
|||
subs.append(ev)
|
||||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, **kwargs):
|
||||
def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, apply_styles=True, **kwargs):
|
||||
"""
|
||||
See :meth:`pysubs2.formats.FormatBase.to_file()`
|
||||
|
||||
The only supported styling is marking whole lines italic.
|
||||
|
||||
Keyword args:
|
||||
write_fps_declaration: If True, create a zero-duration first subtitle which will contain
|
||||
the fps.
|
||||
apply_styles: If False, do not write any styling.
|
||||
|
||||
"""
|
||||
if fps is None:
|
||||
fps = subs.fps
|
||||
|
||||
|
@ -83,11 +94,14 @@ class MicroDVDFormat(FormatBase):
|
|||
|
||||
# insert an artificial first line telling the framerate
|
||||
if write_fps_declaration:
|
||||
subs.insert(0, SSAEvent(start=0, end=0, text=text_type(fps)))
|
||||
subs.insert(0, SSAEvent(start=0, end=0, text=str(fps)))
|
||||
|
||||
for line in subs:
|
||||
if line.is_comment or line.is_drawing:
|
||||
continue
|
||||
|
||||
for line in (ev for ev in subs if not ev.is_comment):
|
||||
text = "|".join(line.plaintext.splitlines())
|
||||
if is_entirely_italic(line):
|
||||
if apply_styles and is_entirely_italic(line):
|
||||
text = "{Y:i}" + text
|
||||
|
||||
start, end = map(to_frames, (line.start, line.end))
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
# coding=utf-8
|
||||
|
||||
from __future__ import print_function, division, unicode_literals
|
||||
import re
|
||||
|
||||
from .time import times_to_ms
|
||||
|
@ -13,13 +10,16 @@ MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*)")
|
|||
|
||||
|
||||
class MPL2Format(FormatBase):
|
||||
"""MPL2 subtitle format implementation"""
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
||||
if MPL2_FORMAT.search(text):
|
||||
return "mpl2"
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, **kwargs):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
|
||||
def prepare_text(lines):
|
||||
out = []
|
||||
for s in lines.split("|"):
|
||||
|
@ -37,7 +37,12 @@ class MPL2Format(FormatBase):
|
|||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, **kwargs):
|
||||
"""
|
||||
See :meth:`pysubs2.formats.FormatBase.to_file()`
|
||||
|
||||
No styling is supported at the moment.
|
||||
|
||||
"""
|
||||
# TODO handle italics
|
||||
for line in subs:
|
||||
if line.is_comment:
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
from __future__ import unicode_literals
|
||||
import re
|
||||
import warnings
|
||||
from typing import Optional, Dict, Any, ClassVar
|
||||
import dataclasses
|
||||
|
||||
from .common import IntOrFloat
|
||||
from .time import ms_to_str, make_time
|
||||
from .common import PY3
|
||||
|
||||
|
||||
class SSAEvent(object):
|
||||
@dataclasses.dataclass(repr=False, eq=False, order=False)
|
||||
class SSAEvent:
|
||||
"""
|
||||
A SubStation Event, ie. one subtitle.
|
||||
|
||||
|
@ -21,36 +25,29 @@ class SSAEvent(object):
|
|||
>>> ev = SSAEvent(start=make_time(s=1), end=make_time(s=2.5), text="Hello World!")
|
||||
|
||||
"""
|
||||
OVERRIDE_SEQUENCE = re.compile(r"{[^}]*}")
|
||||
OVERRIDE_SEQUENCE: ClassVar = re.compile(r"{[^}]*}")
|
||||
|
||||
#: All fields in SSAEvent.
|
||||
FIELDS = frozenset([
|
||||
"start", "end", "text", "marked", "layer", "style",
|
||||
"name", "marginl", "marginr", "marginv", "effect", "type"
|
||||
])
|
||||
|
||||
def __init__(self, **fields):
|
||||
self.start = 0 #: Subtitle start time (in milliseconds)
|
||||
self.end = 10000 #: Subtitle end time (in milliseconds)
|
||||
self.text = "" #: Text of subtitle (with SubStation override tags)
|
||||
self.marked = False #: (SSA only)
|
||||
self.layer = 0 #: Layer number, 0 is the lowest layer (ASS only)
|
||||
self.style = "Default" #: Style name
|
||||
self.name = "" #: Actor name
|
||||
self.marginl = 0 #: Left margin
|
||||
self.marginr = 0 #: Right margin
|
||||
self.marginv = 0 #: Vertical margin
|
||||
self.effect = "" #: Line effect
|
||||
self.type = "Dialogue" #: Line type (Dialogue/Comment)
|
||||
|
||||
for k, v in fields.items():
|
||||
if k in self.FIELDS:
|
||||
setattr(self, k, v)
|
||||
else:
|
||||
raise ValueError("SSAEvent has no field named %r" % k)
|
||||
start: int = 0 #: Subtitle start time (in milliseconds)
|
||||
end: int = 10000 #: Subtitle end time (in milliseconds)
|
||||
text: str = "" #: Text of subtitle (with SubStation override tags)
|
||||
marked: bool = False #: (SSA only)
|
||||
layer: int = 0 #: Layer number, 0 is the lowest layer (ASS only)
|
||||
style: str = "Default" #: Style name
|
||||
name: str = "" #: Actor name
|
||||
marginl: int = 0 #: Left margin
|
||||
marginr: int = 0 #: Right margin
|
||||
marginv: int = 0 #: Vertical margin
|
||||
effect: str = "" #: Line effect
|
||||
type: str = "Dialogue" #: Line type (Dialogue/Comment)
|
||||
|
||||
@property
|
||||
def duration(self):
|
||||
def FIELDS(self):
|
||||
"""All fields in SSAEvent."""
|
||||
warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
|
||||
return frozenset(field.name for field in dataclasses.fields(self))
|
||||
|
||||
@property
|
||||
def duration(self) -> IntOrFloat:
|
||||
"""
|
||||
Subtitle duration in milliseconds (read/write property).
|
||||
|
||||
|
@ -60,14 +57,14 @@ class SSAEvent(object):
|
|||
return self.end - self.start
|
||||
|
||||
@duration.setter
|
||||
def duration(self, ms):
|
||||
def duration(self, ms: int):
|
||||
if ms >= 0:
|
||||
self.end = self.start + ms
|
||||
else:
|
||||
raise ValueError("Subtitle duration cannot be negative")
|
||||
|
||||
@property
|
||||
def is_comment(self):
|
||||
def is_comment(self) -> bool:
|
||||
"""
|
||||
When true, the subtitle is a comment, ie. not visible (read/write property).
|
||||
|
||||
|
@ -77,14 +74,20 @@ class SSAEvent(object):
|
|||
return self.type == "Comment"
|
||||
|
||||
@is_comment.setter
|
||||
def is_comment(self, value):
|
||||
def is_comment(self, value: bool):
|
||||
if value:
|
||||
self.type = "Comment"
|
||||
else:
|
||||
self.type = "Dialogue"
|
||||
|
||||
@property
|
||||
def plaintext(self):
|
||||
def is_drawing(self) -> bool:
|
||||
"""Returns True if line is SSA drawing tag (ie. not text)"""
|
||||
from .substation import parse_tags
|
||||
return any(sty.drawing for _, sty in parse_tags(self.text))
|
||||
|
||||
@property
|
||||
def plaintext(self) -> str:
|
||||
"""
|
||||
Subtitle text as multi-line string with no tags (read/write property).
|
||||
|
||||
|
@ -99,10 +102,11 @@ class SSAEvent(object):
|
|||
return text
|
||||
|
||||
@plaintext.setter
|
||||
def plaintext(self, text):
|
||||
def plaintext(self, text: str):
|
||||
self.text = text.replace("\n", r"\N")
|
||||
|
||||
def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
|
||||
def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
|
||||
frames: Optional[int]=None, fps: Optional[float]=None):
|
||||
"""
|
||||
Shift start and end times.
|
||||
|
||||
|
@ -113,41 +117,39 @@ class SSAEvent(object):
|
|||
self.start += delta
|
||||
self.end += delta
|
||||
|
||||
def copy(self):
|
||||
def copy(self) -> "SSAEvent":
|
||||
"""Return a copy of the SSAEvent."""
|
||||
return SSAEvent(**self.as_dict())
|
||||
|
||||
def as_dict(self):
|
||||
return {field: getattr(self, field) for field in self.FIELDS}
|
||||
def as_dict(self) -> Dict[str, Any]:
|
||||
# dataclasses.asdict() would recursively dictify Color objects, which we don't want
|
||||
return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
|
||||
|
||||
def equals(self, other):
|
||||
def equals(self, other: "SSAEvent") -> bool:
|
||||
"""Field-based equality for SSAEvents."""
|
||||
if isinstance(other, SSAEvent):
|
||||
return self.as_dict() == other.as_dict()
|
||||
else:
|
||||
raise TypeError("Cannot compare to non-SSAEvent object")
|
||||
|
||||
def __eq__(self, other):
|
||||
def __eq__(self, other: "SSAEvent"):
|
||||
# XXX document this
|
||||
return self.start == other.start and self.end == other.end
|
||||
|
||||
def __ne__(self, other):
|
||||
def __ne__(self, other: "SSAEvent"):
|
||||
return self.start != other.start or self.end != other.end
|
||||
|
||||
def __lt__(self, other):
|
||||
def __lt__(self, other: "SSAEvent"):
|
||||
return (self.start, self.end) < (other.start, other.end)
|
||||
|
||||
def __le__(self, other):
|
||||
def __le__(self, other: "SSAEvent"):
|
||||
return (self.start, self.end) <= (other.start, other.end)
|
||||
|
||||
def __gt__(self, other):
|
||||
def __gt__(self, other: "SSAEvent"):
|
||||
return (self.start, self.end) > (other.start, other.end)
|
||||
|
||||
def __ge__(self, other):
|
||||
def __ge__(self, other: "SSAEvent"):
|
||||
return (self.start, self.end) >= (other.start, other.end)
|
||||
|
||||
def __repr__(self):
|
||||
s = "<SSAEvent type={self.type} start={start} end={end} text='{self.text}'>".format(
|
||||
self=self, start=ms_to_str(self.start), end=ms_to_str(self.end))
|
||||
if not PY3: s = s.encode("utf-8")
|
||||
return s
|
||||
return f"<SSAEvent type={self.type} start={ms_to_str(self.start)} end={ms_to_str(self.end)} text={self.text!r}>"
|
||||
|
|
|
@ -1,16 +1,17 @@
|
|||
from __future__ import print_function, unicode_literals, division
|
||||
from collections import MutableSequence, OrderedDict
|
||||
from collections import MutableSequence
|
||||
import io
|
||||
from io import open
|
||||
from itertools import starmap, chain
|
||||
from itertools import chain
|
||||
import os.path
|
||||
import logging
|
||||
from typing import Optional, List, Dict, Iterable, Any
|
||||
|
||||
from .common import IntOrFloat
|
||||
from .formats import autodetect_format, get_format_class, get_format_identifier
|
||||
from .substation import is_valid_field_content
|
||||
from .ssaevent import SSAEvent
|
||||
from .ssastyle import SSAStyle
|
||||
from .time import make_time, ms_to_str
|
||||
from .common import PY3
|
||||
|
||||
|
||||
class SSAFile(MutableSequence):
|
||||
|
@ -31,28 +32,37 @@ class SSAFile(MutableSequence):
|
|||
|
||||
"""
|
||||
|
||||
DEFAULT_INFO = OrderedDict([
|
||||
("WrapStyle", "0"),
|
||||
("ScaledBorderAndShadow", "yes"),
|
||||
("Collisions", "Normal")])
|
||||
DEFAULT_INFO = {
|
||||
"WrapStyle": "0",
|
||||
"ScaledBorderAndShadow": "yes",
|
||||
"Collisions": "Normal"
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.events = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles.
|
||||
self.styles = OrderedDict([("Default", SSAStyle.DEFAULT_STYLE.copy())]) #: Dict of :class:`SSAStyle` instances.
|
||||
self.info = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``.
|
||||
self.aegisub_project = OrderedDict() #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``.
|
||||
self.fps = None #: Framerate used when reading the file, if applicable.
|
||||
self.format = None #: Format of source subtitle file, if applicable, eg. ``"srt"``.
|
||||
self.events: List[SSAEvent] = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles.
|
||||
self.styles: Dict[str, SSAStyle] = {"Default": SSAStyle.DEFAULT_STYLE.copy()} #: Dict of :class:`SSAStyle` instances.
|
||||
self.info: Dict[str, str] = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``.
|
||||
self.aegisub_project: Dict[str, str] = {} #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``.
|
||||
self.fonts_opaque: Dict[str, Any] = {} #: Dict with embedded fonts, ie. ``[Fonts]``.
|
||||
self.fps: Optional[float] = None #: Framerate used when reading the file, if applicable.
|
||||
self.format: Optional[str] = None #: Format of source subtitle file, if applicable, eg. ``"srt"``.
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# I/O methods
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def load(cls, path, encoding="utf-8", format_=None, fps=None, **kwargs):
|
||||
def load(cls, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
|
||||
"""
|
||||
Load subtitle file from given path.
|
||||
|
||||
This method is implemented in terms of :meth:`SSAFile.from_file()`.
|
||||
|
||||
See also:
|
||||
Specific formats may implement additional loading options,
|
||||
please refer to documentation of the implementation classes
|
||||
(eg. :meth:`pysubs2.subrip.SubripFormat.from_file()`)
|
||||
|
||||
Arguments:
|
||||
path (str): Path to subtitle file.
|
||||
encoding (str): Character encoding of input file.
|
||||
|
@ -66,14 +76,7 @@ class SSAFile(MutableSequence):
|
|||
be detected from the file, in which case you don't need
|
||||
to specify it here (when given, this argument overrides
|
||||
autodetection).
|
||||
keep_unknown_html_tags (bool): This affects SubRip only (SRT),
|
||||
for other formats this argument is ignored.
|
||||
By default, HTML tags are converted to equivalent SubStation tags
|
||||
(eg. ``<i>`` to ``{\\i1}`` and any remaining tags are removed
|
||||
to keep the text clean. Set this parameter to ``True``
|
||||
if you want to pass through these tags (eg. ``<sub>``).
|
||||
This is useful if your output format is SRT and your player
|
||||
supports these tags.
|
||||
kwargs: Extra options for the reader.
|
||||
|
||||
Returns:
|
||||
SSAFile
|
||||
|
@ -100,7 +103,7 @@ class SSAFile(MutableSequence):
|
|||
return cls.from_file(fp, format_, fps=fps, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, string, format_=None, fps=None, **kwargs):
|
||||
def from_string(cls, string: str, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
|
||||
"""
|
||||
Load subtitle file from string.
|
||||
|
||||
|
@ -126,7 +129,7 @@ class SSAFile(MutableSequence):
|
|||
return cls.from_file(fp, format_, fps=fps, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, fp, format_=None, fps=None, **kwargs):
|
||||
def from_file(cls, fp: io.TextIOBase, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
|
||||
"""
|
||||
Read subtitle file from file object.
|
||||
|
||||
|
@ -160,10 +163,17 @@ class SSAFile(MutableSequence):
|
|||
impl.from_file(subs, fp, format_, fps=fps, **kwargs)
|
||||
return subs
|
||||
|
||||
def save(self, path, encoding="utf-8", format_=None, fps=None, **kwargs):
|
||||
def save(self, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs):
|
||||
"""
|
||||
Save subtitle file to given path.
|
||||
|
||||
This method is implemented in terms of :meth:`SSAFile.to_file()`.
|
||||
|
||||
See also:
|
||||
Specific formats may implement additional saving options,
|
||||
please refer to documentation of the implementation classes
|
||||
(eg. :meth:`pysubs2.subrip.SubripFormat.to_file()`)
|
||||
|
||||
Arguments:
|
||||
path (str): Path to subtitle file.
|
||||
encoding (str): Character encoding of output file.
|
||||
|
@ -197,7 +207,7 @@ class SSAFile(MutableSequence):
|
|||
with open(path, "w", encoding=encoding) as fp:
|
||||
self.to_file(fp, format_, fps=fps, **kwargs)
|
||||
|
||||
def to_string(self, format_, fps=None, **kwargs):
|
||||
def to_string(self, format_: str, fps: Optional[float]=None, **kwargs) -> str:
|
||||
"""
|
||||
Get subtitle file as a string.
|
||||
|
||||
|
@ -211,7 +221,7 @@ class SSAFile(MutableSequence):
|
|||
self.to_file(fp, format_, fps=fps, **kwargs)
|
||||
return fp.getvalue()
|
||||
|
||||
def to_file(self, fp, format_, fps=None, **kwargs):
|
||||
def to_file(self, fp: io.TextIOBase, format_: str, fps: Optional[float]=None, **kwargs):
|
||||
"""
|
||||
Write subtitle file to file object.
|
||||
|
||||
|
@ -233,7 +243,8 @@ class SSAFile(MutableSequence):
|
|||
# Retiming subtitles
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
|
||||
def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
|
||||
frames: Optional[int]=None, fps: Optional[float]=None):
|
||||
"""
|
||||
Shift all subtitles by constant time amount.
|
||||
|
||||
|
@ -255,7 +266,7 @@ class SSAFile(MutableSequence):
|
|||
line.start += delta
|
||||
line.end += delta
|
||||
|
||||
def transform_framerate(self, in_fps, out_fps):
|
||||
def transform_framerate(self, in_fps: float, out_fps: float):
|
||||
"""
|
||||
Rescale all timestamps by ratio of in_fps/out_fps.
|
||||
|
||||
|
@ -282,7 +293,7 @@ class SSAFile(MutableSequence):
|
|||
# Working with styles
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def rename_style(self, old_name, new_name):
|
||||
def rename_style(self, old_name: str, new_name: str):
|
||||
"""
|
||||
Rename a style, including references to it.
|
||||
|
||||
|
@ -311,7 +322,7 @@ class SSAFile(MutableSequence):
|
|||
if line.style == old_name:
|
||||
line.style = new_name
|
||||
|
||||
def import_styles(self, subs, overwrite=True):
|
||||
def import_styles(self, subs: "SSAFile", overwrite: bool=True):
|
||||
"""
|
||||
Merge in styles from other SSAFile.
|
||||
|
||||
|
@ -332,7 +343,39 @@ class SSAFile(MutableSequence):
|
|||
# Helper methods
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def equals(self, other):
|
||||
def remove_miscellaneous_events(self):
|
||||
"""
|
||||
Remove subtitles which appear to be non-essential (the --clean in CLI)
|
||||
|
||||
Currently, this removes events matching any of these criteria:
|
||||
- SSA event type Comment
|
||||
- SSA drawing tags
|
||||
- Less than two characters of text
|
||||
- Duplicated text with identical time interval (only the first event is kept)
|
||||
"""
|
||||
new_events = []
|
||||
|
||||
duplicate_text_ids = set()
|
||||
times_to_texts = {}
|
||||
for i, e in enumerate(self):
|
||||
tmp = times_to_texts.setdefault((e.start, e.end), [])
|
||||
if tmp.count(e.plaintext) > 0:
|
||||
duplicate_text_ids.add(i)
|
||||
tmp.append(e.plaintext)
|
||||
|
||||
for i, e in enumerate(self):
|
||||
if e.is_drawing or e.is_comment:
|
||||
continue
|
||||
if len(e.plaintext.strip()) < 2:
|
||||
continue
|
||||
if i in duplicate_text_ids:
|
||||
continue
|
||||
|
||||
new_events.append(e)
|
||||
|
||||
self.events = new_events
|
||||
|
||||
def equals(self, other: "SSAFile"):
|
||||
"""
|
||||
Equality of two SSAFiles.
|
||||
|
||||
|
@ -357,6 +400,18 @@ class SSAFile(MutableSequence):
|
|||
logging.debug("info %r differs (self=%r, other=%r)", key, sv, ov)
|
||||
return False
|
||||
|
||||
for key in set(chain(self.fonts_opaque.keys(), other.fonts_opaque.keys())):
|
||||
sv, ov = self.fonts_opaque.get(key), other.fonts_opaque.get(key)
|
||||
if sv is None:
|
||||
logging.debug("%r missing in self.fonts_opaque", key)
|
||||
return False
|
||||
elif ov is None:
|
||||
logging.debug("%r missing in other.fonts_opaque", key)
|
||||
return False
|
||||
elif sv != ov:
|
||||
logging.debug("fonts_opaque %r differs (self=%r, other=%r)", key, sv, ov)
|
||||
return False
|
||||
|
||||
for key in set(chain(self.styles.keys(), other.styles.keys())):
|
||||
sv, ov = self.styles.get(key), other.styles.get(key)
|
||||
if sv is None:
|
||||
|
@ -389,12 +444,10 @@ class SSAFile(MutableSequence):
|
|||
def __repr__(self):
|
||||
if self.events:
|
||||
max_time = max(ev.end for ev in self)
|
||||
s = "<SSAFile with %d events and %d styles, last timestamp %s>" % \
|
||||
(len(self), len(self.styles), ms_to_str(max_time))
|
||||
s = f"<SSAFile with {len(self)} events and {len(self.styles)} styles, last timestamp {ms_to_str(max_time)}>"
|
||||
else:
|
||||
s = "<SSAFile with 0 events and %d styles>" % len(self.styles)
|
||||
s = f"<SSAFile with 0 events and {len(self.styles)} styles>"
|
||||
|
||||
if not PY3: s = s.encode("utf-8")
|
||||
return s
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
|
@ -405,22 +458,25 @@ class SSAFile(MutableSequence):
|
|||
"""Sort subtitles time-wise, in-place."""
|
||||
self.events.sort()
|
||||
|
||||
def __getitem__(self, item):
|
||||
def __iter__(self) -> Iterable[SSAEvent]:
|
||||
return iter(self.events)
|
||||
|
||||
def __getitem__(self, item: int):
|
||||
return self.events[item]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
def __setitem__(self, key: int, value: SSAEvent):
|
||||
if isinstance(value, SSAEvent):
|
||||
self.events[key] = value
|
||||
else:
|
||||
raise TypeError("SSAFile.events must contain only SSAEvent objects")
|
||||
|
||||
def __delitem__(self, key):
|
||||
def __delitem__(self, key: int):
|
||||
del self.events[key]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.events)
|
||||
|
||||
def insert(self, index, value):
|
||||
def insert(self, index: int, value: SSAEvent):
|
||||
if isinstance(value, SSAEvent):
|
||||
self.events.insert(index, value)
|
||||
else:
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
from __future__ import unicode_literals
|
||||
from .common import Color, PY3
|
||||
import warnings
|
||||
from typing import Dict, Any, ClassVar
|
||||
import dataclasses
|
||||
|
||||
from .common import Color
|
||||
|
||||
class SSAStyle(object):
|
||||
@dataclasses.dataclass(repr=False)
|
||||
class SSAStyle:
|
||||
"""
|
||||
A SubStation Style.
|
||||
|
||||
|
@ -17,71 +20,57 @@ class SSAStyle(object):
|
|||
This class defines equality (equality of all fields).
|
||||
|
||||
"""
|
||||
DEFAULT_STYLE = None
|
||||
DEFAULT_STYLE: ClassVar["SSAStyle"] = None
|
||||
|
||||
#: All fields in SSAStyle.
|
||||
FIELDS = frozenset([
|
||||
"fontname", "fontsize", "primarycolor", "secondarycolor",
|
||||
"tertiarycolor", "outlinecolor", "backcolor",
|
||||
"bold", "italic", "underline", "strikeout",
|
||||
"scalex", "scaley", "spacing", "angle", "borderstyle",
|
||||
"outline", "shadow", "alignment",
|
||||
"marginl", "marginr", "marginv", "alphalevel", "encoding"
|
||||
])
|
||||
@property
|
||||
def FIELDS(self):
|
||||
"""All fields in SSAStyle."""
|
||||
warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
|
||||
return frozenset(field.name for field in dataclasses.fields(self))
|
||||
|
||||
def __init__(self, **fields):
|
||||
self.fontname = "Arial" #: Font name
|
||||
self.fontsize = 20.0 #: Font size (in pixels)
|
||||
self.primarycolor = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance)
|
||||
self.secondarycolor = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance)
|
||||
self.tertiarycolor = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance)
|
||||
self.outlinecolor = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance)
|
||||
self.backcolor = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance)
|
||||
self.bold = False #: Bold
|
||||
self.italic = False #: Italic
|
||||
self.underline = False #: Underline (ASS only)
|
||||
self.strikeout = False #: Strikeout (ASS only)
|
||||
self.drawing = False #: Drawing (ASS only, see http://docs.aegisub.org/3.1/ASS_Tags/#drawing-tags
|
||||
self.scalex = 100.0 #: Horizontal scaling (ASS only)
|
||||
self.scaley = 100.0 #: Vertical scaling (ASS only)
|
||||
self.spacing = 0.0 #: Letter spacing (ASS only)
|
||||
self.angle = 0.0 #: Rotation (ASS only)
|
||||
self.borderstyle = 1 #: Border style
|
||||
self.outline = 2.0 #: Outline width (in pixels)
|
||||
self.shadow = 2.0 #: Shadow depth (in pixels)
|
||||
self.alignment = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics)
|
||||
self.marginl = 10 #: Left margin (in pixels)
|
||||
self.marginr = 10 #: Right margin (in pixels)
|
||||
self.marginv = 10 #: Vertical margin (in pixels)
|
||||
self.alphalevel = 0 #: Old, unused SSA-only field
|
||||
self.encoding = 1 #: Charset
|
||||
fontname: str = "Arial" #: Font name
|
||||
fontsize: float = 20.0 #: Font size (in pixels)
|
||||
primarycolor: Color = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance)
|
||||
secondarycolor: Color = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance)
|
||||
tertiarycolor: Color = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance)
|
||||
outlinecolor: Color = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance)
|
||||
backcolor: Color = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance)
|
||||
bold: bool = False #: Bold
|
||||
italic: bool = False #: Italic
|
||||
underline: bool = False #: Underline (ASS only)
|
||||
strikeout: bool = False #: Strikeout (ASS only)
|
||||
scalex: float = 100.0 #: Horizontal scaling (ASS only)
|
||||
scaley: float = 100.0 #: Vertical scaling (ASS only)
|
||||
spacing: float = 0.0 #: Letter spacing (ASS only)
|
||||
angle: float = 0.0 #: Rotation (ASS only)
|
||||
borderstyle: int = 1 #: Border style
|
||||
outline: float = 2.0 #: Outline width (in pixels)
|
||||
shadow: float = 2.0 #: Shadow depth (in pixels)
|
||||
alignment: int = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics)
|
||||
marginl: int = 10 #: Left margin (in pixels)
|
||||
marginr: int = 10 #: Right margin (in pixels)
|
||||
marginv: int = 10 #: Vertical margin (in pixels)
|
||||
alphalevel: int = 0 #: Old, unused SSA-only field
|
||||
encoding: int = 1 #: Charset
|
||||
|
||||
for k, v in fields.items():
|
||||
if k in self.FIELDS:
|
||||
setattr(self, k, v)
|
||||
else:
|
||||
raise ValueError("SSAStyle has no field named %r" % k)
|
||||
# The following attributes cannot be defined for SSA styles themselves,
|
||||
# but can be used in override tags and thus are useful to keep here
|
||||
# for the `pysubs2.substation.parse_tags()` interface which returns
|
||||
# SSAStyles for text fragments.
|
||||
drawing: bool = False #: Indicates that text span is a SSA vector drawing, see `pysubs2.substation.parse_tags()`
|
||||
|
||||
def copy(self):
|
||||
def copy(self) -> "SSAStyle":
|
||||
return SSAStyle(**self.as_dict())
|
||||
|
||||
def as_dict(self):
|
||||
return {field: getattr(self, field) for field in self.FIELDS}
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.as_dict() == other.as_dict()
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
def as_dict(self) -> Dict[str, Any]:
|
||||
# dataclasses.asdict() would recursively dictify Color objects, which we don't want
|
||||
return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
|
||||
|
||||
def __repr__(self):
|
||||
s = "<SSAStyle "
|
||||
s += "%rpx " % self.fontsize
|
||||
if self.bold: s += "bold "
|
||||
if self.italic: s += "italic "
|
||||
s += "{!r}>".format(self.fontname)
|
||||
if not PY3: s = s.encode("utf-8")
|
||||
return s
|
||||
return f"<SSAStyle {self.fontsize!r}px" \
|
||||
f"{' bold' if self.bold else ''}" \
|
||||
f"{' italic' if self.italic else ''}" \
|
||||
f" {self.fontname!r}>"
|
||||
|
||||
|
||||
SSAStyle.DEFAULT_STYLE = SSAStyle()
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
from __future__ import print_function, unicode_literals
|
||||
|
||||
import re
|
||||
from .formatbase import FormatBase
|
||||
from .ssaevent import SSAEvent
|
||||
|
@ -21,25 +19,50 @@ def ms_to_timestamp(ms):
|
|||
|
||||
|
||||
class SubripFormat(FormatBase):
|
||||
"""SubRip Text (SRT) subtitle format implementation"""
|
||||
TIMESTAMP = TIMESTAMP
|
||||
|
||||
@staticmethod
|
||||
def timestamp_to_ms(groups):
|
||||
return timestamp_to_ms(groups)
|
||||
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
||||
if "[Script Info]" in text or "[V4+ Styles]" in text:
|
||||
# disambiguation vs. SSA/ASS
|
||||
return None
|
||||
|
||||
if text.lstrip().startswith("WEBVTT"):
|
||||
# disambiguation vs. WebVTT
|
||||
return None
|
||||
|
||||
for line in text.splitlines():
|
||||
if len(TIMESTAMP.findall(line)) == 2:
|
||||
if len(cls.TIMESTAMP.findall(line)) == 2:
|
||||
return "srt"
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, keep_unknown_html_tags=False, **kwargs):
|
||||
"""
|
||||
See :meth:`pysubs2.formats.FormatBase.from_file()`
|
||||
|
||||
Supported tags:
|
||||
|
||||
- ``<i>``
|
||||
- ``<u>``
|
||||
- ``<s>``
|
||||
|
||||
Keyword args:
|
||||
keep_unknown_html_tags: If True, HTML tags other than i/u/s will be kept as-is.
|
||||
Otherwise, they will be stripped from input.
|
||||
"""
|
||||
timestamps = [] # (start, end)
|
||||
following_lines = [] # contains lists of lines following each timestamp
|
||||
|
||||
for line in fp:
|
||||
stamps = TIMESTAMP.findall(line)
|
||||
stamps = cls.TIMESTAMP.findall(line)
|
||||
if len(stamps) == 2: # timestamp line
|
||||
start, end = map(timestamp_to_ms, stamps)
|
||||
start, end = map(cls.timestamp_to_ms, stamps)
|
||||
timestamps.append((start, end))
|
||||
following_lines.append([])
|
||||
else:
|
||||
|
@ -72,16 +95,26 @@ class SubripFormat(FormatBase):
|
|||
for (start, end), lines in zip(timestamps, following_lines)]
|
||||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, **kwargs):
|
||||
def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
|
||||
"""
|
||||
See :meth:`pysubs2.formats.FormatBase.to_file()`
|
||||
|
||||
Italic, underline and strikeout styling is supported.
|
||||
|
||||
Keyword args:
|
||||
apply_styles: If False, do not write any styling.
|
||||
|
||||
"""
|
||||
def prepare_text(text, style):
|
||||
body = []
|
||||
for fragment, sty in parse_tags(text, style, subs.styles):
|
||||
fragment = fragment.replace(r"\h", " ")
|
||||
fragment = fragment.replace(r"\n", "\n")
|
||||
fragment = fragment.replace(r"\N", "\n")
|
||||
if sty.italic: fragment = "<i>%s</i>" % fragment
|
||||
if sty.underline: fragment = "<u>%s</u>" % fragment
|
||||
if sty.strikeout: fragment = "<s>%s</s>" % fragment
|
||||
if apply_styles:
|
||||
if sty.italic: fragment = "<i>%s</i>" % fragment
|
||||
if sty.underline: fragment = "<u>%s</u>" % fragment
|
||||
if sty.strikeout: fragment = "<s>%s</s>" % fragment
|
||||
if sty.drawing: raise ContentNotUsable
|
||||
body.append(fragment)
|
||||
|
||||
|
@ -89,7 +122,8 @@ class SubripFormat(FormatBase):
|
|||
|
||||
visible_lines = (line for line in subs if not line.is_comment)
|
||||
|
||||
for i, line in enumerate(visible_lines, 1):
|
||||
lineno = 1
|
||||
for line in visible_lines:
|
||||
start = ms_to_timestamp(line.start)
|
||||
end = ms_to_timestamp(line.end)
|
||||
try:
|
||||
|
@ -97,6 +131,7 @@ class SubripFormat(FormatBase):
|
|||
except ContentNotUsable:
|
||||
continue
|
||||
|
||||
print("%d" % i, file=fp) # Python 2.7 compat
|
||||
print("%d" % lineno, file=fp) # Python 2.7 compat
|
||||
print(start, "-->", end, file=fp)
|
||||
print(text, end="\n\n", file=fp)
|
||||
lineno += 1
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from __future__ import print_function, division, unicode_literals
|
||||
import logging
|
||||
import re
|
||||
from numbers import Number
|
||||
from .formatbase import FormatBase
|
||||
from .ssaevent import SSAEvent
|
||||
from .ssastyle import SSAStyle
|
||||
from .common import text_type, Color, PY3, binary_string_type
|
||||
from .common import Color
|
||||
from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP
|
||||
|
||||
SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
|
||||
|
@ -15,7 +15,14 @@ def ass_to_ssa_alignment(i):
|
|||
def ssa_to_ass_alignment(i):
|
||||
return SSA_ALIGNMENT.index(i) + 1
|
||||
|
||||
SECTION_HEADING = re.compile(r"^.{,3}\[[^\]]+\]") # allow for UTF-8 BOM, which is 3 bytes
|
||||
SECTION_HEADING = re.compile(
|
||||
r"^.{,3}" # allow 3 chars at start of line for BOM
|
||||
r"\[" # open square bracket
|
||||
r"[^]]*[a-z][^]]*" # inside square brackets, at least one lowercase letter (this guards vs. uuencoded font data)
|
||||
r"]" # close square bracket
|
||||
)
|
||||
|
||||
FONT_FILE_HEADING = re.compile(r"fontname:\s+(\S+)")
|
||||
|
||||
STYLE_FORMAT_LINE = {
|
||||
"ass": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic,"
|
||||
|
@ -46,7 +53,7 @@ EVENT_FIELDS = {
|
|||
#: Largest timestamp allowed in SubStation, ie. 9:59:59.99.
|
||||
MAX_REPRESENTABLE_TIME = make_time(h=10) - 10
|
||||
|
||||
def ms_to_timestamp(ms):
|
||||
def ms_to_timestamp(ms: int) -> str:
|
||||
"""Convert ms to 'H:MM:SS.cc'"""
|
||||
# XXX throw on overflow/underflow?
|
||||
if ms < 0: ms = 0
|
||||
|
@ -54,28 +61,24 @@ def ms_to_timestamp(ms):
|
|||
h, m, s, ms = ms_to_times(ms)
|
||||
return "%01d:%02d:%02d.%02d" % (h, m, s, ms//10)
|
||||
|
||||
def color_to_ass_rgba(c):
|
||||
def color_to_ass_rgba(c: Color) -> str:
|
||||
return "&H%08X" % ((c.a << 24) | (c.b << 16) | (c.g << 8) | c.r)
|
||||
|
||||
def color_to_ssa_rgb(c):
|
||||
def color_to_ssa_rgb(c: Color) -> str:
|
||||
return "%d" % ((c.b << 16) | (c.g << 8) | c.r)
|
||||
|
||||
def ass_rgba_to_color(s):
|
||||
x = int(s[2:], base=16)
|
||||
def rgba_to_color(s: str) -> Color:
|
||||
if s[0] == '&':
|
||||
x = int(s[2:], base=16)
|
||||
else:
|
||||
x = int(s)
|
||||
r = x & 0xff
|
||||
g = (x >> 8) & 0xff
|
||||
b = (x >> 16) & 0xff
|
||||
a = (x >> 24) & 0xff
|
||||
return Color(r, g, b, a)
|
||||
|
||||
def ssa_rgb_to_color(s):
|
||||
x = int(s)
|
||||
r = x & 0xff
|
||||
g = (x >> 8) & 0xff
|
||||
b = (x >> 16) & 0xff
|
||||
return Color(r, g, b)
|
||||
|
||||
def is_valid_field_content(s):
|
||||
def is_valid_field_content(s: str) -> bool:
|
||||
"""
|
||||
Returns True if string s can be stored in a SubStation field.
|
||||
|
||||
|
@ -140,8 +143,10 @@ def parse_tags(text, style=SSAStyle.DEFAULT_STYLE, styles={}):
|
|||
NOTICE = "Script generated by pysubs2\nhttps://pypi.python.org/pypi/pysubs2"
|
||||
|
||||
class SubstationFormat(FormatBase):
|
||||
"""SubStation Alpha (ASS, SSA) subtitle format implementation"""
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
||||
if "V4+ Styles" in text:
|
||||
return "ass"
|
||||
elif "V4 Styles" in text:
|
||||
|
@ -149,6 +154,7 @@ class SubstationFormat(FormatBase):
|
|||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, **kwargs):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
|
||||
|
||||
def string_to_field(f, v):
|
||||
if f in {"start", "end"}:
|
||||
|
@ -159,10 +165,7 @@ class SubstationFormat(FormatBase):
|
|||
else:
|
||||
return timestamp_to_ms(TIMESTAMP.match(v).groups())
|
||||
elif "color" in f:
|
||||
if format_ == "ass":
|
||||
return ass_rgba_to_color(v)
|
||||
else:
|
||||
return ssa_rgb_to_color(v)
|
||||
return rgba_to_color(v)
|
||||
elif f in {"bold", "underline", "italic", "strikeout"}:
|
||||
return v == "-1"
|
||||
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
|
||||
|
@ -183,16 +186,22 @@ class SubstationFormat(FormatBase):
|
|||
subs.info.clear()
|
||||
subs.aegisub_project.clear()
|
||||
subs.styles.clear()
|
||||
subs.fonts_opaque.clear()
|
||||
|
||||
inside_info_section = False
|
||||
inside_aegisub_section = False
|
||||
inside_font_section = False
|
||||
current_font_name = None
|
||||
current_font_lines_buffer = []
|
||||
|
||||
for line in fp:
|
||||
for lineno, line in enumerate(fp, 1):
|
||||
line = line.strip()
|
||||
|
||||
if SECTION_HEADING.match(line):
|
||||
logging.debug("at line %d: section heading %s", lineno, line)
|
||||
inside_info_section = "Info" in line
|
||||
inside_aegisub_section = "Aegisub" in line
|
||||
inside_font_section = "Fonts" in line
|
||||
elif inside_info_section or inside_aegisub_section:
|
||||
if line.startswith(";"): continue # skip comments
|
||||
try:
|
||||
|
@ -203,6 +212,24 @@ class SubstationFormat(FormatBase):
|
|||
subs.aegisub_project[k] = v.strip()
|
||||
except ValueError:
|
||||
pass
|
||||
elif inside_font_section:
|
||||
m = FONT_FILE_HEADING.match(line)
|
||||
|
||||
if current_font_name and (m or not line):
|
||||
# flush last font on newline or new font name
|
||||
font_data = current_font_lines_buffer[:]
|
||||
subs.fonts_opaque[current_font_name] = font_data
|
||||
logging.debug("at line %d: finished font definition %s", lineno, current_font_name)
|
||||
current_font_lines_buffer.clear()
|
||||
current_font_name = None
|
||||
|
||||
if m:
|
||||
# start new font
|
||||
font_name = m.group(1)
|
||||
current_font_name = font_name
|
||||
elif line:
|
||||
# add non-empty line to current buffer
|
||||
current_font_lines_buffer.append(line)
|
||||
elif line.startswith("Style:"):
|
||||
_, rest = line.split(":", 1)
|
||||
buf = rest.strip().split(",")
|
||||
|
@ -218,9 +245,18 @@ class SubstationFormat(FormatBase):
|
|||
ev = SSAEvent(**field_dict)
|
||||
subs.events.append(ev)
|
||||
|
||||
# cleanup fonts
|
||||
if current_font_name:
|
||||
# flush last font on EOF or new section w/o newline
|
||||
font_data = current_font_lines_buffer[:]
|
||||
subs.fonts_opaque[current_font_name] = font_data
|
||||
logging.debug("at EOF: finished font definition %s", current_font_name)
|
||||
current_font_lines_buffer.clear()
|
||||
current_font_name = None
|
||||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, header_notice=NOTICE, **kwargs):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
|
||||
print("[Script Info]", file=fp)
|
||||
for line in header_notice.splitlines(False):
|
||||
print(";", line, file=fp)
|
||||
|
@ -240,19 +276,11 @@ class SubstationFormat(FormatBase):
|
|||
elif f == "marked":
|
||||
return "Marked=%d" % v
|
||||
elif f == "alignment" and format_ == "ssa":
|
||||
return text_type(ass_to_ssa_alignment(v))
|
||||
return str(ass_to_ssa_alignment(v))
|
||||
elif isinstance(v, bool):
|
||||
return "-1" if v else "0"
|
||||
elif isinstance(v, (text_type, Number)):
|
||||
return text_type(v)
|
||||
elif not PY3 and isinstance(v, binary_string_type):
|
||||
# A convenience feature, see issue #12 - accept non-unicode strings
|
||||
# when they are ASCII; this is useful in Python 2, especially for non-text
|
||||
# fields like style names, where requiring Unicode type seems too stringent
|
||||
if all(ord(c) < 128 for c in v):
|
||||
return text_type(v)
|
||||
else:
|
||||
raise TypeError("Encountered binary string with non-ASCII codepoint in SubStation field {!r} for line {!r} - please use unicode string instead of str".format(f, line))
|
||||
elif isinstance(v, (str, Number)):
|
||||
return str(v)
|
||||
elif isinstance(v, Color):
|
||||
if format_ == "ass":
|
||||
return color_to_ass_rgba(v)
|
||||
|
@ -267,6 +295,14 @@ class SubstationFormat(FormatBase):
|
|||
fields = [field_to_string(f, getattr(sty, f), sty) for f in STYLE_FIELDS[format_]]
|
||||
print("Style: %s" % name, *fields, sep=",", file=fp)
|
||||
|
||||
if subs.fonts_opaque:
|
||||
print("\n[Fonts]", file=fp)
|
||||
for font_name, font_lines in sorted(subs.fonts_opaque.items()):
|
||||
print("fontname: {}".format(font_name), file=fp)
|
||||
for line in font_lines:
|
||||
print(line, file=fp)
|
||||
print(file=fp)
|
||||
|
||||
print("\n[Events]", file=fp)
|
||||
print(EVENT_FORMAT_LINE[format_], file=fp)
|
||||
for ev in subs.events:
|
||||
|
|
|
@ -1,15 +1,19 @@
|
|||
from __future__ import division
|
||||
|
||||
from collections import namedtuple
|
||||
import re
|
||||
|
||||
|
||||
#: Pattern that matches both SubStation and SubRip timestamps.
|
||||
from typing import Optional, List, Tuple, Sequence
|
||||
|
||||
from pysubs2.common import IntOrFloat
|
||||
|
||||
TIMESTAMP = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})[.,](\d{2,3})")
|
||||
|
||||
Times = namedtuple("Times", ["h", "m", "s", "ms"])
|
||||
|
||||
def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None):
|
||||
|
||||
def make_time(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
|
||||
frames: Optional[int]=None, fps: Optional[float]=None):
|
||||
"""
|
||||
Convert time to milliseconds.
|
||||
|
||||
|
@ -33,7 +37,8 @@ def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None):
|
|||
else:
|
||||
raise ValueError("Both fps and frames must be specified")
|
||||
|
||||
def timestamp_to_ms(groups):
|
||||
|
||||
def timestamp_to_ms(groups: Sequence[str]):
|
||||
"""
|
||||
Convert groups from :data:`pysubs2.time.TIMESTAMP` match to milliseconds.
|
||||
|
||||
|
@ -49,7 +54,8 @@ def timestamp_to_ms(groups):
|
|||
ms += h * 3600000
|
||||
return ms
|
||||
|
||||
def tmptimestamp_to_ms(groups):
|
||||
|
||||
def tmptimestamp_to_ms(groups: Sequence[str]):
|
||||
"""
|
||||
Convert groups from :data:`pysubs2.time.TMPTIMESTAMP` match to milliseconds.
|
||||
|
||||
|
@ -63,7 +69,9 @@ def tmptimestamp_to_ms(groups):
|
|||
ms += m * 60000
|
||||
ms += h * 3600000
|
||||
return ms
|
||||
def times_to_ms(h=0, m=0, s=0, ms=0):
|
||||
|
||||
|
||||
def times_to_ms(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0) -> int:
|
||||
"""
|
||||
Convert hours, minutes, seconds to milliseconds.
|
||||
|
||||
|
@ -79,7 +87,8 @@ def times_to_ms(h=0, m=0, s=0, ms=0):
|
|||
ms += h * 3600000
|
||||
return int(round(ms))
|
||||
|
||||
def frames_to_ms(frames, fps):
|
||||
|
||||
def frames_to_ms(frames: int, fps: float) -> int:
|
||||
"""
|
||||
Convert frame-based duration to milliseconds.
|
||||
|
||||
|
@ -99,7 +108,8 @@ def frames_to_ms(frames, fps):
|
|||
|
||||
return int(round(frames * (1000 / fps)))
|
||||
|
||||
def ms_to_frames(ms, fps):
|
||||
|
||||
def ms_to_frames(ms: IntOrFloat, fps: float) -> int:
|
||||
"""
|
||||
Convert milliseconds to number of frames.
|
||||
|
||||
|
@ -119,7 +129,8 @@ def ms_to_frames(ms, fps):
|
|||
|
||||
return int(round((ms / 1000) * fps))
|
||||
|
||||
def ms_to_times(ms):
|
||||
|
||||
def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]:
|
||||
"""
|
||||
Convert milliseconds to normalized tuple (h, m, s, ms).
|
||||
|
||||
|
@ -138,7 +149,8 @@ def ms_to_times(ms):
|
|||
s, ms = divmod(ms, 1000)
|
||||
return Times(h, m, s, ms)
|
||||
|
||||
def ms_to_str(ms, fractions=False):
|
||||
|
||||
def ms_to_str(ms: IntOrFloat, fractions: bool=False) -> str:
|
||||
"""
|
||||
Prettyprint milliseconds to [-]H:MM:SS[.mmm]
|
||||
|
||||
|
@ -156,6 +168,6 @@ def ms_to_str(ms, fractions=False):
|
|||
sgn = "-" if ms < 0 else ""
|
||||
h, m, s, ms = ms_to_times(abs(ms))
|
||||
if fractions:
|
||||
return sgn + "{:01d}:{:02d}:{:02d}.{:03d}".format(h, m, s, ms)
|
||||
return f"{sgn}{h:01d}:{m:02d}:{s:02d}.{ms:03d}"
|
||||
else:
|
||||
return sgn + "{:01d}:{:02d}:{:02d}".format(h, m, s)
|
||||
return f"{sgn}{h:01d}:{m:02d}:{s:02d}"
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
from __future__ import print_function, unicode_literals
|
||||
|
||||
import re
|
||||
from .formatbase import FormatBase
|
||||
from .ssaevent import SSAEvent
|
||||
|
@ -15,6 +13,7 @@ TMP_LINE = re.compile(r"(\d{1,2}:\d{2}:\d{2}):(.+)")
|
|||
#: Largest timestamp allowed in Tmp, ie. 99:59:59.
|
||||
MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
|
||||
|
||||
|
||||
def ms_to_timestamp(ms):
|
||||
"""Convert ms to 'HH:MM:SS'"""
|
||||
# XXX throw on overflow/underflow?
|
||||
|
@ -25,8 +24,10 @@ def ms_to_timestamp(ms):
|
|||
|
||||
|
||||
class TmpFormat(FormatBase):
|
||||
"""TMP subtitle format implementation"""
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
||||
if "[Script Info]" in text or "[V4+ Styles]" in text:
|
||||
# disambiguation vs. SSA/ASS
|
||||
return None
|
||||
|
@ -37,8 +38,14 @@ class TmpFormat(FormatBase):
|
|||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, **kwargs):
|
||||
timestamps = [] # (start)
|
||||
lines = [] # contains lists of lines following each timestamp
|
||||
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
|
||||
events = []
|
||||
|
||||
def prepare_text(text):
|
||||
text = text.replace("|", r"\N") # convert newlines
|
||||
text = re.sub(r"< *u *>", "{\\\\u1}", text) # not r" for Python 2.7 compat, triggers unicodeescape
|
||||
text = re.sub(r"< */? *[a-zA-Z][^>]*>", "", text) # strip other HTML tags
|
||||
return text
|
||||
|
||||
for line in fp:
|
||||
match = TMP_LINE.match(line)
|
||||
|
@ -47,42 +54,54 @@ class TmpFormat(FormatBase):
|
|||
|
||||
start, text = match.groups()
|
||||
start = tmptimestamp_to_ms(TMPTIMESTAMP.match(start).groups())
|
||||
#calculate endtime from starttime + 500 miliseconds + 67 miliseconds per each character (15 chars per second)
|
||||
end = start + 500 + (len(line) * 67)
|
||||
timestamps.append((start, end))
|
||||
lines.append(text)
|
||||
|
||||
def prepare_text(lines):
|
||||
lines = lines.replace("|", r"\N") # convert newlines
|
||||
lines = re.sub(r"< *u *>", "{\\\\u1}", lines) # not r" for Python 2.7 compat, triggers unicodeescape
|
||||
lines = re.sub(r"< */? *[a-zA-Z][^>]*>", "", lines) # strip other HTML tags
|
||||
return lines
|
||||
# Unfortunately, end timestamp is not given; try to estimate something reasonable:
|
||||
# start + 500 ms + 67 ms/character (15 chars per second)
|
||||
end_guess = start + 500 + (len(line) * 67)
|
||||
|
||||
subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
|
||||
for (start, end), lines in zip(timestamps, lines)]
|
||||
event = SSAEvent(start=start, end=end_guess, text=prepare_text(text))
|
||||
events.append(event)
|
||||
|
||||
# correct any overlapping subtitles created by end_guess
|
||||
for i in range(len(events) - 1):
|
||||
events[i].end = min(events[i].end, events[i+1].start)
|
||||
|
||||
subs.events = events
|
||||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, **kwargs):
|
||||
def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
|
||||
"""
|
||||
See :meth:`pysubs2.formats.FormatBase.to_file()`
|
||||
|
||||
Italic, underline and strikeout styling is supported.
|
||||
|
||||
Keyword args:
|
||||
apply_styles: If False, do not write any styling.
|
||||
|
||||
"""
|
||||
def prepare_text(text, style):
|
||||
body = []
|
||||
skip = False
|
||||
for fragment, sty in parse_tags(text, style, subs.styles):
|
||||
fragment = fragment.replace(r"\h", " ")
|
||||
fragment = fragment.replace(r"\n", "\n")
|
||||
fragment = fragment.replace(r"\N", "\n")
|
||||
if sty.italic: fragment = "<i>%s</i>" % fragment
|
||||
if sty.underline: fragment = "<u>%s</u>" % fragment
|
||||
if sty.strikeout: fragment = "<s>%s</s>" % fragment
|
||||
if apply_styles:
|
||||
if sty.italic: fragment = "<i>%s</i>" % fragment
|
||||
if sty.underline: fragment = "<u>%s</u>" % fragment
|
||||
if sty.strikeout: fragment = "<s>%s</s>" % fragment
|
||||
if sty.drawing: skip = True
|
||||
body.append(fragment)
|
||||
|
||||
return re.sub("\n+", "\n", "".join(body).strip())
|
||||
if skip:
|
||||
return ""
|
||||
else:
|
||||
return re.sub("\n+", "\n", "".join(body).strip())
|
||||
|
||||
visible_lines = (line for line in subs if not line.is_comment)
|
||||
|
||||
for i, line in enumerate(visible_lines, 1):
|
||||
for line in visible_lines:
|
||||
start = ms_to_timestamp(line.start)
|
||||
#end = ms_to_timestamp(line.end)
|
||||
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
|
||||
|
||||
#print("%d" % i, file=fp) # Python 2.7 compat
|
||||
print(start + ":" + text, end="\n", file=fp)
|
||||
#print(text, end="\n\n", file=fp)
|
||||
|
|
36
libs/pysubs2/webvtt.py
Normal file
36
libs/pysubs2/webvtt.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
import re
|
||||
from .subrip import SubripFormat
|
||||
from .time import make_time
|
||||
|
||||
|
||||
class WebVTTFormat(SubripFormat):
|
||||
"""
|
||||
Web Video Text Tracks (WebVTT) subtitle format implementation
|
||||
|
||||
Currently, this shares implementation with :class:`pysubs2.subrip.SubripFormat`.
|
||||
"""
|
||||
TIMESTAMP = re.compile(r"(\d{0,4}:)?(\d{2}):(\d{2})\.(\d{2,3})")
|
||||
|
||||
@staticmethod
|
||||
def timestamp_to_ms(groups):
|
||||
_h, _m, _s, _ms = groups
|
||||
if not _h:
|
||||
h = 0
|
||||
else:
|
||||
h = int(_h.strip(":"))
|
||||
m, s, ms = map(int, (_m, _s, _ms))
|
||||
return make_time(h=h, m=m, s=s, ms=ms)
|
||||
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
||||
if text.lstrip().startswith("WEBVTT"):
|
||||
return "vtt"
|
||||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, **kwargs):
|
||||
"""
|
||||
See :meth:`pysubs2.formats.FormatBase.to_file()`
|
||||
"""
|
||||
print("WEBVTT\n", file=fp)
|
||||
return SubripFormat.to_file(subs=subs, fp=fp, format_=format_, **kwargs)
|
Loading…
Reference in a new issue