2020-06-11 00:04:54 +08:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import copy
|
|
|
|
from datetime import timedelta
|
|
|
|
import logging
|
2020-06-13 03:08:44 +08:00
|
|
|
import os
|
2022-01-24 12:07:52 +08:00
|
|
|
from typing import cast, Any, Dict, Iterator, List, Optional
|
2020-06-11 00:04:54 +08:00
|
|
|
|
|
|
|
import pysubs2
|
|
|
|
import srt
|
|
|
|
import six
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
logger: logging.Logger = logging.getLogger(__name__)
|
2020-06-11 00:04:54 +08:00
|
|
|
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
class GenericSubtitle:
|
2020-06-11 00:04:54 +08:00
|
|
|
def __init__(self, start, end, inner):
|
|
|
|
self.start = start
|
|
|
|
self.end = end
|
|
|
|
self.inner = inner
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def __eq__(self, other: object) -> bool:
|
|
|
|
if not isinstance(other, GenericSubtitle):
|
|
|
|
return False
|
2020-06-11 00:04:54 +08:00
|
|
|
eq = True
|
|
|
|
eq = eq and self.start == other.start
|
|
|
|
eq = eq and self.end == other.end
|
|
|
|
eq = eq and self.inner == other.inner
|
|
|
|
return eq
|
|
|
|
|
2021-04-13 12:02:29 +08:00
|
|
|
@property
|
2022-01-24 12:07:52 +08:00
|
|
|
def content(self) -> str:
|
2021-04-13 12:02:29 +08:00
|
|
|
if isinstance(self.inner, srt.Subtitle):
|
|
|
|
ret = self.inner.content
|
|
|
|
elif isinstance(self.inner, pysubs2.SSAEvent):
|
|
|
|
ret = self.inner.text
|
|
|
|
else:
|
2022-01-24 12:07:52 +08:00
|
|
|
raise NotImplementedError(
|
|
|
|
"unsupported subtitle type: %s" % type(self.inner)
|
|
|
|
)
|
2021-04-13 12:02:29 +08:00
|
|
|
return ret
|
|
|
|
|
2020-06-11 00:04:54 +08:00
|
|
|
def resolve_inner_timestamps(self):
|
|
|
|
ret = copy.deepcopy(self.inner)
|
|
|
|
if isinstance(self.inner, srt.Subtitle):
|
|
|
|
ret.start = self.start
|
|
|
|
ret.end = self.end
|
|
|
|
elif isinstance(self.inner, pysubs2.SSAEvent):
|
|
|
|
ret.start = pysubs2.make_time(s=self.start.total_seconds())
|
|
|
|
ret.end = pysubs2.make_time(s=self.end.total_seconds())
|
|
|
|
else:
|
2022-01-24 12:07:52 +08:00
|
|
|
raise NotImplementedError(
|
|
|
|
"unsupported subtitle type: %s" % type(self.inner)
|
|
|
|
)
|
2020-06-11 00:04:54 +08:00
|
|
|
return ret
|
|
|
|
|
|
|
|
def merge_with(self, other):
|
|
|
|
assert isinstance(self.inner, type(other.inner))
|
|
|
|
inner_merged = copy.deepcopy(self.inner)
|
|
|
|
if isinstance(self.inner, srt.Subtitle):
|
2022-01-24 12:07:52 +08:00
|
|
|
inner_merged.content = "{}\n{}".format(
|
|
|
|
inner_merged.content, other.inner.content
|
2020-06-11 00:04:54 +08:00
|
|
|
)
|
2022-01-24 12:07:52 +08:00
|
|
|
return self.__class__(self.start, self.end, inner_merged)
|
2020-06-11 00:04:54 +08:00
|
|
|
else:
|
2022-01-24 12:07:52 +08:00
|
|
|
raise NotImplementedError(
|
|
|
|
"unsupported subtitle type: %s" % type(self.inner)
|
|
|
|
)
|
2020-06-11 00:04:54 +08:00
|
|
|
|
|
|
|
@classmethod
|
2022-01-24 12:07:52 +08:00
|
|
|
def wrap_inner_subtitle(cls, sub) -> "GenericSubtitle":
|
2020-06-11 00:04:54 +08:00
|
|
|
if isinstance(sub, srt.Subtitle):
|
|
|
|
return cls(sub.start, sub.end, sub)
|
|
|
|
elif isinstance(sub, pysubs2.SSAEvent):
|
|
|
|
return cls(
|
2022-01-24 12:07:52 +08:00
|
|
|
timedelta(milliseconds=sub.start), timedelta(milliseconds=sub.end), sub
|
2020-06-11 00:04:54 +08:00
|
|
|
)
|
|
|
|
else:
|
2022-01-24 12:07:52 +08:00
|
|
|
raise NotImplementedError("unsupported subtitle type: %s" % type(sub))
|
2020-06-11 00:04:54 +08:00
|
|
|
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
class GenericSubtitlesFile:
|
|
|
|
def __init__(self, subs: List[GenericSubtitle], *_, **kwargs: Any):
|
|
|
|
sub_format: str = cast(str, kwargs.pop("sub_format", None))
|
2020-06-11 00:04:54 +08:00
|
|
|
if sub_format is None:
|
2022-01-24 12:07:52 +08:00
|
|
|
raise ValueError("format must be specified")
|
|
|
|
encoding: str = cast(str, kwargs.pop("encoding", None))
|
2020-06-11 00:04:54 +08:00
|
|
|
if encoding is None:
|
2022-01-24 12:07:52 +08:00
|
|
|
raise ValueError("encoding must be specified")
|
|
|
|
self.subs_: List[GenericSubtitle] = subs
|
|
|
|
self._sub_format: str = sub_format
|
|
|
|
self._encoding: str = encoding
|
|
|
|
self._styles: Optional[Dict[str, pysubs2.SSAStyle]] = kwargs.pop("styles", None)
|
|
|
|
self._fonts_opaque: Optional[Dict[str, Any]] = kwargs.pop("fonts_opaque", None)
|
|
|
|
self._info: Optional[Dict[str, str]] = kwargs.pop("info", None)
|
|
|
|
|
|
|
|
def set_encoding(self, encoding: str) -> "GenericSubtitlesFile":
|
|
|
|
if encoding != "same":
|
2020-06-11 00:04:54 +08:00
|
|
|
self._encoding = encoding
|
|
|
|
return self
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def __len__(self) -> int:
|
2020-06-11 00:04:54 +08:00
|
|
|
return len(self.subs_)
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def __getitem__(self, item: int) -> GenericSubtitle:
|
2020-06-11 00:04:54 +08:00
|
|
|
return self.subs_[item]
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def __iter__(self) -> Iterator[GenericSubtitle]:
|
|
|
|
return iter(self.subs_)
|
2020-06-11 00:04:54 +08:00
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def clone_props_for_subs(
|
|
|
|
self, new_subs: List[GenericSubtitle]
|
|
|
|
) -> "GenericSubtitlesFile":
|
|
|
|
return GenericSubtitlesFile(
|
|
|
|
new_subs,
|
|
|
|
sub_format=self._sub_format,
|
|
|
|
encoding=self._encoding,
|
|
|
|
styles=self._styles,
|
|
|
|
fonts_opaque=self._fonts_opaque,
|
|
|
|
info=self._info,
|
|
|
|
)
|
2021-04-13 12:02:29 +08:00
|
|
|
|
2020-06-11 00:04:54 +08:00
|
|
|
def gen_raw_resolved_subs(self):
|
|
|
|
for sub in self.subs_:
|
|
|
|
yield sub.resolve_inner_timestamps()
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def offset(self, td: timedelta) -> "GenericSubtitlesFile":
|
2020-06-11 00:04:54 +08:00
|
|
|
offset_subs = []
|
|
|
|
for sub in self.subs_:
|
2022-01-24 12:07:52 +08:00
|
|
|
offset_subs.append(GenericSubtitle(sub.start + td, sub.end + td, sub.inner))
|
|
|
|
return self.clone_props_for_subs(offset_subs)
|
2020-06-11 00:04:54 +08:00
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def write_file(self, fname: str) -> None:
|
2020-06-13 03:08:44 +08:00
|
|
|
# TODO: converter to go between self.subs_format and out_format
|
|
|
|
if fname is None:
|
|
|
|
out_format = self._sub_format
|
|
|
|
else:
|
|
|
|
out_format = os.path.splitext(fname)[-1][1:]
|
2020-06-11 00:04:54 +08:00
|
|
|
subs = list(self.gen_raw_resolved_subs())
|
2022-01-24 12:07:52 +08:00
|
|
|
if self._sub_format in ("ssa", "ass"):
|
2020-06-11 00:04:54 +08:00
|
|
|
ssaf = pysubs2.SSAFile()
|
|
|
|
ssaf.events = subs
|
2022-01-24 12:07:52 +08:00
|
|
|
if self._styles is not None:
|
|
|
|
ssaf.styles = self._styles
|
|
|
|
if self._info is not None:
|
|
|
|
ssaf.info = self._info
|
|
|
|
if self._fonts_opaque is not None:
|
|
|
|
ssaf.fonts_opaque = self._fonts_opaque
|
2020-06-13 03:08:44 +08:00
|
|
|
to_write = ssaf.to_string(out_format)
|
2022-01-24 12:07:52 +08:00
|
|
|
elif self._sub_format == "srt" and out_format in ("ssa", "ass"):
|
|
|
|
to_write = pysubs2.SSAFile.from_string(srt.compose(subs)).to_string(
|
|
|
|
out_format
|
|
|
|
)
|
|
|
|
elif out_format == "srt":
|
|
|
|
to_write = srt.compose(subs)
|
2020-06-11 00:04:54 +08:00
|
|
|
else:
|
2022-01-24 12:07:52 +08:00
|
|
|
raise NotImplementedError("unsupported output format: %s" % out_format)
|
2020-06-11 00:04:54 +08:00
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
to_write = to_write.encode(self._encoding)
|
2020-06-11 00:04:54 +08:00
|
|
|
if six.PY3:
|
2022-01-24 12:07:52 +08:00
|
|
|
with open(fname or sys.stdout.fileno(), "wb") as f:
|
2020-06-11 00:04:54 +08:00
|
|
|
f.write(to_write)
|
|
|
|
else:
|
2022-01-24 12:07:52 +08:00
|
|
|
with (fname and open(fname, "wb")) or sys.stdout as f:
|
2020-06-11 00:04:54 +08:00
|
|
|
f.write(to_write)
|
2022-01-24 12:07:52 +08:00
|
|
|
|
|
|
|
|
|
|
|
class SubsMixin:
|
|
|
|
def __init__(self, subs: Optional[GenericSubtitlesFile] = None) -> None:
|
|
|
|
self.subs_: Optional[GenericSubtitlesFile] = subs
|
|
|
|
|
|
|
|
def set_encoding(self, encoding: str) -> "SubsMixin":
|
|
|
|
self.subs_.set_encoding(encoding)
|
|
|
|
return self
|