2021-07-15 07:13:28 +08:00
|
|
|
from typing import Dict, Type
|
|
|
|
|
2018-11-01 00:08:29 +08:00
|
|
|
from .formatbase import FormatBase
|
|
|
|
from .microdvd import MicroDVDFormat
|
|
|
|
from .subrip import SubripFormat
|
|
|
|
from .jsonformat import JSONFormat
|
|
|
|
from .substation import SubstationFormat
|
2019-05-12 12:23:46 +08:00
|
|
|
from .mpl2 import MPL2Format
|
2019-09-21 05:56:33 +08:00
|
|
|
from .tmp import TmpFormat
|
2021-07-15 07:13:28 +08:00
|
|
|
from .webvtt import WebVTTFormat
|
2018-11-01 00:08:29 +08:00
|
|
|
from .exceptions import *
|
|
|
|
|
|
|
|
#: Dict mapping file extensions to format identifiers.
|
2021-07-15 07:13:28 +08:00
|
|
|
FILE_EXTENSION_TO_FORMAT_IDENTIFIER: Dict[str, str] = {
|
2018-11-01 00:08:29 +08:00
|
|
|
".srt": "srt",
|
|
|
|
".ass": "ass",
|
|
|
|
".ssa": "ssa",
|
|
|
|
".sub": "microdvd",
|
|
|
|
".json": "json",
|
2019-09-21 05:56:33 +08:00
|
|
|
".txt": "tmp",
|
2021-07-15 07:13:28 +08:00
|
|
|
".vtt": "vtt",
|
2018-11-01 00:08:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#: Dict mapping format identifiers to implementations (FormatBase subclasses).
|
2021-07-15 07:13:28 +08:00
|
|
|
FORMAT_IDENTIFIER_TO_FORMAT_CLASS: Dict[str, Type[FormatBase]] = {
|
2018-11-01 00:08:29 +08:00
|
|
|
"srt": SubripFormat,
|
|
|
|
"ass": SubstationFormat,
|
|
|
|
"ssa": SubstationFormat,
|
|
|
|
"microdvd": MicroDVDFormat,
|
|
|
|
"json": JSONFormat,
|
|
|
|
"mpl2": MPL2Format,
|
2019-09-21 05:56:33 +08:00
|
|
|
"tmp": TmpFormat,
|
2021-07-15 07:13:28 +08:00
|
|
|
"vtt": WebVTTFormat,
|
2018-11-01 00:08:29 +08:00
|
|
|
}
|
|
|
|
|
2021-07-15 07:13:28 +08:00
|
|
|
FORMAT_IDENTIFIERS = list(FORMAT_IDENTIFIER_TO_FORMAT_CLASS.keys())
|
|
|
|
|
|
|
|
|
|
|
|
def get_format_class(format_: str) -> Type[FormatBase]:
|
2018-11-01 00:08:29 +08:00
|
|
|
"""Format identifier -> format class (ie. subclass of FormatBase)"""
|
|
|
|
try:
|
|
|
|
return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_]
|
|
|
|
except KeyError:
|
|
|
|
raise UnknownFormatIdentifierError(format_)
|
|
|
|
|
2021-07-15 07:13:28 +08:00
|
|
|
|
|
|
|
def get_format_identifier(ext: str) -> str:
|
2018-11-01 00:08:29 +08:00
|
|
|
"""File extension -> format identifier"""
|
|
|
|
try:
|
|
|
|
return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext]
|
|
|
|
except KeyError:
|
|
|
|
raise UnknownFileExtensionError(ext)
|
|
|
|
|
2021-07-15 07:13:28 +08:00
|
|
|
|
|
|
|
def get_file_extension(format_: str) -> str:
|
2018-11-01 00:08:29 +08:00
|
|
|
"""Format identifier -> file extension"""
|
|
|
|
if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS:
|
|
|
|
raise UnknownFormatIdentifierError(format_)
|
|
|
|
|
|
|
|
for ext, f in FILE_EXTENSION_TO_FORMAT_IDENTIFIER.items():
|
|
|
|
if f == format_:
|
|
|
|
return ext
|
|
|
|
|
|
|
|
raise RuntimeError("No file extension for format %r" % format_)
|
|
|
|
|
2021-07-15 07:13:28 +08:00
|
|
|
|
|
|
|
def autodetect_format(content: str) -> str:
|
2018-11-01 00:08:29 +08:00
|
|
|
"""Return format identifier for given fragment or raise FormatAutodetectionError."""
|
|
|
|
formats = set()
|
|
|
|
for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values():
|
|
|
|
guess = impl.guess_format(content)
|
|
|
|
if guess is not None:
|
|
|
|
formats.add(guess)
|
|
|
|
|
|
|
|
if len(formats) == 1:
|
|
|
|
return formats.pop()
|
|
|
|
elif not formats:
|
|
|
|
raise FormatAutodetectionError("No suitable formats")
|
|
|
|
else:
|
|
|
|
raise FormatAutodetectionError("Multiple suitable formats (%r)" % formats)
|