bazarr/libs/pysubs2/formats.py

81 lines
2.4 KiB
Python
Raw Normal View History

from typing import Dict, Type
2018-11-01 00:08:29 +08:00
from .formatbase import FormatBase
from .microdvd import MicroDVDFormat
from .subrip import SubripFormat
from .jsonformat import JSONFormat
from .substation import SubstationFormat
from .mpl2 import MPL2Format
2019-09-21 05:56:33 +08:00
from .tmp import TmpFormat
from .webvtt import WebVTTFormat
2018-11-01 00:08:29 +08:00
from .exceptions import *
#: Dict mapping file extensions to format identifiers.
FILE_EXTENSION_TO_FORMAT_IDENTIFIER: Dict[str, str] = {
2018-11-01 00:08:29 +08:00
".srt": "srt",
".ass": "ass",
".ssa": "ssa",
".sub": "microdvd",
".json": "json",
2019-09-21 05:56:33 +08:00
".txt": "tmp",
".vtt": "vtt",
2018-11-01 00:08:29 +08:00
}
#: Dict mapping format identifiers to implementations (FormatBase subclasses).
FORMAT_IDENTIFIER_TO_FORMAT_CLASS: Dict[str, Type[FormatBase]] = {
2018-11-01 00:08:29 +08:00
"srt": SubripFormat,
"ass": SubstationFormat,
"ssa": SubstationFormat,
"microdvd": MicroDVDFormat,
"json": JSONFormat,
"mpl2": MPL2Format,
2019-09-21 05:56:33 +08:00
"tmp": TmpFormat,
"vtt": WebVTTFormat,
2018-11-01 00:08:29 +08:00
}
FORMAT_IDENTIFIERS = list(FORMAT_IDENTIFIER_TO_FORMAT_CLASS.keys())
def get_format_class(format_: str) -> Type[FormatBase]:
2018-11-01 00:08:29 +08:00
"""Format identifier -> format class (ie. subclass of FormatBase)"""
try:
return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_]
except KeyError:
raise UnknownFormatIdentifierError(format_)
def get_format_identifier(ext: str) -> str:
2018-11-01 00:08:29 +08:00
"""File extension -> format identifier"""
try:
return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext]
except KeyError:
raise UnknownFileExtensionError(ext)
def get_file_extension(format_: str) -> str:
2018-11-01 00:08:29 +08:00
"""Format identifier -> file extension"""
if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS:
raise UnknownFormatIdentifierError(format_)
for ext, f in FILE_EXTENSION_TO_FORMAT_IDENTIFIER.items():
if f == format_:
return ext
raise RuntimeError("No file extension for format %r" % format_)
def autodetect_format(content: str) -> str:
2018-11-01 00:08:29 +08:00
"""Return format identifier for given fragment or raise FormatAutodetectionError."""
formats = set()
for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values():
guess = impl.guess_format(content)
if guess is not None:
formats.add(guess)
if len(formats) == 1:
return formats.pop()
elif not formats:
raise FormatAutodetectionError("No suitable formats")
else:
raise FormatAutodetectionError("Multiple suitable formats (%r)" % formats)