mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-01-08 15:57:36 +08:00
core: update to subliminal_patch:head; core: try and fix #660
This commit is contained in:
parent
64fa210688
commit
a612335a44
1 changed files with 17 additions and 0 deletions
|
@ -20,6 +20,15 @@ from subliminal import Subtitle as Subtitle_
|
|||
from subliminal.subtitle import Episode, Movie, sanitize_release_group, get_equivalent_release_groups
|
||||
from subliminal_patch.utils import sanitize
|
||||
from ftfy import fix_text
|
||||
from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
|
||||
|
||||
BOMS = (
|
||||
(BOM_UTF8, "UTF-8"),
|
||||
(BOM_UTF32_BE, "UTF-32-BE"),
|
||||
(BOM_UTF32_LE, "UTF-32-LE"),
|
||||
(BOM_UTF16_BE, "UTF-16-BE"),
|
||||
(BOM_UTF16_LE, "UTF-16-LE"),
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -106,6 +115,9 @@ class Subtitle(Subtitle_):
|
|||
# normalize line endings
|
||||
self.content = self.content.replace(b"\r\n", b"\n").replace(b'\r', b'\n')
|
||||
|
||||
def _check_bom(self, data):
|
||||
return [encoding for bom, encoding in BOMS if data.startswith(bom)]
|
||||
|
||||
def guess_encoding(self):
|
||||
"""Guess encoding using the language, falling back on chardet.
|
||||
|
||||
|
@ -120,6 +132,11 @@ class Subtitle(Subtitle_):
|
|||
|
||||
encodings = ['utf-8']
|
||||
|
||||
# check UTF BOMs
|
||||
bom_encodings = self._check_bom(self.content)
|
||||
if bom_encodings:
|
||||
encodings = list(set(enc.lower() for enc in bom_encodings + encodings))
|
||||
|
||||
# add language-specific encodings
|
||||
# http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages
|
||||
|
||||
|
|
Loading…
Reference in a new issue