mirror of
https://github.com/morpheus65535/bazarr.git
synced 2026-03-01 03:54:09 +08:00
Fix #886
Use the whole subtitle file to detect encoding. Seems like if trying to detect encoding only by a part of the file, is not always working. This modification will lead to worse performance, but should be more reliable.
This commit is contained in:
parent
ca0123c036
commit
d06cace2d6
2 changed files with 5 additions and 10 deletions
|
|
@ -364,8 +364,8 @@ def guess_external_subtitles(dest_folder, subtitles):
|
|||
logging.debug("BAZARR falling back to file content analysis to detect language.")
|
||||
detected_language = None
|
||||
|
||||
# to improve performance, skip detection of files larger that 5M
|
||||
if os.path.getsize(subtitle_path) > 5*1024*1024:
|
||||
# to improve performance, skip detection of files larger that 1M
|
||||
if os.path.getsize(subtitle_path) > 1*1024*1024:
|
||||
logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " +
|
||||
subtitle_path)
|
||||
continue
|
||||
|
|
@ -374,16 +374,11 @@ def guess_external_subtitles(dest_folder, subtitles):
|
|||
text = f.read()
|
||||
|
||||
try:
|
||||
# to improve performance, use only the first 32K to detect encoding
|
||||
guess = chardet.detect(text[:32768])
|
||||
guess = chardet.detect(text)
|
||||
logging.debug('BAZARR detected encoding %r', guess)
|
||||
if guess["confidence"] < 0.6:
|
||||
raise UnicodeError
|
||||
if guess["encoding"] == "ascii":
|
||||
guess["encoding"] = "utf-8"
|
||||
text = text.decode(guess["encoding"])
|
||||
detected_language = guess_language(text)
|
||||
except UnicodeError:
|
||||
except (UnicodeDecodeError, TypeError):
|
||||
logging.exception("BAZARR subtitles file doesn't seems to be text based. Skipping this file: " +
|
||||
subtitle_path)
|
||||
except:
|
||||
|
|
|
|||
|
|
@ -606,7 +606,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
|
|||
continue
|
||||
|
||||
if p_root.lower() == fn_no_ext_lower:
|
||||
# skip check for language code is the subtitle file name is the same as the video name
|
||||
# skip check for language code if the subtitle file name is the same as the video name
|
||||
subtitles[p] = None
|
||||
continue
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue