diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index 20bddc995..1cf1f0494 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -364,8 +364,8 @@ def guess_external_subtitles(dest_folder, subtitles): logging.debug("BAZARR falling back to file content analysis to detect language.") detected_language = None - # to improve performance, skip detection of files larger that 5M - if os.path.getsize(subtitle_path) > 5*1024*1024: + # to improve performance, skip detection of files larger that 1M + if os.path.getsize(subtitle_path) > 1*1024*1024: logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue @@ -374,16 +374,11 @@ def guess_external_subtitles(dest_folder, subtitles): text = f.read() try: - # to improve performance, use only the first 32K to detect encoding - guess = chardet.detect(text[:32768]) + guess = chardet.detect(text) logging.debug('BAZARR detected encoding %r', guess) - if guess["confidence"] < 0.6: - raise UnicodeError - if guess["encoding"] == "ascii": - guess["encoding"] = "utf-8" text = text.decode(guess["encoding"]) detected_language = guess_language(text) - except UnicodeError: + except (UnicodeDecodeError, TypeError): logging.exception("BAZARR subtitles file doesn't seems to be text based. Skipping this file: " + subtitle_path) except: diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index 397effcfe..f854390cf 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -606,7 +606,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen continue if p_root.lower() == fn_no_ext_lower: - # skip check for language code is the subtitle file name is the same as the video name + # skip check for language code if the subtitle file name is the same as the video name subtitles[p] = None continue