From 98de479389d367248b6376c74afc66093d7642a8 Mon Sep 17 00:00:00 2001 From: morpheus65535 <5130500+morpheus65535@users.noreply.github.com> Date: Sat, 13 Jan 2018 23:20:20 -0500 Subject: [PATCH] Switched to UnicodeDammit instead of chardet which give me better result #37 --- list_subtitles.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/list_subtitles.py b/list_subtitles.py index 4d0010a53..26dff271e 100644 --- a/list_subtitles.py +++ b/list_subtitles.py @@ -6,7 +6,7 @@ import pycountry import sqlite3 import ast import langdetect -import chardet +from bs4 import UnicodeDammit from itertools import islice from get_general_settings import * @@ -38,9 +38,9 @@ def store_subtitles(file): with open(path_replace(os.path.join(os.path.dirname(file), subtitle)), 'r') as f: text = list(islice(f, 20)) text = ' '.join(text) - encoding = chardet.detect(text)['encoding'] + encoding = UnicodeDammit(text) try: - text = text.decode(encoding) + text = text.decode(encoding.original_encoding) except Exception as e: logging.exception('Error trying to detect character encoding for this subtitles file: ' + path_replace(os.path.join(os.path.dirname(file), subtitle))) else: