From d8aa94e632ceeb476261a9df1d5c1f08808152c1 Mon Sep 17 00:00:00 2001 From: morpheus65535 <5130500+morpheus65535@users.noreply.github.com> Date: Fri, 12 Jan 2018 20:47:48 -0500 Subject: [PATCH] Increased number of line to check for language detection and fixed conversion #36 --- list_subtitles.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/list_subtitles.py b/list_subtitles.py index 3b6ce5bf6..2ad8e5ec3 100644 --- a/list_subtitles.py +++ b/list_subtitles.py @@ -6,6 +6,7 @@ import pycountry import sqlite3 import ast import langdetect +import chardet from get_general_settings import * @@ -35,7 +36,9 @@ def list_subtitles(file): else: with open(path_replace(os.path.join(os.path.dirname(file), subtitle)), 'r') as f: text = [next(f) for x in xrange(5)] - text = ' '.join(text).decode('iso-8859-1') + text = ' '.join(text) + encoding = chardet.detect(text)['encoding'] + text = text.decode(encoding) detected_language = langdetect.detect(text) if len(detected_language) > 0: actual_subtitles.append([str(detected_language), path_replace_reverse(os.path.join(os.path.dirname(file), subtitle))]) @@ -67,7 +70,7 @@ def store_subtitles(file): actual_subtitles.append([str(language), path_replace_reverse(os.path.join(os.path.dirname(file), subtitle))]) else: with open(path_replace(os.path.join(os.path.dirname(file), subtitle)), 'r') as f: - text = [next(f) for x in xrange(5)] + text = [next(f) for x in xrange(20)] text = ' '.join(text).decode('iso-8859-1') detected_language = langdetect.detect(text) if len(detected_language) > 0: