diff --git a/bazarr/embedded_subs_reader.py b/bazarr/embedded_subs_reader.py index d43c234a9..4f013676d 100644 --- a/bazarr/embedded_subs_reader.py +++ b/bazarr/embedded_subs_reader.py @@ -9,38 +9,57 @@ from enzyme.exceptions import MalformedMKVError from enzyme.exceptions import MalformedMKVError from database import database +_FFPROBE_SPECIAL_LANGS = { + "zho": { + "list": ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"], + "alpha3": "zht", + }, + "por": { + "list": ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"], + "alpha3": "pob", + }, +} + +def _handle_alpha3(detected_language: dict): + alpha3 = detected_language["language"].alpha3 + + name = detected_language.get("name", "").lower() + special_lang = _FFPROBE_SPECIAL_LANGS.get(alpha3) + + if special_lang is None or not name: + return alpha3 # The original alpha3 + + if any(ext in name for ext in special_lang["list"]): + return special_lang["alpha3"] # Guessed alpha from _FFPROBE_OTHER_LANGS + + return alpha3 # In any case def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None): data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id) subtitles_list = [] - if data['ffprobe']: - traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"] - brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"] + if data['ffprobe'] and 'subtitle' in data['ffprobe']: + for detected_language in data['ffprobe']['subtitle']: + if not "language" in detected_language: + continue + + # Avoid commentary subtitles + name = detected_language.get("name", "").lower() + if "commentary" in name: + logging.debug("Ignoring commentary subtitle: %s", name) + continue + + language = _handle_alpha3(detected_language) + + forced = detected_language.get("forced", False) + hearing_impaired = detected_language.get("hearing_impaired", False) + codec = detected_language.get("format") # or None + subtitles_list.append([language, forced, hearing_impaired, codec]) - if 'subtitle' in data['ffprobe']: - for detected_language in data['ffprobe']['subtitle']: - if 'language' in detected_language: - language = detected_language['language'].alpha3 - if language == 'zho' and 'name' in detected_language: - if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese): - language = 'zht' - if language == 'por' and 'name' in detected_language: - if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese): - language = 'pob' - forced = detected_language['forced'] if 'forced' in detected_language else False - hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \ - detected_language else False - codec = detected_language['format'] if 'format' in detected_language else None - subtitles_list.append([language, forced, hearing_impaired, codec]) - else: - continue elif data['enzyme']: for subtitle_track in data['enzyme'].subtitle_tracks: - hearing_impaired = False - if subtitle_track.name: - if 'sdh' in subtitle_track.name.lower(): - hearing_impaired = True + hearing_impaired = subtitle_track.name and "sdh" in subtitle_track.name.lower() + subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired, subtitle_track.codec_id]) @@ -52,7 +71,7 @@ def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=No data = { 'ffprobe': {}, 'enzyme': {}, - 'file_id': episode_file_id if episode_file_id else movie_file_id, + 'file_id': episode_file_id or movie_file_id, 'file_size': file_size } diff --git a/libs/subliminal_patch/subtitle.py b/libs/subliminal_patch/subtitle.py index 7e1bbda91..f9a496029 100644 --- a/libs/subliminal_patch/subtitle.py +++ b/libs/subliminal_patch/subtitle.py @@ -410,6 +410,17 @@ MERGED_FORMATS = { MERGED_FORMATS_REV = dict((v.lower(), k.lower()) for k in MERGED_FORMATS for v in MERGED_FORMATS[k]) +def _has_match(video, guess, key) -> bool: + value = getattr(video, key) + if value is None: + return False + + guess_value = guess.get(key) + if isinstance(guess_value, list): + return any(value == item for item in guess_value) + + return value == guess_value + def guess_matches(video, guess, partial=False): """Get matches between a `video` and a `guess`. @@ -514,12 +525,11 @@ def guess_matches(video, guess, partial=False): logger.info("Release group matched but source didn't. Remnoving release group match.") matches.remove("release_group") - # video_codec - if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec: - matches.add('video_codec') - # audio_codec - if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec: - matches.add('audio_codec') + if _has_match(video, guess, "video_codec"): + matches.add("video_codec") + + if _has_match(video, guess, "audio_codec"): + matches.add("audio_codec") return matches