mirror of
https://github.com/morpheus65535/bazarr.git
synced 2024-11-10 09:02:44 +08:00
Refactor Argenteam Provider
* Deprecate text search in favour of IMDB search * Simplify code
This commit is contained in:
parent
2e4480dd5f
commit
52760d8bc7
3 changed files with 146 additions and 203 deletions
|
@ -1,21 +1,20 @@
|
|||
# coding=utf-8
|
||||
from __future__ import absolute_import
|
||||
|
||||
from json import JSONDecodeError
|
||||
import logging
|
||||
import os
|
||||
import io
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from json import JSONDecodeError
|
||||
from zipfile import ZipFile
|
||||
from guessit import guessit
|
||||
from requests import Session
|
||||
from subliminal import Episode, Movie
|
||||
from subliminal.utils import sanitize
|
||||
from subliminal import Episode
|
||||
from subliminal import Movie
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal_patch.subtitle import Subtitle, guess_matches
|
||||
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
||||
from subliminal_patch.providers.utils import get_archive_from_bytes
|
||||
from subliminal_patch.providers.utils import get_subtitle_from_archive
|
||||
from subliminal_patch.providers.utils import update_matches
|
||||
from subliminal_patch.subtitle import Subtitle
|
||||
from subzero.language import Language
|
||||
|
||||
BASE_URL = "https://argenteam.net"
|
||||
|
@ -30,42 +29,31 @@ class ArgenteamSubtitle(Subtitle):
|
|||
|
||||
def __init__(self, language, page_link, download_link, release_info, matches):
|
||||
super(ArgenteamSubtitle, self).__init__(language, page_link=page_link)
|
||||
|
||||
self._found_matches = matches
|
||||
|
||||
self.page_link = page_link
|
||||
self.download_link = download_link
|
||||
self.found_matches = matches
|
||||
self._release_info = release_info
|
||||
# Original subtitle filename guessed from the URL
|
||||
self.release_info = urllib.parse.unquote(self.download_link.split("/")[-1])
|
||||
self.release_info = release_info
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.download_link
|
||||
|
||||
def get_matches(self, video):
|
||||
type_ = "episode" if isinstance(video, Episode) else "movie"
|
||||
update_matches(self._found_matches, video, self.release_info)
|
||||
|
||||
self.found_matches |= guess_matches(
|
||||
video,
|
||||
guessit(self.release_info, {"type": type_}),
|
||||
)
|
||||
self.found_matches |= guess_matches(
|
||||
video,
|
||||
guessit(self._release_info, {"type": type_}),
|
||||
)
|
||||
|
||||
return self.found_matches
|
||||
return self._found_matches
|
||||
|
||||
|
||||
class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
provider_name = "argenteam"
|
||||
# Safe to assume every subtitle from Argenteam is Latam Spanish
|
||||
|
||||
languages = {Language("spa", "MX")}
|
||||
video_types = (Episode, Movie)
|
||||
subtitle_class = ArgenteamSubtitle
|
||||
hearing_impaired_verifiable = False
|
||||
language_list = list(languages)
|
||||
|
||||
multi_result_throttle = 2 # seconds
|
||||
_default_lang = Language("spa", "MX")
|
||||
|
||||
def __init__(self):
|
||||
self.session = Session()
|
||||
|
@ -78,31 +66,36 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
def terminate(self):
|
||||
self.session.close()
|
||||
|
||||
def query(self, title, video, titles=None):
|
||||
def query(self, video):
|
||||
is_episode = isinstance(video, Episode)
|
||||
season = episode = None
|
||||
url = f"{API_URL}/movie"
|
||||
if is_episode:
|
||||
season = video.season
|
||||
episode = video.episode
|
||||
url = f"{API_URL}/episode"
|
||||
argenteam_ids = self._search_ids(
|
||||
title, season=season, episode=episode, titles=titles
|
||||
)
|
||||
imdb_id = video.series_imdb_id if is_episode else video.imdb_id
|
||||
|
||||
else:
|
||||
argenteam_ids = self._search_ids(
|
||||
title, year=video.year, imdb_id=video.imdb_id, titles=titles
|
||||
)
|
||||
|
||||
if not argenteam_ids:
|
||||
if not imdb_id:
|
||||
logger.debug("%s doesn't have IMDB ID. Can't search")
|
||||
return []
|
||||
|
||||
language = self.language_list[0]
|
||||
if is_episode:
|
||||
argenteam_ids = self._search_ids(
|
||||
imdb_id, season=video.season, episode=video.episode
|
||||
)
|
||||
else:
|
||||
argenteam_ids = self._search_ids(imdb_id)
|
||||
|
||||
if not argenteam_ids:
|
||||
logger.debug("No IDs found")
|
||||
return []
|
||||
|
||||
return self._parse_subtitles(argenteam_ids, is_episode)
|
||||
|
||||
def _parse_subtitles(self, ids, is_episode=True):
|
||||
movie_kind = "episode" if is_episode else "movie"
|
||||
|
||||
subtitles = []
|
||||
has_multiple_ids = len(argenteam_ids) > 1
|
||||
for aid in argenteam_ids:
|
||||
response = self.session.get(url, params={"id": aid}, timeout=10)
|
||||
|
||||
for aid in ids:
|
||||
response = self.session.get(
|
||||
f"{API_URL}/{movie_kind}", params={"id": aid}, timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
try:
|
||||
|
@ -113,81 +106,55 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
if not content or not content.get("releases"):
|
||||
continue
|
||||
|
||||
imdb_id = year = None
|
||||
returned_title = title
|
||||
if not is_episode and "info" in content:
|
||||
imdb_id = content["info"].get("imdb")
|
||||
year = content["info"].get("year")
|
||||
returned_title = content["info"].get("title", title)
|
||||
|
||||
for r in content["releases"]:
|
||||
for s in r["subtitles"]:
|
||||
movie_kind = "episode" if is_episode else "movie"
|
||||
page_link = f"{BASE_URL}/{movie_kind}/{aid}"
|
||||
release_info = self._combine_release_info(r)
|
||||
|
||||
release_info = self._combine_release_info(r, s)
|
||||
|
||||
logger.debug("Got release info: %s", release_info)
|
||||
|
||||
download_link = s["uri"].replace("http://", "https://")
|
||||
|
||||
matches_ = self._get_query_matches(
|
||||
video,
|
||||
movie_kind=movie_kind,
|
||||
season=season,
|
||||
episode=episode,
|
||||
title=returned_title,
|
||||
year=year,
|
||||
imdb_id=imdb_id,
|
||||
tvdb_id=content.get("tvdb"),
|
||||
)
|
||||
# Already matched within query
|
||||
if is_episode:
|
||||
matches = {"series", "title", "season", "episode", "imdb_id"}
|
||||
else:
|
||||
matches = {"title", "year", "imdb_id"}
|
||||
|
||||
if matches_ is not None:
|
||||
subtitles.append(
|
||||
ArgenteamSubtitle(
|
||||
language,
|
||||
page_link,
|
||||
download_link,
|
||||
release_info,
|
||||
matches_,
|
||||
)
|
||||
subtitles.append(
|
||||
ArgenteamSubtitle(
|
||||
self._default_lang,
|
||||
page_link,
|
||||
download_link,
|
||||
release_info,
|
||||
matches,
|
||||
)
|
||||
|
||||
if has_multiple_ids:
|
||||
time.sleep(self.multi_result_throttle)
|
||||
)
|
||||
|
||||
return subtitles
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
if isinstance(video, Episode):
|
||||
titles = [video.series] + video.alternative_series[:2]
|
||||
else:
|
||||
titles = [video.title] + video.alternative_titles[:2]
|
||||
|
||||
for title in titles:
|
||||
subs = self.query(title, video, titles=titles)
|
||||
if subs:
|
||||
return subs
|
||||
time.sleep(self.multi_result_throttle)
|
||||
|
||||
return []
|
||||
return self.query(video)
|
||||
|
||||
def download_subtitle(self, subtitle):
|
||||
# download as a zip
|
||||
logger.info("Downloading subtitle %r", subtitle)
|
||||
r = self.session.get(subtitle.download_link, timeout=10)
|
||||
r.raise_for_status()
|
||||
|
||||
# open the zip
|
||||
with ZipFile(io.BytesIO(r.content)) as zf:
|
||||
subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
|
||||
archive = get_archive_from_bytes(r.content)
|
||||
subtitle.content = get_subtitle_from_archive(archive)
|
||||
|
||||
def _search_ids(self, title, **kwargs):
|
||||
query = title
|
||||
titles = kwargs.get("titles") or []
|
||||
def _search_ids(self, identifier, **kwargs):
|
||||
"""
|
||||
:param identifier: imdb_id or title (without year)
|
||||
"""
|
||||
identifier = identifier.lstrip("tt")
|
||||
|
||||
is_episode = False
|
||||
query = identifier
|
||||
if kwargs.get("season") and kwargs.get("episode"):
|
||||
is_episode = True
|
||||
query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}"
|
||||
query = f"{identifier} S{kwargs['season']:02}E{kwargs['episode']:02}"
|
||||
|
||||
logger.debug(f"Searching ID (episode: {is_episode}) for {query}")
|
||||
logger.debug("Searching ID for %s", query)
|
||||
|
||||
r = self.session.get(f"{API_URL}/search", params={"q": query}, timeout=10)
|
||||
r.raise_for_status()
|
||||
|
@ -200,84 +167,27 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
if not results.get("results"):
|
||||
return []
|
||||
|
||||
match_ids = []
|
||||
for result in results["results"]:
|
||||
if result["type"] == "movie" and is_episode:
|
||||
continue
|
||||
|
||||
imdb = f"tt{result.get('imdb', 'n/a')}"
|
||||
if not is_episode and imdb == kwargs.get("imdb_id"):
|
||||
logger.debug("Movie matched by IMDB ID, taking shortcut")
|
||||
match_ids = [result["id"]]
|
||||
break
|
||||
|
||||
# advanced title check in case of multiple movie results
|
||||
title_year = kwargs.get("year") and kwargs.get("title")
|
||||
if results["total"] > 1 and not is_episode and title_year:
|
||||
sanitized = sanitize(result["title"])
|
||||
titles = [f"{sanitize(name)} {kwargs['year']}" for name in titles]
|
||||
if sanitized not in titles:
|
||||
continue
|
||||
|
||||
match_ids.append(result["id"])
|
||||
|
||||
if match_ids:
|
||||
ids = ", ".join(str(id) for id in match_ids)
|
||||
logger.debug("Found matching IDs: %s", ids)
|
||||
else:
|
||||
logger.debug("Nothing found from %s query", query)
|
||||
match_ids = [result["id"] for result in results["results"]]
|
||||
logger.debug("Found matching IDs: %s", match_ids)
|
||||
|
||||
return match_ids
|
||||
|
||||
def _get_query_matches(self, video, **kwargs):
|
||||
matches = set()
|
||||
def _combine_release_info(self, release_dict, subtitle_dict):
|
||||
releases = [
|
||||
urllib.parse.unquote(subtitle_dict.get("uri", "Unknown").split("/")[-1])
|
||||
]
|
||||
|
||||
if isinstance(video, Episode) and kwargs.get("movie_kind") == "episode":
|
||||
if (kwargs.get("tvdb_id") and video.series_tvdb_id) and str(
|
||||
video.series_tvdb_id
|
||||
) != str(kwargs.get("tvdb_id")):
|
||||
logger.debug(
|
||||
"TVDB ID not matched: %s - %s", kwargs, video.series_tvdb_id
|
||||
)
|
||||
return None
|
||||
combine = [
|
||||
release_dict.get(key)
|
||||
for key in ("source", "codec", "tags")
|
||||
if release_dict.get(key)
|
||||
]
|
||||
|
||||
if video.series and (
|
||||
sanitize(kwargs.get("title"))
|
||||
in (
|
||||
sanitize(name) for name in [video.series] + video.alternative_series
|
||||
)
|
||||
):
|
||||
matches.add("series")
|
||||
|
||||
if video.season and kwargs.get("season") == video.season:
|
||||
matches.add("season")
|
||||
|
||||
if video.episode and kwargs.get("episode") == video.episode:
|
||||
matches.add("episode")
|
||||
|
||||
# year (year is not available for series, but we assume it matches)
|
||||
matches.add("year")
|
||||
|
||||
elif isinstance(video, Movie) and kwargs.get("movie_kind") == "movie":
|
||||
if video.title and (
|
||||
sanitize(kwargs.get("title"))
|
||||
in (sanitize(name) for name in [video.title] + video.alternative_titles)
|
||||
):
|
||||
matches.add("title")
|
||||
|
||||
if video.imdb_id and f"tt{kwargs.get('imdb_id')}" == str(video.imdb_id):
|
||||
matches.add("imdb_id")
|
||||
|
||||
if video.year and kwargs.get("year") == video.year:
|
||||
matches.add("year")
|
||||
else:
|
||||
logger.info(f"{kwargs.get('movie_kind')} is not a valid movie_kind")
|
||||
|
||||
return matches
|
||||
|
||||
def _combine_release_info(self, release_dict):
|
||||
keys = ("source", "codec", "tags", "team")
|
||||
combine = [release_dict.get(key) for key in keys if release_dict.get(key)]
|
||||
if combine:
|
||||
return ".".join(combine)
|
||||
return "Unknown"
|
||||
r_info = ".".join(combine)
|
||||
if release_dict.get("team"):
|
||||
r_info += f"-{release_dict['team']}"
|
||||
|
||||
releases.append(r_info)
|
||||
|
||||
return "\n".join(releases)
|
||||
|
|
|
@ -123,6 +123,7 @@ def episodes():
|
|||
1,
|
||||
1,
|
||||
source="Blu-Ray",
|
||||
series_imdb_id="tt0903747",
|
||||
release_group="REWARD",
|
||||
resolution="720p",
|
||||
video_codec="H.264",
|
||||
|
|
|
@ -8,14 +8,39 @@ from subliminal_patch.core import Episode
|
|||
from subzero.language import Language
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"imdb_id,expected_id", [("tt0028950", 62790), ("tt0054407", 102006)]
|
||||
)
|
||||
def test_search_ids_movie(imdb_id, expected_id):
|
||||
with ArgenteamProvider() as provider:
|
||||
ids = provider._search_ids(imdb_id)
|
||||
assert ids[0] == expected_id
|
||||
|
||||
|
||||
def test_search_ids_tv_show():
|
||||
with ArgenteamProvider() as provider:
|
||||
ids = provider._search_ids("tt0306414", season=1, episode=1)
|
||||
assert ids[0] == 10075
|
||||
|
||||
|
||||
def test_parse_subtitles_episode():
|
||||
with ArgenteamProvider() as provider:
|
||||
assert len(provider._parse_subtitles([10075])) > 1
|
||||
|
||||
|
||||
def test_parse_subtitles_movie():
|
||||
with ArgenteamProvider() as provider:
|
||||
assert len(provider._parse_subtitles([61], is_episode=False)) > 3
|
||||
|
||||
|
||||
def test_get_matches_episode(episodes):
|
||||
episode = episodes["breaking_bad_s01e01"]
|
||||
subtitle = ArgenteamSubtitle(
|
||||
Language.fromalpha2("es"),
|
||||
None,
|
||||
"https://argenteam.net/subtitles/24002/Breaking.Bad.%282008%29.S01E01-Pilot.BluRay.x264.720p-REWARD",
|
||||
"BluRay x264 720p",
|
||||
{"title", "season", "episode", "imdb_id"},
|
||||
"Breaking.Bad.(2008).S01E01-Pilot.BluRay.x264.720p-REWARD\nBluRay x264 720p",
|
||||
{"series", "title", "season", "episode", "imdb_id"},
|
||||
)
|
||||
matches = subtitle.get_matches(episode)
|
||||
assert matches == {
|
||||
|
@ -52,10 +77,10 @@ def test_get_matches_movie(movies):
|
|||
"resolution",
|
||||
"edition",
|
||||
"video_codec",
|
||||
"streaming_service",
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.vcr
|
||||
def test_list_subtitles_movie(movies):
|
||||
item = movies["dune"]
|
||||
with ArgenteamProvider() as provider:
|
||||
|
@ -69,7 +94,20 @@ def test_list_subtitles_movie(movies):
|
|||
assert any(expected == sub.download_link for sub in subtitles)
|
||||
|
||||
|
||||
@pytest.mark.vcr
|
||||
def test_list_subtitles_movie_no_imdb(movies):
|
||||
item = movies["dune"]
|
||||
item.imdb_id = None
|
||||
with ArgenteamProvider() as provider:
|
||||
assert not provider.list_subtitles(item, {Language("spa", "MX")})
|
||||
|
||||
|
||||
def test_list_subtitles_movie_not_found(movies):
|
||||
item = movies["dune"]
|
||||
item.imdb_id = "tt29318321832"
|
||||
with ArgenteamProvider() as provider:
|
||||
assert not provider.list_subtitles(item, {Language("spa", "MX")})
|
||||
|
||||
|
||||
def test_list_subtitles_episode(episodes):
|
||||
item = episodes["breaking_bad_s01e01"]
|
||||
with ArgenteamProvider() as provider:
|
||||
|
@ -82,29 +120,23 @@ def test_list_subtitles_episode(episodes):
|
|||
assert any(expected == sub.download_link for sub in subtitles)
|
||||
|
||||
|
||||
@pytest.mark.vcr
|
||||
def test_list_subtitles_episode_no_imdb_id(episodes):
|
||||
item = episodes["breaking_bad_s01e01"]
|
||||
item.series_imdb_id = None
|
||||
with ArgenteamProvider() as provider:
|
||||
assert not provider.list_subtitles(item, {Language("spa", "MX")})
|
||||
|
||||
|
||||
def test_list_subtitles_episode_not_found(episodes):
|
||||
item = episodes["breaking_bad_s01e01"]
|
||||
item.series_imdb_id = "tt29318321832"
|
||||
with ArgenteamProvider() as provider:
|
||||
assert not provider.list_subtitles(item, {Language("spa", "MX")})
|
||||
|
||||
|
||||
def test_download_subtitle(episodes):
|
||||
item = episodes["breaking_bad_s01e01"]
|
||||
with ArgenteamProvider() as provider:
|
||||
subtitles = provider.list_subtitles(item, {Language("spa", "MX")})
|
||||
subtitle = subtitles[0]
|
||||
provider.download_subtitle(subtitle)
|
||||
assert subtitle.content is not None
|
||||
|
||||
|
||||
@pytest.mark.vcr
|
||||
def test_list_subtitles_episode_with_tvdb():
|
||||
video = Episode(
|
||||
"Severance.S01E01.720p.BluRay.X264-REWARD.mkv",
|
||||
"Severance",
|
||||
1,
|
||||
1,
|
||||
source="Blu-Ray",
|
||||
release_group="REWARD",
|
||||
resolution="720p",
|
||||
video_codec="H.264",
|
||||
series_tvdb_id=371980,
|
||||
)
|
||||
with ArgenteamProvider() as provider:
|
||||
subtitles = provider.list_subtitles(video, {Language("spa", "MX")})
|
||||
assert len(subtitles) == 0
|
||||
provider.download_subtitle(subtitles[0])
|
||||
assert subtitles[0].is_valid()
|
||||
|
|
Loading…
Reference in a new issue