bazarr/libs/subliminal_patch/providers/sucha.py

189 lines
5.8 KiB
Python
Raw Normal View History

2020-10-01 01:39:25 +08:00
# -*- coding: utf-8 -*-
2020-10-12 03:40:07 +08:00
import io
2020-10-01 01:39:25 +08:00
import logging
import os
import zipfile
2020-10-12 03:40:07 +08:00
import rarfile
2020-10-01 01:39:25 +08:00
from requests import Session
from guessit import guessit
2020-10-12 03:40:07 +08:00
from subliminal import Episode, Movie
2020-10-01 01:39:25 +08:00
from subliminal.exceptions import ServiceUnavailable
2020-10-12 03:40:07 +08:00
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending
from subliminal_patch.exceptions import APIThrottled
2020-10-01 01:39:25 +08:00
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle, guess_matches
2020-10-12 03:40:07 +08:00
from subzero.language import Language
2020-10-01 01:39:25 +08:00
logger = logging.getLogger(__name__)
2021-01-26 05:18:03 +08:00
SERVER_URL = "http://sapidb.caretas.club/"
PAGE_URL = "https://sucha.caretas.club/"
UNDESIRED_FILES = ("[eng]", ".en.", ".eng.", ".fr.", ".pt.")
2020-10-01 01:39:25 +08:00
class SuchaSubtitle(Subtitle):
provider_name = "sucha"
hash_verifiable = False
def __init__(
2020-10-12 03:40:07 +08:00
self,
language,
2020-12-04 02:23:52 +08:00
release_info,
2020-10-12 03:40:07 +08:00
filename,
2020-12-04 02:23:52 +08:00
download_id,
download_type,
2020-10-12 03:40:07 +08:00
matches,
2020-10-01 01:39:25 +08:00
):
super(SuchaSubtitle, self).__init__(
2021-01-26 05:18:03 +08:00
language, hearing_impaired=False, page_link=PAGE_URL
2020-10-01 01:39:25 +08:00
)
2020-12-04 02:23:52 +08:00
self.download_id = download_id
self.download_type = download_type
2020-10-01 01:39:25 +08:00
self.language = language
2020-12-04 02:23:52 +08:00
self.guessed_release_info = release_info
2020-10-01 01:39:25 +08:00
self.filename = filename
2020-12-04 02:23:52 +08:00
self.release_info = (
release_info if len(release_info) > len(filename) else filename
)
2020-10-01 01:39:25 +08:00
self.found_matches = matches
@property
def id(self):
2020-12-04 02:23:52 +08:00
return self.download_id
2020-10-01 01:39:25 +08:00
def get_matches(self, video):
2020-12-04 02:23:52 +08:00
self.found_matches |= guess_matches(
video,
guessit(
self.filename,
{"type": "episode" if isinstance(video, Episode) else "movie"},
),
)
self.found_matches |= guess_matches(
video,
guessit(
self.guessed_release_info,
{"type": "episode" if isinstance(video, Episode) else "movie"},
),
)
2020-10-01 01:39:25 +08:00
return self.found_matches
class SuchaProvider(Provider):
"""Sucha Provider"""
languages = {Language.fromalpha2(l) for l in ["es"]}
language_list = list(languages)
video_types = (Episode, Movie)
def initialize(self):
self.session = Session()
self.session.headers = {
"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
}
def terminate(self):
self.session.close()
def query(self, languages, video):
2020-12-04 02:23:52 +08:00
movie_year = video.year if video.year else "0"
is_episode = isinstance(video, Episode)
2020-10-01 01:39:25 +08:00
language = self.language_list[0]
2021-01-26 05:18:03 +08:00
2020-10-01 01:39:25 +08:00
if is_episode:
2021-01-26 05:18:03 +08:00
q = {"query": f"{video.series} S{video.season:02}E{video.episode:02}"}
2020-10-01 01:39:25 +08:00
else:
2020-12-04 02:23:52 +08:00
q = {"query": video.title, "year": movie_year}
2021-01-26 05:18:03 +08:00
logger.debug(f"Searching subtitles: {q}")
result = self.session.get(
SERVER_URL + ("episode" if is_episode else "movie"), params=q, timeout=10
2020-12-04 02:23:52 +08:00
)
2021-01-26 05:18:03 +08:00
result.raise_for_status()
result_ = result.json()
2020-12-04 05:36:32 +08:00
subtitles = []
2021-01-26 05:18:03 +08:00
for i in result_:
2020-12-04 05:36:32 +08:00
matches = set()
try:
2020-12-04 02:23:52 +08:00
if (
video.title.lower() in i["title"].lower()
or video.title.lower() in i["alt_title"].lower()
):
2020-10-01 01:39:25 +08:00
matches.add("title")
2020-12-04 05:36:32 +08:00
except TypeError:
logger.debug("No subtitles found")
return []
2021-01-26 05:18:03 +08:00
2020-12-04 05:36:32 +08:00
if is_episode:
if (
q["query"].lower() in i["title"].lower()
or q["query"].lower() in i["alt_title"].lower()
):
2021-01-26 05:18:03 +08:00
matches_ = ("title", "series", "season", "episode", "year")
[matches.add(match) for match in matches_]
2020-12-04 05:36:32 +08:00
if str(i["year"]) == video.year:
matches.add("year")
2021-01-26 05:18:03 +08:00
2020-12-04 05:36:32 +08:00
subtitles.append(
SuchaSubtitle(
language,
i["release"],
i["filename"],
str(i["id"]),
"episode" if is_episode else "movie",
matches,
2020-10-01 01:39:25 +08:00
)
2020-12-04 05:36:32 +08:00
)
return subtitles
2020-10-01 01:39:25 +08:00
def list_subtitles(self, video, languages):
return self.query(languages, video)
def _check_response(self, response):
if response.status_code != 200:
2021-01-26 05:18:03 +08:00
raise ServiceUnavailable(f"Bad status code: {response.status_code}")
2020-10-01 01:39:25 +08:00
def _get_archive(self, content):
archive_stream = io.BytesIO(content)
2021-01-26 05:18:03 +08:00
2020-10-01 01:39:25 +08:00
if rarfile.is_rarfile(archive_stream):
logger.debug("Identified rar archive")
2021-01-26 05:18:03 +08:00
return rarfile.RarFile(archive_stream)
if zipfile.is_zipfile(archive_stream):
2020-10-01 01:39:25 +08:00
logger.debug("Identified zip archive")
2021-01-26 05:18:03 +08:00
return zipfile.ZipFile(archive_stream)
raise APIThrottled("Unsupported compressed format")
2020-10-01 01:39:25 +08:00
def get_file(self, archive):
for name in archive.namelist():
if os.path.split(name)[-1].startswith("."):
continue
2021-01-26 05:18:03 +08:00
2020-10-01 01:39:25 +08:00
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
2021-01-26 05:18:03 +08:00
if any(undesired in name.lower() for undesired in UNDESIRED_FILES):
2020-10-01 01:39:25 +08:00
continue
2021-01-26 05:18:03 +08:00
logger.debug(f"Returning from archive: {name}")
2020-10-01 01:39:25 +08:00
return archive.read(name)
2021-01-26 05:18:03 +08:00
2020-10-01 01:39:25 +08:00
raise APIThrottled("Can not find the subtitle in the compressed file")
def download_subtitle(self, subtitle):
logger.info("Downloading subtitle %r", subtitle)
response = self.session.get(
2021-01-26 05:18:03 +08:00
SERVER_URL + "download",
2020-12-04 02:23:52 +08:00
params={"id": subtitle.download_id, "type": subtitle.download_type},
timeout=10,
2020-10-01 01:39:25 +08:00
)
response.raise_for_status()
self._check_response(response)
archive = self._get_archive(response.content)
subtitle_file = self.get_file(archive)
subtitle.content = fix_line_ending(subtitle_file)