bazarr/libs/subliminal_patch/providers/karagarga.py
2022-06-29 00:07:18 -04:00

237 lines
6.9 KiB
Python

# -*- coding: utf-8 -*-
import datetime
import logging
from bs4 import BeautifulSoup as bso
from requests import Session
from subliminal.cache import region as cache_region
from subliminal.exceptions import AuthenticationError
from subliminal.exceptions import ConfigurationError
from subliminal_patch.core import Movie
from subliminal_patch.providers import Provider
from subliminal_patch.providers.utils import update_matches
from subliminal_patch.subtitle import Subtitle
from subzero.language import Language
logger = logging.getLogger(__name__)
_PROVIDER_NAME = "karagarga"
_BASE_URL = "https://karagarga.in"
class KaragargaSubtitle(Subtitle):
provider_name = _PROVIDER_NAME
hash_verifiable = False
def __init__(self, language, page_link, release_info, downloads):
super().__init__(language, page_link=page_link)
self.release_info = release_info
self.downloads = downloads
self._matches = {"title", "year"}
def get_matches(self, video):
update_matches(self._matches, video, self.release_info, type="movie")
return self._matches
@property
def id(self):
return self.page_link
_NO_LOGGED_IN_REDIRECT = 302
_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds()
class KaragargaProvider(Provider):
provider_name = _PROVIDER_NAME
# Only english for now
languages = {Language.fromietf("en")}
video_types = (Movie,)
subtitle_class = KaragargaSubtitle
_session: Session
def __init__(self, username: str, password: str, f_username=None, f_password=None):
if not username or not password:
raise ConfigurationError("Username/password not provided")
self._username = username
self._password = password
self._f_username = f_username or username
self._f_password = f_password or password
def initialize(self):
self._session = Session()
self._session.headers.update(
{"authority": "karagarga.in", "user-agent": "Bazarr"}
)
self._login()
def terminate(self):
self._session.close()
def _login(self):
self._login_main()
self._login_forum()
def _login_main(self):
data = {
"username": self._username,
"password": self._password,
}
self._session.post(f"{_BASE_URL}/takelogin.php", data=data)
if "pass" not in self._session.cookies:
raise AuthenticationError("Invalid username/password")
logger.debug("Karagarga login: OK")
def _login_forum(self):
params = {
"app": "core",
"module": "global",
"section": "login",
"do": "process",
}
data = {
# What's the origin of this key?
"auth_key": "880ea6a14ea49e853634fbdc5015a024",
#
"referer": "https://forum.karagarga.in/",
"ips_username": self._username,
"ips_password": self._password,
"rememberMe": "1",
"anonymous": "1",
}
self._session.post(
"https://forum.karagarga.in/index.php", params=params, data=data
)
if not {"session_id", "pass_hash"}.issubset(self._session.cookies.keys()):
raise AuthenticationError("Invalid forum username/password")
logger.debug("Karagarga forum login: OK")
@cache_region.cache_on_arguments(expiration_time=_EXPIRATION_TIME)
def _cached_get(self, url, params):
response = self._session.get(url, params=params)
if response.status_code == _NO_LOGGED_IN_REDIRECT:
raise AuthenticationError("Not logged in")
return response.content
def _search_movie(self, title, year):
params = {"search": title, "status": "completed"}
content = self._cached_get(f"{_BASE_URL}/pots.php", params)
soup = bso(content, "html.parser")
table = soup.find("table", {"cellspacing": "5"})
if table is None:
logger.debug("Failed to get table. Returning []")
return []
subtitles = []
scans = 0
for tr_ in table.find_all("tr"): # type: ignore
if "forum.karagarga" not in str(tr_):
continue
found_tds = tr_.find_all("td")
if len(found_tds) != 11:
continue
title = found_tds[1].text
if f"({year}" not in title:
logger.debug("Year doesn't match: %s", title)
continue
logger.debug("Movie matched: %s", title)
requested_language = found_tds[5].text
if "English" not in requested_language:
continue
forum_item = found_tds[9]
if "approved" not in str(forum_item):
logger.debug("Non-approved subtitle: %s", title)
continue
try:
forum_url = str(forum_item.find("a").get("href"))
except AttributeError:
continue
if scans > 2:
logger.debug("Forum scans limit exceeded")
break
subtitles += self._parse_from_forum(forum_url, Language.fromietf("en"))
scans += 1
return subtitles
def _parse_from_forum(self, url, language):
logger.debug("Scanning forum for subs: %s", url)
content = self._cached_get(url, {})
soup = bso(content, "html.parser")
for post in soup.find_all("div", {"class": "post entry-content"}):
yield from _gen_subtitles(post, language)
def list_subtitles(self, video, languages):
subtitles = self._search_movie(video.title, video.year)
if not subtitles:
return []
subtitles.sort(key=lambda x: x.downloads, reverse=True)
# Always return the most downloaded subtitle from the forum
return [subtitles[0]]
def download_subtitle(self, subtitle):
response = self._session.get(subtitle.page_link, allow_redirects=True)
response.raise_for_status()
subtitle.content = response.content
def _gen_subtitles(post, language):
seen_urls = set()
for potential in post.select("p,li.attachment,div"):
downloads = potential.find("span", {"class": "desc lighter"})
if not downloads:
continue
try:
download_count = int(downloads.text.split()[0].strip())
item = [a_ for a_ in potential.find_all("a") if a_.find("strong")][0]
release_info = item.find("strong").text
except (AttributeError, KeyError, ValueError) as error:
logger.debug("Error parsing post: %s", error)
continue
url = item.get("href")
if not url or url in seen_urls:
continue
seen_urls.add(url)
subtitle = KaragargaSubtitle(language, url, release_info, download_count)
logger.debug("Valid subtitle found: %s - %s", release_info, subtitle)
yield subtitle