mirror of
https://github.com/morpheus65535/bazarr.git
synced 2024-09-21 07:55:59 +08:00
Merge branch 'development' into morpheus
This commit is contained in:
commit
b8df31f4a6
|
@ -41,7 +41,8 @@ defaults = {
|
|||
'subfolder_custom': '',
|
||||
'upgrade_subs': 'True',
|
||||
'days_to_upgrade_subs': '7',
|
||||
'upgrade_manual': 'True'
|
||||
'upgrade_manual': 'True',
|
||||
'anti_captcha_provider': 'None'
|
||||
},
|
||||
'auth': {
|
||||
'type': 'None',
|
||||
|
@ -98,7 +99,15 @@ defaults = {
|
|||
},
|
||||
'assrt': {
|
||||
'token': ''
|
||||
}}
|
||||
},
|
||||
'anticaptcha': {
|
||||
'anti_captcha_key': ''
|
||||
},
|
||||
'deathbycaptcha': {
|
||||
'username': '',
|
||||
'password': ''
|
||||
}
|
||||
}
|
||||
|
||||
settings = simpleconfigparser(defaults=defaults)
|
||||
settings.read(os.path.join(args.config_dir, 'config', 'config.ini'))
|
||||
|
|
|
@ -791,12 +791,14 @@ def upgrade_subtitles():
|
|||
|
||||
providers_list = get_providers()
|
||||
providers_auth = get_providers_auth()
|
||||
|
||||
count_episode_to_upgrade = len(episodes_to_upgrade)
|
||||
count_movie_to_upgrade = len(movies_to_upgrade)
|
||||
|
||||
for episode in episodes_to_upgrade:
|
||||
for i, episode in enumerate(episodes_to_upgrade, 1):
|
||||
if episode[1] in ast.literal_eval(str(episode[9])):
|
||||
notifications.write(
|
||||
msg='Searching to upgrade ' + str(language_from_alpha2(episode[1])) + ' subtitles for this episode: ' +
|
||||
path_replace(episode[0]), queue='get_subtitle')
|
||||
notifications.write(msg='Upgrading series subtitles : ' + str(i) + '/' + str(count_episode_to_upgrade),
|
||||
queue='get_subtitle', duration='long')
|
||||
result = download_subtitle(path_replace(episode[0]), str(alpha3_from_alpha2(episode[1])),
|
||||
episode[3], episode[10], providers_list, providers_auth, str(episode[4]),
|
||||
episode[5], 'series', forced_minimum_score=int(episode[2]), is_upgrade=True)
|
||||
|
@ -810,11 +812,10 @@ def upgrade_subtitles():
|
|||
history_log(3, episode[6], episode[7], message, path, language_code, provider, score)
|
||||
send_notifications(episode[6], episode[7], message)
|
||||
|
||||
for movie in movies_to_upgrade:
|
||||
for i, movie in enumerate(movies_to_upgrade, 1):
|
||||
if movie[1] in ast.literal_eval(str(movie[8])):
|
||||
notifications.write(
|
||||
msg='Searching to upgrade ' + str(language_from_alpha2(movie[1])) + ' subtitles for this movie: ' +
|
||||
path_replace_movie(movie[0]), queue='get_subtitle')
|
||||
notifications.write(msg='Upgrading movie subtitles : ' + str(i) + '/' + str(count_movie_to_upgrade),
|
||||
queue='get_subtitle', duration='long')
|
||||
result = download_subtitle(path_replace_movie(movie[0]), str(alpha3_from_alpha2(movie[1])),
|
||||
movie[3], movie[9], providers_list, providers_auth, str(movie[4]),
|
||||
movie[5], 'movie', forced_minimum_score=int(movie[2]), is_upgrade=True)
|
||||
|
|
|
@ -17,6 +17,16 @@ from get_args import args
|
|||
# set subliminal_patch user agent
|
||||
os.environ["SZ_USER_AGENT"] = "Bazarr/1"
|
||||
|
||||
# set anti-captcha provider and key
|
||||
if settings.general.anti_captcha_provider == 'anti-captcha':
|
||||
os.environ["ANTICAPTCHA_CLASS"] = 'AntiCaptchaProxyLess'
|
||||
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = settings.anticaptcha.anti_captcha_key
|
||||
elif settings.general.anti_captcha_provider == 'AntiCaptchaProxyLessPitcher':
|
||||
os.environ["ANTICAPTCHA_CLASS"] = 'DBCProxyLess'
|
||||
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = ':'.join({settings.deathbycaptcha.username, settings.deathbycaptcha.password})
|
||||
else:
|
||||
os.environ["ANTICAPTCHA_CLASS"] = ''
|
||||
|
||||
# Check if args.config_dir exist
|
||||
if not os.path.exists(args.config_dir):
|
||||
# Create config_dir directory tree
|
||||
|
|
|
@ -67,6 +67,8 @@ def configure_logging(debug=False):
|
|||
fh.setFormatter(f)
|
||||
fh.addFilter(BlacklistFilter())
|
||||
fh.addFilter(PublicIPFilter())
|
||||
fh.setLevel(log_level)
|
||||
logger.addHandler(fh)
|
||||
|
||||
if debug:
|
||||
logging.getLogger("apscheduler").setLevel(logging.DEBUG)
|
||||
|
@ -90,8 +92,7 @@ def configure_logging(debug=False):
|
|||
logging.getLogger("rebulk").setLevel(logging.WARNING)
|
||||
logging.getLogger("stevedore.extension").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("geventwebsocket.handler").setLevel(logging.WARNING)
|
||||
fh.setLevel(log_level)
|
||||
logger.addHandler(fh)
|
||||
|
||||
|
||||
|
||||
class MyFilter(logging.Filter):
|
||||
|
|
|
@ -1275,6 +1275,10 @@ def save_settings():
|
|||
settings_upgrade_manual = 'False'
|
||||
else:
|
||||
settings_upgrade_manual = 'True'
|
||||
settings_anti_captcha_provider = request.forms.get('settings_anti_captcha_provider')
|
||||
settings_anti_captcha_key = request.forms.get('settings_anti_captcha_key')
|
||||
settings_death_by_captcha_username = request.forms.get('settings_death_by_captcha_username')
|
||||
settings_death_by_captcha_password = request.forms.get('settings_death_by_captcha_password')
|
||||
|
||||
before = (unicode(settings.general.ip), int(settings.general.port), unicode(settings.general.base_url),
|
||||
unicode(settings.general.path_mappings), unicode(settings.general.getboolean('use_sonarr')),
|
||||
|
@ -1306,6 +1310,22 @@ def save_settings():
|
|||
settings.general.upgrade_subs = text_type(settings_upgrade_subs)
|
||||
settings.general.days_to_upgrade_subs = text_type(settings_days_to_upgrade_subs)
|
||||
settings.general.upgrade_manual = text_type(settings_upgrade_manual)
|
||||
settings.general.anti_captcha_provider = text_type(settings_anti_captcha_provider)
|
||||
settings.anticaptcha.anti_captcha_key = text_type(settings_anti_captcha_key)
|
||||
settings.deathbycaptcha.username = text_type(settings_death_by_captcha_username)
|
||||
settings.deathbycaptcha.password = text_type(settings_death_by_captcha_password)
|
||||
|
||||
# set anti-captcha provider and key
|
||||
if settings.general.anti_captcha_provider == 'anti-captcha':
|
||||
os.environ["ANTICAPTCHA_CLASS"] = 'AntiCaptchaProxyLess'
|
||||
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = settings.anticaptcha.anti_captcha_key
|
||||
elif settings.general.anti_captcha_provider == 'AntiCaptchaProxyLessPitcher':
|
||||
os.environ["ANTICAPTCHA_CLASS"] = 'DBCProxyLess'
|
||||
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = ':'.join(
|
||||
{settings.deathbycaptcha.username, settings.deathbycaptcha.password})
|
||||
else:
|
||||
os.environ["ANTICAPTCHA_CLASS"] = ''
|
||||
|
||||
settings.general.minimum_score_movie = text_type(settings_general_minimum_score_movies)
|
||||
settings.general.use_embedded_subs = text_type(settings_general_embedded)
|
||||
settings.general.adaptive_searching = text_type(settings_general_adaptive_searching)
|
||||
|
|
279
libs/cfscrape.py
Normal file
279
libs/cfscrape.py
Normal file
|
@ -0,0 +1,279 @@
|
|||
import logging
|
||||
import random
|
||||
import time
|
||||
import re
|
||||
|
||||
# based off of https://gist.github.com/doko-desuka/58d9212461f62583f8df9bc6387fade2
|
||||
# and https://github.com/Anorov/cloudflare-scrape
|
||||
# and https://github.com/VeNoMouS/cloudflare-scrape-js2py
|
||||
|
||||
'''''''''
|
||||
Disables InsecureRequestWarning: Unverified HTTPS request is being made warnings.
|
||||
'''''''''
|
||||
import requests
|
||||
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||
|
||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||
''''''
|
||||
from requests.sessions import Session
|
||||
from copy import deepcopy
|
||||
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
DEFAULT_USER_AGENTS = [
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/65.0.3325.181 Chrome/65.0.3325.181 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; Moto G (5) Build/NPPS25.137-93-8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"
|
||||
]
|
||||
|
||||
DEFAULT_USER_AGENT = random.choice(DEFAULT_USER_AGENTS)
|
||||
|
||||
BUG_REPORT = (
|
||||
"Cloudflare may have changed their technique, or there may be a bug in the script.\n\nPlease read " "https://github.com/Anorov/cloudflare-scrape#updates, then file a "
|
||||
"bug report at https://github.com/Anorov/cloudflare-scrape/issues.")
|
||||
|
||||
|
||||
class CloudflareScraper(Session):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CloudflareScraper, self).__init__(*args, **kwargs)
|
||||
|
||||
if "requests" in self.headers["User-Agent"]:
|
||||
# Spoof Firefox on Linux if no custom User-Agent has been set
|
||||
self.headers["User-Agent"] = random.choice(DEFAULT_USER_AGENTS)
|
||||
|
||||
def request(self, method, url, *args, **kwargs):
|
||||
resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
|
||||
|
||||
# Check if Cloudflare anti-bot is on
|
||||
if (resp.status_code in (503, 429)
|
||||
and resp.headers.get("Server", "").startswith("cloudflare")
|
||||
and b"jschl_vc" in resp.content
|
||||
and b"jschl_answer" in resp.content
|
||||
):
|
||||
return self.solve_cf_challenge(resp, **kwargs)
|
||||
|
||||
# Otherwise, no Cloudflare anti-bot detected
|
||||
return resp
|
||||
|
||||
def solve_cf_challenge(self, resp, **original_kwargs):
|
||||
body = resp.text
|
||||
parsed_url = urlparse(resp.url)
|
||||
domain = parsed_url.netloc
|
||||
submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)
|
||||
|
||||
cloudflare_kwargs = deepcopy(original_kwargs)
|
||||
params = cloudflare_kwargs.setdefault("params", {})
|
||||
headers = cloudflare_kwargs.setdefault("headers", {})
|
||||
headers["Referer"] = resp.url
|
||||
|
||||
try:
|
||||
cf_delay = float(re.search('submit.*?(\d+)', body, re.DOTALL).group(1)) / 1000.0
|
||||
|
||||
form_index = body.find('id="challenge-form"')
|
||||
if form_index == -1:
|
||||
raise Exception('CF form not found')
|
||||
sub_body = body[form_index:]
|
||||
|
||||
s_match = re.search('name="s" value="(.+?)"', sub_body)
|
||||
if s_match:
|
||||
params["s"] = s_match.group(1) # On older variants this parameter is absent.
|
||||
params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', sub_body).group(1)
|
||||
params["pass"] = re.search(r'name="pass" value="(.+?)"', sub_body).group(1)
|
||||
|
||||
if body.find('id="cf-dn-', form_index) != -1:
|
||||
extra_div_expression = re.search('id="cf-dn-.*?>(.+?)<', sub_body).group(1)
|
||||
|
||||
# Initial value.
|
||||
js_answer = self.cf_parse_expression(
|
||||
re.search('setTimeout\(function\(.*?:(.*?)}', body, re.DOTALL).group(1)
|
||||
)
|
||||
# Extract the arithmetic operations.
|
||||
builder = re.search("challenge-form'\);\s*;(.*);a.value", body, re.DOTALL).group(1)
|
||||
# Remove a function semicolon before splitting on semicolons, else it messes the order.
|
||||
lines = builder.replace(' return +(p)}();', '', 1).split(';')
|
||||
|
||||
for line in lines:
|
||||
if len(line) and '=' in line:
|
||||
heading, expression = line.split('=', 1)
|
||||
if 'eval(eval(atob' in expression:
|
||||
# Uses the expression in an external <div>.
|
||||
expression_value = self.cf_parse_expression(extra_div_expression)
|
||||
elif '(function(p' in expression:
|
||||
# Expression + domain sampling function.
|
||||
expression_value = self.cf_parse_expression(expression, domain)
|
||||
else:
|
||||
expression_value = self.cf_parse_expression(expression)
|
||||
js_answer = self.cf_arithmetic_op(heading[-1], js_answer, expression_value)
|
||||
|
||||
if '+ t.length' in body:
|
||||
js_answer += len(domain) # Only older variants add the domain length.
|
||||
|
||||
params["jschl_answer"] = '%.10f' % js_answer
|
||||
|
||||
except Exception as e:
|
||||
# Something is wrong with the page.
|
||||
# This may indicate Cloudflare has changed their anti-bot
|
||||
# technique. If you see this and are running the latest version,
|
||||
# please open a GitHub issue so I can update the code accordingly.
|
||||
logging.error("[!] %s Unable to parse Cloudflare anti-bots page. "
|
||||
"Try upgrading cloudflare-scrape, or submit a bug report "
|
||||
"if you are running the latest version. Please read "
|
||||
"https://github.com/Anorov/cloudflare-scrape#updates "
|
||||
"before submitting a bug report." % e)
|
||||
raise
|
||||
|
||||
# Cloudflare requires a delay before solving the challenge.
|
||||
# Always wait the full delay + 1s because of 'time.sleep()' imprecision.
|
||||
time.sleep(cf_delay + 1.0)
|
||||
|
||||
# Requests transforms any request into a GET after a redirect,
|
||||
# so the redirect has to be handled manually here to allow for
|
||||
# performing other types of requests even as the first request.
|
||||
method = resp.request.method
|
||||
cloudflare_kwargs["allow_redirects"] = False
|
||||
|
||||
redirect = self.request(method, submit_url, **cloudflare_kwargs)
|
||||
|
||||
if 'Location' in redirect.headers:
|
||||
redirect_location = urlparse(redirect.headers["Location"])
|
||||
if not redirect_location.netloc:
|
||||
redirect_url = "%s://%s%s" % (parsed_url.scheme, domain, redirect_location.path)
|
||||
return self.request(method, redirect_url, **original_kwargs)
|
||||
return self.request(method, redirect.headers["Location"], **original_kwargs)
|
||||
else:
|
||||
return redirect
|
||||
|
||||
def cf_sample_domain_function(self, func_expression, domain):
|
||||
parameter_start_index = func_expression.find('}(') + 2
|
||||
# Send the expression with the "+" char and enclosing parenthesis included, as they are
|
||||
# stripped inside ".cf_parse_expression()'.
|
||||
sample_index = self.cf_parse_expression(
|
||||
func_expression[parameter_start_index: func_expression.rfind(')))')]
|
||||
)
|
||||
return ord(domain[int(sample_index)])
|
||||
|
||||
def cf_arithmetic_op(self, op, a, b):
|
||||
if op == '+':
|
||||
return a + b
|
||||
elif op == '/':
|
||||
return a / float(b)
|
||||
elif op == '*':
|
||||
return a * float(b)
|
||||
elif op == '-':
|
||||
return a - b
|
||||
else:
|
||||
raise Exception('Unknown operation')
|
||||
|
||||
def cf_parse_expression(self, expression, domain=None):
|
||||
|
||||
def _get_jsfuck_number(section):
|
||||
digit_expressions = section.replace('!+[]', '1').replace('+!![]', '1').replace('+[]', '0').split('+')
|
||||
return int(
|
||||
# Form a number string, with each digit as the sum of the values inside each parenthesis block.
|
||||
''.join(
|
||||
str(sum(int(digit_char) for digit_char in digit_expression[1:-1])) # Strip the parenthesis.
|
||||
for digit_expression in digit_expressions
|
||||
)
|
||||
)
|
||||
|
||||
if '/' in expression:
|
||||
dividend, divisor = expression.split('/')
|
||||
dividend = dividend[2:-1] # Strip the leading '+' char and the enclosing parenthesis.
|
||||
|
||||
if domain:
|
||||
# 2019-04-02: At this moment, this extra domain sampling function always appears on the
|
||||
# divisor side, at the end.
|
||||
divisor_a, divisor_b = divisor.split('))+(')
|
||||
divisor_a = _get_jsfuck_number(divisor_a[5:]) # Left-strip the sequence of "(+(+(".
|
||||
divisor_b = self.cf_sample_domain_function(divisor_b, domain)
|
||||
return _get_jsfuck_number(dividend) / float(divisor_a + divisor_b)
|
||||
else:
|
||||
divisor = divisor[2:-1]
|
||||
return _get_jsfuck_number(dividend) / float(_get_jsfuck_number(divisor))
|
||||
else:
|
||||
return _get_jsfuck_number(expression[2:-1])
|
||||
|
||||
@classmethod
|
||||
def create_scraper(cls, sess=None, **kwargs):
|
||||
"""
|
||||
Convenience function for creating a ready-to-go requests.Session (subclass) object.
|
||||
"""
|
||||
scraper = cls()
|
||||
|
||||
if sess:
|
||||
attrs = ["auth", "cert", "cookies", "headers", "hooks", "params", "proxies", "data"]
|
||||
for attr in attrs:
|
||||
val = getattr(sess, attr, None)
|
||||
if val:
|
||||
setattr(scraper, attr, val)
|
||||
|
||||
return scraper
|
||||
|
||||
## Functions for integrating cloudflare-scrape with other applications and scripts
|
||||
|
||||
@classmethod
|
||||
def get_tokens(cls, url, user_agent=None, **kwargs):
|
||||
scraper = cls.create_scraper()
|
||||
if user_agent:
|
||||
scraper.headers["User-Agent"] = user_agent
|
||||
|
||||
try:
|
||||
resp = scraper.get(url, **kwargs)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
logging.error("'%s' returned an error. Could not collect tokens." % url)
|
||||
raise
|
||||
|
||||
domain = urlparse(resp.url).netloc
|
||||
cookie_domain = None
|
||||
|
||||
for d in scraper.cookies.list_domains():
|
||||
if d.startswith(".") and d in ("." + domain):
|
||||
cookie_domain = d
|
||||
break
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
|
||||
|
||||
return ({
|
||||
"__cfduid": scraper.cookies.get("__cfduid", "", domain=cookie_domain),
|
||||
"cf_clearance": scraper.cookies.get("cf_clearance", "", domain=cookie_domain)
|
||||
},
|
||||
scraper.headers["User-Agent"]
|
||||
)
|
||||
|
||||
def get_live_tokens(self, domain):
|
||||
for d in self.cookies.list_domains():
|
||||
if d.startswith(".") and d in ("." + domain):
|
||||
cookie_domain = d
|
||||
break
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
|
||||
|
||||
return ({
|
||||
"__cfduid": self.cookies.get("__cfduid", "", domain=cookie_domain),
|
||||
"cf_clearance": self.cookies.get("cf_clearance", "", domain=cookie_domain)
|
||||
},
|
||||
self.headers["User-Agent"]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_cookie_string(cls, url, user_agent=None, **kwargs):
|
||||
"""
|
||||
Convenience function for building a Cookie HTTP header value.
|
||||
"""
|
||||
tokens, user_agent = cls.get_tokens(url, user_agent=user_agent, **kwargs)
|
||||
return "; ".join("=".join(pair) for pair in tokens.items()), user_agent
|
||||
|
||||
|
||||
create_scraper = CloudflareScraper.create_scraper
|
||||
get_tokens = CloudflareScraper.get_tokens
|
||||
get_cookie_string = CloudflareScraper.get_cookie_string
|
516
libs/deathbycaptcha.py
Normal file
516
libs/deathbycaptcha.py
Normal file
|
@ -0,0 +1,516 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
"""Death by Captcha HTTP and socket API clients.
|
||||
|
||||
There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
|
||||
socket ones. Both offer the same functionalily, with the socket API
|
||||
sporting faster responses and using way less connections.
|
||||
|
||||
To access the socket API, use SocketClient class; for the HTTP API, use
|
||||
HttpClient class. Both are thread-safe. SocketClient keeps a persistent
|
||||
connection opened and serializes all API requests sent through it, thus
|
||||
it is advised to keep a pool of them if you're script is heavily
|
||||
multithreaded.
|
||||
|
||||
Both SocketClient and HttpClient give you the following methods:
|
||||
|
||||
get_user()
|
||||
Returns your DBC account details as a dict with the following keys:
|
||||
|
||||
"user": your account numeric ID; if login fails, it will be the only
|
||||
item with the value of 0;
|
||||
"rate": your CAPTCHA rate, i.e. how much you will be charged for one
|
||||
solved CAPTCHA in US cents;
|
||||
"balance": your DBC account balance in US cents;
|
||||
"is_banned": flag indicating whether your account is suspended or not.
|
||||
|
||||
get_balance()
|
||||
Returns your DBC account balance in US cents.
|
||||
|
||||
get_captcha(cid)
|
||||
Returns an uploaded CAPTCHA details as a dict with the following keys:
|
||||
|
||||
"captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
|
||||
be the only item with the value of 0;
|
||||
"text": the CAPTCHA text, if solved, otherwise None;
|
||||
"is_correct": flag indicating whether the CAPTCHA was solved correctly
|
||||
(DBC can detect that in rare cases).
|
||||
|
||||
The only argument `cid` is the CAPTCHA numeric ID.
|
||||
|
||||
get_text(cid)
|
||||
Returns an uploaded CAPTCHA text (None if not solved). The only argument
|
||||
`cid` is the CAPTCHA numeric ID.
|
||||
|
||||
report(cid)
|
||||
Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
|
||||
CAPTCHA numeric ID. Returns True on success, False otherwise.
|
||||
|
||||
upload(captcha)
|
||||
Uploads a CAPTCHA. The only argument `captcha` can be either file-like
|
||||
object (any object with `read` method defined, actually, so StringIO
|
||||
will do), or CAPTCHA image file name. On successul upload you'll get
|
||||
the CAPTCHA details dict (see get_captcha() method).
|
||||
|
||||
NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
|
||||
to poll for its status periodically using get_captcha() or get_text()
|
||||
method until the CAPTCHA is solved and you get the text.
|
||||
|
||||
decode(captcha, timeout=DEFAULT_TIMEOUT)
|
||||
A convenient method that uploads a CAPTCHA and polls for its status
|
||||
periodically, but no longer than `timeout` (defaults to 60 seconds).
|
||||
If solved, you'll get the CAPTCHA details dict (see get_captcha()
|
||||
method for details). See upload() method for details on `captcha`
|
||||
argument.
|
||||
|
||||
Visit http://www.deathbycaptcha.com/user/api for updates.
|
||||
|
||||
"""
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import errno
|
||||
import imghdr
|
||||
import random
|
||||
import os
|
||||
import select
|
||||
import socket
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import urllib
|
||||
import urllib2
|
||||
try:
|
||||
from json import read as json_decode, write as json_encode
|
||||
except ImportError:
|
||||
try:
|
||||
from json import loads as json_decode, dumps as json_encode
|
||||
except ImportError:
|
||||
from simplejson import loads as json_decode, dumps as json_encode
|
||||
|
||||
|
||||
# API version and unique software ID
|
||||
API_VERSION = 'DBC/Python v4.6'
|
||||
|
||||
# Default CAPTCHA timeout and decode() polling interval
|
||||
DEFAULT_TIMEOUT = 60
|
||||
DEFAULT_TOKEN_TIMEOUT = 120
|
||||
POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
|
||||
DFLT_POLL_INTERVAL = 3
|
||||
|
||||
# Base HTTP API url
|
||||
HTTP_BASE_URL = 'http://api.dbcapi.me/api'
|
||||
|
||||
# Preferred HTTP API server's response content type, do not change
|
||||
HTTP_RESPONSE_TYPE = 'application/json'
|
||||
|
||||
# Socket API server's host & ports range
|
||||
SOCKET_HOST = 'api.dbcapi.me'
|
||||
SOCKET_PORTS = range(8123, 8131)
|
||||
|
||||
|
||||
def _load_image(captcha):
|
||||
if hasattr(captcha, 'read'):
|
||||
img = captcha.read()
|
||||
elif type(captcha) == bytearray:
|
||||
img = captcha
|
||||
else:
|
||||
img = ''
|
||||
try:
|
||||
captcha_file = open(captcha, 'rb')
|
||||
except Exception:
|
||||
raise
|
||||
else:
|
||||
img = captcha_file.read()
|
||||
captcha_file.close()
|
||||
if not len(img):
|
||||
raise ValueError('CAPTCHA image is empty')
|
||||
elif imghdr.what(None, img) is None:
|
||||
raise TypeError('Unknown CAPTCHA image type')
|
||||
else:
|
||||
return img
|
||||
|
||||
|
||||
class AccessDeniedException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Client(object):
|
||||
|
||||
"""Death by Captcha API Client."""
|
||||
|
||||
def __init__(self, username, password):
|
||||
self.is_verbose = False
|
||||
self.userpwd = {'username': username, 'password': password}
|
||||
|
||||
def _log(self, cmd, msg=''):
|
||||
if self.is_verbose:
|
||||
print '%d %s %s' % (time.time(), cmd, msg.rstrip())
|
||||
return self
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def connect(self):
|
||||
pass
|
||||
|
||||
def get_user(self):
|
||||
"""Fetch user details -- ID, balance, rate and banned status."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_balance(self):
|
||||
"""Fetch user balance (in US cents)."""
|
||||
return self.get_user().get('balance')
|
||||
|
||||
def get_captcha(self, cid):
|
||||
"""Fetch a CAPTCHA details -- ID, text and correctness flag."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_text(self, cid):
|
||||
"""Fetch a CAPTCHA text."""
|
||||
return self.get_captcha(cid).get('text') or None
|
||||
|
||||
def report(self, cid):
|
||||
"""Report a CAPTCHA as incorrectly solved."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def upload(self, captcha):
|
||||
"""Upload a CAPTCHA.
|
||||
|
||||
Accepts file names and file-like objects. Returns CAPTCHA details
|
||||
dict on success.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def decode(self, captcha=None, timeout=None, **kwargs):
|
||||
"""
|
||||
Try to solve a CAPTCHA.
|
||||
|
||||
See Client.upload() for arguments details.
|
||||
|
||||
Uploads a CAPTCHA, polls for its status periodically with arbitrary
|
||||
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
|
||||
"""
|
||||
if not timeout:
|
||||
if not captcha:
|
||||
timeout = DEFAULT_TOKEN_TIMEOUT
|
||||
else:
|
||||
timeout = DEFAULT_TIMEOUT
|
||||
|
||||
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
|
||||
uploaded_captcha = self.upload(captcha, **kwargs)
|
||||
if uploaded_captcha:
|
||||
intvl_idx = 0 # POLL_INTERVAL index
|
||||
while deadline > time.time() and not uploaded_captcha.get('text'):
|
||||
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
|
||||
time.sleep(intvl)
|
||||
pulled = self.get_captcha(uploaded_captcha['captcha'])
|
||||
if pulled['captcha'] == uploaded_captcha['captcha']:
|
||||
uploaded_captcha = pulled
|
||||
if uploaded_captcha.get('text') and \
|
||||
uploaded_captcha.get('is_correct'):
|
||||
return uploaded_captcha
|
||||
|
||||
def _get_poll_interval(self, idx):
|
||||
"""Returns poll interval and next index depending on index provided"""
|
||||
|
||||
if len(POLLS_INTERVAL) > idx:
|
||||
intvl = POLLS_INTERVAL[idx]
|
||||
else:
|
||||
intvl = DFLT_POLL_INTERVAL
|
||||
idx += 1
|
||||
|
||||
return intvl, idx
|
||||
|
||||
|
||||
class HttpClient(Client):
|
||||
|
||||
"""Death by Captcha HTTP API client."""
|
||||
|
||||
def __init__(self, *args):
|
||||
Client.__init__(self, *args)
|
||||
self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
|
||||
|
||||
def _call(self, cmd, payload=None, headers=None):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
headers['Accept'] = HTTP_RESPONSE_TYPE
|
||||
headers['User-Agent'] = API_VERSION
|
||||
if hasattr(payload, 'items'):
|
||||
payload = urllib.urlencode(payload)
|
||||
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
|
||||
else:
|
||||
self._log('SEND', '%s' % cmd)
|
||||
if payload is not None:
|
||||
headers['Content-Length'] = len(payload)
|
||||
try:
|
||||
response = self.opener.open(urllib2.Request(
|
||||
HTTP_BASE_URL + '/' + cmd.strip('/'),
|
||||
data=payload,
|
||||
headers=headers
|
||||
)).read()
|
||||
except urllib2.HTTPError, err:
|
||||
if 403 == err.code:
|
||||
raise AccessDeniedException('Access denied, please check'
|
||||
' your credentials and/or balance')
|
||||
elif 400 == err.code or 413 == err.code:
|
||||
raise ValueError("CAPTCHA was rejected by the service, check"
|
||||
" if it's a valid image")
|
||||
elif 503 == err.code:
|
||||
raise OverflowError("CAPTCHA was rejected due to service"
|
||||
" overload, try again later")
|
||||
else:
|
||||
raise err
|
||||
else:
|
||||
self._log('RECV', '%d %s' % (len(response), response))
|
||||
try:
|
||||
return json_decode(response)
|
||||
except Exception:
|
||||
raise RuntimeError('Invalid API response')
|
||||
return {}
|
||||
|
||||
def get_user(self):
|
||||
return self._call('user', self.userpwd.copy()) or {'user': 0}
|
||||
|
||||
def get_captcha(self, cid):
|
||||
return self._call('captcha/%d' % cid) or {'captcha': 0}
|
||||
|
||||
def report(self, cid):
|
||||
return not self._call('captcha/%d/report' % cid,
|
||||
self.userpwd.copy()).get('is_correct')
|
||||
|
||||
def upload(self, captcha=None, **kwargs):
|
||||
boundary = binascii.hexlify(os.urandom(16))
|
||||
banner = kwargs.get('banner', '')
|
||||
if banner:
|
||||
kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
|
||||
body = '\r\n'.join(('\r\n'.join((
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="%s"' % k,
|
||||
'Content-Type: text/plain',
|
||||
'Content-Length: %d' % len(str(v)),
|
||||
'',
|
||||
str(v)
|
||||
))) for k, v in self.userpwd.items())
|
||||
|
||||
body += '\r\n'.join(('\r\n'.join((
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="%s"' % k,
|
||||
'Content-Type: text/plain',
|
||||
'Content-Length: %d' % len(str(v)),
|
||||
'',
|
||||
str(v)
|
||||
))) for k, v in kwargs.items())
|
||||
|
||||
if captcha:
|
||||
img = _load_image(captcha)
|
||||
body += '\r\n'.join((
|
||||
'',
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="captchafile"; '
|
||||
'filename="captcha"',
|
||||
'Content-Type: application/octet-stream',
|
||||
'Content-Length: %d' % len(img),
|
||||
'',
|
||||
img,
|
||||
'--%s--' % boundary,
|
||||
''
|
||||
))
|
||||
|
||||
response = self._call('captcha', body, {
|
||||
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
|
||||
}) or {}
|
||||
if response.get('captcha'):
|
||||
return response
|
||||
|
||||
|
||||
class SocketClient(Client):
|
||||
|
||||
"""Death by Captcha socket API client."""
|
||||
|
||||
TERMINATOR = '\r\n'
|
||||
|
||||
def __init__(self, *args):
|
||||
Client.__init__(self, *args)
|
||||
self.socket_lock = threading.Lock()
|
||||
self.socket = None
|
||||
|
||||
def close(self):
|
||||
if self.socket:
|
||||
self._log('CLOSE')
|
||||
try:
|
||||
self.socket.shutdown(socket.SHUT_RDWR)
|
||||
except socket.error:
|
||||
pass
|
||||
finally:
|
||||
self.socket.close()
|
||||
self.socket = None
|
||||
|
||||
def connect(self):
|
||||
if not self.socket:
|
||||
self._log('CONN')
|
||||
host = (socket.gethostbyname(SOCKET_HOST),
|
||||
random.choice(SOCKET_PORTS))
|
||||
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self.socket.settimeout(0)
|
||||
try:
|
||||
self.socket.connect(host)
|
||||
except socket.error, err:
|
||||
if (err.args[0] not in
|
||||
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
|
||||
self.close()
|
||||
raise err
|
||||
return self.socket
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def _sendrecv(self, sock, buf):
|
||||
self._log('SEND', buf)
|
||||
fds = [sock]
|
||||
buf += self.TERMINATOR
|
||||
response = ''
|
||||
intvl_idx = 0
|
||||
while True:
|
||||
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
|
||||
rds, wrs, exs = select.select((not buf and fds) or [],
|
||||
(buf and fds) or [],
|
||||
fds,
|
||||
intvl)
|
||||
if exs:
|
||||
raise IOError('select() failed')
|
||||
try:
|
||||
if wrs:
|
||||
while buf:
|
||||
buf = buf[wrs[0].send(buf):]
|
||||
elif rds:
|
||||
while True:
|
||||
s = rds[0].recv(256)
|
||||
if not s:
|
||||
raise IOError('recv(): connection lost')
|
||||
else:
|
||||
response += s
|
||||
except socket.error, err:
|
||||
if (err.args[0] not in
|
||||
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
|
||||
raise err
|
||||
if response.endswith(self.TERMINATOR):
|
||||
self._log('RECV', response)
|
||||
return response.rstrip(self.TERMINATOR)
|
||||
raise IOError('send/recv timed out')
|
||||
|
||||
def _call(self, cmd, data=None):
|
||||
if data is None:
|
||||
data = {}
|
||||
data['cmd'] = cmd
|
||||
data['version'] = API_VERSION
|
||||
request = json_encode(data)
|
||||
|
||||
response = None
|
||||
for _ in range(2):
|
||||
if not self.socket and cmd != 'login':
|
||||
self._call('login', self.userpwd.copy())
|
||||
self.socket_lock.acquire()
|
||||
try:
|
||||
sock = self.connect()
|
||||
response = self._sendrecv(sock, request)
|
||||
except IOError, err:
|
||||
sys.stderr.write(str(err) + "\n")
|
||||
self.close()
|
||||
except socket.error, err:
|
||||
sys.stderr.write(str(err) + "\n")
|
||||
self.close()
|
||||
raise IOError('Connection refused')
|
||||
else:
|
||||
break
|
||||
finally:
|
||||
self.socket_lock.release()
|
||||
|
||||
if response is None:
|
||||
raise IOError('Connection lost or timed out during API request')
|
||||
|
||||
try:
|
||||
response = json_decode(response)
|
||||
except Exception:
|
||||
raise RuntimeError('Invalid API response')
|
||||
|
||||
if not response.get('error'):
|
||||
return response
|
||||
|
||||
error = response['error']
|
||||
if error in ('not-logged-in', 'invalid-credentials'):
|
||||
raise AccessDeniedException('Access denied, check your credentials')
|
||||
elif 'banned' == error:
|
||||
raise AccessDeniedException('Access denied, account is suspended')
|
||||
elif 'insufficient-funds' == error:
|
||||
raise AccessDeniedException(
|
||||
'CAPTCHA was rejected due to low balance')
|
||||
elif 'invalid-captcha' == error:
|
||||
raise ValueError('CAPTCHA is not a valid image')
|
||||
elif 'service-overload' == error:
|
||||
raise OverflowError(
|
||||
'CAPTCHA was rejected due to service overload, try again later')
|
||||
else:
|
||||
self.socket_lock.acquire()
|
||||
self.close()
|
||||
self.socket_lock.release()
|
||||
raise RuntimeError('API server error occured: %s' % error)
|
||||
|
||||
def get_user(self):
|
||||
return self._call('user') or {'user': 0}
|
||||
|
||||
def get_captcha(self, cid):
|
||||
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
|
||||
|
||||
def upload(self, captcha=None, **kwargs):
|
||||
data = {}
|
||||
if captcha:
|
||||
data['captcha'] = base64.b64encode(_load_image(captcha))
|
||||
if kwargs:
|
||||
banner = kwargs.get('banner', '')
|
||||
if banner:
|
||||
kwargs['banner'] = base64.b64encode(_load_image(banner))
|
||||
data.update(kwargs)
|
||||
response = self._call('upload', data)
|
||||
if response.get('captcha'):
|
||||
uploaded_captcha = dict(
|
||||
(k, response.get(k))
|
||||
for k in ('captcha', 'text', 'is_correct')
|
||||
)
|
||||
if not uploaded_captcha['text']:
|
||||
uploaded_captcha['text'] = None
|
||||
return uploaded_captcha
|
||||
|
||||
def report(self, cid):
|
||||
return not self._call('report', {'captcha': cid}).get('is_correct')
|
||||
|
||||
|
||||
if '__main__' == __name__:
|
||||
# Put your DBC username & password here:
|
||||
# client = HttpClient(sys.argv[1], sys.argv[2])
|
||||
client = SocketClient(sys.argv[1], sys.argv[2])
|
||||
client.is_verbose = True
|
||||
|
||||
print 'Your balance is %s US cents' % client.get_balance()
|
||||
|
||||
for fn in sys.argv[3:]:
|
||||
try:
|
||||
# Put your CAPTCHA image file name or file-like object, and optional
|
||||
# solving timeout (in seconds) here:
|
||||
captcha = client.decode(fn, DEFAULT_TIMEOUT)
|
||||
except Exception, e:
|
||||
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
|
||||
captcha = None
|
||||
|
||||
if captcha:
|
||||
print 'CAPTCHA %d solved: %s' % \
|
||||
(captcha['captcha'], captcha['text'])
|
||||
|
||||
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
|
||||
# in fact incorrectly solved!
|
||||
# try:
|
||||
# client.report(captcha['captcha'])
|
||||
# except Exception, e:
|
||||
# sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))
|
7
libs/python_anticaptcha/__init__.py
Normal file
7
libs/python_anticaptcha/__init__.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
from .base import AnticaptchaClient
|
||||
from .tasks import NoCaptchaTask, NoCaptchaTaskProxylessTask, ImageToTextTask, FunCaptchaTask
|
||||
from .proxy import Proxy
|
||||
from .exceptions import AnticaptchaException
|
||||
from .fields import SimpleText, Image, WebLink, TextInput, Textarea, Checkbox, Select, Radio, ImageUpload
|
||||
|
||||
AnticatpchaException = AnticaptchaException
|
114
libs/python_anticaptcha/base.py
Normal file
114
libs/python_anticaptcha/base.py
Normal file
|
@ -0,0 +1,114 @@
|
|||
import requests
|
||||
import time
|
||||
|
||||
from six.moves.urllib_parse import urljoin
|
||||
from .exceptions import AnticaptchaException
|
||||
|
||||
SLEEP_EVERY_CHECK_FINISHED = 3
|
||||
MAXIMUM_JOIN_TIME = 60 * 5
|
||||
|
||||
|
||||
class Job(object):
|
||||
client = None
|
||||
task_id = None
|
||||
_last_result = None
|
||||
|
||||
def __init__(self, client, task_id):
|
||||
self.client = client
|
||||
self.task_id = task_id
|
||||
|
||||
def _update(self):
|
||||
self._last_result = self.client.getTaskResult(self.task_id)
|
||||
|
||||
def check_is_ready(self):
|
||||
self._update()
|
||||
return self._last_result['status'] == 'ready'
|
||||
|
||||
def get_solution_response(self): # Recaptcha
|
||||
return self._last_result['solution']['gRecaptchaResponse']
|
||||
|
||||
def get_token_response(self): # Funcaptcha
|
||||
return self._last_result['solution']['token']
|
||||
|
||||
def get_answers(self):
|
||||
return self._last_result['solution']['answers']
|
||||
|
||||
def get_captcha_text(self): # Image
|
||||
return self._last_result['solution']['text']
|
||||
|
||||
def report_incorrect(self):
|
||||
return self.client.reportIncorrectImage(self.task_id)
|
||||
|
||||
def join(self, maximum_time=None):
|
||||
elapsed_time = 0
|
||||
maximum_time = maximum_time or MAXIMUM_JOIN_TIME
|
||||
while not self.check_is_ready():
|
||||
time.sleep(SLEEP_EVERY_CHECK_FINISHED)
|
||||
elapsed_time += SLEEP_EVERY_CHECK_FINISHED
|
||||
if elapsed_time is not None and elapsed_time > maximum_time:
|
||||
raise AnticaptchaException(None, 250,
|
||||
"The execution time exceeded a maximum time of {} seconds. It takes {} seconds.".format(
|
||||
maximum_time, elapsed_time))
|
||||
|
||||
|
||||
class AnticaptchaClient(object):
|
||||
client_key = None
|
||||
CREATE_TASK_URL = "/createTask"
|
||||
TASK_RESULT_URL = "/getTaskResult"
|
||||
BALANCE_URL = "/getBalance"
|
||||
REPORT_IMAGE_URL = "/reportIncorrectImageCaptcha"
|
||||
SOFT_ID = 847
|
||||
language_pool = "en"
|
||||
|
||||
def __init__(self, client_key, language_pool="en", host="api.anti-captcha.com", use_ssl=True):
|
||||
self.client_key = client_key
|
||||
self.language_pool = language_pool
|
||||
self.base_url = "{proto}://{host}/".format(proto="https" if use_ssl else "http",
|
||||
host=host)
|
||||
self.session = requests.Session()
|
||||
|
||||
@property
|
||||
def client_ip(self):
|
||||
if not hasattr(self, '_client_ip'):
|
||||
self._client_ip = self.session.get('http://httpbin.org/ip').json()['origin']
|
||||
return self._client_ip
|
||||
|
||||
def _check_response(self, response):
|
||||
if response.get('errorId', False) == 11:
|
||||
response['errorDescription'] = "{} Your missing IP address is {}.".format(response['errorDescription'],
|
||||
self.client_ip)
|
||||
if response.get('errorId', False):
|
||||
raise AnticaptchaException(response['errorId'],
|
||||
response['errorCode'],
|
||||
response['errorDescription'])
|
||||
|
||||
def createTask(self, task):
|
||||
request = {"clientKey": self.client_key,
|
||||
"task": task.serialize(),
|
||||
"softId": self.SOFT_ID,
|
||||
"languagePool": self.language_pool,
|
||||
}
|
||||
response = self.session.post(urljoin(self.base_url, self.CREATE_TASK_URL), json=request).json()
|
||||
self._check_response(response)
|
||||
return Job(self, response['taskId'])
|
||||
|
||||
def getTaskResult(self, task_id):
|
||||
request = {"clientKey": self.client_key,
|
||||
"taskId": task_id}
|
||||
response = self.session.post(urljoin(self.base_url, self.TASK_RESULT_URL), json=request).json()
|
||||
self._check_response(response)
|
||||
return response
|
||||
|
||||
def getBalance(self):
|
||||
request = {"clientKey": self.client_key}
|
||||
response = self.session.post(urljoin(self.base_url, self.BALANCE_URL), json=request).json()
|
||||
self._check_response(response)
|
||||
return response['balance']
|
||||
|
||||
def reportIncorrectImage(self, task_id):
|
||||
request = {"clientKey": self.client_key,
|
||||
"taskId": task_id
|
||||
}
|
||||
response = self.session.post(urljoin(self.base_url, self.REPORT_IMAGE_URL), json=request).json()
|
||||
self._check_response(response)
|
||||
return response.get('status', False) != False
|
23
libs/python_anticaptcha/exceptions.py
Normal file
23
libs/python_anticaptcha/exceptions.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
class AnticaptchaException(Exception):
|
||||
def __init__(self, error_id, error_code, error_description, *args):
|
||||
super(AnticaptchaException, self).__init__("[{}:{}]{}".format(error_code, error_id, error_description))
|
||||
self.error_description = error_description
|
||||
self.error_id = error_id
|
||||
self.error_code = error_code
|
||||
|
||||
|
||||
AnticatpchaException = AnticaptchaException
|
||||
|
||||
|
||||
class InvalidWidthException(AnticaptchaException):
|
||||
def __init__(self, width):
|
||||
self.width = width
|
||||
msg = 'Invalid width (%s). Can be one of these: 100, 50, 33, 25.' % (self.width,)
|
||||
super(InvalidWidthException, self).__init__("AC-1", 1, msg)
|
||||
|
||||
|
||||
class MissingNameException(AnticaptchaException):
|
||||
def __init__(self, cls):
|
||||
self.cls = cls
|
||||
msg = 'Missing name data in {0}. Provide {0}.__init__(name="X") or {0}.serialize(name="X")'.format(str(self.cls))
|
||||
super(MissingNameException, self).__init__("AC-2", 2, msg)
|
199
libs/python_anticaptcha/fields.py
Normal file
199
libs/python_anticaptcha/fields.py
Normal file
|
@ -0,0 +1,199 @@
|
|||
import six
|
||||
from python_anticaptcha.exceptions import InvalidWidthException, MissingNameException
|
||||
|
||||
|
||||
class BaseField(object):
|
||||
label = None
|
||||
labelHint = None
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = {}
|
||||
if self.label:
|
||||
data['label'] = self.label or False
|
||||
if self.labelHint:
|
||||
data['labelHint'] = self.labelHint or False
|
||||
return data
|
||||
|
||||
|
||||
class NameBaseField(BaseField):
|
||||
name = None
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(NameBaseField, self).serialize(name)
|
||||
if name:
|
||||
data['name'] = name
|
||||
elif self.name:
|
||||
data['name'] = self.name
|
||||
else:
|
||||
raise MissingNameException(cls=self.__class__)
|
||||
return data
|
||||
|
||||
|
||||
class SimpleText(BaseField):
|
||||
contentType = 'text'
|
||||
|
||||
def __init__(self, content, label=None, labelHint=None, width=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.content = content
|
||||
self.width = width
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(SimpleText, self).serialize(name)
|
||||
data['contentType'] = self.contentType
|
||||
data['content'] = self.content
|
||||
|
||||
if self.width:
|
||||
if self.width not in [100, 50, 33, 25]:
|
||||
raise InvalidWidthException(self.width)
|
||||
data['inputOptions'] = {}
|
||||
data['width'] = self.width
|
||||
return data
|
||||
|
||||
|
||||
class Image(BaseField):
|
||||
contentType = 'image'
|
||||
|
||||
def __init__(self, imageUrl, label=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
self.imageUrl = imageUrl
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(Image, self).serialize(name)
|
||||
data['contentType'] = self.contentType
|
||||
data['content'] = self.imageUrl
|
||||
return data
|
||||
|
||||
|
||||
class WebLink(BaseField):
|
||||
contentType = 'link'
|
||||
|
||||
def __init__(self, linkText, linkUrl, label=None, labelHint=None, width=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.linkText = linkText
|
||||
self.linkUrl = linkUrl
|
||||
|
||||
self.width = width
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(WebLink, self).serialize(name)
|
||||
data['contentType'] = self.contentType
|
||||
|
||||
if self.width:
|
||||
if self.width not in [100, 50, 33, 25]:
|
||||
raise InvalidWidthException(self.width)
|
||||
data['inputOptions'] = {}
|
||||
data['width'] = self.width
|
||||
|
||||
data.update({'content': {'url': self.linkUrl,
|
||||
'text': self.linkText}})
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class TextInput(NameBaseField):
|
||||
def __init__(self, placeHolder=None, label=None, labelHint=None, width=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.placeHolder = placeHolder
|
||||
|
||||
self.width = width
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(TextInput, self).serialize(name)
|
||||
data['inputType'] = 'text'
|
||||
|
||||
data['inputOptions'] = {}
|
||||
|
||||
if self.width:
|
||||
if self.width not in [100, 50, 33, 25]:
|
||||
raise InvalidWidthException(self.width)
|
||||
|
||||
data['inputOptions']['width'] = str(self.width)
|
||||
|
||||
if self.placeHolder:
|
||||
data['inputOptions']['placeHolder'] = self.placeHolder
|
||||
return data
|
||||
|
||||
|
||||
class Textarea(NameBaseField):
|
||||
def __init__(self, placeHolder=None, rows=None, label=None, width=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.placeHolder = placeHolder
|
||||
self.rows = rows
|
||||
self.width = width
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(Textarea, self).serialize(name)
|
||||
data['inputType'] = 'textarea'
|
||||
data['inputOptions'] = {}
|
||||
if self.rows:
|
||||
data['inputOptions']['rows'] = str(self.rows)
|
||||
if self.placeHolder:
|
||||
data['inputOptions']['placeHolder'] = self.placeHolder
|
||||
if self.width:
|
||||
data['inputOptions']['width'] = str(self.width)
|
||||
return data
|
||||
|
||||
|
||||
class Checkbox(NameBaseField):
|
||||
def __init__(self, text, label=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.text = text
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(Checkbox, self).serialize(name)
|
||||
data['inputType'] = 'checkbox'
|
||||
data['inputOptions'] = {'label': self.text}
|
||||
return data
|
||||
|
||||
|
||||
class Select(NameBaseField):
|
||||
type = 'select'
|
||||
|
||||
def __init__(self, label=None, choices=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
self.choices = choices or ()
|
||||
|
||||
def get_choices(self):
|
||||
for choice in self.choices:
|
||||
if isinstance(choice, six.text_type):
|
||||
yield choice, choice
|
||||
else:
|
||||
yield choice
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(Select, self).serialize(name)
|
||||
data['inputType'] = self.type
|
||||
|
||||
data['inputOptions'] = []
|
||||
for value, caption in self.get_choices():
|
||||
data['inputOptions'].append({"value": value,
|
||||
"caption": caption})
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class Radio(Select):
|
||||
type = 'radio'
|
||||
|
||||
|
||||
class ImageUpload(NameBaseField):
|
||||
def __init__(self, label=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(ImageUpload, self).serialize(name)
|
||||
data['inputType'] = 'imageUpload'
|
||||
return data
|
28
libs/python_anticaptcha/proxy.py
Normal file
28
libs/python_anticaptcha/proxy.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
from six.moves.urllib_parse import urlparse
|
||||
|
||||
|
||||
class Proxy(object):
|
||||
def __init__(self, proxy_type, proxy_address, proxy_port, proxy_login, proxy_password):
|
||||
self.proxyType = proxy_type
|
||||
self.proxyAddress = proxy_address
|
||||
self.proxyPort = proxy_port
|
||||
self.proxyLogin = proxy_login
|
||||
self.proxyPassword = proxy_password
|
||||
|
||||
def serialize(self):
|
||||
result = {'proxyType': self.proxyType,
|
||||
'proxyAddress': self.proxyAddress,
|
||||
'proxyPort': self.proxyPort}
|
||||
if self.proxyLogin or self.proxyPassword:
|
||||
result['proxyLogin'] = self.proxyLogin
|
||||
result['proxyPassword'] = self.proxyPassword
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def parse_url(cls, url):
|
||||
parsed = urlparse(url)
|
||||
return cls(proxy_type=parsed.scheme,
|
||||
proxy_address=parsed.hostname,
|
||||
proxy_port=parsed.port,
|
||||
proxy_login=parsed.username,
|
||||
proxy_password=parsed.password)
|
128
libs/python_anticaptcha/tasks.py
Normal file
128
libs/python_anticaptcha/tasks.py
Normal file
|
@ -0,0 +1,128 @@
|
|||
import base64
|
||||
from .fields import BaseField
|
||||
|
||||
|
||||
class BaseTask(object):
|
||||
def serialize(self, **result):
|
||||
return result
|
||||
|
||||
|
||||
class ProxyMixin(BaseTask):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.proxy = kwargs.pop('proxy')
|
||||
self.userAgent = kwargs.pop('user_agent')
|
||||
self.cookies = kwargs.pop('cookies', '')
|
||||
super(ProxyMixin, self).__init__(*args, **kwargs)
|
||||
|
||||
def serialize(self, **result):
|
||||
result = super(ProxyMixin, self).serialize(**result)
|
||||
result.update(self.proxy.serialize())
|
||||
result['userAgent'] = self.userAgent
|
||||
if self.cookies:
|
||||
result['cookies'] = self.cookies
|
||||
return result
|
||||
|
||||
|
||||
class NoCaptchaTaskProxylessTask(BaseTask):
|
||||
type = "NoCaptchaTaskProxyless"
|
||||
websiteURL = None
|
||||
websiteKey = None
|
||||
websiteSToken = None
|
||||
|
||||
def __init__(self, website_url, website_key, website_s_token=None, is_invisible=None):
|
||||
self.websiteURL = website_url
|
||||
self.websiteKey = website_key
|
||||
self.websiteSToken = website_s_token
|
||||
self.isInvisible = is_invisible
|
||||
|
||||
def serialize(self):
|
||||
data = {'type': self.type,
|
||||
'websiteURL': self.websiteURL,
|
||||
'websiteKey': self.websiteKey}
|
||||
if self.websiteSToken is not None:
|
||||
data['websiteSToken'] = self.websiteSToken
|
||||
if self.isInvisible is not None:
|
||||
data['isInvisible'] = self.isInvisible
|
||||
return data
|
||||
|
||||
|
||||
class FunCaptchaTask(ProxyMixin):
|
||||
type = "FunCaptchaTask"
|
||||
websiteURL = None
|
||||
websiteKey = None
|
||||
|
||||
def __init__(self, website_url, website_key, *args, **kwargs):
|
||||
self.websiteURL = website_url
|
||||
self.websiteKey = website_key
|
||||
super(FunCaptchaTask, self).__init__(*args, **kwargs)
|
||||
|
||||
def serialize(self, **result):
|
||||
result = super(FunCaptchaTask, self).serialize(**result)
|
||||
result.update({'type': self.type,
|
||||
'websiteURL': self.websiteURL,
|
||||
'websitePublicKey': self.websiteKey})
|
||||
return result
|
||||
|
||||
|
||||
class NoCaptchaTask(ProxyMixin, NoCaptchaTaskProxylessTask):
|
||||
type = "NoCaptchaTask"
|
||||
|
||||
|
||||
class ImageToTextTask(object):
|
||||
type = "ImageToTextTask"
|
||||
fp = None
|
||||
phrase = None
|
||||
case = None
|
||||
numeric = None
|
||||
math = None
|
||||
minLength = None
|
||||
maxLength = None
|
||||
|
||||
def __init__(self, fp, phrase=None, case=None, numeric=None, math=None, min_length=None, max_length=None):
|
||||
self.fp = fp
|
||||
self.phrase = phrase
|
||||
self.case = case
|
||||
self.numeric = numeric
|
||||
self.math = math
|
||||
self.minLength = min_length
|
||||
self.maxLength = max_length
|
||||
|
||||
def serialize(self):
|
||||
return {'type': self.type,
|
||||
'body': base64.b64encode(self.fp.read()).decode('utf-8'),
|
||||
'phrase': self.phrase,
|
||||
'case': self.case,
|
||||
'numeric': self.numeric,
|
||||
'math': self.math,
|
||||
'minLength': self.minLength,
|
||||
'maxLength': self.maxLength}
|
||||
|
||||
|
||||
class CustomCaptchaTask(BaseTask):
|
||||
type = 'CustomCaptchaTask'
|
||||
imageUrl = None
|
||||
assignment = None
|
||||
form = None
|
||||
|
||||
def __init__(self, imageUrl, form=None, assignment=None):
|
||||
self.imageUrl = imageUrl
|
||||
self.form = form or {}
|
||||
self.assignment = assignment
|
||||
|
||||
def serialize(self):
|
||||
data = super(CustomCaptchaTask, self).serialize()
|
||||
data.update({'type': self.type,
|
||||
'imageUrl': self.imageUrl})
|
||||
if self.form:
|
||||
forms = []
|
||||
for name, field in self.form.items():
|
||||
if isinstance(field, BaseField):
|
||||
forms.append(field.serialize(name))
|
||||
else:
|
||||
field = field.copy()
|
||||
field['name'] = name
|
||||
forms.append(field)
|
||||
data['forms'] = forms
|
||||
if self.assignment:
|
||||
data['assignment'] = self.assignment
|
||||
return data
|
|
@ -518,10 +518,20 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
|
|||
hints["expected_title"] = [hints["title"]]
|
||||
|
||||
guessed_result = guessit(guess_from, options=hints)
|
||||
|
||||
logger.debug('GuessIt found: %s', json.dumps(guessed_result, cls=GuessitEncoder, indent=4, ensure_ascii=False))
|
||||
video = Video.fromguess(path, guessed_result)
|
||||
video.hints = hints
|
||||
|
||||
# get possibly alternative title from the filename itself
|
||||
alt_guess = guessit(filename, options=hints)
|
||||
if "title" in alt_guess and alt_guess["title"] != guessed_result["title"]:
|
||||
if video_type == "episode":
|
||||
video.alternative_series.append(alt_guess["title"])
|
||||
else:
|
||||
video.alternative_titles.append(alt_guess["title"])
|
||||
logger.debug("Adding alternative title: %s", alt_guess["title"])
|
||||
|
||||
if dont_use_actual_file:
|
||||
return video
|
||||
|
||||
|
|
|
@ -8,10 +8,18 @@ import requests
|
|||
import xmlrpclib
|
||||
import dns.resolver
|
||||
|
||||
from requests import Session, exceptions
|
||||
from requests import exceptions
|
||||
from urllib3.util import connection
|
||||
from retry.api import retry_call
|
||||
from exceptions import APIThrottled
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from subliminal.cache import region
|
||||
from cfscrape import CloudflareScraper
|
||||
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from subzero.lib.io import get_viable_encoding
|
||||
|
||||
|
@ -30,24 +38,58 @@ custom_resolver = dns.resolver.Resolver(configure=False)
|
|||
custom_resolver.nameservers = ['8.8.8.8', '1.1.1.1']
|
||||
|
||||
|
||||
class CertifiSession(Session):
|
||||
class CertifiSession(CloudflareScraper):
|
||||
timeout = 10
|
||||
|
||||
def __init__(self):
|
||||
super(CertifiSession, self).__init__()
|
||||
self.verify = pem_file
|
||||
self.headers.update({
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Pragma': 'no-cache',
|
||||
'DNT': '1'
|
||||
})
|
||||
|
||||
def request(self, *args, **kwargs):
|
||||
def request(self, method, url, *args, **kwargs):
|
||||
if kwargs.get('timeout') is None:
|
||||
kwargs['timeout'] = self.timeout
|
||||
return super(CertifiSession, self).request(*args, **kwargs)
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
domain = parsed_url.netloc
|
||||
|
||||
cache_key = "cf_data_%s" % domain
|
||||
|
||||
if not self.cookies.get("__cfduid", "", domain=domain):
|
||||
cf_data = region.get(cache_key)
|
||||
if cf_data is not NO_VALUE:
|
||||
cf_cookies, user_agent = cf_data
|
||||
logger.debug("Trying to use old cf data for %s: %s", domain, cf_data)
|
||||
for cookie, value in cf_cookies.iteritems():
|
||||
self.cookies.set(cookie, value, domain=domain)
|
||||
|
||||
self.headers['User-Agent'] = user_agent
|
||||
|
||||
ret = super(CertifiSession, self).request(method, url, *args, **kwargs)
|
||||
try:
|
||||
cf_data = self.get_live_tokens(domain)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
if cf_data != region.get(cache_key) and self.cookies.get("__cfduid", "", domain=domain)\
|
||||
and self.cookies.get("cf_clearance", "", domain=domain):
|
||||
logger.debug("Storing cf data for %s: %s", domain, cf_data)
|
||||
region.set(cache_key, cf_data)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class RetryingSession(CertifiSession):
|
||||
proxied_functions = ("get", "post")
|
||||
|
||||
def __init__(self):
|
||||
super(CertifiSession, self).__init__()
|
||||
super(RetryingSession, self).__init__()
|
||||
self.verify = pem_file
|
||||
|
||||
proxy = os.environ.get('SZ_HTTP_PROXY')
|
||||
|
@ -62,7 +104,7 @@ class RetryingSession(CertifiSession):
|
|||
# fixme: may be a little loud
|
||||
logger.debug("Using proxy %s for: %s", self.proxies["http"], args[0])
|
||||
|
||||
return retry_call(getattr(super(CertifiSession, self), method), fargs=args, fkwargs=kwargs, tries=3, delay=5,
|
||||
return retry_call(getattr(super(RetryingSession, self), method), fargs=args, fkwargs=kwargs, tries=3, delay=5,
|
||||
exceptions=(exceptions.ConnectionError,
|
||||
exceptions.ProxyError,
|
||||
exceptions.SSLError,
|
||||
|
|
257
libs/subliminal_patch/pitcher.py
Normal file
257
libs/subliminal_patch/pitcher.py
Normal file
|
@ -0,0 +1,257 @@
|
|||
# coding=utf-8
|
||||
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import json
|
||||
from subliminal.cache import region
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\
|
||||
Proxy
|
||||
from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PitcherRegistry(object):
|
||||
pitchers = []
|
||||
pitchers_by_key = {}
|
||||
|
||||
def register(self, cls):
|
||||
idx = len(self.pitchers)
|
||||
self.pitchers.append(cls)
|
||||
key = "%s_%s" % (cls.name, cls.needs_proxy)
|
||||
key_by_source = "%s_%s" % (cls.source, cls.needs_proxy)
|
||||
self.pitchers_by_key[key] = idx
|
||||
self.pitchers_by_key[key_by_source] = idx
|
||||
return cls
|
||||
|
||||
def get_pitcher(self, name_or_site=None, with_proxy=False):
|
||||
name_or_site = name_or_site or os.environ.get("ANTICAPTCHA_CLASS")
|
||||
if not name_or_site:
|
||||
raise Exception("AntiCaptcha class not given, exiting")
|
||||
|
||||
key = "%s_%s" % (name_or_site, with_proxy)
|
||||
|
||||
if key not in self.pitchers_by_key:
|
||||
raise Exception("Pitcher %s not found (proxy: %s)" % (name_or_site, with_proxy))
|
||||
|
||||
return self.pitchers[self.pitchers_by_key.get(key)]
|
||||
|
||||
|
||||
registry = pitchers = PitcherRegistry()
|
||||
|
||||
|
||||
class Pitcher(object):
|
||||
name = None
|
||||
source = None
|
||||
needs_proxy = False
|
||||
tries = 3
|
||||
job = None
|
||||
client = None
|
||||
client_key = None
|
||||
website_url = None
|
||||
website_key = None
|
||||
website_name = None
|
||||
solve_time = None
|
||||
success = False
|
||||
|
||||
def __init__(self, website_name, website_url, website_key, tries=3, client_key=None, *args, **kwargs):
|
||||
self.tries = tries
|
||||
self.client_key = client_key or os.environ.get("ANTICAPTCHA_ACCOUNT_KEY")
|
||||
if not self.client_key:
|
||||
raise Exception("AntiCaptcha key not given, exiting")
|
||||
|
||||
self.website_name = website_name
|
||||
self.website_key = website_key
|
||||
self.website_url = website_url
|
||||
self.success = False
|
||||
self.solve_time = None
|
||||
|
||||
def get_client(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_job(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _throw(self):
|
||||
self.client = self.get_client()
|
||||
self.job = self.get_job()
|
||||
|
||||
def throw(self):
|
||||
t = time.time()
|
||||
data = self._throw()
|
||||
if self.success:
|
||||
self.solve_time = time.time() - t
|
||||
logger.info("%s: Solving took %ss", self.website_name, int(self.solve_time))
|
||||
return data
|
||||
|
||||
|
||||
@registry.register
|
||||
class AntiCaptchaProxyLessPitcher(Pitcher):
|
||||
name = "AntiCaptchaProxyLess"
|
||||
source = "anti-captcha.com"
|
||||
host = "api.anti-captcha.com"
|
||||
language_pool = "en"
|
||||
tries = 5
|
||||
use_ssl = True
|
||||
is_invisible = False
|
||||
|
||||
def __init__(self, website_name, website_url, website_key, tries=3, host=None, language_pool=None,
|
||||
use_ssl=True, is_invisible=False, *args, **kwargs):
|
||||
super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args,
|
||||
**kwargs)
|
||||
self.host = host or self.host
|
||||
self.language_pool = language_pool or self.language_pool
|
||||
self.use_ssl = use_ssl
|
||||
self.is_invisible = is_invisible
|
||||
|
||||
def get_client(self):
|
||||
return AnticaptchaClient(self.client_key, self.language_pool, self.host, self.use_ssl)
|
||||
|
||||
def get_job(self):
|
||||
task = NoCaptchaTaskProxylessTask(website_url=self.website_url, website_key=self.website_key,
|
||||
is_invisible=self.is_invisible)
|
||||
return self.client.createTask(task)
|
||||
|
||||
def _throw(self):
|
||||
for i in range(self.tries):
|
||||
try:
|
||||
super(AntiCaptchaProxyLessPitcher, self)._throw()
|
||||
self.job.join()
|
||||
ret = self.job.get_solution_response()
|
||||
if ret:
|
||||
self.success = True
|
||||
return ret
|
||||
except AnticaptchaException as e:
|
||||
if i >= self.tries - 1:
|
||||
logger.error("%s: Captcha solving finally failed. Exiting", self.website_name)
|
||||
return
|
||||
|
||||
if e.error_code == 'ERROR_ZERO_BALANCE':
|
||||
logger.error("%s: No balance left on captcha solving service. Exiting", self.website_name)
|
||||
return
|
||||
|
||||
elif e.error_code == 'ERROR_NO_SLOT_AVAILABLE':
|
||||
logger.info("%s: No captcha solving slot available, retrying", self.website_name)
|
||||
time.sleep(5.0)
|
||||
continue
|
||||
|
||||
elif e.error_code == 'ERROR_KEY_DOES_NOT_EXIST':
|
||||
logger.error("%s: Bad AntiCaptcha API key", self.website_name)
|
||||
return
|
||||
|
||||
elif e.error_id is None and e.error_code == 250:
|
||||
# timeout
|
||||
if i < self.tries:
|
||||
logger.info("%s: Captcha solving timed out, retrying", self.website_name)
|
||||
time.sleep(1.0)
|
||||
continue
|
||||
else:
|
||||
logger.error("%s: Captcha solving timed out three times; bailing out", self.website_name)
|
||||
return
|
||||
raise
|
||||
|
||||
|
||||
@registry.register
|
||||
class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
|
||||
name = "AntiCaptcha"
|
||||
proxy = None
|
||||
needs_proxy = True
|
||||
user_agent = None
|
||||
cookies = None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.proxy = Proxy.parse_url(kwargs.pop("proxy"))
|
||||
self.user_agent = kwargs.pop("user_agent")
|
||||
cookies = kwargs.pop("cookies", {})
|
||||
if isinstance(cookies, dict):
|
||||
self.cookies = ";".join(["%s=%s" % (k, v) for k, v in cookies.iteritems()])
|
||||
|
||||
super(AntiCaptchaPitcher, self).__init__(*args, **kwargs)
|
||||
|
||||
def get_job(self):
|
||||
task = NoCaptchaTask(website_url=self.website_url, website_key=self.website_key, proxy=self.proxy,
|
||||
user_agent=self.user_agent, cookies=self.cookies, is_invisible=self.is_invisible)
|
||||
return self.client.createTask(task)
|
||||
|
||||
|
||||
@registry.register
|
||||
class DBCProxyLessPitcher(Pitcher):
|
||||
name = "DeathByCaptchaProxyLess"
|
||||
source = "deathbycaptcha.com"
|
||||
username = None
|
||||
password = None
|
||||
|
||||
def __init__(self, website_name, website_url, website_key,
|
||||
timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs):
|
||||
super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries)
|
||||
|
||||
self.username, self.password = self.client_key.split(":", 1)
|
||||
self.timeout = timeout
|
||||
|
||||
def get_client(self):
|
||||
return DBCClient(self.username, self.password)
|
||||
|
||||
def get_job(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def payload_dict(self):
|
||||
return {
|
||||
"googlekey": self.website_key,
|
||||
"pageurl": self.website_url
|
||||
}
|
||||
|
||||
def _throw(self):
|
||||
super(DBCProxyLessPitcher, self)._throw()
|
||||
payload = json.dumps(self.payload_dict)
|
||||
for i in range(self.tries):
|
||||
try:
|
||||
#balance = self.client.get_balance()
|
||||
data = self.client.decode(timeout=self.timeout, type=4, token_params=payload)
|
||||
if data and data["is_correct"] and data["text"]:
|
||||
self.success = True
|
||||
return data["text"]
|
||||
except:
|
||||
raise
|
||||
|
||||
|
||||
@registry.register
|
||||
class DBCPitcher(DBCProxyLessPitcher):
|
||||
name = "DeathByCaptcha"
|
||||
proxy = None
|
||||
needs_proxy = True
|
||||
proxy_type = "HTTP"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.proxy = kwargs.pop("proxy")
|
||||
super(DBCPitcher, self).__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def payload_dict(self):
|
||||
payload = super(DBCPitcher, self).payload_dict
|
||||
payload.update({
|
||||
"proxytype": self.proxy_type,
|
||||
"proxy": self.proxy
|
||||
})
|
||||
return payload
|
||||
|
||||
|
||||
def load_verification(site_name, session, callback=lambda x: None):
|
||||
ccks = region.get("%s_data" % site_name, expiration_time=15552000) # 6m
|
||||
if ccks != NO_VALUE:
|
||||
cookies, user_agent = ccks
|
||||
logger.debug("%s: Re-using previous user agent: %s", site_name.capitalize(), user_agent)
|
||||
session.headers["User-Agent"] = user_agent
|
||||
try:
|
||||
session.cookies._cookies.update(cookies)
|
||||
return callback(region)
|
||||
except:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def store_verification(site_name, session):
|
||||
region.set("%s_data" % site_name, (session.cookies._cookies, session.headers["User-Agent"]))
|
|
@ -4,18 +4,17 @@ import re
|
|||
import datetime
|
||||
import subliminal
|
||||
import time
|
||||
from random import randint
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from requests import Session
|
||||
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError
|
||||
from random import randint
|
||||
from requests import Session
|
||||
from subliminal.cache import region
|
||||
from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError
|
||||
from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \
|
||||
Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup, show_cells_re
|
||||
from subliminal.cache import region
|
||||
from subliminal.subtitle import fix_line_ending
|
||||
from subliminal_patch.utils import sanitize
|
||||
from subliminal_patch.exceptions import TooManyRequests
|
||||
|
||||
from subliminal_patch.pitcher import pitchers, load_verification, store_verification
|
||||
from subzero.language import Language
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -64,6 +63,7 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
USE_ADDICTED_RANDOM_AGENTS = False
|
||||
hearing_impaired_verifiable = True
|
||||
subtitle_class = Addic7edSubtitle
|
||||
server_url = 'https://www.addic7ed.com/'
|
||||
|
||||
sanitize_characters = {'-', ':', '(', ')', '.', '/'}
|
||||
|
||||
|
@ -75,45 +75,76 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
self.session = Session()
|
||||
self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__
|
||||
|
||||
if self.USE_ADDICTED_RANDOM_AGENTS:
|
||||
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
|
||||
logger.debug("Addic7ed: using random user agents")
|
||||
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
|
||||
self.session.headers['Referer'] = self.server_url
|
||||
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
|
||||
logger.debug("Addic7ed: using random user agents")
|
||||
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
|
||||
self.session.headers['Referer'] = self.server_url
|
||||
|
||||
# login
|
||||
if self.username and self.password:
|
||||
ccks = region.get("addic7ed_cookies", expiration_time=86400)
|
||||
if ccks != NO_VALUE:
|
||||
try:
|
||||
self.session.cookies._cookies.update(ccks)
|
||||
r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
|
||||
if r.status_code == 302:
|
||||
logger.info('Addic7ed: Login expired')
|
||||
region.delete("addic7ed_cookies")
|
||||
else:
|
||||
logger.info('Addic7ed: Reusing old login')
|
||||
self.logged_in = True
|
||||
return
|
||||
except:
|
||||
pass
|
||||
def check_verification(cache_region):
|
||||
rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10,
|
||||
headers={"Referer": self.server_url})
|
||||
if rr.status_code == 302:
|
||||
logger.info('Addic7ed: Login expired')
|
||||
cache_region.delete("addic7ed_data")
|
||||
else:
|
||||
logger.info('Addic7ed: Re-using old login')
|
||||
self.logged_in = True
|
||||
return True
|
||||
|
||||
if load_verification("addic7ed", self.session, callback=check_verification):
|
||||
return
|
||||
|
||||
logger.info('Addic7ed: Logging in')
|
||||
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
|
||||
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
|
||||
headers={"Referer": self.server_url + "login.php"})
|
||||
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '',
|
||||
'remember': 'true'}
|
||||
|
||||
if "relax, slow down" in r.content:
|
||||
raise TooManyRequests(self.username)
|
||||
tries = 0
|
||||
while tries < 3:
|
||||
r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
|
||||
if "grecaptcha" in r.content:
|
||||
logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
|
||||
'happen once every so often')
|
||||
|
||||
if r.status_code != 302:
|
||||
raise AuthenticationError(self.username)
|
||||
site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
|
||||
if not site_key:
|
||||
logger.error("Addic7ed: Captcha site-key not found!")
|
||||
return
|
||||
|
||||
region.set("addic7ed_cookies", self.session.cookies._cookies)
|
||||
pitcher = pitchers.get_pitcher()("Addic7ed", self.server_url + 'login.php', site_key,
|
||||
user_agent=self.session.headers["User-Agent"],
|
||||
cookies=self.session.cookies.get_dict(),
|
||||
is_invisible=True)
|
||||
|
||||
result = pitcher.throw()
|
||||
if not result:
|
||||
raise Exception("Addic7ed: Couldn't solve captcha!")
|
||||
|
||||
data["recaptcha_response"] = result
|
||||
|
||||
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
|
||||
headers={"Referer": self.server_url + "login.php"})
|
||||
|
||||
if "relax, slow down" in r.content:
|
||||
raise TooManyRequests(self.username)
|
||||
|
||||
if r.status_code != 302:
|
||||
if "User <b></b> doesn't exist" in r.content and tries <= 2:
|
||||
logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3)
|
||||
tries += 1
|
||||
continue
|
||||
|
||||
raise AuthenticationError(self.username)
|
||||
break
|
||||
|
||||
store_verification("addic7ed", self.session)
|
||||
|
||||
logger.debug('Addic7ed: Logged in')
|
||||
self.logged_in = True
|
||||
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
|
||||
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
|
||||
def _get_show_ids(self):
|
||||
|
@ -140,7 +171,7 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
|
||||
# populate the show ids
|
||||
show_ids = {}
|
||||
for show in soup.select('td.version > h3 > a[href^="/show/"]'):
|
||||
for show in soup.select('td > h3 > a[href^="/show/"]'):
|
||||
show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
|
||||
try:
|
||||
show_id = int(show['href'][6:])
|
||||
|
|
|
@ -11,8 +11,8 @@ from babelfish import language_converters
|
|||
from dogpile.cache.api import NO_VALUE
|
||||
from subliminal.exceptions import ConfigurationError, ServiceUnavailable
|
||||
from subliminal.providers.opensubtitles import OpenSubtitlesProvider as _OpenSubtitlesProvider,\
|
||||
OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, ServerProxy, Unauthorized, NoSession, \
|
||||
DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError
|
||||
OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, Movie, ServerProxy, Unauthorized, NoSession, \
|
||||
DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError, sanitize
|
||||
from mixins import ProviderRetryMixin
|
||||
from subliminal.subtitle import fix_line_ending
|
||||
from subliminal_patch.http import SubZeroRequestsTransport
|
||||
|
@ -45,6 +45,19 @@ class OpenSubtitlesSubtitle(_OpenSubtitlesSubtitle):
|
|||
def get_matches(self, video, hearing_impaired=False):
|
||||
matches = super(OpenSubtitlesSubtitle, self).get_matches(video)
|
||||
|
||||
# episode
|
||||
if isinstance(video, Episode) and self.movie_kind == 'episode':
|
||||
# series
|
||||
if video.series and (sanitize(self.series_name) in (
|
||||
sanitize(name) for name in [video.series] + video.alternative_series)):
|
||||
matches.add('series')
|
||||
# movie
|
||||
elif isinstance(video, Movie) and self.movie_kind == 'movie':
|
||||
# title
|
||||
if video.title and (sanitize(self.movie_name) in (
|
||||
sanitize(name) for name in [video.title] + video.alternative_titles)):
|
||||
matches.add('title')
|
||||
|
||||
sub_fps = None
|
||||
try:
|
||||
sub_fps = float(self.fps)
|
||||
|
@ -205,19 +218,19 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
|
|||
|
||||
season = episode = None
|
||||
if isinstance(video, Episode):
|
||||
query = video.series
|
||||
query = [video.series] + video.alternative_series
|
||||
season = video.season
|
||||
episode = episode = min(video.episode) if isinstance(video.episode, list) else video.episode
|
||||
|
||||
if video.is_special:
|
||||
season = None
|
||||
episode = None
|
||||
query = u"%s %s" % (video.series, video.title)
|
||||
query = [u"%s %s" % (series, video.title) for series in [video.series] + video.alternative_series]
|
||||
logger.info("%s: Searching for special: %r", self.__class__, query)
|
||||
# elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
|
||||
# query = video.name.split(os.sep)[-1]
|
||||
else:
|
||||
query = video.title
|
||||
query = [video.title] + video.alternative_titles
|
||||
|
||||
return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
|
||||
query=query, season=season, episode=episode, tag=video.original_name,
|
||||
|
@ -238,9 +251,11 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
|
|||
else:
|
||||
criteria.append({'imdbid': imdb_id[2:]})
|
||||
if query and season and episode:
|
||||
criteria.append({'query': query.replace('\'', ''), 'season': season, 'episode': episode})
|
||||
for q in query:
|
||||
criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
|
||||
elif query:
|
||||
criteria.append({'query': query.replace('\'', '')})
|
||||
for q in query:
|
||||
criteria.append({'query': q.replace('\'', '')})
|
||||
if not criteria:
|
||||
raise ValueError('Not enough information')
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import logging
|
|||
import os
|
||||
import time
|
||||
import inflect
|
||||
import cfscrape
|
||||
|
||||
from random import randint
|
||||
from zipfile import ZipFile
|
||||
|
@ -12,7 +13,9 @@ from zipfile import ZipFile
|
|||
from babelfish import language_converters
|
||||
from guessit import guessit
|
||||
from requests import Session
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from subliminal import Episode, ProviderError
|
||||
from subliminal.cache import region
|
||||
from subliminal.utils import sanitize_release_group
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
||||
|
@ -125,6 +128,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
self.session = Session()
|
||||
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
|
||||
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
|
||||
self.session.headers['Referer'] = "https://subscene.com"
|
||||
|
||||
def terminate(self):
|
||||
logger.info("Closing session")
|
||||
|
@ -198,43 +202,48 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
subtitles = []
|
||||
logger.debug(u"Searching for: %s", vfn)
|
||||
film = search(vfn, session=self.session)
|
||||
|
||||
if film and film.subtitles:
|
||||
logger.debug('Release results found: %s', len(film.subtitles))
|
||||
subtitles = self.parse_results(video, film)
|
||||
else:
|
||||
logger.debug('No release results found')
|
||||
|
||||
time.sleep(self.search_throttle)
|
||||
|
||||
# re-search for episodes without explicit release name
|
||||
if isinstance(video, Episode):
|
||||
#term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
|
||||
term = u"%s - %s Season" % (video.series, p.number_to_words("%sth" % video.season).capitalize())
|
||||
time.sleep(self.search_throttle)
|
||||
logger.debug('Searching for alternative results: %s', term)
|
||||
film = search(term, session=self.session, release=False)
|
||||
if film and film.subtitles:
|
||||
logger.debug('Alternative results found: %s', len(film.subtitles))
|
||||
subtitles += self.parse_results(video, film)
|
||||
else:
|
||||
logger.debug('No alternative results found')
|
||||
|
||||
# packs
|
||||
if video.season_fully_aired:
|
||||
term = u"%s S%02i" % (video.series, video.season)
|
||||
logger.debug('Searching for packs: %s', term)
|
||||
for series in [video.series] + video.alternative_series:
|
||||
term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize())
|
||||
time.sleep(self.search_throttle)
|
||||
film = search(term, session=self.session)
|
||||
logger.debug('Searching for alternative results: %s', term)
|
||||
film = search(term, session=self.session, release=False)
|
||||
if film and film.subtitles:
|
||||
logger.debug('Pack results found: %s', len(film.subtitles))
|
||||
logger.debug('Alternative results found: %s', len(film.subtitles))
|
||||
subtitles += self.parse_results(video, film)
|
||||
else:
|
||||
logger.debug('No pack results found')
|
||||
else:
|
||||
logger.debug("Not searching for packs, because the season hasn't fully aired")
|
||||
logger.debug('No alternative results found')
|
||||
|
||||
# packs
|
||||
if video.season_fully_aired:
|
||||
term = u"%s S%02i" % (series, video.season)
|
||||
logger.debug('Searching for packs: %s', term)
|
||||
time.sleep(self.search_throttle)
|
||||
film = search(term, session=self.session)
|
||||
if film and film.subtitles:
|
||||
logger.debug('Pack results found: %s', len(film.subtitles))
|
||||
subtitles += self.parse_results(video, film)
|
||||
else:
|
||||
logger.debug('No pack results found')
|
||||
else:
|
||||
logger.debug("Not searching for packs, because the season hasn't fully aired")
|
||||
else:
|
||||
logger.debug('Searching for movie results: %s', video.title)
|
||||
film = search(video.title, year=video.year, session=self.session, limit_to=None, release=False)
|
||||
if film and film.subtitles:
|
||||
subtitles += self.parse_results(video, film)
|
||||
for title in [video.title] + video.alternative_titles:
|
||||
logger.debug('Searching for movie results: %s', title)
|
||||
film = search(title, year=video.year, session=self.session, limit_to=None, release=False)
|
||||
if film and film.subtitles:
|
||||
subtitles += self.parse_results(video, film)
|
||||
|
||||
logger.info("%s subtitles found" % len(subtitles))
|
||||
return subtitles
|
||||
|
|
|
@ -118,7 +118,7 @@ class SubsSabBzProvider(Provider):
|
|||
for row in rows[:10]:
|
||||
a_element_wrapper = row.find('td', { 'class': 'c2field' })
|
||||
if a_element_wrapper:
|
||||
element = row.find('a')
|
||||
element = a_element_wrapper.find('a')
|
||||
if element:
|
||||
link = element.get('href')
|
||||
logger.info('Found subtitle link %r', link)
|
||||
|
|
|
@ -4,6 +4,7 @@ import io
|
|||
import logging
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
|
||||
import rarfile
|
||||
|
||||
|
@ -23,6 +24,7 @@ from subliminal.utils import sanitize_release_group
|
|||
from subliminal.subtitle import guess_matches
|
||||
from subliminal.video import Episode, Movie
|
||||
from subliminal.subtitle import fix_line_ending
|
||||
from subliminal_patch.pitcher import pitchers, load_verification, store_verification
|
||||
from subzero.language import Language
|
||||
|
||||
from random import randint
|
||||
|
@ -142,6 +144,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
|
||||
self.session.headers['Referer'] = self.server_url
|
||||
logger.debug('Referer set to %s', self.session.headers['Referer'])
|
||||
load_verification("titlovi", self.session)
|
||||
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
|
@ -182,110 +185,144 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
r = self.session.get(self.search_url, params=params, timeout=10)
|
||||
r.raise_for_status()
|
||||
except RequestException as e:
|
||||
logger.exception('RequestException %s', e)
|
||||
break
|
||||
captcha_passed = False
|
||||
if e.response.status_code == 403 and "data-sitekey" in e.response.content:
|
||||
logger.info('titlovi: Solving captcha. This might take a couple of minutes, but should only '
|
||||
'happen once every so often')
|
||||
|
||||
try:
|
||||
soup = BeautifulSoup(r.content, 'lxml')
|
||||
site_key = re.search(r'data-sitekey="(.+?)"', e.response.content).group(1)
|
||||
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', e.response.content).group(1)
|
||||
challenge_ray = re.search(r'data-ray="(.+?)"', e.response.content).group(1)
|
||||
if not all([site_key, challenge_s, challenge_ray]):
|
||||
raise Exception("titlovi: Captcha site-key not found!")
|
||||
|
||||
# number of results
|
||||
result_count = int(soup.select_one('.results_count b').string)
|
||||
except:
|
||||
result_count = None
|
||||
pitcher = pitchers.get_pitcher()("titlovi", e.request.url, site_key,
|
||||
user_agent=self.session.headers["User-Agent"],
|
||||
cookies=self.session.cookies.get_dict(),
|
||||
is_invisible=True)
|
||||
|
||||
# exit if no results
|
||||
if not result_count:
|
||||
if not subtitles:
|
||||
logger.debug('No subtitles found')
|
||||
else:
|
||||
logger.debug("No more subtitles found")
|
||||
break
|
||||
result = pitcher.throw()
|
||||
if not result:
|
||||
raise Exception("titlovi: Couldn't solve captcha!")
|
||||
|
||||
# number of pages with results
|
||||
pages = int(math.ceil(result_count / float(items_per_page)))
|
||||
s_params = {
|
||||
"s": challenge_s,
|
||||
"id": challenge_ray,
|
||||
"g-recaptcha-response": result,
|
||||
}
|
||||
r = self.session.get(self.server_url + "/cdn-cgi/l/chk_captcha", params=s_params, timeout=10,
|
||||
allow_redirects=False)
|
||||
r.raise_for_status()
|
||||
r = self.session.get(self.search_url, params=params, timeout=10)
|
||||
r.raise_for_status()
|
||||
store_verification("titlovi", self.session)
|
||||
captcha_passed = True
|
||||
|
||||
# get current page
|
||||
if 'pg' in params:
|
||||
current_page = int(params['pg'])
|
||||
if not captcha_passed:
|
||||
logger.exception('RequestException %s', e)
|
||||
break
|
||||
else:
|
||||
try:
|
||||
soup = BeautifulSoup(r.content, 'lxml')
|
||||
|
||||
try:
|
||||
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
|
||||
for sub in sublist:
|
||||
# subtitle id
|
||||
sid = sub.find(attrs={'data-id': True}).attrs['data-id']
|
||||
# get download link
|
||||
download_link = self.download_url + sid
|
||||
# title and alternate title
|
||||
match = title_re.search(sub.a.string)
|
||||
if match:
|
||||
_title = match.group('title')
|
||||
alt_title = match.group('altitle')
|
||||
# number of results
|
||||
result_count = int(soup.select_one('.results_count b').string)
|
||||
except:
|
||||
result_count = None
|
||||
|
||||
# exit if no results
|
||||
if not result_count:
|
||||
if not subtitles:
|
||||
logger.debug('No subtitles found')
|
||||
else:
|
||||
continue
|
||||
logger.debug("No more subtitles found")
|
||||
break
|
||||
|
||||
# page link
|
||||
page_link = self.server_url + sub.a.attrs['href']
|
||||
# subtitle language
|
||||
match = lang_re.search(sub.select_one('.lang').attrs['src'])
|
||||
if match:
|
||||
try:
|
||||
# decode language
|
||||
lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
|
||||
except ValueError:
|
||||
# number of pages with results
|
||||
pages = int(math.ceil(result_count / float(items_per_page)))
|
||||
|
||||
# get current page
|
||||
if 'pg' in params:
|
||||
current_page = int(params['pg'])
|
||||
|
||||
try:
|
||||
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
|
||||
for sub in sublist:
|
||||
# subtitle id
|
||||
sid = sub.find(attrs={'data-id': True}).attrs['data-id']
|
||||
# get download link
|
||||
download_link = self.download_url + sid
|
||||
# title and alternate title
|
||||
match = title_re.search(sub.a.string)
|
||||
if match:
|
||||
_title = match.group('title')
|
||||
alt_title = match.group('altitle')
|
||||
else:
|
||||
continue
|
||||
|
||||
# relase year or series start year
|
||||
match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
|
||||
if match:
|
||||
r_year = int(match.group('year'))
|
||||
# fps
|
||||
match = fps_re.search(sub.select_one('.fps').string)
|
||||
if match:
|
||||
fps = match.group('fps')
|
||||
# releases
|
||||
releases = str(sub.select_one('.fps').parent.contents[0].string)
|
||||
# page link
|
||||
page_link = self.server_url + sub.a.attrs['href']
|
||||
# subtitle language
|
||||
match = lang_re.search(sub.select_one('.lang').attrs['src'])
|
||||
if match:
|
||||
try:
|
||||
# decode language
|
||||
lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# handle movies and series separately
|
||||
if is_episode:
|
||||
# season and episode info
|
||||
sxe = sub.select_one('.s0xe0y').string
|
||||
r_season = None
|
||||
r_episode = None
|
||||
if sxe:
|
||||
match = season_re.search(sxe)
|
||||
if match:
|
||||
r_season = int(match.group('season'))
|
||||
match = episode_re.search(sxe)
|
||||
if match:
|
||||
r_episode = int(match.group('episode'))
|
||||
# relase year or series start year
|
||||
match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
|
||||
if match:
|
||||
r_year = int(match.group('year'))
|
||||
# fps
|
||||
match = fps_re.search(sub.select_one('.fps').string)
|
||||
if match:
|
||||
fps = match.group('fps')
|
||||
# releases
|
||||
releases = str(sub.select_one('.fps').parent.contents[0].string)
|
||||
|
||||
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
|
||||
alt_title=alt_title, season=r_season, episode=r_episode,
|
||||
year=r_year, fps=fps,
|
||||
asked_for_release_group=video.release_group,
|
||||
asked_for_episode=episode)
|
||||
else:
|
||||
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
|
||||
alt_title=alt_title, year=r_year, fps=fps,
|
||||
asked_for_release_group=video.release_group)
|
||||
logger.debug('Found subtitle %r', subtitle)
|
||||
# handle movies and series separately
|
||||
if is_episode:
|
||||
# season and episode info
|
||||
sxe = sub.select_one('.s0xe0y').string
|
||||
r_season = None
|
||||
r_episode = None
|
||||
if sxe:
|
||||
match = season_re.search(sxe)
|
||||
if match:
|
||||
r_season = int(match.group('season'))
|
||||
match = episode_re.search(sxe)
|
||||
if match:
|
||||
r_episode = int(match.group('episode'))
|
||||
|
||||
# prime our matches so we can use the values later
|
||||
subtitle.get_matches(video)
|
||||
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
|
||||
alt_title=alt_title, season=r_season, episode=r_episode,
|
||||
year=r_year, fps=fps,
|
||||
asked_for_release_group=video.release_group,
|
||||
asked_for_episode=episode)
|
||||
else:
|
||||
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
|
||||
alt_title=alt_title, year=r_year, fps=fps,
|
||||
asked_for_release_group=video.release_group)
|
||||
logger.debug('Found subtitle %r', subtitle)
|
||||
|
||||
# add found subtitles
|
||||
subtitles.append(subtitle)
|
||||
# prime our matches so we can use the values later
|
||||
subtitle.get_matches(video)
|
||||
|
||||
finally:
|
||||
soup.decompose()
|
||||
# add found subtitles
|
||||
subtitles.append(subtitle)
|
||||
|
||||
# stop on last page
|
||||
if current_page >= pages:
|
||||
break
|
||||
finally:
|
||||
soup.decompose()
|
||||
|
||||
# increment current page
|
||||
params['pg'] = current_page + 1
|
||||
logger.debug('Getting page %d', params['pg'])
|
||||
# stop on last page
|
||||
if current_page >= pages:
|
||||
break
|
||||
|
||||
# increment current page
|
||||
params['pg'] = current_page + 1
|
||||
logger.debug('Getting page %d', params['pg'])
|
||||
|
||||
return subtitles
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ import subliminal
|
|||
import base64
|
||||
import zlib
|
||||
from subliminal import __short_version__
|
||||
from subliminal.refiners.omdb import OMDBClient, refine
|
||||
from subliminal.refiners.omdb import OMDBClient, refine as refine_orig, Episode, Movie
|
||||
|
||||
|
||||
class SZOMDBClient(OMDBClient):
|
||||
|
@ -63,5 +63,13 @@ class SZOMDBClient(OMDBClient):
|
|||
return j
|
||||
|
||||
|
||||
def refine(video, **kwargs):
|
||||
refine_orig(video, **kwargs)
|
||||
if isinstance(video, Episode) and video.series_imdb_id:
|
||||
video.series_imdb_id = video.series_imdb_id.strip()
|
||||
elif isinstance(video, Movie) and video.imdb_id:
|
||||
video.imdb_id = video.imdb_id.strip()
|
||||
|
||||
|
||||
omdb_client = SZOMDBClient(headers={'User-Agent': 'Subliminal/%s' % __short_version__})
|
||||
subliminal.refiners.omdb.omdb_client = omdb_client
|
||||
|
|
|
@ -38,6 +38,8 @@ class Subtitle(Subtitle_):
|
|||
plex_media_fps = None
|
||||
skip_wrong_fps = False
|
||||
wrong_fps = False
|
||||
wrong_series = False
|
||||
wrong_season_ep = False
|
||||
is_pack = False
|
||||
asked_for_release_group = None
|
||||
asked_for_episode = None
|
||||
|
@ -356,7 +358,8 @@ def guess_matches(video, guess, partial=False):
|
|||
matches = set()
|
||||
if isinstance(video, Episode):
|
||||
# series
|
||||
if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series):
|
||||
if video.series and 'title' in guess and sanitize(guess['title']) in (
|
||||
sanitize(name) for name in [video.series] + video.alternative_series):
|
||||
matches.add('series')
|
||||
# title
|
||||
if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
|
||||
|
@ -384,7 +387,8 @@ def guess_matches(video, guess, partial=False):
|
|||
if video.year and 'year' in guess and guess['year'] == video.year:
|
||||
matches.add('year')
|
||||
# title
|
||||
if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
|
||||
if video.title and 'title' in guess and sanitize(guess['title']) in (
|
||||
sanitize(name) for name in [video.title] + video.alternative_titles):
|
||||
matches.add('title')
|
||||
|
||||
# release_group
|
||||
|
|
|
@ -244,17 +244,20 @@
|
|||
url: url_notifications,
|
||||
success: function (data) {
|
||||
if (data !== "") {
|
||||
data = JSON.parse(data);
|
||||
var msg = data[0];
|
||||
var type = data[1];
|
||||
var duration = data[2];
|
||||
var button = data[3];
|
||||
var queue = data[4];
|
||||
data = JSON.parse(data);
|
||||
var msg = data[0];
|
||||
var type = data[1];
|
||||
var duration = data[2];
|
||||
var button = data[3];
|
||||
var queue = data[4];
|
||||
|
||||
if (duration === 'temporary') {
|
||||
timeout = 3000;
|
||||
killer = queue;
|
||||
} else {
|
||||
if (duration === 'temporary') {
|
||||
timeout = 3000;
|
||||
killer = queue;
|
||||
} else if (duration === 'long') {
|
||||
timeout = 15000;
|
||||
killer = queue;
|
||||
} else {
|
||||
timeout = false;
|
||||
killer = false;
|
||||
}
|
||||
|
|
|
@ -1228,12 +1228,104 @@
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="ui dividing header">Anti-captcha options</div>
|
||||
<div class="twelve wide column">
|
||||
<div class="ui grid">
|
||||
<div class="middle aligned row">
|
||||
<div class="right aligned four wide column">
|
||||
<label>Provider</label>
|
||||
</div>
|
||||
<div class="five wide column">
|
||||
<select name="settings_anti_captcha_provider" id="settings_anti_captcha_provider" class="ui fluid selection dropdown">
|
||||
<option value="None">None</option>
|
||||
<option value="anti-captcha">Anti-Captcha</option>
|
||||
<option value="death-by-captcha">Death by Captcha</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="collapsed center aligned column">
|
||||
<div class="ui basic icon"
|
||||
data-tooltip='Choose the anti-captcha provider you want to use.'
|
||||
data-inverted="">
|
||||
<i class="help circle large icon"></i>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="middle aligned row anticaptcha">
|
||||
<div class="two wide column"></div>
|
||||
<div class="right aligned four wide column">
|
||||
<label>Provider website</label>
|
||||
</div>
|
||||
<div class="five wide column">
|
||||
<a href="http://getcaptchasolution.com/eixxo1rsnw" target="_blank">Anti-Captcha.com</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="middle aligned row anticaptcha">
|
||||
<div class="two wide column"></div>
|
||||
<div class="right aligned four wide column">
|
||||
<label>Account addKey</label>
|
||||
</div>
|
||||
<div class="five wide column">
|
||||
<div class='field'>
|
||||
<div class="ui fluid input">
|
||||
<input id="settings_anti_captcha_key" name="settings_anti_captcha_key"
|
||||
type="text" value="{{ settings.anticaptcha.anti_captcha_key }}">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="middle aligned row deathbycaptcha">
|
||||
<div class="two wide column"></div>
|
||||
<div class="right aligned four wide column">
|
||||
<label>Provider website</label>
|
||||
</div>
|
||||
<div class="five wide column">
|
||||
<a href="https://www.deathbycaptcha.com" target="_blank">DeathByCaptcha.com</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="middle aligned row deathbycaptcha">
|
||||
<div class="two wide column"></div>
|
||||
<div class="right aligned four wide column">
|
||||
<label>Username</label>
|
||||
</div>
|
||||
<div class="five wide column">
|
||||
<div class='field'>
|
||||
<div class="ui fluid input">
|
||||
<input id="settings_death_by_captcha_username" name="settings_death_by_captcha_username"
|
||||
type="text" value="{{ settings.deathbycaptcha.username }}">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="middle aligned row deathbycaptcha">
|
||||
<div class="two wide column"></div>
|
||||
<div class="right aligned four wide column">
|
||||
<label>Password</label>
|
||||
</div>
|
||||
<div class="five wide column">
|
||||
<div class='field'>
|
||||
<div class="ui fluid input">
|
||||
<input id="settings_death_by_captcha_password" name="settings_death_by_captcha_password"
|
||||
type="text" value="{{ settings.deathbycaptcha.password }}">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="ui dividing header">Subtitles providers</div>
|
||||
<div class="twelve wide column">
|
||||
<div class="ui grid">
|
||||
<div class="middle aligned row">
|
||||
<div class="right aligned four wide column">
|
||||
<label>Addic7ed</label>
|
||||
<label>Addic7ed (require anti-captcha)</label>
|
||||
</div>
|
||||
<div class="one wide column">
|
||||
<div id="addic7ed" class="ui toggle checkbox provider">
|
||||
|
@ -1703,7 +1795,7 @@
|
|||
|
||||
<div class="middle aligned row">
|
||||
<div class="right aligned four wide column">
|
||||
<label>Titlovi</label>
|
||||
<label>Titlovi (require anti-captcha)</label>
|
||||
</div>
|
||||
<div class="one wide column">
|
||||
<div id="titlovi" class="ui toggle checkbox provider">
|
||||
|
@ -2235,6 +2327,30 @@
|
|||
}
|
||||
});
|
||||
|
||||
if ($('#settings_anti_captcha_provider').val() === "None") {
|
||||
$('.anticaptcha').hide();
|
||||
$('.deathbycaptcha').hide();
|
||||
} else if ($('#settings_anti_captcha_provider').val() === "anti-captcha") {
|
||||
$('.anticaptcha').show();
|
||||
$('.deathbycaptcha').hide();
|
||||
} else if ($('#settings_anti_captcha_provider').val() === "death-by-cCaptcha") {
|
||||
$('.deathbycaptcha').show();
|
||||
$('.anticaptcha').hide();
|
||||
}
|
||||
|
||||
$('#settings_anti_captcha_provider').dropdown('setting', 'onChange', function(){
|
||||
if ($('#settings_anti_captcha_provider').val() === "None") {
|
||||
$('.anticaptcha').hide();
|
||||
$('.deathbycaptcha').hide();
|
||||
} else if ($('#settings_anti_captcha_provider').val() === "anti-captcha") {
|
||||
$('.anticaptcha').show();
|
||||
$('.deathbycaptcha').hide();
|
||||
} else if ($('#settings_anti_captcha_provider').val() === "death-by-captcha") {
|
||||
$('.deathbycaptcha').show();
|
||||
$('.anticaptcha').hide();
|
||||
}
|
||||
});
|
||||
|
||||
if ($('#settings_use_postprocessing').data("postprocessing") === "True") {
|
||||
$('.postprocessing').show();
|
||||
} else {
|
||||
|
@ -2445,6 +2561,8 @@
|
|||
$('#settings_page_size').dropdown('set selected','{{!settings.general.page_size}}');
|
||||
$('#settings_subfolder').dropdown('clear');
|
||||
$('#settings_subfolder').dropdown('set selected', '{{!settings.general.subfolder}}');
|
||||
$('#settings_anti_captcha_provider').dropdown('clear');
|
||||
$('#settings_anti_captcha_provider').dropdown('set selected', '{{!settings.general.anti_captcha_provider}}');
|
||||
$('#settings_proxy_type').dropdown('clear');
|
||||
$('#settings_proxy_type').dropdown('set selected','{{!settings.proxy.type}}');
|
||||
$('#settings_providers').dropdown('clear');
|
||||
|
@ -2616,6 +2734,7 @@
|
|||
]
|
||||
},
|
||||
settings_days_to_upgrade_subs : {
|
||||
depends: 'settings_upgrade_subs',
|
||||
rules : [
|
||||
{
|
||||
type : 'integer[1..30]'
|
||||
|
|
|
@ -414,7 +414,7 @@
|
|||
<div class="ui grid">
|
||||
<div class="middle aligned row">
|
||||
<div class="right aligned four wide column">
|
||||
<label>Addic7ed</label>
|
||||
<label>Addic7ed (require anti-captcha)</label>
|
||||
</div>
|
||||
<div class="one wide column">
|
||||
<div id="addic7ed" class="ui toggle checkbox provider">
|
||||
|
@ -884,7 +884,7 @@
|
|||
|
||||
<div class="middle aligned row">
|
||||
<div class="right aligned four wide column">
|
||||
<label>Titlovi</label>
|
||||
<label>Titlovi (require anti-captcha)</label>
|
||||
</div>
|
||||
<div class="one wide column">
|
||||
<div id="titlovi" class="ui toggle checkbox provider">
|
||||
|
|
Loading…
Reference in a new issue