mirror of
https://github.com/morpheus65535/bazarr.git
synced 2024-09-20 07:25:58 +08:00
Rolled back cloudscraper to fix captcha v1 solving issue.
This commit is contained in:
parent
1428edfb8b
commit
213a04405d
|
@ -1,6 +1,6 @@
|
||||||
Metadata-Version: 2.1
|
Metadata-Version: 2.1
|
||||||
Name: cloudscraper
|
Name: cloudscraper
|
||||||
Version: 1.2.71
|
Version: 1.2.58
|
||||||
Summary: A Python module to bypass Cloudflare's anti-bot page.
|
Summary: A Python module to bypass Cloudflare's anti-bot page.
|
||||||
Home-page: https://github.com/venomous/cloudscraper
|
Home-page: https://github.com/venomous/cloudscraper
|
||||||
Author: VeNoMouS
|
Author: VeNoMouS
|
||||||
|
@ -82,6 +82,31 @@ We support the following Javascript interpreters/engines.
|
||||||
- **[Node.js](https://nodejs.org/)**
|
- **[Node.js](https://nodejs.org/)**
|
||||||
- **[V8](https://github.com/sony/v8eval/):** We use Sony's [v8eval](https://v8.dev)() python module.
|
- **[V8](https://github.com/sony/v8eval/):** We use Sony's [v8eval](https://v8.dev)() python module.
|
||||||
|
|
||||||
|
# Updates
|
||||||
|
|
||||||
|
Cloudflare modifies their anti-bot protection page occasionally, So far it has changed maybe once per year on average.
|
||||||
|
|
||||||
|
If you notice that the anti-bot page has changed, or if this module suddenly stops working, please create a GitHub issue so that I can update the code accordingly.
|
||||||
|
|
||||||
|
- Many issues are a result of users not updating to the latest release of this project. Before filing an issue, please run the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip show cloudscraper
|
||||||
|
```
|
||||||
|
|
||||||
|
If the value of the version field is not the latest release, please run the following to update your package:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install cloudscraper -U
|
||||||
|
```
|
||||||
|
|
||||||
|
If you are still encountering a problem, open an issue and please include:
|
||||||
|
|
||||||
|
- The full exception and stack trace.
|
||||||
|
- The URL of the Cloudflare-protected page which the script does not work on.
|
||||||
|
- A Pastebin or Gist containing the HTML source of the protected page.
|
||||||
|
- The version number from `pip show cloudscraper`.
|
||||||
|
|
||||||
# Usage
|
# Usage
|
||||||
|
|
||||||
The simplest way to use cloudscraper is by calling `create_scraper()`.
|
The simplest way to use cloudscraper is by calling `create_scraper()`.
|
||||||
|
@ -104,26 +129,6 @@ Consult [Requests' documentation](http://docs.python-requests.org/en/latest/user
|
||||||
|
|
||||||
## Options
|
## Options
|
||||||
|
|
||||||
### Disable Cloudflare V1
|
|
||||||
#### Description
|
|
||||||
|
|
||||||
If you don't want to even attempt Cloudflare v1 (Deprecated) solving..
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
|
|
||||||
|Parameter|Value|Default|
|
|
||||||
|-------------|:-------------:|:-----:|
|
|
||||||
|disableCloudflareV1|(boolean)|False|
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```python
|
|
||||||
scraper = cloudscraper.create_scraper(disableCloudflareV1=True)
|
|
||||||
```
|
|
||||||
|
|
||||||
------
|
|
||||||
|
|
||||||
### Brotli
|
### Brotli
|
||||||
|
|
||||||
#### Description
|
#### Description
|
||||||
|
@ -327,7 +332,6 @@ scraper = cloudscraper.create_scraper(interpreter='nodejs')
|
||||||
|
|
||||||
- **[2captcha](https://www.2captcha.com/)**
|
- **[2captcha](https://www.2captcha.com/)**
|
||||||
- **[anticaptcha](https://www.anti-captcha.com/)**
|
- **[anticaptcha](https://www.anti-captcha.com/)**
|
||||||
- **[CapSolver](https://capsolver.com/)**
|
|
||||||
- **[CapMonster Cloud](https://capmonster.cloud/)**
|
- **[CapMonster Cloud](https://capmonster.cloud/)**
|
||||||
- **[deathbycaptcha](https://www.deathbycaptcha.com/)**
|
- **[deathbycaptcha](https://www.deathbycaptcha.com/)**
|
||||||
- **[9kw](https://www.9kw.eu/)**
|
- **[9kw](https://www.9kw.eu/)**
|
||||||
|
@ -365,6 +369,7 @@ if proxies are set you can disable sending the proxies to 2captcha by setting `n
|
||||||
|
|
||||||
```python
|
```python
|
||||||
scraper = cloudscraper.create_scraper(
|
scraper = cloudscraper.create_scraper(
|
||||||
|
interpreter='nodejs',
|
||||||
captcha={
|
captcha={
|
||||||
'provider': '2captcha',
|
'provider': '2captcha',
|
||||||
'api_key': 'your_2captcha_api_key'
|
'api_key': 'your_2captcha_api_key'
|
||||||
|
@ -392,6 +397,7 @@ if proxies are set you can disable sending the proxies to anticaptcha by setting
|
||||||
|
|
||||||
```python
|
```python
|
||||||
scraper = cloudscraper.create_scraper(
|
scraper = cloudscraper.create_scraper(
|
||||||
|
interpreter='nodejs',
|
||||||
captcha={
|
captcha={
|
||||||
'provider': 'anticaptcha',
|
'provider': 'anticaptcha',
|
||||||
'api_key': 'your_anticaptcha_api_key'
|
'api_key': 'your_anticaptcha_api_key'
|
||||||
|
@ -401,29 +407,6 @@ scraper = cloudscraper.create_scraper(
|
||||||
|
|
||||||
------
|
------
|
||||||
|
|
||||||
#### CapSolver
|
|
||||||
|
|
||||||
##### Required `captcha` Parameters
|
|
||||||
|
|
||||||
|Parameter|Value|Required|Default|
|
|
||||||
|-------------|:-------------:|:-----:|:-----:|
|
|
||||||
|provider|(string) `captchaai`|yes||
|
|
||||||
|api_key|(string)|yes||
|
|
||||||
|
|
||||||
|
|
||||||
##### Example
|
|
||||||
|
|
||||||
```python
|
|
||||||
scraper = cloudscraper.create_scraper(
|
|
||||||
captcha={
|
|
||||||
'provider': 'capsolver',
|
|
||||||
'api_key': 'your_captchaai_api_key'
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
------
|
|
||||||
|
|
||||||
#### CapMonster Cloud
|
#### CapMonster Cloud
|
||||||
|
|
||||||
##### Required `captcha` Parameters
|
##### Required `captcha` Parameters
|
||||||
|
@ -442,6 +425,7 @@ if proxies are set you can disable sending the proxies to CapMonster by setting
|
||||||
|
|
||||||
```python
|
```python
|
||||||
scraper = cloudscraper.create_scraper(
|
scraper = cloudscraper.create_scraper(
|
||||||
|
interpreter='nodejs',
|
||||||
captcha={
|
captcha={
|
||||||
'provider': 'capmonster',
|
'provider': 'capmonster',
|
||||||
'clientKey': 'your_capmonster_clientKey'
|
'clientKey': 'your_capmonster_clientKey'
|
||||||
|
@ -465,6 +449,7 @@ scraper = cloudscraper.create_scraper(
|
||||||
|
|
||||||
```python
|
```python
|
||||||
scraper = cloudscraper.create_scraper(
|
scraper = cloudscraper.create_scraper(
|
||||||
|
interpreter='nodejs',
|
||||||
captcha={
|
captcha={
|
||||||
'provider': 'deathbycaptcha',
|
'provider': 'deathbycaptcha',
|
||||||
'username': 'your_deathbycaptcha_username',
|
'username': 'your_deathbycaptcha_username',
|
||||||
|
@ -489,6 +474,7 @@ scraper = cloudscraper.create_scraper(
|
||||||
|
|
||||||
```python
|
```python
|
||||||
scraper = cloudscraper.create_scraper(
|
scraper = cloudscraper.create_scraper(
|
||||||
|
interpreter='nodejs',
|
||||||
captcha={
|
captcha={
|
||||||
'provider': '9kw',
|
'provider': '9kw',
|
||||||
'api_key': 'your_9kw_api_key',
|
'api_key': 'your_9kw_api_key',
|
||||||
|
@ -512,6 +498,7 @@ Use this if you want the requests response payload without solving the Captcha.
|
||||||
##### Example
|
##### Example
|
||||||
```python
|
```python
|
||||||
scraper = cloudscraper.create_scraper(
|
scraper = cloudscraper.create_scraper(
|
||||||
|
interpreter='nodejs',
|
||||||
captcha={'provider': 'return_response'}
|
captcha={'provider': 'return_response'}
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
@ -637,36 +624,3 @@ print(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Cryptography
|
|
||||||
|
|
||||||
#### Description
|
|
||||||
|
|
||||||
Control communication between client and server
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
Can be passed as an argument to `create_scraper()`.
|
|
||||||
|
|
||||||
|Parameter|Value|Default|
|
|
||||||
|-------------|:-------------:|:-----:|
|
|
||||||
|cipherSuite|(string)|None|
|
|
||||||
|ecdhCurve|(string)|prime256v1|
|
|
||||||
|server_hostname|(string)|None|
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Some servers require the use of a more complex ecdh curve than the default "prime256v1"
|
|
||||||
# It may can solve handshake failure
|
|
||||||
scraper = cloudscraper.create_scraper(ecdhCurve='secp384r1')
|
|
||||||
```
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Manipulate server_hostname
|
|
||||||
scraper = cloudscraper.create_scraper(server_hostname='www.somesite.com')
|
|
||||||
scraper.get(
|
|
||||||
'https://backend.hosting.com/',
|
|
||||||
headers={'Host': 'www.somesite.com'}
|
|
||||||
)
|
|
||||||
```
|
|
|
@ -1,19 +1,17 @@
|
||||||
cloudscraper-1.2.71.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
cloudscraper-1.2.58.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
cloudscraper-1.2.71.dist-info/LICENSE,sha256=luC9NJPEX0JAQUKWkzWlAOaaE69fNKnW1uIuDKmWERc,1091
|
cloudscraper-1.2.58.dist-info/LICENSE,sha256=luC9NJPEX0JAQUKWkzWlAOaaE69fNKnW1uIuDKmWERc,1091
|
||||||
cloudscraper-1.2.71.dist-info/METADATA,sha256=ywzk5ZCEv-I8Y9gajnVCsiAR3DrdmeiRLam3EGTJ0UA,19942
|
cloudscraper-1.2.58.dist-info/METADATA,sha256=q25vkvMHkAxmuZRwak56i4CLAFUuG5EwEzz1oEXOY3U,19537
|
||||||
cloudscraper-1.2.71.dist-info/RECORD,,
|
cloudscraper-1.2.58.dist-info/RECORD,,
|
||||||
cloudscraper-1.2.71.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
cloudscraper-1.2.58.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
cloudscraper-1.2.71.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
cloudscraper-1.2.58.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
||||||
cloudscraper-1.2.71.dist-info/top_level.txt,sha256=OFEsobVl62sa2NzpgNtfHZkIw_qZr_wljhjmlP9oGiM,13
|
cloudscraper-1.2.58.dist-info/top_level.txt,sha256=OFEsobVl62sa2NzpgNtfHZkIw_qZr_wljhjmlP9oGiM,13
|
||||||
cloudscraper/__init__.py,sha256=Eg8AqKak2yYcraKqt7O3LJLNmppC2uL7dvAANiyxh5w,15960
|
cloudscraper/__init__.py,sha256=gsOMaKAKNfJUR4FkiEefAA2fAHVFuSwkblGgqxClsrw,32790
|
||||||
cloudscraper/captcha/2captcha.py,sha256=yyDWvL6HVK4pM69aRpOV9mwzbtPC0yGz_mWkQ7-mkmI,10643
|
cloudscraper/captcha/2captcha.py,sha256=CWF62VmLqb_KvSH-dqzo1XEwCBOQh1Aee-G18cX_7aw,10371
|
||||||
cloudscraper/captcha/9kw.py,sha256=5EAUyO_vBEuLKsr4sXYa25MSVOm3BXVAdcenF6ZPsgI,7701
|
cloudscraper/captcha/9kw.py,sha256=1dfhRHKeCx8yIE1opWyQ1Q7aHJlXDdkv1bV2Bfzbrf8,7387
|
||||||
cloudscraper/captcha/__init__.py,sha256=VORxm32xqLrEE-zxFWgEhSbtqfigjCfwodChg1VlQ6c,1511
|
cloudscraper/captcha/__init__.py,sha256=VORxm32xqLrEE-zxFWgEhSbtqfigjCfwodChg1VlQ6c,1511
|
||||||
cloudscraper/captcha/anticaptcha.py,sha256=YUsLviq3ZtbjTUnAPq6zVEieHmeSgnmiXKcqXZeO5qA,6152
|
cloudscraper/captcha/anticaptcha.py,sha256=cK8LON8M-8MN1wx_rSMTTqxrpwbL65Z2svH-LtGiA40,3478
|
||||||
cloudscraper/captcha/capmonster.py,sha256=_9AUr6vHG4c5XLc5XqvnnMqgcvuKnzz1ckJpSySjgKQ,6143
|
cloudscraper/captcha/capmonster.py,sha256=oVXdv2Wrgh2nWFrYttUzbqW9xZU1j6A4cDDcZINIoVg,5695
|
||||||
cloudscraper/captcha/capsolver.py,sha256=x38fO0m_k2W8nO3IppXADZsfCYl0iyvRgajZ5s5iTSU,6060
|
cloudscraper/captcha/deathbycaptcha.py,sha256=UJqkh35gcKVdIhwNqF7N_0ixpIPT2PHiMbT378wEM4w,8073
|
||||||
cloudscraper/captcha/deathbycaptcha.py,sha256=asUX_quUsjAyWVRc7_8o_ryHZFotN-NP60mQiuN-c1U,8673
|
|
||||||
cloudscraper/cloudflare.py,sha256=i1jyJcY-aRy3IQ-7YUly8qGUovO4Nx99M_FKfz4eivQ,19993
|
|
||||||
cloudscraper/exceptions.py,sha256=WSMgI8PRvU3g4KDFrjU-42p83lSAVOw8tN2NSqqIUfw,2397
|
cloudscraper/exceptions.py,sha256=WSMgI8PRvU3g4KDFrjU-42p83lSAVOw8tN2NSqqIUfw,2397
|
||||||
cloudscraper/help.py,sha256=fNYNGFQjiCL1d-gCpDoulBk4iHOuzNhLBudi7NrOHSg,2100
|
cloudscraper/help.py,sha256=fNYNGFQjiCL1d-gCpDoulBk4iHOuzNhLBudi7NrOHSg,2100
|
||||||
cloudscraper/interpreters/__init__.py,sha256=mWY8LuzDRYWGGnKz5vYSdrOnoVaeWlixmAtZN8Pq6bY,1734
|
cloudscraper/interpreters/__init__.py,sha256=mWY8LuzDRYWGGnKz5vYSdrOnoVaeWlixmAtZN8Pq6bY,1734
|
|
@ -1,14 +1,20 @@
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import requests
|
import requests
|
||||||
import sys
|
import sys
|
||||||
import ssl
|
import ssl
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from requests.sessions import Session
|
from requests.sessions import Session
|
||||||
from requests_toolbelt.utils import dump
|
from requests_toolbelt.utils import dump
|
||||||
|
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -22,23 +28,37 @@ except ImportError:
|
||||||
import copy_reg as copyreg
|
import copy_reg as copyreg
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urlparse import urlparse
|
from HTMLParser import HTMLParser
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from urllib.parse import urlparse
|
if sys.version_info >= (3, 4):
|
||||||
|
import html
|
||||||
|
else:
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urlparse import urlparse, urljoin
|
||||||
|
except ImportError:
|
||||||
|
from urllib.parse import urlparse, urljoin
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
CloudflareLoopProtection,
|
CloudflareLoopProtection,
|
||||||
CloudflareIUAMError
|
CloudflareCode1020,
|
||||||
|
CloudflareIUAMError,
|
||||||
|
CloudflareSolveError,
|
||||||
|
CloudflareChallengeError,
|
||||||
|
CloudflareCaptchaError,
|
||||||
|
CloudflareCaptchaProvider
|
||||||
)
|
)
|
||||||
|
|
||||||
from .cloudflare import Cloudflare
|
from .interpreters import JavaScriptInterpreter
|
||||||
|
from .captcha import Captcha
|
||||||
from .user_agent import User_Agent
|
from .user_agent import User_Agent
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
__version__ = '1.2.71'
|
__version__ = '1.2.58'
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@ -59,8 +79,6 @@ class CipherSuiteAdapter(HTTPAdapter):
|
||||||
self.ssl_context = kwargs.pop('ssl_context', None)
|
self.ssl_context = kwargs.pop('ssl_context', None)
|
||||||
self.cipherSuite = kwargs.pop('cipherSuite', None)
|
self.cipherSuite = kwargs.pop('cipherSuite', None)
|
||||||
self.source_address = kwargs.pop('source_address', None)
|
self.source_address = kwargs.pop('source_address', None)
|
||||||
self.server_hostname = kwargs.pop('server_hostname', None)
|
|
||||||
self.ecdhCurve = kwargs.pop('ecdhCurve', 'prime256v1')
|
|
||||||
|
|
||||||
if self.source_address:
|
if self.source_address:
|
||||||
if isinstance(self.source_address, str):
|
if isinstance(self.source_address, str):
|
||||||
|
@ -73,34 +91,14 @@ class CipherSuiteAdapter(HTTPAdapter):
|
||||||
|
|
||||||
if not self.ssl_context:
|
if not self.ssl_context:
|
||||||
self.ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
|
self.ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
|
||||||
|
|
||||||
self.ssl_context.orig_wrap_socket = self.ssl_context.wrap_socket
|
|
||||||
self.ssl_context.wrap_socket = self.wrap_socket
|
|
||||||
|
|
||||||
if self.server_hostname:
|
|
||||||
self.ssl_context.server_hostname = self.server_hostname
|
|
||||||
|
|
||||||
self.ssl_context.set_ciphers(self.cipherSuite)
|
self.ssl_context.set_ciphers(self.cipherSuite)
|
||||||
self.ssl_context.set_ecdh_curve(self.ecdhCurve)
|
self.ssl_context.set_ecdh_curve('prime256v1')
|
||||||
|
self.ssl_context.options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1)
|
||||||
self.ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
|
||||||
self.ssl_context.maximum_version = ssl.TLSVersion.TLSv1_3
|
|
||||||
|
|
||||||
super(CipherSuiteAdapter, self).__init__(**kwargs)
|
super(CipherSuiteAdapter, self).__init__(**kwargs)
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
def wrap_socket(self, *args, **kwargs):
|
|
||||||
if hasattr(self.ssl_context, 'server_hostname') and self.ssl_context.server_hostname:
|
|
||||||
kwargs['server_hostname'] = self.ssl_context.server_hostname
|
|
||||||
self.ssl_context.check_hostname = False
|
|
||||||
else:
|
|
||||||
self.ssl_context.check_hostname = True
|
|
||||||
|
|
||||||
return self.ssl_context.orig_wrap_socket(*args, **kwargs)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def init_poolmanager(self, *args, **kwargs):
|
def init_poolmanager(self, *args, **kwargs):
|
||||||
kwargs['ssl_context'] = self.ssl_context
|
kwargs['ssl_context'] = self.ssl_context
|
||||||
kwargs['source_address'] = self.source_address
|
kwargs['source_address'] = self.source_address
|
||||||
|
@ -120,21 +118,15 @@ class CloudScraper(Session):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self.debug = kwargs.pop('debug', False)
|
self.debug = kwargs.pop('debug', False)
|
||||||
|
|
||||||
self.disableCloudflareV1 = kwargs.pop('disableCloudflareV1', False)
|
|
||||||
self.delay = kwargs.pop('delay', None)
|
self.delay = kwargs.pop('delay', None)
|
||||||
self.captcha = kwargs.pop('captcha', {})
|
self.cipherSuite = kwargs.pop('cipherSuite', None)
|
||||||
self.doubleDown = kwargs.pop('doubleDown', True)
|
self.ssl_context = kwargs.pop('ssl_context', None)
|
||||||
self.interpreter = kwargs.pop('interpreter', 'native')
|
self.interpreter = kwargs.pop('interpreter', 'native')
|
||||||
|
self.captcha = kwargs.pop('captcha', {})
|
||||||
self.requestPreHook = kwargs.pop('requestPreHook', None)
|
self.requestPreHook = kwargs.pop('requestPreHook', None)
|
||||||
self.requestPostHook = kwargs.pop('requestPostHook', None)
|
self.requestPostHook = kwargs.pop('requestPostHook', None)
|
||||||
|
|
||||||
self.cipherSuite = kwargs.pop('cipherSuite', None)
|
|
||||||
self.ecdhCurve = kwargs.pop('ecdhCurve', 'prime256v1')
|
|
||||||
self.source_address = kwargs.pop('source_address', None)
|
self.source_address = kwargs.pop('source_address', None)
|
||||||
self.server_hostname = kwargs.pop('server_hostname', None)
|
self.doubleDown = kwargs.pop('doubleDown', True)
|
||||||
self.ssl_context = kwargs.pop('ssl_context', None)
|
|
||||||
|
|
||||||
self.allow_brotli = kwargs.pop(
|
self.allow_brotli = kwargs.pop(
|
||||||
'allow_brotli',
|
'allow_brotli',
|
||||||
|
@ -167,10 +159,8 @@ class CloudScraper(Session):
|
||||||
'https://',
|
'https://',
|
||||||
CipherSuiteAdapter(
|
CipherSuiteAdapter(
|
||||||
cipherSuite=self.cipherSuite,
|
cipherSuite=self.cipherSuite,
|
||||||
ecdhCurve=self.ecdhCurve,
|
ssl_context=self.ssl_context,
|
||||||
server_hostname=self.server_hostname,
|
source_address=self.source_address
|
||||||
source_address=self.source_address,
|
|
||||||
ssl_context=self.ssl_context
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -211,6 +201,20 @@ class CloudScraper(Session):
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
print(f"Debug Error: {getattr(e, 'message', e)}")
|
print(f"Debug Error: {getattr(e, 'message', e)}")
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Unescape / decode html entities
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def unescape(html_text):
|
||||||
|
if sys.version_info >= (3, 0):
|
||||||
|
if sys.version_info >= (3, 4):
|
||||||
|
return html.unescape(html_text)
|
||||||
|
|
||||||
|
return HTMLParser().unescape(html_text)
|
||||||
|
|
||||||
|
return HTMLParser().unescape(html_text)
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
# Decode Brotli on older versions of urllib3 manually
|
# Decode Brotli on older versions of urllib3 manually
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
@ -271,24 +275,13 @@ class CloudScraper(Session):
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
if self.requestPostHook:
|
if self.requestPostHook:
|
||||||
newResponse = self.requestPostHook(self, response)
|
response = self.requestPostHook(self, response)
|
||||||
|
|
||||||
if response != newResponse: # Give me walrus in 3.7!!!
|
|
||||||
response = newResponse
|
|
||||||
if self.debug:
|
if self.debug:
|
||||||
print('==== requestPostHook Debug ====')
|
|
||||||
self.debugRequest(response)
|
self.debugRequest(response)
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# Check if Cloudflare anti-bot is on
|
||||||
|
if self.is_Challenge_Request(response):
|
||||||
if not self.disableCloudflareV1:
|
|
||||||
cloudflareV1 = Cloudflare(self)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Check if Cloudflare v1 anti-bot is on
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
if cloudflareV1.is_Challenge_Request(response):
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
# Try to solve the challenge and send it back
|
# Try to solve the challenge and send it back
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
@ -302,13 +295,460 @@ class CloudScraper(Session):
|
||||||
|
|
||||||
self._solveDepthCnt += 1
|
self._solveDepthCnt += 1
|
||||||
|
|
||||||
response = cloudflareV1.Challenge_Response(response, **kwargs)
|
response = self.Challenge_Response(response, **kwargs)
|
||||||
else:
|
else:
|
||||||
if not response.is_redirect and response.status_code not in [429, 503]:
|
if not response.is_redirect and response.status_code not in [429, 503]:
|
||||||
self._solveDepthCnt = 0
|
self._solveDepthCnt = 0
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# check if the response contains a valid Cloudflare Bot Fight Mode challenge
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_BFM_Challenge(resp):
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
resp.headers.get('Server', '').startswith('cloudflare')
|
||||||
|
and re.search(
|
||||||
|
r"\/cdn-cgi\/bm\/cv\/\d+\/api\.js.*?"
|
||||||
|
r"window\['__CF\$cv\$params'\]\s*=\s*{",
|
||||||
|
resp.text,
|
||||||
|
re.M | re.S
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# check if the response contains a valid Cloudflare challenge
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_IUAM_Challenge(resp):
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
resp.headers.get('Server', '').startswith('cloudflare')
|
||||||
|
and resp.status_code in [429, 503]
|
||||||
|
and re.search(
|
||||||
|
r'<form .*?="challenge-form" action="/.*?__cf_chl_jschl_tk__=\S+"',
|
||||||
|
resp.text,
|
||||||
|
re.M | re.S
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# check if the response contains new Cloudflare challenge
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_New_IUAM_Challenge(resp):
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
resp.headers.get('Server', '').startswith('cloudflare')
|
||||||
|
and resp.status_code in [429, 503]
|
||||||
|
and re.search(
|
||||||
|
r'cpo.src\s*=\s*"/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v1',
|
||||||
|
resp.text,
|
||||||
|
re.M | re.S
|
||||||
|
)
|
||||||
|
and re.search(r'window._cf_chl_enter\s*[\(=]', resp.text, re.M | re.S)
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# check if the response contains a v2 hCaptcha Cloudflare challenge
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_New_Captcha_Challenge(resp):
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
CloudScraper.is_Captcha_Challenge(resp)
|
||||||
|
and re.search(
|
||||||
|
r'cpo.src\s*=\s*"/cdn-cgi/challenge-platform/\S+orchestrate/captcha/v1',
|
||||||
|
resp.text,
|
||||||
|
re.M | re.S
|
||||||
|
)
|
||||||
|
and re.search(r'\s*id="trk_captcha_js"', resp.text, re.M | re.S)
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# check if the response contains a Cloudflare hCaptcha challenge
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_Captcha_Challenge(resp):
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
resp.headers.get('Server', '').startswith('cloudflare')
|
||||||
|
and resp.status_code == 403
|
||||||
|
and re.search(
|
||||||
|
r'action="/\S+__cf_chl_captcha_tk__=\S+',
|
||||||
|
resp.text,
|
||||||
|
re.M | re.DOTALL
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# check if the response contains Firewall 1020 Error
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_Firewall_Blocked(resp):
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
resp.headers.get('Server', '').startswith('cloudflare')
|
||||||
|
and resp.status_code == 403
|
||||||
|
and re.search(
|
||||||
|
r'<span class="cf-error-code">1020</span>',
|
||||||
|
resp.text,
|
||||||
|
re.M | re.DOTALL
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Wrapper for is_Captcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
def is_Challenge_Request(self, resp):
|
||||||
|
if self.is_Firewall_Blocked(resp):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareCode1020,
|
||||||
|
'Cloudflare has blocked this request (Code 1020 Detected).'
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.is_New_Captcha_Challenge(resp):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareChallengeError,
|
||||||
|
'Detected a Cloudflare version 2 Captcha challenge, This feature is not available in the opensource (free) version.'
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.is_New_IUAM_Challenge(resp):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareChallengeError,
|
||||||
|
'Detected a Cloudflare version 2 challenge, This feature is not available in the opensource (free) version.'
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.is_Captcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
|
||||||
|
if self.debug:
|
||||||
|
print('Detected a Cloudflare version 1 challenge.')
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Try to solve cloudflare javascript challenge.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
def IUAM_Challenge_Response(self, body, url, interpreter):
|
||||||
|
try:
|
||||||
|
formPayload = re.search(
|
||||||
|
r'<form (?P<form>.*?="challenge-form" '
|
||||||
|
r'action="(?P<challengeUUID>.*?'
|
||||||
|
r'__cf_chl_jschl_tk__=\S+)"(.*?)</form>)',
|
||||||
|
body,
|
||||||
|
re.M | re.DOTALL
|
||||||
|
).groupdict()
|
||||||
|
|
||||||
|
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareIUAMError,
|
||||||
|
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = OrderedDict()
|
||||||
|
for challengeParam in re.findall(r'^\s*<input\s(.*?)/>', formPayload['form'], re.M | re.S):
|
||||||
|
inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
|
||||||
|
if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
|
||||||
|
payload.update({inputPayload['name']: inputPayload['value']})
|
||||||
|
|
||||||
|
except AttributeError:
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareIUAMError,
|
||||||
|
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
||||||
|
)
|
||||||
|
|
||||||
|
hostParsed = urlparse(url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport(
|
||||||
|
interpreter
|
||||||
|
).solveChallenge(body, hostParsed.netloc)
|
||||||
|
except Exception as e:
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareIUAMError,
|
||||||
|
f"Unable to parse Cloudflare anti-bots page: {getattr(e, 'message', e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
|
||||||
|
'data': payload
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Try to solve the Captcha challenge via 3rd party.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
def captcha_Challenge_Response(self, provider, provider_params, body, url):
|
||||||
|
try:
|
||||||
|
formPayload = re.search(
|
||||||
|
r'<form (?P<form>.*?="challenge-form" '
|
||||||
|
r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
|
||||||
|
body,
|
||||||
|
re.M | re.DOTALL
|
||||||
|
).groupdict()
|
||||||
|
|
||||||
|
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareCaptchaError,
|
||||||
|
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = OrderedDict(
|
||||||
|
re.findall(
|
||||||
|
r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"',
|
||||||
|
formPayload['form']
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha'
|
||||||
|
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareCaptchaError,
|
||||||
|
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Pass proxy parameter to provider to solve captcha.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
if self.proxies and self.proxies != self.captcha.get('proxy'):
|
||||||
|
self.captcha['proxy'] = self.proxies
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Pass User-Agent if provider supports it to solve captcha.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
self.captcha['User-Agent'] = self.headers['User-Agent']
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Submit job to provider to request captcha solve.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
captchaResponse = Captcha.dynamicImport(
|
||||||
|
provider.lower()
|
||||||
|
).solveCaptcha(
|
||||||
|
captchaType,
|
||||||
|
url,
|
||||||
|
payload['data-sitekey'],
|
||||||
|
provider_params
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Parse and handle the response of solved captcha.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
dataPayload = OrderedDict([
|
||||||
|
('r', payload.get('name="r" value', '')),
|
||||||
|
('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
|
||||||
|
('id', payload.get('data-ray')),
|
||||||
|
('g-recaptcha-response', captchaResponse)
|
||||||
|
])
|
||||||
|
|
||||||
|
if captchaType == 'hCaptcha':
|
||||||
|
dataPayload.update({'h-captcha-response': captchaResponse})
|
||||||
|
|
||||||
|
hostParsed = urlparse(url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
|
||||||
|
'data': dataPayload
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Attempt to handle and send the challenge response back to cloudflare
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
def Challenge_Response(self, resp, **kwargs):
|
||||||
|
if self.is_Captcha_Challenge(resp):
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# double down on the request as some websites are only checking
|
||||||
|
# if cfuid is populated before issuing Captcha.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
if self.doubleDown:
|
||||||
|
resp = self.decodeBrotli(
|
||||||
|
self.perform_request(resp.request.method, resp.url, **kwargs)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.is_Captcha_Challenge(resp):
|
||||||
|
return resp
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# if no captcha provider raise a runtime error.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
if not self.captcha or not isinstance(self.captcha, dict) or not self.captcha.get('provider'):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareCaptchaProvider,
|
||||||
|
"Cloudflare Captcha detected, unfortunately you haven't loaded an anti Captcha provider "
|
||||||
|
"correctly via the 'captcha' parameter."
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# if provider is return_response, return the response without doing anything.
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
if self.captcha.get('provider') == 'return_response':
|
||||||
|
return resp
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Submit request to parser wrapper to solve captcha
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
submit_url = self.captcha_Challenge_Response(
|
||||||
|
self.captcha.get('provider'),
|
||||||
|
self.captcha,
|
||||||
|
resp.text,
|
||||||
|
resp.url
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Cloudflare requires a delay before solving the challenge
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
if not self.delay:
|
||||||
|
try:
|
||||||
|
delay = float(
|
||||||
|
re.search(
|
||||||
|
r'submit\(\);\r?\n\s*},\s*([0-9]+)',
|
||||||
|
resp.text
|
||||||
|
).group(1)
|
||||||
|
) / float(1000)
|
||||||
|
if isinstance(delay, (int, float)):
|
||||||
|
self.delay = delay
|
||||||
|
except (AttributeError, ValueError):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareIUAMError,
|
||||||
|
"Cloudflare IUAM possibility malformed, issue extracing delay value."
|
||||||
|
)
|
||||||
|
|
||||||
|
sleep(self.delay)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
submit_url = self.IUAM_Challenge_Response(
|
||||||
|
resp.text,
|
||||||
|
resp.url,
|
||||||
|
self.interpreter
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Send the Challenge Response back to Cloudflare
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
if submit_url:
|
||||||
|
|
||||||
|
def updateAttr(obj, name, newValue):
|
||||||
|
try:
|
||||||
|
obj[name].update(newValue)
|
||||||
|
return obj[name]
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
obj[name] = {}
|
||||||
|
obj[name].update(newValue)
|
||||||
|
return obj[name]
|
||||||
|
|
||||||
|
cloudflare_kwargs = deepcopy(kwargs)
|
||||||
|
cloudflare_kwargs['allow_redirects'] = False
|
||||||
|
cloudflare_kwargs['data'] = updateAttr(
|
||||||
|
cloudflare_kwargs,
|
||||||
|
'data',
|
||||||
|
submit_url['data']
|
||||||
|
)
|
||||||
|
|
||||||
|
urlParsed = urlparse(resp.url)
|
||||||
|
cloudflare_kwargs['headers'] = updateAttr(
|
||||||
|
cloudflare_kwargs,
|
||||||
|
'headers',
|
||||||
|
{
|
||||||
|
'Origin': f'{urlParsed.scheme}://{urlParsed.netloc}',
|
||||||
|
'Referer': resp.url
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
challengeSubmitResponse = self.request(
|
||||||
|
'POST',
|
||||||
|
submit_url['url'],
|
||||||
|
**cloudflare_kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
if challengeSubmitResponse.status_code == 400:
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareSolveError,
|
||||||
|
'Invalid challenge answer detected, Cloudflare broken?'
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# Return response if Cloudflare is doing content pass through instead of 3xx
|
||||||
|
# else request with redirect URL also handle protocol scheme change http -> https
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
if not challengeSubmitResponse.is_redirect:
|
||||||
|
return challengeSubmitResponse
|
||||||
|
|
||||||
|
else:
|
||||||
|
cloudflare_kwargs = deepcopy(kwargs)
|
||||||
|
cloudflare_kwargs['headers'] = updateAttr(
|
||||||
|
cloudflare_kwargs,
|
||||||
|
'headers',
|
||||||
|
{'Referer': challengeSubmitResponse.url}
|
||||||
|
)
|
||||||
|
|
||||||
|
if not urlparse(challengeSubmitResponse.headers['Location']).netloc:
|
||||||
|
redirect_location = urljoin(
|
||||||
|
challengeSubmitResponse.url,
|
||||||
|
challengeSubmitResponse.headers['Location']
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
redirect_location = challengeSubmitResponse.headers['Location']
|
||||||
|
|
||||||
|
return self.request(
|
||||||
|
resp.request.method,
|
||||||
|
redirect_location,
|
||||||
|
**cloudflare_kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
# We shouldn't be here...
|
||||||
|
# Re-request the original query and/or process again....
|
||||||
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
return self.request(resp.request.method, resp.url, **kwargs)
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -321,7 +761,7 @@ class CloudScraper(Session):
|
||||||
if sess:
|
if sess:
|
||||||
for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']:
|
for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']:
|
||||||
val = getattr(sess, attr, None)
|
val = getattr(sess, attr, None)
|
||||||
if val is not None:
|
if val:
|
||||||
setattr(scraper, attr, val)
|
setattr(scraper, attr, val)
|
||||||
|
|
||||||
return scraper
|
return scraper
|
||||||
|
@ -342,7 +782,7 @@ class CloudScraper(Session):
|
||||||
'doubleDown',
|
'doubleDown',
|
||||||
'captcha',
|
'captcha',
|
||||||
'interpreter',
|
'interpreter',
|
||||||
'source_address',
|
'source_address'
|
||||||
'requestPreHook',
|
'requestPreHook',
|
||||||
'requestPostHook'
|
'requestPostHook'
|
||||||
] if field in kwargs
|
] if field in kwargs
|
||||||
|
@ -366,7 +806,6 @@ class CloudScraper(Session):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
cls.simpleException(
|
cls.simpleException(
|
||||||
cls,
|
|
||||||
CloudflareIUAMError,
|
CloudflareIUAMError,
|
||||||
"Unable to find Cloudflare cookies. Does the site actually "
|
"Unable to find Cloudflare cookies. Does the site actually "
|
||||||
"have Cloudflare IUAM (I'm Under Attack Mode) enabled?"
|
"have Cloudflare IUAM (I'm Under Attack Mode) enabled?"
|
||||||
|
@ -374,6 +813,7 @@ class CloudScraper(Session):
|
||||||
|
|
||||||
return (
|
return (
|
||||||
{
|
{
|
||||||
|
'__cfduid': scraper.cookies.get('__cfduid', '', domain=cookie_domain),
|
||||||
'cf_clearance': scraper.cookies.get('cf_clearance', '', domain=cookie_domain)
|
'cf_clearance': scraper.cookies.get('cf_clearance', '', domain=cookie_domain)
|
||||||
},
|
},
|
||||||
scraper.headers['User-Agent']
|
scraper.headers['User-Agent']
|
||||||
|
@ -402,6 +842,5 @@ if ssl.OPENSSL_VERSION_INFO < (1, 1, 1):
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
create_scraper = CloudScraper.create_scraper
|
create_scraper = CloudScraper.create_scraper
|
||||||
session = CloudScraper.create_scraper
|
|
||||||
get_tokens = CloudScraper.get_tokens
|
get_tokens = CloudScraper.get_tokens
|
||||||
get_cookie_string = CloudScraper.get_cookie_string
|
get_cookie_string = CloudScraper.get_cookie_string
|
||||||
|
|
|
@ -29,11 +29,6 @@ class captchaSolver(Captcha):
|
||||||
super(captchaSolver, self).__init__('2captcha')
|
super(captchaSolver, self).__init__('2captcha')
|
||||||
self.host = 'https://2captcha.com'
|
self.host = 'https://2captcha.com'
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.captchaType = {
|
|
||||||
'reCaptcha': 'userrecaptcha',
|
|
||||||
'hCaptcha': 'hcaptcha',
|
|
||||||
'turnstile': 'turnstile'
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@ -180,16 +175,23 @@ class captchaSolver(Captcha):
|
||||||
'soft_id': 2905
|
'soft_id': 2905
|
||||||
}
|
}
|
||||||
|
|
||||||
data.update({
|
data.update(
|
||||||
'method': self.captchaType[captchaType],
|
{
|
||||||
'googlekey' if captchaType == 'reCaptcha' else 'sitekey': siteKey
|
'method': 'userrcaptcha',
|
||||||
})
|
'googlekey': siteKey
|
||||||
|
} if captchaType == 'reCaptcha' else {
|
||||||
|
'method': 'hcaptcha',
|
||||||
|
'sitekey': siteKey
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
if self.proxy:
|
if self.proxy:
|
||||||
data.update({
|
data.update(
|
||||||
|
{
|
||||||
'proxy': self.proxy,
|
'proxy': self.proxy,
|
||||||
'proxytype': self.proxyType
|
'proxytype': self.proxyType
|
||||||
})
|
}
|
||||||
|
)
|
||||||
|
|
||||||
response = polling2.poll(
|
response = polling2.poll(
|
||||||
lambda: self.session.post(
|
lambda: self.session.post(
|
||||||
|
|
|
@ -12,35 +12,30 @@ except ImportError:
|
||||||
)
|
)
|
||||||
|
|
||||||
from ..exceptions import (
|
from ..exceptions import (
|
||||||
CaptchaException,
|
reCaptchaServiceUnavailable,
|
||||||
CaptchaServiceUnavailable,
|
reCaptchaAPIError,
|
||||||
CaptchaAPIError,
|
reCaptchaTimeout,
|
||||||
CaptchaTimeout,
|
reCaptchaParameter,
|
||||||
CaptchaParameter,
|
reCaptchaBadJobID
|
||||||
CaptchaBadJobID
|
|
||||||
)
|
)
|
||||||
|
|
||||||
from . import Captcha
|
from . import reCaptcha
|
||||||
|
|
||||||
|
|
||||||
class captchaSolver(Captcha):
|
class captchaSolver(reCaptcha):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(captchaSolver, self).__init__('9kw')
|
super(captchaSolver, self).__init__('9kw')
|
||||||
self.host = 'https://www.9kw.eu/index.cgi'
|
self.host = 'https://www.9kw.eu/index.cgi'
|
||||||
self.maxtimeout = 180
|
self.maxtimeout = 180
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.captchaType = {
|
|
||||||
'reCaptcha': 'recaptchav2',
|
|
||||||
'hCaptcha': 'hcaptcha'
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def checkErrorStatus(response):
|
def checkErrorStatus(response):
|
||||||
if response.status_code in [500, 502]:
|
if response.status_code in [500, 502]:
|
||||||
raise CaptchaServiceUnavailable(
|
raise reCaptchaServiceUnavailable(
|
||||||
f'9kw: Server Side Error {response.status_code}'
|
f'9kw: Server Side Error {response.status_code}'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -103,18 +98,18 @@ class captchaSolver(Captcha):
|
||||||
|
|
||||||
if response.text.startswith('{'):
|
if response.text.startswith('{'):
|
||||||
if response.json().get('error'):
|
if response.json().get('error'):
|
||||||
raise CaptchaAPIError(error_codes.get(int(response.json().get('error'))))
|
raise reCaptchaAPIError(error_codes.get(int(response.json().get('error'))))
|
||||||
else:
|
else:
|
||||||
error_code = int(re.search(r'^00(?P<error_code>\d+)', response.text).groupdict().get('error_code', 0))
|
error_code = int(re.search(r'^00(?P<error_code>\d+)', response.text).groupdict().get('error_code', 0))
|
||||||
if error_code:
|
if error_code:
|
||||||
raise CaptchaAPIError(error_codes.get(error_code))
|
raise reCaptchaAPIError(error_codes.get(error_code))
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
def requestJob(self, jobID):
|
def requestJob(self, jobID):
|
||||||
if not jobID:
|
if not jobID:
|
||||||
raise CaptchaBadJobID(
|
raise reCaptchaBadJobID(
|
||||||
"9kw: Error bad job id to request against."
|
"9kw: Error bad job id to request reCaptcha against."
|
||||||
)
|
)
|
||||||
|
|
||||||
def _checkRequest(response):
|
def _checkRequest(response):
|
||||||
|
@ -144,7 +139,7 @@ class captchaSolver(Captcha):
|
||||||
if response:
|
if response:
|
||||||
return response.json().get('answer')
|
return response.json().get('answer')
|
||||||
else:
|
else:
|
||||||
raise CaptchaTimeout("9kw: Error failed to solve.")
|
raise reCaptchaTimeout("9kw: Error failed to solve reCaptcha.")
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@ -157,6 +152,11 @@ class captchaSolver(Captcha):
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
captchaMap = {
|
||||||
|
'reCaptcha': 'recaptchav2',
|
||||||
|
'hCaptcha': 'hcaptcha'
|
||||||
|
}
|
||||||
|
|
||||||
response = polling.poll(
|
response = polling.poll(
|
||||||
lambda: self.session.post(
|
lambda: self.session.post(
|
||||||
self.host,
|
self.host,
|
||||||
|
@ -165,7 +165,7 @@ class captchaSolver(Captcha):
|
||||||
'action': 'usercaptchaupload',
|
'action': 'usercaptchaupload',
|
||||||
'interactive': 1,
|
'interactive': 1,
|
||||||
'file-upload-01': siteKey,
|
'file-upload-01': siteKey,
|
||||||
'oldsource': self.captchaType[captchaType],
|
'oldsource': captchaMap[captchaType],
|
||||||
'pageurl': url,
|
'pageurl': url,
|
||||||
'maxtimeout': self.maxtimeout,
|
'maxtimeout': self.maxtimeout,
|
||||||
'json': 1
|
'json': 1
|
||||||
|
@ -180,35 +180,33 @@ class captchaSolver(Captcha):
|
||||||
if response:
|
if response:
|
||||||
return response.json().get('captchaid')
|
return response.json().get('captchaid')
|
||||||
else:
|
else:
|
||||||
raise CaptchaBadJobID('9kw: Error no valid job id was returned.')
|
raise reCaptchaBadJobID('9kw: Error no valid job id was returned.')
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
|
|
||||||
|
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
|
||||||
jobID = None
|
jobID = None
|
||||||
|
|
||||||
if not captchaParams.get('api_key'):
|
if not reCaptchaParams.get('api_key'):
|
||||||
raise CaptchaParameter("9kw: Missing api_key parameter.")
|
raise reCaptchaParameter("9kw: Missing api_key parameter.")
|
||||||
|
|
||||||
self.api_key = captchaParams.get('api_key')
|
self.api_key = reCaptchaParams.get('api_key')
|
||||||
|
|
||||||
if captchaParams.get('maxtimeout'):
|
if reCaptchaParams.get('maxtimeout'):
|
||||||
self.maxtimeout = captchaParams.get('maxtimeout')
|
self.maxtimeout = reCaptchaParams.get('maxtimeout')
|
||||||
|
|
||||||
if captchaParams.get('proxy'):
|
if reCaptchaParams.get('proxy'):
|
||||||
self.session.proxies = captchaParams.get('proxies')
|
self.session.proxies = reCaptchaParams.get('proxies')
|
||||||
|
|
||||||
if captchaType not in self.captchaType:
|
|
||||||
raise CaptchaException(f'9kw: {captchaType} is not supported by this provider.')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
jobID = self.requestSolve(captchaType, url, siteKey)
|
jobID = self.requestSolve(captchaType, url, siteKey)
|
||||||
return self.requestJob(jobID)
|
return self.requestJob(jobID)
|
||||||
except polling.TimeoutException:
|
except polling.TimeoutException:
|
||||||
raise CaptchaTimeout(
|
raise reCaptchaTimeout(
|
||||||
f"9kw: solve took to long to execute 'captchaid' {jobID}, aborting."
|
f"9kw: reCaptcha solve took to long to execute 'captchaid' {jobID}, aborting."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
|
||||||
captchaSolver()
|
captchaSolver()
|
||||||
|
|
|
@ -1,24 +1,31 @@
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
from ..exceptions import (
|
||||||
import requests
|
CaptchaParameter,
|
||||||
|
CaptchaTimeout,
|
||||||
|
CaptchaAPIError
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from ..exceptions import (
|
try:
|
||||||
CaptchaServiceUnavailable,
|
from python_anticaptcha import (
|
||||||
CaptchaAPIError,
|
AnticaptchaClient,
|
||||||
CaptchaTimeout,
|
NoCaptchaTaskProxylessTask,
|
||||||
CaptchaParameter,
|
HCaptchaTaskProxyless,
|
||||||
CaptchaBadJobID
|
NoCaptchaTask,
|
||||||
|
HCaptchaTask,
|
||||||
|
AnticaptchaException
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"Please install/upgrade the python module 'python_anticaptcha' via "
|
||||||
|
"pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
import sys
|
||||||
import polling2
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError("Please install the python module 'polling2' via pip")
|
|
||||||
|
|
||||||
from . import Captcha
|
from . import Captcha
|
||||||
|
|
||||||
|
@ -26,172 +33,75 @@ from . import Captcha
|
||||||
class captchaSolver(Captcha):
|
class captchaSolver(Captcha):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
if sys.modules['python_anticaptcha'].__version__ < '0.6':
|
||||||
|
raise ImportError(
|
||||||
|
"Please upgrade the python module 'python_anticaptcha' via "
|
||||||
|
"pip install -U python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
|
||||||
|
)
|
||||||
super(captchaSolver, self).__init__('anticaptcha')
|
super(captchaSolver, self).__init__('anticaptcha')
|
||||||
self.host = 'https://api.anti-captcha.com'
|
|
||||||
self.session = requests.Session()
|
|
||||||
self.captchaType = {
|
|
||||||
'reCaptcha': 'NoCaptchaTask',
|
|
||||||
'hCaptcha': 'HCaptchaTask',
|
|
||||||
'turnstile': 'TurnstileTask'
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
@staticmethod
|
def parseProxy(self, url, user_agent):
|
||||||
def checkErrorStatus(response):
|
parsed = urlparse(url)
|
||||||
if response.status_code in [500, 502]:
|
|
||||||
raise CaptchaServiceUnavailable(
|
|
||||||
f'anticaptcha: Server Side Error {response.status_code}'
|
|
||||||
)
|
|
||||||
|
|
||||||
payload = response.json()
|
return dict(
|
||||||
if payload['errorId'] >= 1:
|
proxy_type=parsed.scheme,
|
||||||
if 'errorDescription' in payload:
|
proxy_address=parsed.hostname,
|
||||||
raise CaptchaAPIError(
|
proxy_port=parsed.port,
|
||||||
payload['errorDescription']
|
proxy_login=parsed.username,
|
||||||
)
|
proxy_password=parsed.password,
|
||||||
else:
|
user_agent=user_agent
|
||||||
raise CaptchaAPIError(payload['errorCode'])
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def requestJob(self, taskID):
|
|
||||||
if not taskID:
|
|
||||||
raise CaptchaBadJobID(
|
|
||||||
'anticaptcha: Error bad task id to request Captcha.'
|
|
||||||
)
|
|
||||||
|
|
||||||
def _checkRequest(response):
|
|
||||||
self.checkErrorStatus(response)
|
|
||||||
|
|
||||||
if response.ok and response.json()['status'] == 'ready':
|
|
||||||
return True
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
response = polling2.poll(
|
|
||||||
lambda: self.session.post(
|
|
||||||
f'{self.host}/getTaskResult',
|
|
||||||
json={
|
|
||||||
'clientKey': self.clientKey,
|
|
||||||
'taskId': taskID
|
|
||||||
},
|
|
||||||
timeout=30
|
|
||||||
),
|
|
||||||
check_success=_checkRequest,
|
|
||||||
step=5,
|
|
||||||
timeout=180
|
|
||||||
)
|
|
||||||
|
|
||||||
if response:
|
|
||||||
payload = response.json()['solution']
|
|
||||||
if 'token' in payload:
|
|
||||||
return payload['token']
|
|
||||||
else:
|
|
||||||
return payload['gRecaptchaResponse']
|
|
||||||
else:
|
|
||||||
raise CaptchaTimeout(
|
|
||||||
"anticaptcha: Error failed to solve Captcha."
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def requestSolve(self, captchaType, url, siteKey):
|
|
||||||
def _checkRequest(response):
|
|
||||||
self.checkErrorStatus(response)
|
|
||||||
|
|
||||||
if response.ok and response.json()['taskId']:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'clientKey': self.clientKey,
|
|
||||||
'task': {
|
|
||||||
'websiteURL': url,
|
|
||||||
'websiteKey': siteKey,
|
|
||||||
'type': self.captchaType[captchaType]
|
|
||||||
},
|
|
||||||
'softId': 959
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.proxy:
|
|
||||||
data['task'].update(self.proxy)
|
|
||||||
else:
|
|
||||||
data['task']['type'] = f"{data['task']['type']}Proxyless"
|
|
||||||
|
|
||||||
response = polling2.poll(
|
|
||||||
lambda: self.session.post(
|
|
||||||
f'{self.host}/createTask',
|
|
||||||
json=data,
|
|
||||||
allow_redirects=False,
|
|
||||||
timeout=30
|
|
||||||
),
|
|
||||||
check_success=_checkRequest,
|
|
||||||
step=5,
|
|
||||||
timeout=180
|
|
||||||
)
|
|
||||||
|
|
||||||
if response:
|
|
||||||
return response.json()['taskId']
|
|
||||||
else:
|
|
||||||
raise CaptchaBadJobID(
|
|
||||||
'anticaptcha: Error no task id was returned.'
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
|
def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
|
||||||
taskID = None
|
if not captchaParams.get('api_key'):
|
||||||
|
raise CaptchaParameter("anticaptcha: Missing api_key parameter.")
|
||||||
|
|
||||||
if not captchaParams.get('clientKey'):
|
client = AnticaptchaClient(captchaParams.get('api_key'))
|
||||||
raise CaptchaParameter(
|
|
||||||
"anticaptcha: Missing clientKey parameter."
|
|
||||||
)
|
|
||||||
|
|
||||||
self.clientKey = captchaParams.get('clientKey')
|
|
||||||
|
|
||||||
if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
|
if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
|
||||||
hostParsed = urlparse(captchaParams.get('proxy', {}).get('https'))
|
captchaMap = {
|
||||||
|
'reCaptcha': NoCaptchaTask,
|
||||||
if not hostParsed.scheme:
|
'hCaptcha': HCaptchaTask
|
||||||
raise CaptchaParameter('Cannot parse proxy correctly, bad scheme')
|
|
||||||
|
|
||||||
if not hostParsed.netloc:
|
|
||||||
raise CaptchaParameter('Cannot parse proxy correctly, bad netloc')
|
|
||||||
|
|
||||||
ports = {
|
|
||||||
'http': 80,
|
|
||||||
'https': 443
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.proxy = {
|
proxy = self.parseProxy(
|
||||||
'proxyType': hostParsed.scheme,
|
captchaParams.get('proxy', {}).get('https'),
|
||||||
'proxyAddress': hostParsed.hostname,
|
captchaParams.get('User-Agent', '')
|
||||||
'proxyPort': hostParsed.port if hostParsed.port else ports[self.proxy['proxyType']],
|
)
|
||||||
'proxyLogin': hostParsed.username,
|
|
||||||
'proxyPassword': hostParsed.password,
|
task = captchaMap[captchaType](
|
||||||
}
|
url,
|
||||||
|
siteKey,
|
||||||
|
**proxy
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.proxy = None
|
captchaMap = {
|
||||||
|
'reCaptcha': NoCaptchaTaskProxylessTask,
|
||||||
|
'hCaptcha': HCaptchaTaskProxyless
|
||||||
|
}
|
||||||
|
task = captchaMap[captchaType](url, siteKey)
|
||||||
|
|
||||||
try:
|
if not hasattr(client, 'createTaskSmee'):
|
||||||
taskID = self.requestSolve(captchaType, url, siteKey)
|
raise NotImplementedError(
|
||||||
return self.requestJob(taskID)
|
"Please upgrade 'python_anticaptcha' via pip or download it from "
|
||||||
except polling2.TimeoutException:
|
"https://github.com/ad-m/python-anticaptcha/tree/hcaptcha"
|
||||||
try:
|
|
||||||
if taskID:
|
|
||||||
self.reportJob(taskID)
|
|
||||||
except polling2.TimeoutException:
|
|
||||||
raise CaptchaTimeout(
|
|
||||||
"anticaptcha: Captcha solve took to long and also failed "
|
|
||||||
f"reporting the task with task id {taskID}."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
raise CaptchaTimeout(
|
job = client.createTaskSmee(task, timeout=180)
|
||||||
"anticaptcha: Captcha solve took to long to execute "
|
|
||||||
f"task id {taskID}, aborting."
|
try:
|
||||||
)
|
job.join(maximum_time=180)
|
||||||
|
except (AnticaptchaException) as e:
|
||||||
|
raise CaptchaTimeout(f"{getattr(e, 'message', e)}")
|
||||||
|
|
||||||
|
if 'solution' in job._last_result:
|
||||||
|
return job.get_solution_response()
|
||||||
|
else:
|
||||||
|
raise CaptchaAPIError('Job did not return `solution` key in payload.')
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
|
@ -29,11 +29,6 @@ class captchaSolver(Captcha):
|
||||||
super(captchaSolver, self).__init__('capmonster')
|
super(captchaSolver, self).__init__('capmonster')
|
||||||
self.host = 'https://api.capmonster.cloud'
|
self.host = 'https://api.capmonster.cloud'
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.captchaType = {
|
|
||||||
'reCaptcha': 'NoCaptchaTask',
|
|
||||||
'hCaptcha': 'HCaptchaTask',
|
|
||||||
'turnstile': 'TurnstileTask'
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@ -84,11 +79,7 @@ class captchaSolver(Captcha):
|
||||||
)
|
)
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
payload = response.json()['solution']
|
return response.json()['solution']['gRecaptchaResponse']
|
||||||
if 'token' in payload:
|
|
||||||
return payload['token']
|
|
||||||
else:
|
|
||||||
return payload['gRecaptchaResponse']
|
|
||||||
else:
|
else:
|
||||||
raise CaptchaTimeout(
|
raise CaptchaTimeout(
|
||||||
"CapMonster: Error failed to solve Captcha."
|
"CapMonster: Error failed to solve Captcha."
|
||||||
|
@ -110,9 +101,9 @@ class captchaSolver(Captcha):
|
||||||
'task': {
|
'task': {
|
||||||
'websiteURL': url,
|
'websiteURL': url,
|
||||||
'websiteKey': siteKey,
|
'websiteKey': siteKey,
|
||||||
'type': self.captchaType[captchaType]
|
'softId': 37,
|
||||||
},
|
'type': 'NoCaptchaTask' if captchaType == 'reCaptcha' else 'HCaptchaTask'
|
||||||
'softId': 37
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.proxy:
|
if self.proxy:
|
||||||
|
|
|
@ -1,188 +0,0 @@
|
||||||
from __future__ import absolute_import
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
try:
|
|
||||||
from urlparse import urlparse
|
|
||||||
except ImportError:
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from ..exceptions import (
|
|
||||||
CaptchaServiceUnavailable,
|
|
||||||
CaptchaAPIError,
|
|
||||||
CaptchaTimeout,
|
|
||||||
CaptchaParameter,
|
|
||||||
CaptchaBadJobID
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
import polling2
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError("Please install the python module 'polling2' via pip")
|
|
||||||
|
|
||||||
from . import Captcha
|
|
||||||
|
|
||||||
|
|
||||||
class captchaSolver(Captcha):
|
|
||||||
def __init__(self):
|
|
||||||
super(captchaSolver, self).__init__('capsolver')
|
|
||||||
self.host = 'https://api.capsolver.com'
|
|
||||||
self.session = requests.Session()
|
|
||||||
self.captchaType = {
|
|
||||||
'reCaptcha': 'ReCaptchaV2Task',
|
|
||||||
'hCaptcha': 'HCaptchaTask',
|
|
||||||
'turnstile': 'AntiCloudflareTask'
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def checkErrorStatus(response, fnct):
|
|
||||||
if response.status_code in [500, 502]:
|
|
||||||
raise CaptchaServiceUnavailable(f'CapSolver: Server Side Error {response.status_code}')
|
|
||||||
|
|
||||||
try:
|
|
||||||
rPayload = response.json()
|
|
||||||
except Exception:
|
|
||||||
return
|
|
||||||
|
|
||||||
if rPayload.get('errorDescription', False) and 'Current system busy' not in rPayload['errorDescription']:
|
|
||||||
raise CaptchaAPIError(
|
|
||||||
f"CapSolver -> {fnct} -> {rPayload.get('errorDescription')}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def requestJob(self, jobID):
|
|
||||||
if not jobID:
|
|
||||||
raise CaptchaBadJobID("CapSolver: Error bad job id to request task result.")
|
|
||||||
|
|
||||||
def _checkRequest(response):
|
|
||||||
self.checkErrorStatus(response, 'requestJob')
|
|
||||||
try:
|
|
||||||
if response.ok and response.json()['status'] == 'ready':
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
response = polling2.poll(
|
|
||||||
lambda: self.session.post(
|
|
||||||
f'{self.host}/getTaskResult',
|
|
||||||
json={
|
|
||||||
'clientKey': self.api_key,
|
|
||||||
'taskId': jobID
|
|
||||||
},
|
|
||||||
timeout=30
|
|
||||||
),
|
|
||||||
check_success=_checkRequest,
|
|
||||||
step=5,
|
|
||||||
timeout=180
|
|
||||||
)
|
|
||||||
|
|
||||||
if response:
|
|
||||||
try:
|
|
||||||
rPayload = response.json()['solution']
|
|
||||||
if 'token' in rPayload:
|
|
||||||
return rPayload['token']
|
|
||||||
else:
|
|
||||||
return rPayload['gRecaptchaResponse']
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
raise CaptchaTimeout(
|
|
||||||
"CapSolver: Error failed to solve Captcha."
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def requestSolve(self, captchaType, url, siteKey):
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def _checkRequest(response):
|
|
||||||
self.checkErrorStatus(response, 'createTask')
|
|
||||||
try:
|
|
||||||
rPayload = response.json()
|
|
||||||
if response.ok:
|
|
||||||
if rPayload.get("taskId", False):
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
'clientKey': self.api_key,
|
|
||||||
'appId': '9E717405-8C70-49B3-B277-7C2F2196484B',
|
|
||||||
'task': {
|
|
||||||
'type': self.captchaType[captchaType],
|
|
||||||
'websiteURL': url,
|
|
||||||
'websiteKey': siteKey
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if captchaType == 'turnstile':
|
|
||||||
payload['task']['metadata'] = {'type': 'turnstile'}
|
|
||||||
|
|
||||||
if self.proxy:
|
|
||||||
payload['task']['proxy'] = self.proxy
|
|
||||||
else:
|
|
||||||
payload['task']['type'] = f"{self.captchaType[captchaType]}Proxyless"
|
|
||||||
|
|
||||||
response = polling2.poll(
|
|
||||||
lambda: self.session.post(
|
|
||||||
f'{self.host}/createTask',
|
|
||||||
json=payload,
|
|
||||||
allow_redirects=False,
|
|
||||||
timeout=30
|
|
||||||
),
|
|
||||||
check_success=_checkRequest,
|
|
||||||
step=5,
|
|
||||||
timeout=180
|
|
||||||
)
|
|
||||||
|
|
||||||
if response:
|
|
||||||
rPayload = response.json()
|
|
||||||
if rPayload.get('taskId'):
|
|
||||||
return rPayload['taskId']
|
|
||||||
|
|
||||||
raise CaptchaBadJobID(
|
|
||||||
'CapSolver: Error no job id was returned.'
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
|
|
||||||
if not captchaParams.get('api_key'):
|
|
||||||
raise CaptchaParameter("CapSolver: Missing api_key parameter.")
|
|
||||||
self.api_key = captchaParams.get('api_key')
|
|
||||||
|
|
||||||
if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
|
|
||||||
hostParsed = urlparse(captchaParams.get('proxy', {}).get('https'))
|
|
||||||
|
|
||||||
if not hostParsed.scheme:
|
|
||||||
raise CaptchaParameter('Cannot parse proxy correctly, bad scheme')
|
|
||||||
|
|
||||||
if not hostParsed.netloc:
|
|
||||||
raise CaptchaParameter('Cannot parse proxy correctly, bad netloc')
|
|
||||||
|
|
||||||
self.proxy = captchaParams['proxy']['https']
|
|
||||||
else:
|
|
||||||
self.proxy = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
jobID = self.requestSolve(captchaType, url, siteKey)
|
|
||||||
return self.requestJob(jobID)
|
|
||||||
except polling2.TimeoutException:
|
|
||||||
raise CaptchaTimeout(
|
|
||||||
f"CapSolver: Captcha solve (task ID: {jobID}) took to long."
|
|
||||||
)
|
|
||||||
|
|
||||||
raise CaptchaAPIError('CapSolver: Job Failure.')
|
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
captchaSolver()
|
|
|
@ -13,7 +13,6 @@ except ImportError:
|
||||||
raise ImportError("Please install the python module 'polling2' via pip")
|
raise ImportError("Please install the python module 'polling2' via pip")
|
||||||
|
|
||||||
from ..exceptions import (
|
from ..exceptions import (
|
||||||
CaptchaException,
|
|
||||||
CaptchaServiceUnavailable,
|
CaptchaServiceUnavailable,
|
||||||
CaptchaTimeout,
|
CaptchaTimeout,
|
||||||
CaptchaParameter,
|
CaptchaParameter,
|
||||||
|
@ -30,10 +29,6 @@ class captchaSolver(Captcha):
|
||||||
super(captchaSolver, self).__init__('deathbycaptcha')
|
super(captchaSolver, self).__init__('deathbycaptcha')
|
||||||
self.host = 'http://api.dbcapi.me/api'
|
self.host = 'http://api.dbcapi.me/api'
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.captchaType = {
|
|
||||||
'reCaptcha': '4',
|
|
||||||
'hCaptcha': '7'
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
@ -186,7 +181,7 @@ class captchaSolver(Captcha):
|
||||||
})
|
})
|
||||||
|
|
||||||
data.update({
|
data.update({
|
||||||
'type': self.captchaType[captchaType],
|
'type': '4',
|
||||||
'token_params': json.dumps(jPayload)
|
'token_params': json.dumps(jPayload)
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
@ -202,7 +197,7 @@ class captchaSolver(Captcha):
|
||||||
})
|
})
|
||||||
|
|
||||||
data.update({
|
data.update({
|
||||||
'type': self.captchaType[captchaType],
|
'type': '7',
|
||||||
'hcaptcha_params': json.dumps(jPayload)
|
'hcaptcha_params': json.dumps(jPayload)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -251,9 +246,6 @@ class captchaSolver(Captcha):
|
||||||
else:
|
else:
|
||||||
self.proxy = None
|
self.proxy = None
|
||||||
|
|
||||||
if captchaType not in self.captchaType:
|
|
||||||
raise CaptchaException(f'DeathByCaptcha: {captchaType} is not supported by this provider.')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
jobID = self.requestSolve(captchaType, url, siteKey)
|
jobID = self.requestSolve(captchaType, url, siteKey)
|
||||||
return self.requestJob(jobID)
|
return self.requestJob(jobID)
|
||||||
|
@ -270,7 +262,7 @@ class captchaSolver(Captcha):
|
||||||
f"DeathByCaptcha: Captcha solve took to long to execute job id {jobID}, aborting."
|
f"DeathByCaptcha: Captcha solve took to long to execute job id {jobID}, aborting."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
# ------------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
|
||||||
captchaSolver()
|
captchaSolver()
|
||||||
|
|
|
@ -1,490 +0,0 @@
|
||||||
# Cloudflare V1
|
|
||||||
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
from copy import deepcopy
|
|
||||||
from collections import OrderedDict
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
try:
|
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
except ImportError:
|
|
||||||
if sys.version_info >= (3, 4):
|
|
||||||
import html
|
|
||||||
else:
|
|
||||||
from html.parser import HTMLParser
|
|
||||||
|
|
||||||
try:
|
|
||||||
from urlparse import urlparse, urljoin
|
|
||||||
except ImportError:
|
|
||||||
from urllib.parse import urlparse, urljoin
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
from .exceptions import (
|
|
||||||
CloudflareCode1020,
|
|
||||||
CloudflareIUAMError,
|
|
||||||
CloudflareSolveError,
|
|
||||||
CloudflareChallengeError,
|
|
||||||
CloudflareCaptchaError,
|
|
||||||
CloudflareCaptchaProvider
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
from .captcha import Captcha
|
|
||||||
from .interpreters import JavaScriptInterpreter
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
|
|
||||||
class Cloudflare():
|
|
||||||
|
|
||||||
def __init__(self, cloudscraper):
|
|
||||||
self.cloudscraper = cloudscraper
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Unescape / decode html entities
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def unescape(html_text):
|
|
||||||
if sys.version_info >= (3, 0):
|
|
||||||
if sys.version_info >= (3, 4):
|
|
||||||
return html.unescape(html_text)
|
|
||||||
|
|
||||||
return HTMLParser().unescape(html_text)
|
|
||||||
|
|
||||||
return HTMLParser().unescape(html_text)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# check if the response contains a valid Cloudflare challenge
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_IUAM_Challenge(resp):
|
|
||||||
try:
|
|
||||||
return (
|
|
||||||
resp.headers.get('Server', '').startswith('cloudflare')
|
|
||||||
and resp.status_code in [429, 503]
|
|
||||||
and re.search(r'/cdn-cgi/images/trace/jsch/', resp.text, re.M | re.S)
|
|
||||||
and re.search(
|
|
||||||
r'''<form .*?="challenge-form" action="/\S+__cf_chl_f_tk=''',
|
|
||||||
resp.text,
|
|
||||||
re.M | re.S
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# check if the response contains new Cloudflare challenge
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def is_New_IUAM_Challenge(self, resp):
|
|
||||||
try:
|
|
||||||
return (
|
|
||||||
self.is_IUAM_Challenge(resp)
|
|
||||||
and re.search(
|
|
||||||
r'''cpo.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v1''',
|
|
||||||
resp.text,
|
|
||||||
re.M | re.S
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# check if the response contains a v2 hCaptcha Cloudflare challenge
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def is_New_Captcha_Challenge(self, resp):
|
|
||||||
try:
|
|
||||||
return (
|
|
||||||
self.is_Captcha_Challenge(resp)
|
|
||||||
and re.search(
|
|
||||||
r'''cpo.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/(captcha|managed)/v1''',
|
|
||||||
resp.text,
|
|
||||||
re.M | re.S
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# check if the response contains a Cloudflare hCaptcha challenge
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_Captcha_Challenge(resp):
|
|
||||||
try:
|
|
||||||
return (
|
|
||||||
resp.headers.get('Server', '').startswith('cloudflare')
|
|
||||||
and resp.status_code == 403
|
|
||||||
and re.search(r'/cdn-cgi/images/trace/(captcha|managed)/', resp.text, re.M | re.S)
|
|
||||||
and re.search(
|
|
||||||
r'''<form .*?="challenge-form" action="/\S+__cf_chl_f_tk=''',
|
|
||||||
resp.text,
|
|
||||||
re.M | re.S
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# check if the response contains Firewall 1020 Error
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_Firewall_Blocked(resp):
|
|
||||||
try:
|
|
||||||
return (
|
|
||||||
resp.headers.get('Server', '').startswith('cloudflare')
|
|
||||||
and resp.status_code == 403
|
|
||||||
and re.search(
|
|
||||||
r'<span class="cf-error-code">1020</span>',
|
|
||||||
resp.text,
|
|
||||||
re.M | re.DOTALL
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Wrapper for is_Captcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def is_Challenge_Request(self, resp):
|
|
||||||
if self.is_Firewall_Blocked(resp):
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareCode1020,
|
|
||||||
'Cloudflare has blocked this request (Code 1020 Detected).'
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.is_New_Captcha_Challenge(resp):
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareChallengeError,
|
|
||||||
'Detected a Cloudflare version 2 Captcha challenge, This feature is not available in the opensource (free) version.'
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.is_New_IUAM_Challenge(resp):
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareChallengeError,
|
|
||||||
'Detected a Cloudflare version 2 challenge, This feature is not available in the opensource (free) version.'
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.is_Captcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
|
|
||||||
if self.cloudscraper.debug:
|
|
||||||
print('Detected a Cloudflare version 1 challenge.')
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Try to solve cloudflare javascript challenge.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def IUAM_Challenge_Response(self, body, url, interpreter):
|
|
||||||
try:
|
|
||||||
formPayload = re.search(
|
|
||||||
r'<form (?P<form>.*?="challenge-form" '
|
|
||||||
r'action="(?P<challengeUUID>.*?'
|
|
||||||
r'__cf_chl_f_tk=\S+)"(.*?)</form>)',
|
|
||||||
body,
|
|
||||||
re.M | re.DOTALL
|
|
||||||
).groupdict()
|
|
||||||
|
|
||||||
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareIUAMError,
|
|
||||||
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
|
||||||
)
|
|
||||||
|
|
||||||
payload = OrderedDict()
|
|
||||||
for challengeParam in re.findall(r'^\s*<input\s(.*?)/>', formPayload['form'], re.M | re.S):
|
|
||||||
inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
|
|
||||||
if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
|
|
||||||
payload.update({inputPayload['name']: inputPayload['value']})
|
|
||||||
|
|
||||||
except AttributeError:
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareIUAMError,
|
|
||||||
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
|
||||||
)
|
|
||||||
|
|
||||||
hostParsed = urlparse(url)
|
|
||||||
|
|
||||||
try:
|
|
||||||
payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport(
|
|
||||||
interpreter
|
|
||||||
).solveChallenge(body, hostParsed.netloc)
|
|
||||||
except Exception as e:
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareIUAMError,
|
|
||||||
f"Unable to parse Cloudflare anti-bots page: {getattr(e, 'message', e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
|
|
||||||
'data': payload
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Try to solve the Captcha challenge via 3rd party.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def captcha_Challenge_Response(self, provider, provider_params, body, url):
|
|
||||||
try:
|
|
||||||
formPayload = re.search(
|
|
||||||
r'<form (?P<form>.*?="challenge-form" '
|
|
||||||
r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
|
|
||||||
body,
|
|
||||||
re.M | re.DOTALL
|
|
||||||
).groupdict()
|
|
||||||
|
|
||||||
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareCaptchaError,
|
|
||||||
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
|
|
||||||
)
|
|
||||||
|
|
||||||
payload = OrderedDict(
|
|
||||||
re.findall(
|
|
||||||
r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"',
|
|
||||||
formPayload['form']
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha'
|
|
||||||
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareCaptchaError,
|
|
||||||
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Pass proxy parameter to provider to solve captcha.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
if self.cloudscraper.proxies and self.cloudscraper.proxies != self.cloudscraper.captcha.get('proxy'):
|
|
||||||
self.cloudscraper.captcha['proxy'] = self.proxies
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Pass User-Agent if provider supports it to solve captcha.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
self.cloudscraper.captcha['User-Agent'] = self.cloudscraper.headers['User-Agent']
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Submit job to provider to request captcha solve.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
captchaResponse = Captcha.dynamicImport(
|
|
||||||
provider.lower()
|
|
||||||
).solveCaptcha(
|
|
||||||
captchaType,
|
|
||||||
url,
|
|
||||||
payload['data-sitekey'],
|
|
||||||
provider_params
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Parse and handle the response of solved captcha.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
dataPayload = OrderedDict([
|
|
||||||
('r', payload.get('name="r" value', '')),
|
|
||||||
('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
|
|
||||||
('id', payload.get('data-ray')),
|
|
||||||
('g-recaptcha-response', captchaResponse)
|
|
||||||
])
|
|
||||||
|
|
||||||
if captchaType == 'hCaptcha':
|
|
||||||
dataPayload.update({'h-captcha-response': captchaResponse})
|
|
||||||
|
|
||||||
hostParsed = urlparse(url)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
|
|
||||||
'data': dataPayload
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Attempt to handle and send the challenge response back to cloudflare
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
def Challenge_Response(self, resp, **kwargs):
|
|
||||||
if self.is_Captcha_Challenge(resp):
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# double down on the request as some websites are only checking
|
|
||||||
# if cfuid is populated before issuing Captcha.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
if self.cloudscraper.doubleDown:
|
|
||||||
resp = self.cloudscraper.decodeBrotli(
|
|
||||||
self.cloudscraper.perform_request(resp.request.method, resp.url, **kwargs)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not self.is_Captcha_Challenge(resp):
|
|
||||||
return resp
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# if no captcha provider raise a runtime error.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
if (
|
|
||||||
not self.cloudscraper.captcha
|
|
||||||
or not isinstance(self.cloudscraper.captcha, dict)
|
|
||||||
or not self.cloudscraper.captcha.get('provider')
|
|
||||||
):
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareCaptchaProvider,
|
|
||||||
"Cloudflare Captcha detected, unfortunately you haven't loaded an anti Captcha provider "
|
|
||||||
"correctly via the 'captcha' parameter."
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# if provider is return_response, return the response without doing anything.
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
if self.cloudscraper.captcha.get('provider') == 'return_response':
|
|
||||||
return resp
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Submit request to parser wrapper to solve captcha
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
submit_url = self.captcha_Challenge_Response(
|
|
||||||
self.cloudscraper.captcha.get('provider'),
|
|
||||||
self.cloudscraper.captcha,
|
|
||||||
resp.text,
|
|
||||||
resp.url
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Cloudflare requires a delay before solving the challenge
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
if not self.cloudscraper.delay:
|
|
||||||
try:
|
|
||||||
delay = float(
|
|
||||||
re.search(
|
|
||||||
r'submit\(\);\r?\n\s*},\s*([0-9]+)',
|
|
||||||
resp.text
|
|
||||||
).group(1)
|
|
||||||
) / float(1000)
|
|
||||||
if isinstance(delay, (int, float)):
|
|
||||||
self.cloudscraper.delay = delay
|
|
||||||
except (AttributeError, ValueError):
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareIUAMError,
|
|
||||||
"Cloudflare IUAM possibility malformed, issue extracing delay value."
|
|
||||||
)
|
|
||||||
|
|
||||||
time.sleep(self.cloudscraper.delay)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
submit_url = self.IUAM_Challenge_Response(
|
|
||||||
resp.text,
|
|
||||||
resp.url,
|
|
||||||
self.cloudscraper.interpreter
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Send the Challenge Response back to Cloudflare
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
if submit_url:
|
|
||||||
|
|
||||||
def updateAttr(obj, name, newValue):
|
|
||||||
try:
|
|
||||||
obj[name].update(newValue)
|
|
||||||
return obj[name]
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
obj[name] = {}
|
|
||||||
obj[name].update(newValue)
|
|
||||||
return obj[name]
|
|
||||||
|
|
||||||
cloudflare_kwargs = deepcopy(kwargs)
|
|
||||||
cloudflare_kwargs['allow_redirects'] = False
|
|
||||||
cloudflare_kwargs['data'] = updateAttr(
|
|
||||||
cloudflare_kwargs,
|
|
||||||
'data',
|
|
||||||
submit_url['data']
|
|
||||||
)
|
|
||||||
|
|
||||||
urlParsed = urlparse(resp.url)
|
|
||||||
cloudflare_kwargs['headers'] = updateAttr(
|
|
||||||
cloudflare_kwargs,
|
|
||||||
'headers',
|
|
||||||
{
|
|
||||||
'Origin': f'{urlParsed.scheme}://{urlParsed.netloc}',
|
|
||||||
'Referer': resp.url
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
challengeSubmitResponse = self.cloudscraper.request(
|
|
||||||
'POST',
|
|
||||||
submit_url['url'],
|
|
||||||
**cloudflare_kwargs
|
|
||||||
)
|
|
||||||
|
|
||||||
if challengeSubmitResponse.status_code == 400:
|
|
||||||
self.cloudscraper.simpleException(
|
|
||||||
CloudflareSolveError,
|
|
||||||
'Invalid challenge answer detected, Cloudflare broken?'
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# Return response if Cloudflare is doing content pass through instead of 3xx
|
|
||||||
# else request with redirect URL also handle protocol scheme change http -> https
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
if not challengeSubmitResponse.is_redirect:
|
|
||||||
return challengeSubmitResponse
|
|
||||||
|
|
||||||
else:
|
|
||||||
cloudflare_kwargs = deepcopy(kwargs)
|
|
||||||
cloudflare_kwargs['headers'] = updateAttr(
|
|
||||||
cloudflare_kwargs,
|
|
||||||
'headers',
|
|
||||||
{'Referer': challengeSubmitResponse.url}
|
|
||||||
)
|
|
||||||
|
|
||||||
if not urlparse(challengeSubmitResponse.headers['Location']).netloc:
|
|
||||||
redirect_location = urljoin(
|
|
||||||
challengeSubmitResponse.url,
|
|
||||||
challengeSubmitResponse.headers['Location']
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
redirect_location = challengeSubmitResponse.headers['Location']
|
|
||||||
|
|
||||||
return self.cloudscraper.request(
|
|
||||||
resp.request.method,
|
|
||||||
redirect_location,
|
|
||||||
**cloudflare_kwargs
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
# We shouldn't be here...
|
|
||||||
# Re-request the original query and/or process again....
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
||||||
|
|
||||||
return self.cloudscraper.request(resp.request.method, resp.url, **kwargs)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------- #
|
|
|
@ -109,7 +109,7 @@ pysrt==1.1.2
|
||||||
stevedore==5.2.0
|
stevedore==5.2.0
|
||||||
|
|
||||||
# Required-by: subliminal_patch
|
# Required-by: subliminal_patch
|
||||||
cloudscraper==1.2.71
|
cloudscraper==1.2.58 # newer version dropped captcha v1 support
|
||||||
decorator==5.1.1
|
decorator==5.1.1
|
||||||
dnspython==2.6.1
|
dnspython==2.6.1
|
||||||
enzyme==0.4.1
|
enzyme==0.4.1
|
||||||
|
|
Loading…
Reference in a new issue