mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 16:26:34 +08:00
Merge pull request #506 from KernelPan1k/PR-fix-incorrect-type-mime-qwant-discovery
qwant search engine discovery: fix warning incorrect mimetype
This commit is contained in:
commit
1ea9721277
|
@ -1,4 +1,6 @@
|
||||||
|
import json
|
||||||
import math
|
import math
|
||||||
|
from json.decoder import JSONDecodeError
|
||||||
|
|
||||||
from theHarvester.lib.core import *
|
from theHarvester.lib.core import *
|
||||||
from theHarvester.parsers import myparser
|
from theHarvester.parsers import myparser
|
||||||
|
@ -27,31 +29,34 @@ def get_start_offset(self) -> int:
|
||||||
return max(start, 0)
|
return max(start, 0)
|
||||||
|
|
||||||
async def do_search(self) -> None:
|
async def do_search(self) -> None:
|
||||||
headers = {
|
headers = {'User-agent': Core.get_user_agent()}
|
||||||
'Host': "api.qwant.com",
|
|
||||||
'User-agent': Core.get_user_agent()
|
|
||||||
}
|
|
||||||
|
|
||||||
start = self.get_start_offset()
|
start = self.get_start_offset()
|
||||||
limit = self.limit + start
|
limit = self.limit + start
|
||||||
step = 10
|
step = 10
|
||||||
# https://help.qwant.com/help/qwant-search/searching/refine-search-with-operators/
|
|
||||||
# during my tests I had better results with this operator
|
|
||||||
word = f"«@{self.word}»"
|
|
||||||
|
|
||||||
api_urls = [
|
api_urls = [
|
||||||
f"https://api.qwant.com/api/search/web?count=10&offset={str(offset)}&q={word}&t=web&r=US&device=desktop&safesearch=0&locale=en_US&uiv=4"
|
f"https://api.qwant.com/api/search/web?count=10&offset={str(offset)}&q={self.word}&t=web&r=US&device=desktop&safesearch=0&locale=en_US&uiv=4"
|
||||||
for offset in range(start, limit, step)
|
for offset in range(start, limit, step)
|
||||||
]
|
]
|
||||||
|
|
||||||
responses = await AsyncFetcher.fetch_all(api_urls, headers=headers, json=True, proxy=self.proxy)
|
responses = await AsyncFetcher.fetch_all(api_urls, headers=headers, proxy=self.proxy)
|
||||||
|
|
||||||
for response in responses:
|
for response in responses:
|
||||||
try:
|
try:
|
||||||
response_items = response['data']['result']['items']
|
json_response = json.loads(response)
|
||||||
|
except JSONDecodeError:
|
||||||
|
# sometimes error 502 from server
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
response_items = json_response['data']['result']['items']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# {"status":"error","error":24}
|
if json_response.get("status", None) \
|
||||||
# https://www.qwant.com/anti_robot
|
and json_response.get("error", None) == 24:
|
||||||
|
# https://www.qwant.com/anti_robot
|
||||||
|
print("Rate limit reached - IP Blocked until captcha is solved")
|
||||||
|
break
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for response_item in response_items:
|
for response_item in response_items:
|
||||||
|
|
Loading…
Reference in a new issue