Merge pull request #506 from KernelPan1k/PR-fix-incorrect-type-mime-qwant-discovery

qwant search engine discovery: fix warning incorrect mimetype
This commit is contained in:
J.Townsend 2020-08-17 21:58:53 +01:00 committed by GitHub
commit 1ea9721277
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,4 +1,6 @@
import json
import math import math
from json.decoder import JSONDecodeError
from theHarvester.lib.core import * from theHarvester.lib.core import *
from theHarvester.parsers import myparser from theHarvester.parsers import myparser
@ -27,31 +29,34 @@ def get_start_offset(self) -> int:
return max(start, 0) return max(start, 0)
async def do_search(self) -> None: async def do_search(self) -> None:
headers = { headers = {'User-agent': Core.get_user_agent()}
'Host': "api.qwant.com",
'User-agent': Core.get_user_agent()
}
start = self.get_start_offset() start = self.get_start_offset()
limit = self.limit + start limit = self.limit + start
step = 10 step = 10
# https://help.qwant.com/help/qwant-search/searching/refine-search-with-operators/
# during my tests I had better results with this operator
word = f"«@{self.word}»"
api_urls = [ api_urls = [
f"https://api.qwant.com/api/search/web?count=10&offset={str(offset)}&q={word}&t=web&r=US&device=desktop&safesearch=0&locale=en_US&uiv=4" f"https://api.qwant.com/api/search/web?count=10&offset={str(offset)}&q={self.word}&t=web&r=US&device=desktop&safesearch=0&locale=en_US&uiv=4"
for offset in range(start, limit, step) for offset in range(start, limit, step)
] ]
responses = await AsyncFetcher.fetch_all(api_urls, headers=headers, json=True, proxy=self.proxy) responses = await AsyncFetcher.fetch_all(api_urls, headers=headers, proxy=self.proxy)
for response in responses: for response in responses:
try: try:
response_items = response['data']['result']['items'] json_response = json.loads(response)
except JSONDecodeError:
# sometimes error 502 from server
continue
try:
response_items = json_response['data']['result']['items']
except KeyError: except KeyError:
# {"status":"error","error":24} if json_response.get("status", None) \
# https://www.qwant.com/anti_robot and json_response.get("error", None) == 24:
# https://www.qwant.com/anti_robot
print("Rate limit reached - IP Blocked until captcha is solved")
break
continue continue
for response_item in response_items: for response_item in response_items: