From 00b22ba5b31ba6eae4d838a8eba485d1ad9199bf Mon Sep 17 00:00:00 2001 From: mm Date: Mon, 17 Aug 2020 17:39:53 +0200 Subject: [PATCH 1/2] qwant search engine discovery: fix warning incorrect mimetype --- theHarvester/discovery/qwantsearch.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/theHarvester/discovery/qwantsearch.py b/theHarvester/discovery/qwantsearch.py index 89539696..3b21e4ad 100644 --- a/theHarvester/discovery/qwantsearch.py +++ b/theHarvester/discovery/qwantsearch.py @@ -1,4 +1,6 @@ +import json import math +from json.decoder import JSONDecodeError from theHarvester.lib.core import * from theHarvester.parsers import myparser @@ -44,11 +46,16 @@ async def do_search(self) -> None: for offset in range(start, limit, step) ] - responses = await AsyncFetcher.fetch_all(api_urls, headers=headers, json=True, proxy=self.proxy) + responses = await AsyncFetcher.fetch_all(api_urls, headers=headers, proxy=self.proxy) for response in responses: try: - response_items = response['data']['result']['items'] + json_response = json.loads(response) + except JSONDecodeError: + continue + + try: + response_items = json_response['data']['result']['items'] except KeyError: # {"status":"error","error":24} # https://www.qwant.com/anti_robot From 31299083419d06d9ecb9f7fa18d64d8c1093e596 Mon Sep 17 00:00:00 2001 From: mm Date: Mon, 17 Aug 2020 21:10:51 +0200 Subject: [PATCH 2/2] improve search and display a message if IP is blocked by qwant --- theHarvester/discovery/qwantsearch.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/theHarvester/discovery/qwantsearch.py b/theHarvester/discovery/qwantsearch.py index 3b21e4ad..58c6119c 100644 --- a/theHarvester/discovery/qwantsearch.py +++ b/theHarvester/discovery/qwantsearch.py @@ -29,20 +29,14 @@ def get_start_offset(self) -> int: return max(start, 0) async def do_search(self) -> None: - headers = { - 'Host': "api.qwant.com", - 'User-agent': Core.get_user_agent() - } + headers = {'User-agent': Core.get_user_agent()} start = self.get_start_offset() limit = self.limit + start step = 10 - # https://help.qwant.com/help/qwant-search/searching/refine-search-with-operators/ - # during my tests I had better results with this operator - word = f"«@{self.word}»" api_urls = [ - f"https://api.qwant.com/api/search/web?count=10&offset={str(offset)}&q={word}&t=web&r=US&device=desktop&safesearch=0&locale=en_US&uiv=4" + f"https://api.qwant.com/api/search/web?count=10&offset={str(offset)}&q={self.word}&t=web&r=US&device=desktop&safesearch=0&locale=en_US&uiv=4" for offset in range(start, limit, step) ] @@ -52,13 +46,17 @@ async def do_search(self) -> None: try: json_response = json.loads(response) except JSONDecodeError: + # sometimes error 502 from server continue try: response_items = json_response['data']['result']['items'] except KeyError: - # {"status":"error","error":24} - # https://www.qwant.com/anti_robot + if json_response.get("status", None) \ + and json_response.get("error", None) == 24: + # https://www.qwant.com/anti_robot + print("Rate limit reached - IP Blocked until captcha is solved") + break continue for response_item in response_items: