Merge pull request #506 from KernelPan1k/PR-fix-incorrect-type-mime-qwant-discovery

qwant search engine discovery: fix warning incorrect mimetype
2024-09-22 16:26:34 +08:00 · 2020-08-17 21:58:53 +01:00 · 2020-08-17 21:58:53 +01:00 · 1ea9721277
parent 6e9da05e38 3129908341
commit 1ea9721277
1 changed files with 17 additions and 12 deletions
--- a/theHarvester/discovery/qwantsearch.py
+++ b/theHarvester/discovery/qwantsearch.py
@ -1,4 +1,6 @@
 import json
 import math
 from json.decoder import JSONDecodeError
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
@ -27,31 +29,34 @@ def get_start_offset(self) -> int:
        return max(start, 0)
    async def do_search(self) -> None:
-        headers = {
+        headers = {'User-agent': Core.get_user_agent()}
            'Host': "api.qwant.com",
            'User-agent': Core.get_user_agent()
        }
        start = self.get_start_offset()
        limit = self.limit + start
        step = 10
        # https://help.qwant.com/help/qwant-search/searching/refine-search-with-operators/
        # during my tests I had better results with this operator
        word = f"«@{self.word}»"
        api_urls = [
-            f"https://api.qwant.com/api/search/web?count=10&offset={str(offset)}&q={word}&t=web&r=US&device=desktop&safesearch=0&locale=en_US&uiv=4"
+            f"https://api.qwant.com/api/search/web?count=10&offset={str(offset)}&q={self.word}&t=web&r=US&device=desktop&safesearch=0&locale=en_US&uiv=4"
            for offset in range(start, limit, step)
        ]
-        responses = await AsyncFetcher.fetch_all(api_urls, headers=headers, json=True, proxy=self.proxy)
+        responses = await AsyncFetcher.fetch_all(api_urls, headers=headers, proxy=self.proxy)
        for response in responses:
            try:
-                response_items = response['data']['result']['items']
+                json_response = json.loads(response)
            except JSONDecodeError:
                # sometimes error 502 from server
                continue
            try:
                response_items = json_response['data']['result']['items']
            except KeyError:
-                # {"status":"error","error":24}
+                if json_response.get("status", None) \
-                # https://www.qwant.com/anti_robot
+                        and json_response.get("error", None) == 24:
                    # https://www.qwant.com/anti_robot
                    print("Rate limit reached - IP Blocked until captcha is solved")
                    break
                continue
            for response_item in response_items: