Merge pull request #62 from NotoriousRebel/master

Ported intelx to be asynchronous
2025-02-24 22:42:56 +08:00 · 2019-12-31 01:34:47 +00:00 · 2019-12-31 01:34:47 +00:00 · 483dcafb4f
commit 483dcafb4f
parent dc71512dd8 4564db96b7
3 changed files with 19 additions and 26 deletions
--- a/theHarvester/discovery/intelxsearch.py
+++ b/theHarvester/discovery/intelxsearch.py
@ -1,8 +1,7 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import intelxparser
-import requests
-import time
+import asyncio


 class SearchIntelx:
@ -18,39 +17,33 @@ def __init__(self, word, limit):
        self.info = ()
        self.limit = limit

-    def do_search(self):
+    async def do_search(self):
        try:
            user_agent = Core.get_user_agent()
            headers = {'User-Agent': user_agent, 'x-key': self.key}
            # data is json that corresponds to what we are searching for, sort:2 means sort by most relevant
            data = f'{{"term": "{self.word}", "maxresults": {self.limit}, "media": 0, "sort": 2 , "terminate": []}}'
-            r = requests.post(f'{self.database}phonebook/search', data=data, headers=headers)
-
-            if r.status_code == 400:
-                raise Exception('Invalid json was passed in.')
-            time.sleep(1)
-
+            resp = await AsyncFetcher.post_fetch(url=f'{self.database}phonebook/search', headers=headers, data=data,
+                                                 json=True)
+            uuid = resp['id']
            # grab uuid to send get request to fetch data
-            uuid = r.json()['id']
+            await asyncio.sleep(2)
            url = f'{self.database}phonebook/search/result?id={uuid}&offset=0&limit={self.limit}'
-            r = requests.get(url, headers=headers)
-            time.sleep(1)
-
-            # TODO: add in future grab status from r.text and check if more results can be gathered
-            if r.status_code != 200:
-                raise Exception('Error occurred while searching intelx.')
-            self.results = r.json()
+            resp = await AsyncFetcher.fetch_all([url], headers=headers, json=True)
+            resp = resp[0]
+            # TODO: Check if more results can be gathered depending on status
+            self.results = resp
        except Exception as e:
            print(f'An exception has occurred: {e}')

-    def process(self):
-        self.do_search()
+    async def process(self):
+        await self.do_search()
        intelx_parser = intelxparser.Parser()
-        self.info = intelx_parser.parse_dictionaries(self.results)
+        self.info = await intelx_parser.parse_dictionaries(self.results)
        # Create parser and set self.info to tuple returned from parsing text.

-    def get_emails(self):
+    async def get_emails(self):
        return self.info[0]

-    def get_hostnames(self):
+    async def get_hostnames(self):
        return self.info[1]
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@ -415,7 +415,7 @@ async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
            return ''

    @staticmethod
-    async def fetch_all(urls, headers='', params='') -> list:
+    async def fetch_all(urls, headers='', params='', json=False) -> list:
        # By default timeout is 5 minutes, 30 seconds should suffice
        timeout = aiohttp.ClientTimeout(total=30)

@ -423,10 +423,10 @@ async def fetch_all(urls, headers='', params='') -> list:
            headers = {'User-Agent': Core.get_user_agent()}
        if len(params) == 0:
            async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
-                texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url) for url in urls])
+                texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
                return texts
        else:
            # Indicates the request has certain params
            async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
-                texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params) for url in urls])
+                texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
                return texts
--- a/theHarvester/parsers/intelxparser.py
+++ b/theHarvester/parsers/intelxparser.py
@ -4,7 +4,7 @@ def __init__(self):
        self.emails = set()
        self.hosts = set()

-    def parse_dictionaries(self, results: dict) -> tuple:
+    async def parse_dictionaries(self, results: dict) -> tuple:
        """
        Parse method to parse json results
        :param results: Dictionary containing a list of dictionaries known as selectors