Merge pull request #14 from NotoriousRebel/dev

ported hunter & exalead
2024-11-11 09:41:06 +08:00 · 2019-12-26 00:02:32 -05:00 · 2019-12-26 00:02:32 -05:00 · 407b3f01d3
commit 407b3f01d3
parent c73c0f3cc7 20d6ed239d
7 changed files with 44 additions and 55 deletions
--- a/theHarvester/main.py
+++ b/theHarvester/main.py
@ -103,6 +103,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
            db_stash.store_all(word, all_hosts, 'host', source)
        if store_emails:
            email_list = filter(await search_engine.get_emails())
+            all_emails.extend(email_list)
            db_stash.store_all(word, email_list, 'email', source)
        if store_ip:
            ips_list = await search_engine.get_ips()
@ -175,7 +176,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
-                            pass
+                            print(e)

                elif engineitem == 'certspotter':
                    print('\033[94m[*] Searching CertSpotter. \033[0m')
@ -669,4 +670,6 @@ async def entry_point():


 if __name__ == '__main__':
+    #import uvloop
+    #uvloop.install()
    asyncio.run(main=entry_point())
--- a/theHarvester/discovery/bingsearch.py
+++ b/theHarvester/discovery/bingsearch.py
@ -1,7 +1,6 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-# import grequests
 from theHarvester.lib.core import async_fetcher


@ -19,7 +18,6 @@ def __init__(self, word, limit, start):
        self.counter = start

    async def do_search(self):
-        print('hello from bing do search')
        headers = {
            'Host': self.hostname,
            'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
--- a/theHarvester/discovery/exaleadsearch.py
+++ b/theHarvester/discovery/exaleadsearch.py
@ -1,10 +1,7 @@
-from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
 import re
-import time
-import grequests
-import requests
+import asyncio


 class SearchExalead:
@ -19,7 +16,7 @@ def __init__(self, word, limit, start):
        self.limit = limit
        self.counter = start

-    def do_search(self):
+    async def do_search(self):
        base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
        headers = {
            'Host': self.hostname,
@ -27,29 +24,23 @@ def do_search(self):
            'User-agent': Core.get_user_agent()
        }
        urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
-        req = []
-        for url in urls:
-            req.append(grequests.get(url, headers=headers, timeout=5))
-            time.sleep(3)
-        responses = grequests.imap(tuple(req), size=3)
+        responses = await async_fetcher.fetch_all(urls, headers=headers)
        for response in responses:
-            # TODO if decoded content contains information about solving captcha print message to user to visit website
-            # TODO to solve it or use a vpn as it appears to be ip based
-            self.total_results += response.content.decode('UTF-8')
+            self.total_results += response

-    def do_search_files(self, files):
+    async def do_search_files(self, files):
        url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
-            f'=50&start_index={self.counter} '
+              f'=50&start_index={self.counter} '
        headers = {
            'Host': self.hostname,
            'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
            'User-agent': Core.get_user_agent()
        }
-        h = requests.get(url=url, headers=headers)
-        self.results = h.text
+        responses = await async_fetcher.fetch_all([url], headers=headers)
+        self.results = responses[0]
        self.total_results += self.results

-    def check_next(self):
+    async def check_next(self):
        renext = re.compile('topNextUrl')
        nextres = renext.findall(self.results)
        if nextres != []:
@ -59,27 +50,27 @@ def check_next(self):
            nexty = '0'
        return nexty

-    def get_emails(self):
+    async def get_emails(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.emails()

-    def get_hostnames(self):
+    async def get_hostnames(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.hostnames()

-    def get_files(self):
+    async def get_files(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.fileurls(self.files)

-    def process(self):
+    async def process(self):
        print('Searching results')
-        self.do_search()
+        await self.do_search()

-    def process_files(self, files):
+    async def process_files(self, files):
        while self.counter < self.limit:
-            self.do_search_files(files)
-            time.sleep(getDelay())
+            await self.do_search_files(files)
            more = self.check_next()
+            await asyncio.sleep(2)
            if more == '1':
                self.counter += 50
            else:
--- a/theHarvester/discovery/huntersearch.py
+++ b/theHarvester/discovery/huntersearch.py
@ -1,7 +1,6 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-import grequests


 class SearchHunter:
@ -17,22 +16,21 @@ def __init__(self, word, limit, start):
        self.counter = start
        self.database = f'https://api.hunter.io/v2/domain-search?domain={word}&api_key={self.key}&limit={self.limit}'

-    def do_search(self):
-        request = grequests.get(self.database)
-        response = grequests.map([request])
-        self.total_results = response[0].content.decode('UTF-8')
+    async def do_search(self):
+        responses = await async_fetcher.fetch_all([self.database], headers={'User-Agent': Core.get_user_agent()})
+        self.total_results += responses[0]

-    def process(self):
-        self.do_search()  # Only need to do it once.
+    async def process(self):
+        await self.do_search()  # Only need to do it once.

-    def get_emails(self):
+    async def get_emails(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.emails()

-    def get_hostnames(self):
+    async def get_hostnames(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.hostnames()

-    def get_profiles(self):
+    async def get_profiles(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.profiles()
--- a/theHarvester/discovery/suip.py
+++ b/theHarvester/discovery/suip.py
@ -1,6 +1,5 @@
 from theHarvester.lib.core import *
 from bs4 import BeautifulSoup
-import requests
 import aiohttp
 import asyncio

@ -17,7 +16,7 @@ def __init__(self, word: str):
    async def request(self, url, params):
        headers = {'User-Agent': Core.get_user_agent()}
        data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
-        timeout = aiohttp.ClientTimeout(total=360)
+        timeout = aiohttp.ClientTimeout(total=720)
        # by default timeout is 5 minutes we will change that to 6 minutes
        # Depending on the domain and if it has a lot of subdomains you may want to tweak it
        # The results are well worth the wait :)
@ -51,9 +50,7 @@ async def do_search(self):
                hosts: list = str(soup.find('pre')).splitlines()
                await self.clean_hosts(hosts)
        except Exception as e:
-            print('An exception has occurred: ', e)
-            import traceback as t
-            t.print_exc()
+            print(f'An exception has occurred: {e}')

    async def get_hostnames(self) -> set:
        return self.totalhosts
@ -69,4 +66,4 @@ async def clean_hosts(self, soup_hosts):
                if host[0] == '.':
                    self.totalhosts.add(host[1:])
                else:
-                    self.totalhosts.add(host)
+                    self.totalhosts.add(host)
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@ -379,14 +379,17 @@ class async_fetcher:
    async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
        # This fetch method solely focuses on get requests
        # TODO determine if method for post requests is necessary
-        if len(params) == 0:
-            async with session.get(url, params=params) as response:
-                await asyncio.sleep(2)
-                return await response.text() if json is False else await response.json()
-        else:
-            async with session.get(url) as response:
-                await asyncio.sleep(2)
-                return await response.text() if json is False else await response.json()
+        try:
+            if params != '':
+                async with session.get(url, params=params) as response:
+                    await asyncio.sleep(2)
+                    return await response.text() if json is False else await response.json()
+            else:
+                async with session.get(url) as response:
+                    await asyncio.sleep(2)
+                    return await response.text() if json is False else await response.json()
+        except Exception:
+            return ''

    @staticmethod
    async def fetch_all(urls, headers='', params='') -> list:
--- a/theHarvester/lib/reportgraph.py
+++ b/theHarvester/lib/reportgraph.py
@ -9,7 +9,6 @@
 except Exception as error:
    print(f'{error}')

-
 class GraphGenerator:

    def __init__(self, domain):
@ -92,4 +91,4 @@ def drawscattergraphscanhistory(self, domain, scanhistorydomain):
                output_type='div')
            return scatterchartcode
        except Exception as e:
-            print(f'Error generating HTML for the historical graph for domain: {e}')
+            print(f'Error generating HTML for the historical graph for domain: {e}')