From 90ed486184e6f6777ce72496fbc8abbad86cee30 Mon Sep 17 00:00:00 2001
From: NotoriousRebel <matt12299@yahoo.com>
Date: Wed, 25 Dec 2019 17:54:32 -0500
Subject: [PATCH 1/4] Removed old code, ported exalead to use aiohttp.

---
 theHarvester/discovery/bingsearch.py    |  2 --
 theHarvester/discovery/exaleadsearch.py | 43 ++++++++++---------------
 theHarvester/discovery/suip.py          |  9 ++----
 theHarvester/lib/core.py                | 19 ++++++-----
 4 files changed, 31 insertions(+), 42 deletions(-)

diff --git a/theHarvester/discovery/bingsearch.py b/theHarvester/discovery/bingsearch.py
index ef503f4c..cbdb9319 100644
--- a/theHarvester/discovery/bingsearch.py
+++ b/theHarvester/discovery/bingsearch.py
@@ -1,7 +1,6 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-# import grequests
 from theHarvester.lib.core import async_fetcher
 
 
@@ -19,7 +18,6 @@ def __init__(self, word, limit, start):
         self.counter = start
 
     async def do_search(self):
-        print('hello from bing do search')
         headers = {
             'Host': self.hostname,
             'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
diff --git a/theHarvester/discovery/exaleadsearch.py b/theHarvester/discovery/exaleadsearch.py
index 9bc4e243..68c55f47 100644
--- a/theHarvester/discovery/exaleadsearch.py
+++ b/theHarvester/discovery/exaleadsearch.py
@@ -1,10 +1,7 @@
-from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
 import re
-import time
-import grequests
-import requests
+import asyncio
 
 
 class SearchExalead:
@@ -19,7 +16,7 @@ def __init__(self, word, limit, start):
         self.limit = limit
         self.counter = start
 
-    def do_search(self):
+    async def do_search(self):
         base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
         headers = {
             'Host': self.hostname,
@@ -27,29 +24,23 @@ def do_search(self):
             'User-agent': Core.get_user_agent()
         }
         urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
-        req = []
-        for url in urls:
-            req.append(grequests.get(url, headers=headers, timeout=5))
-            time.sleep(3)
-        responses = grequests.imap(tuple(req), size=3)
+        responses = await async_fetcher.fetch_all(urls, headers=headers)
         for response in responses:
-            # TODO if decoded content contains information about solving captcha print message to user to visit website
-            # TODO to solve it or use a vpn as it appears to be ip based
-            self.total_results += response.content.decode('UTF-8')
+            self.total_results += response
 
-    def do_search_files(self, files):
+    async def do_search_files(self, files):
         url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
-            f'=50&start_index={self.counter} '
+              f'=50&start_index={self.counter} '
         headers = {
             'Host': self.hostname,
             'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
             'User-agent': Core.get_user_agent()
         }
-        h = requests.get(url=url, headers=headers)
-        self.results = h.text
+        responses = await async_fetcher.fetch_all(url, headers=headers)
+        self.results = responses[0]
         self.total_results += self.results
 
-    def check_next(self):
+    async def check_next(self):
         renext = re.compile('topNextUrl')
         nextres = renext.findall(self.results)
         if nextres != []:
@@ -59,27 +50,27 @@ def check_next(self):
             nexty = '0'
         return nexty
 
-    def get_emails(self):
+    async def get_emails(self):
         rawres = myparser.Parser(self.total_results, self.word)
         return rawres.emails()
 
-    def get_hostnames(self):
+    async def get_hostnames(self):
         rawres = myparser.Parser(self.total_results, self.word)
         return rawres.hostnames()
 
-    def get_files(self):
+    async def get_files(self):
         rawres = myparser.Parser(self.total_results, self.word)
         return rawres.fileurls(self.files)
 
-    def process(self):
+    async def process(self):
         print('Searching results')
-        self.do_search()
+        await self.do_search()
 
-    def process_files(self, files):
+    async def process_files(self, files):
         while self.counter < self.limit:
-            self.do_search_files(files)
-            time.sleep(getDelay())
+            await self.do_search_files(files)
             more = self.check_next()
+            await asyncio.sleep(2)
             if more == '1':
                 self.counter += 50
             else:
diff --git a/theHarvester/discovery/suip.py b/theHarvester/discovery/suip.py
index a8c8e14e..0e735921 100644
--- a/theHarvester/discovery/suip.py
+++ b/theHarvester/discovery/suip.py
@@ -1,6 +1,5 @@
 from theHarvester.lib.core import *
 from bs4 import BeautifulSoup
-import requests
 import aiohttp
 import asyncio
 
@@ -17,7 +16,7 @@ def __init__(self, word: str):
     async def request(self, url, params):
         headers = {'User-Agent': Core.get_user_agent()}
         data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
-        timeout = aiohttp.ClientTimeout(total=360)
+        timeout = aiohttp.ClientTimeout(total=720)
         # by default timeout is 5 minutes we will change that to 6 minutes
         # Depending on the domain and if it has a lot of subdomains you may want to tweak it
         # The results are well worth the wait :)
@@ -51,9 +50,7 @@ async def do_search(self):
                 hosts: list = str(soup.find('pre')).splitlines()
                 await self.clean_hosts(hosts)
         except Exception as e:
-            print('An exception has occurred: ', e)
-            import traceback as t
-            t.print_exc()
+            print(f'An exception has occurred: {e}')
 
     async def get_hostnames(self) -> set:
         return self.totalhosts
@@ -69,4 +66,4 @@ async def clean_hosts(self, soup_hosts):
                 if host[0] == '.':
                     self.totalhosts.add(host[1:])
                 else:
-                    self.totalhosts.add(host)
+                    self.totalhosts.add(host)
\ No newline at end of file
diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py
index 07d4c169..dfdb20d9 100644
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@@ -378,14 +378,17 @@ class async_fetcher:
     async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
         # This fetch method solely focuses on get requests
         # TODO determine if method for post requests is necessary
-        if len(params) == 0:
-            async with session.get(url, params=params) as response:
-                await asyncio.sleep(2)
-                return await response.text() if json is False else await response.json()
-        else:
-            async with session.get(url) as response:
-                await asyncio.sleep(2)
-                return await response.text() if json is False else await response.json()
+        try:
+            if params != '':
+                async with session.get(url, params=params) as response:
+                    await asyncio.sleep(2)
+                    return await response.text() if json is False else await response.json()
+            else:
+                async with session.get(url) as response:
+                    await asyncio.sleep(2)
+                    return await response.text() if json is False else await response.json()
+        except Exception:
+            return ''
 
     @staticmethod
     async def fetch_all(urls, headers='', params='') -> list:

From b169a00f45ed4c3e6bb3755a9acadaf8f1ca3e9b Mon Sep 17 00:00:00 2001
From: NotoriousRebel <matt12299@yahoo.com>
Date: Wed, 25 Dec 2019 23:42:17 -0500
Subject: [PATCH 2/4] Added fix for storing emails.

---
 theHarvester/__main__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py
index c2fea6b9..647589de 100644
--- a/theHarvester/__main__.py
+++ b/theHarvester/__main__.py
@@ -103,6 +103,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
             db_stash.store_all(word, all_hosts, 'host', source)
         if store_emails:
             email_list = filter(await search_engine.get_emails())
+            all_emails.extend(email_list)
             db_stash.store_all(word, email_list, 'email', source)
         if store_ip:
             ips_list = await search_engine.get_ips()
@@ -175,7 +176,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                         if isinstance(e, MissingKey):
                             print(e)
                         else:
-                            pass
+                            print(e)
 
                 elif engineitem == 'certspotter':
                     print('\033[94m[*] Searching CertSpotter. \033[0m')
@@ -664,4 +665,6 @@ async def entry_point():
 
 
 if __name__ == '__main__':
+    #import uvloop
+    #uvloop.install()
     asyncio.run(main=entry_point())

From 35dc8a86325ca94c81d8938260d7998daded3a12 Mon Sep 17 00:00:00 2001
From: Matt <36310667+NotoriousRebel@users.noreply.github.com>
Date: Wed, 25 Dec 2019 23:46:28 -0500
Subject: [PATCH 3/4] Update __main__.py

Added fix to make emails are being properly stored.
---
 theHarvester/__main__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py
index 2c3cc5b4..c52eb245 100644
--- a/theHarvester/__main__.py
+++ b/theHarvester/__main__.py
@@ -97,6 +97,7 @@ def store(search_engine: Any, source: str, process_param: Any = None, store_host
             db_stash.store_all(word, all_hosts, 'host', source)
         if store_emails:
             email_list = filter(search_engine.get_emails())
+            all_emails.extend(email_list)
             db_stash.store_all(word, email_list, 'email', source)
         if store_ip:
             ips_list = search_engine.get_ips()

From 0a3265860a178e46254fac4c77bd477844b9b091 Mon Sep 17 00:00:00 2001
From: NotoriousRebel <matt12299@yahoo.com>
Date: Wed, 25 Dec 2019 23:51:28 -0500
Subject: [PATCH 4/4] Ported hunter+exalead from grequests to aiohttp.

---
 theHarvester/discovery/exaleadsearch.py |  2 +-
 theHarvester/discovery/huntersearch.py  | 18 ++++++++----------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/theHarvester/discovery/exaleadsearch.py b/theHarvester/discovery/exaleadsearch.py
index 68c55f47..fae014e9 100644
--- a/theHarvester/discovery/exaleadsearch.py
+++ b/theHarvester/discovery/exaleadsearch.py
@@ -36,7 +36,7 @@ async def do_search_files(self, files):
             'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
             'User-agent': Core.get_user_agent()
         }
-        responses = await async_fetcher.fetch_all(url, headers=headers)
+        responses = await async_fetcher.fetch_all([url], headers=headers)
         self.results = responses[0]
         self.total_results += self.results
 
diff --git a/theHarvester/discovery/huntersearch.py b/theHarvester/discovery/huntersearch.py
index e4bbc8c9..8699a4e8 100644
--- a/theHarvester/discovery/huntersearch.py
+++ b/theHarvester/discovery/huntersearch.py
@@ -1,7 +1,6 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-import grequests
 
 
 class SearchHunter:
@@ -17,22 +16,21 @@ def __init__(self, word, limit, start):
         self.counter = start
         self.database = f'https://api.hunter.io/v2/domain-search?domain={word}&api_key={self.key}&limit={self.limit}'
 
-    def do_search(self):
-        request = grequests.get(self.database)
-        response = grequests.map([request])
-        self.total_results = response[0].content.decode('UTF-8')
+    async def do_search(self):
+        responses = await async_fetcher.fetch_all([self.database], headers={'User-Agent': Core.get_user_agent()})
+        self.total_results += responses[0]
 
-    def process(self):
-        self.do_search()  # Only need to do it once.
+    async def process(self):
+        await self.do_search()  # Only need to do it once.
 
-    def get_emails(self):
+    async def get_emails(self):
         rawres = myparser.Parser(self.total_results, self.word)
         return rawres.emails()
 
-    def get_hostnames(self):
+    async def get_hostnames(self):
         rawres = myparser.Parser(self.total_results, self.word)
         return rawres.hostnames()
 
-    def get_profiles(self):
+    async def get_profiles(self):
         rawres = myparser.Parser(self.total_results, self.word)
         return rawres.profiles()