From 720aa06080df965cd6edb4226006e27679da3da0 Mon Sep 17 00:00:00 2001
From: NotoriousRebel <matt12299@yahoo.com>
Date: Sun, 11 Aug 2019 22:19:01 -0400
Subject: [PATCH 1/6] Implemented grequests in multiple modules to increase
 speed.

---
 requirements.txt                        |  3 +-
 theHarvester/__main__.py                | 46 ++++++++++++++++++------
 theHarvester/discovery/baidusearch.py   | 23 ++++++------
 theHarvester/discovery/bingsearch.py    | 48 ++++++++++++-------------
 theHarvester/discovery/constants.py     |  3 +-
 theHarvester/discovery/crtsh.py         | 20 ++++++-----
 theHarvester/discovery/dogpilesearch.py | 26 ++++++--------
 theHarvester/discovery/huntersearch.py  | 21 ++++++-----
 theHarvester/discovery/netcraft.py      |  9 +++--
 theHarvester/discovery/yahoosearch.py   | 27 ++++++--------
 theHarvester/lib/core.py                |  1 +
 11 files changed, 120 insertions(+), 107 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 235c5e61..19e17d36 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,5 @@ pytest==5.0.1
 PyYaml==5.1.1
 requests==2.22.0
 shodan==1.14.0
-texttable==1.6.2
\ No newline at end of file
+texttable==1.6.2
+grequests>=0.4.0
\ No newline at end of file
diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py
index fb43e147..105046f7 100644
--- a/theHarvester/__main__.py
+++ b/theHarvester/__main__.py
@@ -53,7 +53,7 @@ def start():
     parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
     parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
     parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
-                        dogpile, duckduckgo, github-code, google, 
+                        dogpile, duckduckgo, exalead, github-code, google, 
                         hunter, intelx,
                         linkedin, netcraft, securityTrails, threatcrowd,
                         trello, twitter, vhost, virustotal, yahoo, all''')
@@ -144,14 +144,16 @@ def start():
                     db.store_all(word, all_ip, 'ip', 'censys')
 
                 elif engineitem == 'crtsh':
-                    print('\033[94m[*] Searching CRT.sh. \033[0m')
-                    search = crtsh.SearchCrtsh(word)
-                    search.process()
-                    hosts = filter(search.get_data())
-                    all_hosts.extend(hosts)
-                    db = stash.stash_manager()
-                    db.store_all(word, all_hosts, 'host', 'CRTsh')
-
+                    try:
+                        print('\033[94m[*] Searching CRT.sh. \033[0m')
+                        search = crtsh.SearchCrtsh(word)
+                        search.process()
+                        hosts = filter(search.get_data())
+                        all_hosts.extend(hosts)
+                        db = stash.stash_manager()
+                        db.store_all(word, all_hosts, 'host', 'CRTsh')
+                    except Exception as e:
+                        pass
                 elif engineitem == 'dnsdumpster':
                     try:
                         print('\033[94m[*] Searching DNSdumpster. \033[0m')
@@ -211,6 +213,18 @@ def start():
                     else:
                         pass
 
+                elif engineitem == 'exalead':
+                    print('\033[94m[*] Searching Exalead \033[0m')
+                    search = exaleadsearch.search_exalead(word, limit, start)
+                    search.process()
+                    emails = filter(search.get_emails())
+                    all_emails.extend(emails)
+                    hosts = filter(search.get_hostnames())
+                    all_hosts.extend(hosts)
+                    db = stash.stash_manager()
+                    db.store_all(word, all_hosts, 'host', 'exalead')
+                    db.store_all(word, all_emails, 'email', 'exalead')
+
                 elif engineitem == 'google':
                     print('\033[94m[*] Searching Google. \033[0m')
                     search = googlesearch.search_google(word, limit, start)
@@ -363,7 +377,7 @@ def start():
 
                 elif engineitem == 'yahoo':
                     print('\033[94m[*] Searching Yahoo. \033[0m')
-                    search = yahoosearch.search_yahoo(word, limit)
+                    search = yahoosearch.SearchYahoo(word, limit)
                     search.process()
                     hosts = search.get_hostnames()
                     emails = search.get_emails()
@@ -467,6 +481,17 @@ def start():
                     db.store_all(word, all_hosts, 'email', 'duckduckgo')
                     db.store_all(word, all_hosts, 'host', 'duckduckgo')
 
+                    print('\033[94m[*] Searching Exalead \033[0m')
+                    search = exaleadsearch.search_exalead(word, limit, start)
+                    search.process()
+                    emails = filter(search.get_emails())
+                    all_emails.extend(emails)
+                    hosts = filter(search.get_hostnames())
+                    all_hosts.extend(hosts)
+                    db = stash.stash_manager()
+                    db.store_all(word, all_hosts, 'host', 'exalead')
+                    db.store_all(word, all_emails, 'email', 'exalead')
+
                     print('\033[94m[*] Searching Google. \033[0m')
                     search = googlesearch.search_google(word, limit, start)
                     search.process(google_dorking)
@@ -945,7 +970,6 @@ def entry_point():
         print('\n\n\033[93m[!] ctrl+c detected from user, quitting.\n\n \033[0m')
     except Exception:
         import traceback
-
         print(traceback.print_exc())
         sys.exit(1)
 
diff --git a/theHarvester/discovery/baidusearch.py b/theHarvester/discovery/baidusearch.py
index a371476a..a1f5becd 100644
--- a/theHarvester/discovery/baidusearch.py
+++ b/theHarvester/discovery/baidusearch.py
@@ -1,8 +1,6 @@
-from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-import requests
-import time
+import grequests
 
 
 class SearchBaidu:
@@ -13,24 +11,21 @@ def __init__(self, word, limit):
         self.server = 'www.baidu.com'
         self.hostname = 'www.baidu.com'
         self.limit = limit
-        self.counter = 0
 
     def do_search(self):
-        url = 'http://' + self.server + '/s?wd=%40' + self.word + '&pn=' + str(self.counter) + '&oq=' + self.word
-        url = f'https://{self.server}/s?wd=%40{self.word}&pn{self.counter}&oq={self.word}'
         headers = {
             'Host': self.hostname,
             'User-agent': Core.get_user_agent()
         }
-        h = requests.get(url=url, headers=headers)
-        time.sleep(getDelay())
-        self.total_results += h.text
+        base_url = f'https://{self.server}/s?wd=%40{self.word}&pnxx&oq={self.word}'
+        urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
+        req = (grequests.get(u, headers=headers, timeout=5) for u in urls)
+        resp = grequests.imap(req, size=5)
+        for x in resp:
+            self.total_results += x.content.decode('UTF-8')
 
     def process(self):
-        while self.counter <= self.limit and self.counter <= 1000:
-            self.do_search()
-            print(f'\tSearching {self.counter} results.')
-            self.counter += 10
+        self.do_search()
 
     def get_emails(self):
         rawres = myparser.Parser(self.total_results, self.word)
@@ -39,3 +34,5 @@ def get_emails(self):
     def get_hostnames(self):
         rawres = myparser.Parser(self.total_results, self.word)
         return rawres.hostnames()
+
+
diff --git a/theHarvester/discovery/bingsearch.py b/theHarvester/discovery/bingsearch.py
index 1147f2ab..683da2be 100644
--- a/theHarvester/discovery/bingsearch.py
+++ b/theHarvester/discovery/bingsearch.py
@@ -1,8 +1,7 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-import requests
-import time
+import grequests
 
 
 class SearchBing:
@@ -10,11 +9,10 @@ class SearchBing:
     def __init__(self, word, limit, start):
         self.word = word.replace(' ', '%20')
         self.results = ""
-        self.totalresults = ""
+        self.total_results = ""
         self.server = 'www.bing.com'
         self.apiserver = 'api.search.live.net'
         self.hostname = 'www.bing.com'
-        self.quantity = '50'
         self.limit = int(limit)
         self.bingApi = Core.bing_key()
         self.counter = start
@@ -26,9 +24,12 @@ def do_search(self):
             'Accept-Language': 'en-us,en',
             'User-agent': Core.get_user_agent()
         }
-        h = requests.get(url=('https://' + self.server + '/search?q=%40"' + self.word + '"&count=50&first=' + str(self.counter)), headers=headers)
-        self.results = h.text
-        self.totalresults += self.results
+        base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx'
+        urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
+        req = (grequests.get(u, headers=headers, timeout=5) for u in urls)
+        resp = grequests.imap(req, size=5)
+        for x in resp:
+            self.total_results += x.content.decode('UTF-8')
 
     def do_search_api(self):
         url = 'https://api.cognitive.microsoft.com/bing/v7.0/search?'
@@ -40,9 +41,10 @@ def do_search_api(self):
             'safesearch': 'Off'
         }
         headers = {'User-Agent': Core.get_user_agent(), 'Ocp-Apim-Subscription-Key': self.bingApi}
-        h = requests.get(url=url, headers=headers, params=params)
-        self.results = h.text
-        self.totalresults += self.results
+        h = grequests.get(url=url, headers=headers, params=params)
+        response = grequests.map([h])
+        self.results = response[0].content.decode('UTF-8')
+        self.total_results += self.results
 
     def do_search_vhost(self):
         headers = {
@@ -51,39 +53,35 @@ def do_search_vhost(self):
             'Accept-Language': 'en-us,en',
             'User-agent': Core.get_user_agent()
         }
-        url = 'http://' + self.server + '/search?q=ip:' + self.word + '&go=&count=50&FORM=QBHL&qs=n&first=' + str(self.counter)
-        h = requests.get(url=url, headers=headers)
-        self.results = h.text
-        self.totalresults += self.results
+        base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx'
+        urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
+        req = (grequests.get(u, headers=headers, timeout=5) for u in urls)
+        resp = grequests.imap(req, size=5)
+        for x in resp:
+            self.total_results += x.content.decode('UTF-8')
 
     def get_emails(self):
-        rawres = myparser.Parser(self.totalresults, self.word)
+        rawres = myparser.Parser(self.total_results, self.word)
         return rawres.emails()
 
     def get_hostnames(self):
-        rawres = myparser.Parser(self.totalresults, self.word)
+        rawres = myparser.Parser(self.total_results, self.word)
         return rawres.hostnames()
 
     def get_allhostnames(self):
-        rawres = myparser.Parser(self.totalresults, self.word)
+        rawres = myparser.Parser(self.total_results, self.word)
         return rawres.hostnames_all()
 
     def process(self, api):
         if api == 'yes':
             if self.bingApi is None:
                 raise MissingKey(True)
-        while self.counter < self.limit:
+        else:
             if api == 'yes':
                 self.do_search_api()
-                time.sleep(getDelay())
             else:
                 self.do_search()
-                time.sleep(getDelay())
-            self.counter += 50
             print(f'\tSearching {self.counter} results.')
 
     def process_vhost(self):
-        # Maybe it is good to use other limit for this.
-        while self.counter < self.limit:
-            self.do_search_vhost()
-            self.counter += 50
+        self.do_search_vhost()
diff --git a/theHarvester/discovery/constants.py b/theHarvester/discovery/constants.py
index eeb8dcaf..005d9bca 100644
--- a/theHarvester/discovery/constants.py
+++ b/theHarvester/discovery/constants.py
@@ -10,7 +10,8 @@ def filter(lst):
     :param lst: list to be filtered
     :return: new filtered list
     """
-    lst = set(lst)  # Remove duplicates.
+    if not isinstance(lst, set):
+        lst = set(lst)  # Remove duplicates.
     new_lst = []
     for item in lst:
         item = str(item)
diff --git a/theHarvester/discovery/crtsh.py b/theHarvester/discovery/crtsh.py
index d5dc5b2d..1894a1e5 100644
--- a/theHarvester/discovery/crtsh.py
+++ b/theHarvester/discovery/crtsh.py
@@ -1,6 +1,5 @@
 from theHarvester.lib.core import *
 import requests
-import urllib3
 
 class SearchCrtsh:
 
@@ -9,13 +8,18 @@ def __init__(self, word):
         self.data = set()
         
     def do_search(self):
-        url = f'https://crt.sh/?q=%25.{self.word}&output=json'
-        headers = {'User-Agent': Core.get_user_agent()}
-        request = requests.get(url, params=headers, timeout=30)
-        if request.ok:
-            content = request.json()
-            data = set([dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value'] for dct in content])
-        return data
+        try:
+            data = set()
+            url = f'https://crt.sh/?q=%25.{self.word}&output=json'
+            headers = {'User-Agent': Core.get_user_agent()}
+            request = requests.get(url, headers=headers, timeout=15)
+            if request.ok:
+                content = request.json()
+                data = set([dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value'] for dct in content])
+                return data
+            return data
+        except Exception as e:
+            print(f'An exception has occurred in crtsh: {e}')
 
     def process(self):
         print('\tSearching results.')
diff --git a/theHarvester/discovery/dogpilesearch.py b/theHarvester/discovery/dogpilesearch.py
index b7769f14..74eaece5 100644
--- a/theHarvester/discovery/dogpilesearch.py
+++ b/theHarvester/discovery/dogpilesearch.py
@@ -1,7 +1,7 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-import requests
+import grequests
 import time
 
 
@@ -13,31 +13,24 @@ def __init__(self, word, limit):
         self.server = 'www.dogpile.com'
         self.hostname = 'www.dogpile.com'
         self.limit = limit
-        self.counter = 0
 
     def do_search(self):
         #import ssl
         #ssl._create_default_https_context = ssl._create_unverified_context
         # Dogpile is hardcoded to return 10 results.
-        url = 'https://' + self.server + "/search/web?qsi=" + str(self.counter) \
-              + "&q=\"%40" + self.word + "\""
-        headers = {
-            'Host': self.hostname,
-            'User-agent': Core.get_user_agent()
-        }
         try:
-            h = requests.get(url=url, headers=headers, verify=False)
-            #print(h.text)
-            self.total_results += h.text
+            headers = {'User-agent': Core.get_user_agent()}
+            base_url = f'https://{self.server}/search/web?qsi=xx&q=%40{self.word}'
+            urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
+            req = (grequests.get(u, headers=headers, verify=False, timeout=5) for u in urls)
+            resp = grequests.imap(req, size=5)
+            for x in resp:
+                self.total_results += x.content.decode('UTF-8')
         except Exception as e:
             print(f'Error Occurred: {e}')
 
     def process(self):
-        while self.counter <= self.limit and self.counter <= 1000:
-            self.do_search()
-            time.sleep(getDelay())
-            print(f'\tSearching {self.counter} results.')
-            self.counter += 10
+        self.do_search()
 
     def get_emails(self):
         rawres = myparser.Parser(self.total_results, self.word)
@@ -46,3 +39,4 @@ def get_emails(self):
     def get_hostnames(self):
         rawres = myparser.Parser(self.total_results, self.word)
         return rawres.hostnames()
+
diff --git a/theHarvester/discovery/huntersearch.py b/theHarvester/discovery/huntersearch.py
index feaee187..f0eb2253 100644
--- a/theHarvester/discovery/huntersearch.py
+++ b/theHarvester/discovery/huntersearch.py
@@ -1,30 +1,28 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-import requests
+import grequests
 
 
 class SearchHunter:
 
     def __init__(self, word, limit, start):
         self.word = word
-        self.limit = 100
+        self.limit = limit
         self.start = start
         self.key = Core.hunter_key()
+        #self.key = "e802ef64e560430c3612ab7e9f2d018fd9946177"
         if self.key is None:
             raise MissingKey(True)
-        self.results = ""
-        self.totalresults = ""
+        self.total_results = ""
         self.counter = start
-        self.database = "https://api.hunter.io/v2/domain-search?domain=" + word + "&api_key=" + self.key + "&limit=" + str(self.limit)
+        self.database = f'https://api.hunter.io/v2/domain-search?domain={word}&api_key={self.key}&limit={self.limit}'
 
     def do_search(self):
-        try:
-            r = requests.get(self.database)
-        except Exception as e:
-            print(e)
-        self.results = r.text
-        self.totalresults += self.results
+        request = grequests.get(self.database)
+        response = grequests.map([request])
+        self.total_results = response[0].content.decode('UTF-8')
+
 
     def process(self):
             self.do_search()  # Only need to do it once.
@@ -40,3 +38,4 @@ def get_hostnames(self):
     def get_profiles(self):
         rawres = myparser.Parser(self.totalresults, self.word)
         return rawres.profiles()
+
diff --git a/theHarvester/discovery/netcraft.py b/theHarvester/discovery/netcraft.py
index f3d001d5..a65453b8 100644
--- a/theHarvester/discovery/netcraft.py
+++ b/theHarvester/discovery/netcraft.py
@@ -12,7 +12,7 @@ def __init__(self, word):
         self.word = word.replace(' ', '%20')
         self.totalresults = ""
         self.server = 'netcraft.com'
-        self.base_url = 'https://searchdns.netcraft.com/?restriction=site+ends+with&host={domain}'
+        self.base_url = f'https://searchdns.netcraft.com/?restriction=site+ends+with&host={word}'
         self.session = requests.session()
         self.headers = {
             'User-Agent': Core.get_user_agent()
@@ -33,7 +33,7 @@ def get_next(self, resp):
         link_regx = re.compile('<A href="(.*?)"><b>Next page</b></a>')
         link = link_regx.findall(resp)
         link = re.sub(f'host=.*?{self.word}', f'host={self.domain}', link[0])
-        url = f'http://searchdns.netcraft.com{link}'
+        url = f'https://searchdns.netcraft.com{link.replace(" ", "%20")}'
         return url
 
     def create_cookies(self, cookie):
@@ -56,13 +56,12 @@ def do_search(self):
         start_url = self.base_url
         resp = self.request(start_url)
         cookies = self.get_cookies(resp.headers)
-        url = self.base_url.format(domain="yale.edu")
         while True:
-            resp = self.request(url, cookies).text
+            resp = self.request(self.base_url, cookies).text
             self.totalresults += resp
             if 'Next page' not in resp or resp is None:
                 break
-            url = self.get_next(resp)
+            self.base_url = self.get_next(resp)
 
     def get_hostnames(self):
         rawres = myparser.Parser(self.totalresults, self.word)
diff --git a/theHarvester/discovery/yahoosearch.py b/theHarvester/discovery/yahoosearch.py
index 65f510bb..a03a7a97 100644
--- a/theHarvester/discovery/yahoosearch.py
+++ b/theHarvester/discovery/yahoosearch.py
@@ -1,35 +1,30 @@
-from theHarvester.discovery.constants import *
+import grequests
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-import requests
-import time
 
 
-class search_yahoo:
+class SearchYahoo:
 
     def __init__(self, word, limit):
         self.word = word
         self.total_results = ""
         self.server = 'search.yahoo.com'
-        self.hostname = 'search.yahoo.com'
         self.limit = limit
-        self.counter = 0
 
     def do_search(self):
-        url = 'http://' + self.server + '/search?p=\"%40' + self.word + '\"&b=' + str(self.counter) + '&pz=10'
+        base_url = f'https://{self.server}/search?p=%40{self.word}&b=xx&pz=10'
         headers = {
-            'Host': self.hostname,
+            'Host': self.server,
             'User-agent': Core.get_user_agent()
         }
-        h = requests.get(url=url, headers=headers)
-        self.total_results += h.text
+        urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
+        request = (grequests.get(url, headers=headers) for url in urls)
+        response = grequests.imap(request, size=5)
+        for entry in response:
+            self.total_results += entry.content.decode('UTF-8')
 
     def process(self):
-        while self.counter <= self.limit and self.counter <= 1000:
-            self.do_search()
-            time.sleep(getDelay())
-            print(f'\tSearching {self.counter} results.')
-            self.counter += 10
+        self.do_search()
 
     def get_emails(self):
         rawres = myparser.Parser(self.total_results, self.word)
@@ -46,4 +41,4 @@ def get_emails(self):
 
     def get_hostnames(self):
         rawres = myparser.Parser(self.total_results, self.word)
-        return rawres.hostnames()
+        return rawres.hostnames()
\ No newline at end of file
diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py
index 1038a560..1bb3eadc 100644
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@@ -72,6 +72,7 @@ def get_supportedengines():
                             'dnsdumpster',
                             'dogpile',
                             'duckduckgo',
+                            'exalead',
                             'github-code',
                             'google',
                             'hunter',

From f30742dbc042b18abf39504209f197a9973fd530 Mon Sep 17 00:00:00 2001
From: NotoriousRebel <matt12299@yahoo.com>
Date: Sun, 11 Aug 2019 22:26:10 -0400
Subject: [PATCH 2/6] Removed key.

---
 theHarvester/discovery/huntersearch.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/theHarvester/discovery/huntersearch.py b/theHarvester/discovery/huntersearch.py
index f0eb2253..40d41a81 100644
--- a/theHarvester/discovery/huntersearch.py
+++ b/theHarvester/discovery/huntersearch.py
@@ -11,7 +11,6 @@ def __init__(self, word, limit, start):
         self.limit = limit
         self.start = start
         self.key = Core.hunter_key()
-        #self.key = "e802ef64e560430c3612ab7e9f2d018fd9946177"
         if self.key is None:
             raise MissingKey(True)
         self.total_results = ""

From 0c3dac58a41d55ac9242c94efa9f0e6e23af5af9 Mon Sep 17 00:00:00 2001
From: NotoriousRebel <matt12299@yahoo.com>
Date: Thu, 15 Aug 2019 23:15:59 -0400
Subject: [PATCH 3/6] Updted parser to use str.replace instead of re.sub, added
 small check for constants.py, and wrapped exalead in try catch.

---
 theHarvester/__main__.py            | 22 ++++++++++++----------
 theHarvester/discovery/constants.py |  2 ++
 theHarvester/parsers/myparser.py    | 19 ++++---------------
 3 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py
index df041033..162ff856 100644
--- a/theHarvester/__main__.py
+++ b/theHarvester/__main__.py
@@ -482,16 +482,18 @@ def start():
                     db.store_all(word, all_hosts, 'host', 'duckduckgo')
 
                     print('\033[94m[*] Searching Exalead \033[0m')
-                    search = exaleadsearch.search_exalead(word, limit, start)
-                    search.process()
-                    emails = filter(search.get_emails())
-                    all_emails.extend(emails)
-                    hosts = filter(search.get_hostnames())
-                    all_hosts.extend(hosts)
-                    db = stash.stash_manager()
-                    db.store_all(word, all_hosts, 'host', 'exalead')
-                    db.store_all(word, all_emails, 'email', 'exalead')
-
+                    try:
+                        search = exaleadsearch.search_exalead(word, limit, start)
+                        search.process()
+                        emails = filter(search.get_emails())
+                        all_emails.extend(emails)
+                        hosts = filter(search.get_hostnames())
+                        all_hosts.extend(hosts)
+                        db = stash.stash_manager()
+                        db.store_all(word, all_hosts, 'host', 'exalead')
+                        db.store_all(word, all_emails, 'email', 'exalead')
+                    except Exception:
+                        pass
                     print('\033[94m[*] Searching Google. \033[0m')
                     search = googlesearch.search_google(word, limit, start)
                     search.process(google_dorking)
diff --git a/theHarvester/discovery/constants.py b/theHarvester/discovery/constants.py
index 005d9bca..7549c4c6 100644
--- a/theHarvester/discovery/constants.py
+++ b/theHarvester/discovery/constants.py
@@ -10,6 +10,8 @@ def filter(lst):
     :param lst: list to be filtered
     :return: new filtered list
     """
+    if lst is None:
+        return []
     if not isinstance(lst, set):
         lst = set(lst)  # Remove duplicates.
     new_lst = []
diff --git a/theHarvester/parsers/myparser.py b/theHarvester/parsers/myparser.py
index a63b0612..0f212730 100644
--- a/theHarvester/parsers/myparser.py
+++ b/theHarvester/parsers/myparser.py
@@ -9,26 +9,15 @@ def __init__(self, results, word):
         self.temp = []
 
     def genericClean(self):
-        self.results = re.sub('<em>', '', self.results)
-        self.results = re.sub('<b>', '', self.results)
-        self.results = re.sub('</b>', '', self.results)
-        self.results = re.sub('</em>', '', self.results)
-        self.results = re.sub('%2f', ' ', self.results)
-        self.results = re.sub('%3a', ' ', self.results)
-        self.results = re.sub('<strong>', '', self.results)
-        self.results = re.sub('</strong>', '', self.results)
-        self.results = re.sub('<wbr>', '', self.results)
-        self.results = re.sub('</wbr>', '', self.results)
+        self.results = self.results.replace('<em>', '').replace('<b>', '').replace('</b>', '').replace('</em>',
+                        '').replace('%2f', '').replace('%3a', '').replace('<strong>', '').replace('</strong>','')\
+                        .replace('<wbr>','').replace('</wbr>','')
 
         for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C', '/', '\\'):
             self.results = self.results.replace(e, ' ')
 
     def urlClean(self):
-        self.results = re.sub('<em>', '', self.results)
-        self.results = re.sub('</em>', '', self.results)
-        self.results = re.sub('%2f', ' ', self.results)
-        self.results = re.sub('%3a', ' ', self.results)
-
+        self.results = self.results.replace('<em>', '').replace('</em>', '').replace('%2f', '').replace('%3a', '')
         for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
             self.results = self.results.replace(e, ' ')
 

From a758757229811fa5c5d2322c059225e021a6ae92 Mon Sep 17 00:00:00 2001
From: Matt <36310667+NotoriousRebel@users.noreply.github.com>
Date: Sat, 17 Aug 2019 18:06:14 -0400
Subject: [PATCH 4/6] Update requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 37c1891a..241cbe1d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,4 @@ PyYaml==5.1.2
 requests==2.22.0
 shodan==1.14.0
 texttable==1.6.2
-grequests>=0.4.0
\ No newline at end of file
+grequests==0.4.0

From 3478aa3b15d942d48a5f9dc50201899bcc511fe4 Mon Sep 17 00:00:00 2001
From: NotoriousRebel <matt12299@yahoo.com>
Date: Sat, 17 Aug 2019 23:18:36 -0400
Subject: [PATCH 5/6] Implemented grequests in exaleadsearch and fixed module.

---
 theHarvester/discovery/exaleadsearch.py | 40 ++++++++++++++-----------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/theHarvester/discovery/exaleadsearch.py b/theHarvester/discovery/exaleadsearch.py
index 5cb1c1d7..6dc3e30e 100644
--- a/theHarvester/discovery/exaleadsearch.py
+++ b/theHarvester/discovery/exaleadsearch.py
@@ -2,9 +2,9 @@
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
 import re
-import requests
 import time
-
+import grequests
+import requests
 
 class search_exalead:
 
@@ -12,27 +12,33 @@ def __init__(self, word, limit, start):
         self.word = word
         self.files = 'pdf'
         self.results = ""
-        self.totalresults = ""
+        self.total_results = ""
         self.server = 'www.exalead.com'
         self.hostname = 'www.exalead.com'
         self.limit = limit
         self.counter = start
 
     def do_search(self):
-        url = 'http:// ' + self.server + '/search/web/results/?q=%40' + self.word \
-              + '&elements_per_page=50&start_index=' + str(self.counter)
+        base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
         headers = {
             'Host': self.hostname,
             'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
             'User-agent': Core.get_user_agent()
         }
-        h = requests.get(url=url, headers=headers)
-        self.results = h.text
-        self.totalresults += self.results
+        urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
+        req = []
+        for u in urls:
+            req.append(grequests.get(u, headers=headers, timeout=5))
+            time.sleep(3)
+        resp = grequests.imap(tuple(req), size=3)
+        for x in resp:
+            # TODO if decoded content contains information about solving captcha print message to user to visit website
+            # TODO to solve it or use a vpn as it appears to be ip based
+            self.total_results += x.content.decode('UTF-8')
 
     def do_search_files(self, files):
-        url = 'http:// ' + self.server + '/search/web/results/?q=%40' + self.word \
-              + 'filetype:' + self.files + '&elements_per_page=50&start_index=' + str(self.counter)
+        url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
+            f'=50&start_index={self.counter} '
         headers = {
             'Host': self.hostname,
             'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
@@ -40,7 +46,7 @@ def do_search_files(self, files):
         }
         h = requests.get(url=url, headers=headers)
         self.results = h.text
-        self.totalresults += self.results
+        self.total_results += self.results
 
     def check_next(self):
         renext = re.compile('topNextUrl')
@@ -53,22 +59,20 @@ def check_next(self):
         return nexty
 
     def get_emails(self):
-        rawres = myparser.Parser(self.totalresults, self.word)
+        rawres = myparser.Parser(self.total_results, self.word)
         return rawres.emails()
 
     def get_hostnames(self):
-        rawres = myparser.Parser(self.totalresults, self.word)
+        rawres = myparser.Parser(self.total_results, self.word)
         return rawres.hostnames()
 
     def get_files(self):
-        rawres = myparser.Parser(self.totalresults, self.word)
+        rawres = myparser.Parser(self.total_results, self.word)
         return rawres.fileurls(self.files)
 
     def process(self):
-        while self.counter <= self.limit:
-            self.do_search()
-            self.counter += 50
-            print(f'\tSearching {self.counter} results.')
+        print('Searching 0 results')
+        self.do_search()
 
     def process_files(self, files):
         while self.counter < self.limit:

From fdfb137eb2d066aa0b355aa4c62859c5b9777c58 Mon Sep 17 00:00:00 2001
From: NotoriousRebel <matt12299@yahoo.com>
Date: Sun, 18 Aug 2019 21:03:41 -0400
Subject: [PATCH 6/6] Changed variable names to be more helpful and added local
 import for exalead.

---
 theHarvester/__main__.py                |  2 ++
 theHarvester/discovery/baidusearch.py   |  8 ++++----
 theHarvester/discovery/bingsearch.py    | 20 ++++++++++----------
 theHarvester/discovery/dogpilesearch.py |  8 ++++----
 theHarvester/discovery/exaleadsearch.py | 12 ++++++------
 5 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py
index 53a49668..318bf2e4 100644
--- a/theHarvester/__main__.py
+++ b/theHarvester/__main__.py
@@ -224,6 +224,7 @@ def start():
 
                 elif engineitem == 'exalead':
                     print('\033[94m[*] Searching Exalead \033[0m')
+                    from theHarvester.discovery import exaleadsearch
                     search = exaleadsearch.search_exalead(word, limit, start)
                     search.process()
                     emails = filter(search.get_emails())
@@ -506,6 +507,7 @@ def start():
 
                     print('\033[94m[*] Searching Exalead \033[0m')
                     try:
+                        from theHarvester.discovery import exaleadsearch
                         search = exaleadsearch.search_exalead(word, limit, start)
                         search.process()
                         emails = filter(search.get_emails())
diff --git a/theHarvester/discovery/baidusearch.py b/theHarvester/discovery/baidusearch.py
index a1f5becd..b23e01be 100644
--- a/theHarvester/discovery/baidusearch.py
+++ b/theHarvester/discovery/baidusearch.py
@@ -19,10 +19,10 @@ def do_search(self):
         }
         base_url = f'https://{self.server}/s?wd=%40{self.word}&pnxx&oq={self.word}'
         urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
-        req = (grequests.get(u, headers=headers, timeout=5) for u in urls)
-        resp = grequests.imap(req, size=5)
-        for x in resp:
-            self.total_results += x.content.decode('UTF-8')
+        req = (grequests.get(url, headers=headers, timeout=5) for url in urls)
+        responses = grequests.imap(req, size=5)
+        for response in responses:
+            self.total_results += response.content.decode('UTF-8')
 
     def process(self):
         self.do_search()
diff --git a/theHarvester/discovery/bingsearch.py b/theHarvester/discovery/bingsearch.py
index 683da2be..490f5c06 100644
--- a/theHarvester/discovery/bingsearch.py
+++ b/theHarvester/discovery/bingsearch.py
@@ -26,10 +26,10 @@ def do_search(self):
         }
         base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx'
         urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
-        req = (grequests.get(u, headers=headers, timeout=5) for u in urls)
-        resp = grequests.imap(req, size=5)
-        for x in resp:
-            self.total_results += x.content.decode('UTF-8')
+        req = (grequests.get(url, headers=headers, timeout=5) for url in urls)
+        responses = grequests.imap(req, size=5)
+        for response in responses:
+            self.total_results += response.content.decode('UTF-8')
 
     def do_search_api(self):
         url = 'https://api.cognitive.microsoft.com/bing/v7.0/search?'
@@ -41,8 +41,8 @@ def do_search_api(self):
             'safesearch': 'Off'
         }
         headers = {'User-Agent': Core.get_user_agent(), 'Ocp-Apim-Subscription-Key': self.bingApi}
-        h = grequests.get(url=url, headers=headers, params=params)
-        response = grequests.map([h])
+        grequests_resp = grequests.get(url=url, headers=headers, params=params)
+        response = grequests.map([grequests_resp])
         self.results = response[0].content.decode('UTF-8')
         self.total_results += self.results
 
@@ -55,10 +55,10 @@ def do_search_vhost(self):
         }
         base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx'
         urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
-        req = (grequests.get(u, headers=headers, timeout=5) for u in urls)
-        resp = grequests.imap(req, size=5)
-        for x in resp:
-            self.total_results += x.content.decode('UTF-8')
+        req = (grequests.get(url, headers=headers, timeout=5) for url in urls)
+        responses = grequests.imap(req, size=5)
+        for response in responses:
+            self.total_results += response.content.decode('UTF-8')
 
     def get_emails(self):
         rawres = myparser.Parser(self.total_results, self.word)
diff --git a/theHarvester/discovery/dogpilesearch.py b/theHarvester/discovery/dogpilesearch.py
index 74eaece5..5ced83bf 100644
--- a/theHarvester/discovery/dogpilesearch.py
+++ b/theHarvester/discovery/dogpilesearch.py
@@ -22,10 +22,10 @@ def do_search(self):
             headers = {'User-agent': Core.get_user_agent()}
             base_url = f'https://{self.server}/search/web?qsi=xx&q=%40{self.word}'
             urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
-            req = (grequests.get(u, headers=headers, verify=False, timeout=5) for u in urls)
-            resp = grequests.imap(req, size=5)
-            for x in resp:
-                self.total_results += x.content.decode('UTF-8')
+            req = (grequests.get(url, headers=headers, verify=False, timeout=5) for url in urls)
+            responses = grequests.imap(req, size=5)
+            for response in responses:
+                self.total_results += response.content.decode('UTF-8')
         except Exception as e:
             print(f'Error Occurred: {e}')
 
diff --git a/theHarvester/discovery/exaleadsearch.py b/theHarvester/discovery/exaleadsearch.py
index 6dc3e30e..5a8617e6 100644
--- a/theHarvester/discovery/exaleadsearch.py
+++ b/theHarvester/discovery/exaleadsearch.py
@@ -27,14 +27,14 @@ def do_search(self):
         }
         urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
         req = []
-        for u in urls:
-            req.append(grequests.get(u, headers=headers, timeout=5))
+        for url in urls:
+            req.append(grequests.get(url, headers=headers, timeout=5))
             time.sleep(3)
-        resp = grequests.imap(tuple(req), size=3)
-        for x in resp:
+        responses = grequests.imap(tuple(req), size=3)
+        for response in responses:
             # TODO if decoded content contains information about solving captcha print message to user to visit website
             # TODO to solve it or use a vpn as it appears to be ip based
-            self.total_results += x.content.decode('UTF-8')
+            self.total_results += response.content.decode('UTF-8')
 
     def do_search_files(self, files):
         url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
@@ -71,7 +71,7 @@ def get_files(self):
         return rawres.fileurls(self.files)
 
     def process(self):
-        print('Searching 0 results')
+        print('Searching results')
         self.do_search()
 
     def process_files(self, files):