From 79d5eaef75c28b2eb5a963a9ed5bf726aaff3df4 Mon Sep 17 00:00:00 2001
From: L1ghtn1ng <jay@cybermon.uk>
Date: Mon, 31 May 2021 16:55:13 +0100
Subject: [PATCH] Add new modules and tweaks everywhere

---
 api-keys.yaml                              |   8 +-
 theHarvester/__main__.py                   | 219 ++++++++++++---------
 theHarvester/discovery/binaryedgesearch.py |  40 ++++
 theHarvester/discovery/intelxsearch.py     |   8 +-
 theHarvester/discovery/rocketreach.py      |  66 +++++--
 theHarvester/discovery/threatcrowd.py      |  21 +-
 theHarvester/discovery/threatminer.py      |   7 +
 theHarvester/discovery/urlscan.py          |  21 +-
 theHarvester/discovery/zoomeyesearch.py    | 199 +++++++++++++++++++
 theHarvester/lib/api/api.py                |  33 ++--
 theHarvester/lib/core.py                   |  10 +
 11 files changed, 480 insertions(+), 152 deletions(-)
 create mode 100644 theHarvester/discovery/binaryedgesearch.py
 create mode 100644 theHarvester/discovery/zoomeyesearch.py

diff --git a/api-keys.yaml b/api-keys.yaml
index c441cc7b..4306fe37 100644
--- a/api-keys.yaml
+++ b/api-keys.yaml
@@ -1,4 +1,7 @@
 apikeys:
+  binaryedge:
+    key:
+
   bing:
     key:
 
@@ -7,7 +10,7 @@ apikeys:
     secret:
 
   github:
-    key: 
+    key:
 
   hunter:
     key:
@@ -32,3 +35,6 @@ apikeys:
 
   spyse:
     key:
+
+  zoomeye:
+    key:
diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py
index 6e3bb1eb..8ecf8b7b 100644
--- a/theHarvester/__main__.py
+++ b/theHarvester/__main__.py
@@ -1,23 +1,24 @@
 #!/usr/bin/env python3
 
+from typing import Dict, List
 from theHarvester.discovery import *
 from theHarvester.discovery import dnssearch, takeover, shodansearch
 from theHarvester.discovery.constants import *
 from theHarvester.lib import hostchecker
-from theHarvester.lib import reportgraph
 from theHarvester.lib import stash
-from theHarvester.lib import statichtmlgenerator
 from theHarvester.lib.core import *
 import argparse
 import asyncio
-import datetime
+import aiofiles
 import json
 import netaddr
 import re
 import sys
+import string
+import secrets
 
 
-async def start():
+async def start(rest_args=None):
     """Main program function"""
     parser = argparse.ArgumentParser(description='theHarvester is used to gather open source intelligence (OSINT) on a company or domain.')
     parser.add_argument('-d', '--domain', help='Company name or domain to search.', required=True)
@@ -33,36 +34,54 @@ async def start():
     parser.add_argument('-r', '--take-over', help='Check for takeovers.', default=False, action='store_true')
     parser.add_argument('-n', '--dns-lookup', help='Enable DNS server lookup, default False.', default=False, action='store_true')
     parser.add_argument('-c', '--dns-brute', help='Perform a DNS brute force on the domain.', default=False, action='store_true')
-    parser.add_argument('-f', '--filename', help='Save the results to an HTML,XML and JSON file.', default='', type=str)
-    parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, bufferoverun, censys, certspotter, crtsh,
+    parser.add_argument('-f', '--filename', help='Save the results to an XML and JSON file.', default='', type=str)
+    parser.add_argument('-b', '--source', help='''baidu, bing, binaryedge, bingapi, bufferoverun, censys, certspotter, crtsh,
                             dnsdumpster, duckduckgo, exalead, github-code, google,
                             hackertarget, hunter, intelx, linkedin, linkedin_links,
                             netcraft, omnisint, otx, pentesttools, projectdiscovery,
                             qwant, rapiddns, rocketreach, securityTrails, spyse, sublist3r, threatcrowd, threatminer,
-                            trello, twitter, urlscan, virustotal, yahoo''')
+                            trello, twitter, urlscan, virustotal, yahoo, zoomeye''')
 
-    args = parser.parse_args()
-    filename: str = args.filename
-    dnsbrute = (args.dns_brute, False)
+    # determines if filename is coming from rest api or user
+    rest_filename = ''
+    # indicates this from the rest API
+    if rest_args:
+        if rest_args.source and rest_args.source == "getsources":
+            return list(sorted(Core.get_supportedengines()))
+        elif rest_args.dns_brute:
+            args = rest_args
+            dnsbrute = (rest_args.dns_brute, True)
+        else:
+            args = rest_args
+            # We need to make sure the filename is random as to not overwrite other files
+            filename: str = args.filename
+            alphabet = string.ascii_letters + string.digits
+            rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" \
+                if len(filename) != 0 else ""
+
+    else:
+        args = parser.parse_args()
+        filename: str = args.filename
+        dnsbrute = (args.dns_brute, False)
     try:
         db = stash.StashManager()
         await db.do_init()
     except Exception:
         pass
 
-    all_emails: list = []
-    all_hosts: list = []
-    all_ip: list = []
+    all_emails: List = []
+    all_hosts: List = []
+    all_ip: List = []
     dnslookup = args.dns_lookup
     dnsserver = args.dns_server
     dnstld = args.dns_tld
-    engines = []
+    engines: List = []
     # If the user specifies
 
-    full: list = []
-    ips: list = []
+    full: List = []
+    ips: List = []
     google_dorking = args.google_dork
-    host_ip: list = []
+    host_ip: List = []
     limit: int = args.limit
     shodan = args.shodan
     start: int = args.start
@@ -72,13 +91,16 @@ async def start():
     word: str = args.domain
     takeover_status = args.take_over
     use_proxy = args.proxies
-    linkedin_people_list_tracker: list = []
-    linkedin_links_tracker: list = []
-    twitter_people_list_tracker: list = []
+    linkedin_people_list_tracker: List = []
+    linkedin_links_tracker: List = []
+    twitter_people_list_tracker: List = []
+    interesting_urls: list = []
+    total_asns: list = []
 
     async def store(search_engine: Any, source: str, process_param: Any = None, store_host: bool = False,
                     store_emails: bool = False, store_ip: bool = False, store_people: bool = False,
-                    store_links: bool = False, store_results: bool = False) -> None:
+                    store_links: bool = False, store_results: bool = False,
+                    store_interestingurls: bool = False, store_asns: bool = False) -> None:
         """
         Persist details into the database.
         The details to be stored is controlled by the parameters passed to the method.
@@ -92,6 +114,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
         :param store_people: whether to store user details
         :param store_links: whether to store links
         :param store_results: whether to fetch details from get_results() and persist
+        :param store_interestingurls: whether to store interesting urls
+        :param store_asns: whether to store asns
         """
         await search_engine.process(use_proxy) if process_param is None else await \
             search_engine.process(process_param, use_proxy)
@@ -128,24 +152,28 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
             await db.store_all(word, all_emails, 'email', source)
         if store_people:
             people_list = await search_engine.get_people()
+            if source == 'twitter':
+                twitter_people_list_tracker.extend(people_list)
+            if source == 'linkedin':
+                linkedin_people_list_tracker.extend(people_list)
             await db_stash.store_all(word, people_list, 'people', source)
-            if len(people_list) == 0:
-                print('\n[*] No users found.\n\n')
-            else:
-                print('\n[*] Users found: ' + str(len(people_list)))
-                print('---------------------')
-                for usr in sorted(list(set(people_list))):
-                    print(usr)
+
         if store_links:
             links = await search_engine.get_links()
-            await db.store_all(word, links, 'name', engineitem)
-            if len(links) == 0:
-                print('\n[*] No links found.\n\n')
-            else:
-                print(f'\n[*] Links found: {len(links)}')
-                print('---------------------')
-                for link in sorted(list(set(links))):
-                    print(link)
+            linkedin_links_tracker.extend(links)
+            if len(links) > 0:
+                await db.store_all(word, links, 'linkedinlinks', engineitem)
+
+        if store_interestingurls:
+            iurls = await search_engine.get_interestingurls()
+            interesting_urls.extend(iurls)
+            if len(iurls) > 0:
+                await db.store_all(word, iurls, 'interestingurl', engineitem)
+        if store_asns:
+            fasns = await search_engine.get_asns()
+            total_asns.extend(fasns)
+            if len(fasns) > 0:
+                await db.store_all(word, fasns, 'asns', engineitem)
 
     stor_lst = []
     if args.source is not None:
@@ -163,8 +191,16 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                     try:
                         baidu_search = baidusearch.SearchBaidu(word, limit)
                         stor_lst.append(store(baidu_search, engineitem, store_host=True, store_emails=True))
-                    except Exception:
-                        pass
+                    except Exception as e:
+                        print(e)
+
+                elif engineitem == 'binaryedge':
+                    from theHarvester.discovery import binaryedgesearch
+                    try:
+                        binaryedge_search = binaryedgesearch.SearchBinaryEdge(word, limit)
+                        stor_lst.append(store(binaryedge_search, engineitem, store_host=True))
+                    except Exception as e:
+                        print(e)
 
                 elif engineitem == 'bing' or engineitem == 'bingapi':
                     from theHarvester.discovery import bingsearch
@@ -220,7 +256,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                     try:
                         from theHarvester.discovery import dnsdumpster
                         dns_dumpster_search = dnsdumpster.SearchDnsDumpster(word)
-                        stor_lst.append(store(dns_dumpster_search, engineitem, store_host=True))
+                        stor_lst.append(store(dns_dumpster_search, engineitem, store_host=True, store_ip=True))
                     except Exception as e:
                         print(f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m')
 
@@ -272,7 +308,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                     # Import locally or won't work.
                     try:
                         intelx_search = intelxsearch.SearchIntelx(word)
-                        stor_lst.append(store(intelx_search, engineitem, store_host=True, store_emails=True))
+                        stor_lst.append(store(intelx_search, engineitem, store_interestingurls=True, store_emails=True))
                     except Exception as e:
                         if isinstance(e, MissingKey):
                             print(e)
@@ -387,7 +423,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                     from theHarvester.discovery import threatcrowd
                     try:
                         threatcrowd_search = threatcrowd.SearchThreatcrowd(word)
-                        stor_lst.append(store(threatcrowd_search, engineitem, store_host=True))
+                        stor_lst.append(store(threatcrowd_search, engineitem, store_host=True, store_ip=True))
                     except Exception as e:
                         print(e)
 
@@ -395,7 +431,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                     from theHarvester.discovery import threatminer
                     try:
                         threatminer_search = threatminer.SearchThreatminer(word)
-                        stor_lst.append(store(threatminer_search, engineitem, store_host=True))
+                        stor_lst.append(store(threatminer_search, engineitem, store_host=True, store_ip=True))
                     except Exception as e:
                         print(e)
 
@@ -414,7 +450,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                     from theHarvester.discovery import urlscan
                     try:
                         urlscan_search = urlscan.SearchUrlscan(word)
-                        stor_lst.append(store(urlscan_search, engineitem, store_host=True, store_ip=True))
+                        stor_lst.append(store(urlscan_search, engineitem, store_host=True, store_ip=True,
+                                              store_interestingurls=True, store_asns=True))
                     except Exception as e:
                         print(e)
 
@@ -424,13 +461,22 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                     stor_lst.append(store(virustotal_search, engineitem, store_host=True))
 
                 elif engineitem == 'yahoo':
-
                     from theHarvester.discovery import yahoosearch
                     yahoo_search = yahoosearch.SearchYahoo(word, limit)
                     stor_lst.append(store(yahoo_search, engineitem, store_host=True, store_emails=True))
+
+                elif engineitem == 'zoomeye':
+                    from theHarvester.discovery import zoomeyesearch
+                    zoomeye_search = zoomeyesearch.SearchZoomEye(word, limit)
+                    stor_lst.append(store(zoomeye_search, engineitem, store_host=True, store_emails=True,
+                                          store_ip=True, store_interestingurls=True, store_asns=True))
         else:
-            print('\033[93m[!] Invalid source.\n\n \033[0m')
-            sys.exit(1)
+            try:
+                # Check if dns_brute is defined
+                rest_args.dns_brute
+            except Exception:
+                print('\033[93m[!] Invalid source.\n\n \033[0m')
+                sys.exit(1)
 
     async def worker(queue):
         while True:
@@ -465,6 +511,15 @@ async def handler(lst):
         await asyncio.gather(*tasks, return_exceptions=True)
 
     await handler(lst=stor_lst)
+    return_ips: List = []
+    if rest_args is not None and len(rest_filename) == 0 and rest_args.dns_brute is False:
+        # Indicates user is using rest api but not wanting output to be saved to a file
+        full = [host if ':' in host and word in host else word in host.split(':')[0] and host for host in full]
+        full = list({host for host in full if host})
+        full.sort()
+        # cast to string so Rest API can understand type
+        return_ips.extend([str(ip) for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])])
+        return list(set(all_emails)), return_ips, full, '', ''
     # Sanity check to see if all_emails and all_hosts are defined.
     try:
         all_emails
@@ -526,6 +581,9 @@ async def handler(lst):
         hosts, ips = await dns_force.run()
         hosts = list({host for host in hosts if ':' in host})
         hosts.sort(key=lambda el: el.split(':')[0])
+        # Check if Rest API is being used if so return found hosts
+        if dnsbrute[1]:
+            return hosts
         print('\n[*] Hosts found after DNS brute force:')
         db = stash.StashManager()
         for host in hosts:
@@ -677,59 +735,24 @@ async def handler(lst):
     if args.dns_tld is not False:
         counter = 0
         for word in vhost:
-            search = googlesearch.SearchGoogle(word, limit, counter)
-            await search.process(google_dorking)
-            emails = await search.get_emails()
-            hosts = await search.get_hostnames()
+            search_google = googlesearch.SearchGoogle(word, limit, counter)
+            await search_google.process(google_dorking)
+            emails = await search_google.get_emails()
+            hosts = await search_google.get_hostnames()
             print(emails)
             print(hosts)
     else:
         pass
 
     # Reporting
-    if filename != "":
+    if filename != '':
+        print('\n[*] Reporting started.')
         try:
-            print('\n[*] Reporting started.')
-            db = stash.StashManager()
-            scanboarddata = await db.getscanboarddata()
-            latestscanresults = await db.getlatestscanresults(word)
-            previousscanresults = await db.getlatestscanresults(word, previousday=True)
-            latestscanchartdata = await db.latestscanchartdata(word)
-            scanhistorydomain = await db.getscanhistorydomain(word)
-            pluginscanstatistics = await db.getpluginscanstatistics()
-            generator = statichtmlgenerator.HtmlGenerator(word)
-            html_code = await generator.beginhtml()
-            html_code += await generator.generatedashboardcode(scanboarddata)
-            html_code += await generator.generatelatestscanresults(latestscanresults)
-            if len(screenshot_tups) > 0:
-                html_code += await generator.generatescreenshots(screenshot_tups)
-            html_code += await generator.generatepreviousscanresults(previousscanresults)
-            graph = reportgraph.GraphGenerator(word)
-            await graph.init_db()
-            html_code += await graph.drawlatestscangraph(word, latestscanchartdata)
-            html_code += await graph.drawscattergraphscanhistory(word, scanhistorydomain)
-            html_code += await generator.generatepluginscanstatistics(pluginscanstatistics)
-            html_code += '<p><span style="color: #000000;">Report generated on ' + str(
-                datetime.datetime.now()) + '</span></p>'
-            html_code += '''
-               </body>
-               </html>
-               '''
-        except Exception as e:
-            print(e)
-            print('\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m')
-            sys.exit(1)
-
-        html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
-        html_file.write(html_code)
-        html_file.close()
-        print('[*] Reporting finished.')
-        print('[*] Saving files.')
-
-        try:
-            # XML REPORT SECTION
-            filename = filename.rsplit('.', 1)[0] + '.xml'
-
+            if len(rest_filename) == 0:
+                filename = filename.rsplit('.', 1)[0] + '.xml'
+            else:
+                filename = 'theHarvester/app/static/' + rest_filename.rsplit('.', 1)[0] + '.xml'
+            # TODO use aiofiles if user is using rest api
             with open(filename, 'w+') as file:
                 file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
                 for x in all_emails:
@@ -767,16 +790,16 @@ async def handler(lst):
                         file.write('</servers>')
 
                 file.write('</theHarvester>')
-            print('[*] XML File saved.')
-        except Exception as er:
-            print(f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m')
+                print('[*] XML File saved.')
+        except Exception as error:
+            print(f'\033[93m[!] An error occurred while saving the XML file: {error} \033[0m')
 
         try:
             # JSON REPORT SECTION
             filename = filename.rsplit('.', 1)[0] + '.json'
 
             # create dict with values for json output
-            json_dict = dict()
+            json_dict: Dict = dict()
 
             json_dict["emails"] = [email for email in all_emails]
             json_dict["hosts"] = [host for host in full]
@@ -791,9 +814,9 @@ async def handler(lst):
             if len(linkedin_links_tracker) > 0:
                 json_dict["linkedin_links"] = [link for link in list(sorted(set(linkedin_links_tracker)))]
 
-            shodan_dict = dict()
+            shodan_dict: Dict = dict()
             if shodanres != []:
-                shodanalysis = []
+                shodanalysis: List = []
                 for x in shodanres:
                     res = x.split('SAPO')
                     shodan_dict[res[0]] = [res[2], [res[1]]]
diff --git a/theHarvester/discovery/binaryedgesearch.py b/theHarvester/discovery/binaryedgesearch.py
new file mode 100644
index 00000000..8382e9c6
--- /dev/null
+++ b/theHarvester/discovery/binaryedgesearch.py
@@ -0,0 +1,40 @@
+from theHarvester.discovery.constants import *
+import asyncio
+
+
+class SearchBinaryEdge:
+
+    def __init__(self, word, limit):
+        self.word = word
+        self.totalhosts = set()
+        self.proxy = False
+        self.key = Core.binaryedge_key()
+        self.limit = 501 if limit >= 501 else limit
+        self.limit = 2 if self.limit == 1 else self.limit
+        if self.key is None:
+            raise MissingKey('binaryedge')
+
+    async def do_search(self):
+        base_url = f'https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}'
+        headers = {'X-KEY': self.key, 'User-Agent': Core.get_user_agent()}
+        for page in range(1, self.limit):
+            params = {'page': page}
+            response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, params=params, headers=headers)
+            responses = response[0]
+            dct = responses
+            if ('status' in dct.keys() and 'message' in dct.keys()) and \
+                    (dct['status'] == 400 or 'Bad Parameter' in dct['message'] or 'Error' in dct['message']):
+                # 400 status code means no more results
+                break
+            if 'events' in dct.keys():
+                if len(dct['events']) == 0:
+                    break
+                self.totalhosts.update({host for host in dct['events']})
+            await asyncio.sleep(get_delay())
+
+    async def get_hostnames(self) -> set:
+        return self.totalhosts
+
+    async def process(self, proxy=False):
+        self.proxy = proxy
+        await self.do_search()
diff --git a/theHarvester/discovery/intelxsearch.py b/theHarvester/discovery/intelxsearch.py
index 5d2851bb..af32f561 100644
--- a/theHarvester/discovery/intelxsearch.py
+++ b/theHarvester/discovery/intelxsearch.py
@@ -25,8 +25,8 @@ async def do_search(self):
             # Based on: https://github.com/IntelligenceX/SDK/blob/master/Python/intelxapi.py
             # API requests self identification
             # https://intelx.io/integrations
-            headers: dict = {'x-key': self.key, 'User-Agent': f'{Core.get_user_agent()}-theHarvester'}
-            data: dict = {
+            headers = {'x-key': self.key, 'User-Agent': f'{Core.get_user_agent()}-theHarvester'}
+            data = {
                 "term": self.word,
                 "buckets": [],
                 "lookuplevel": 0,
@@ -59,8 +59,8 @@ async def process(self, proxy=False):
         intelx_parser = intelxparser.Parser()
         self.info = await intelx_parser.parse_dictionaries(self.results)
 
-    async def get_emails(self) -> Set:
+    async def get_emails(self):
         return self.info[0]
 
-    async def get_hostnames(self) -> Set:
+    async def get_interestingurls(self):
         return self.info[1]
diff --git a/theHarvester/discovery/rocketreach.py b/theHarvester/discovery/rocketreach.py
index ee1ab76f..55fe35a8 100644
--- a/theHarvester/discovery/rocketreach.py
+++ b/theHarvester/discovery/rocketreach.py
@@ -1,31 +1,61 @@
-from theHarvester.discovery.constants import MissingKey
+from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
-import rocketreach
+import asyncio
 
 
-class SearchRocketreach:
+class SearchRocketReach:
 
-    def __init__(self, word):
+    def __init__(self, word, limit):
+        self.ips = set()
         self.word = word
         self.key = Core.rocketreach_key()
         if self.key is None:
-            raise MissingKey('Rocketreach')
-        self.total_results = ""
+            raise MissingKey('RocketReach')
+        self.hosts = set()
         self.proxy = False
+        self.baseurl = 'https://api.rocketreach.co/v2/api/search'
+        self.links = set()
+        self.limit = limit
 
     async def do_search(self):
-        rr = rocketreach.Gateway(rocketreach.GatewayConfig(self.key))
-        s = rr.person.search().filter(current_employer=self.word)
-        result = s.execute()
-        if result.is_success:
-            lookup = rr.person.lookup(result.people[0].id)
-            if lookup.is_success:
-                print(repr(lookup.person))
+        try:
+            headers = {
+                'Api-Key': self.key,
+                'Content-Type': 'application/json',
+                'User-Agent': Core.get_user_agent()
+            }
+
+            import pprint as pp
+
+            # linkedin_urls = set()
+            for page in range(1, self.limit):
+                data = f'{{"query":{{"company_website_url": ["{self.word}"]}}, "start": {page}}}'
+                result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True)
+
+                if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result[
+                    'detail']:
+                    # No more results can be fetched
+                    break
+                if 'detail' in result.keys() and 'Request was throttled.' in result['detail']:
+                    # Rate limit has been triggered need to sleep extra
+                    print(f'RocketReach requests have been throttled; '
+                          f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}')
+                    break
+                if 'profiles' in dict(result).keys():
+                    if len(result['profiles']) == 0:
+                        break
+                    for profile in result['profiles']:
+                        if 'linkedin_url' in dict(profile).keys():
+                            self.links.add(profile['linkedin_url'])
+
+            await asyncio.sleep(get_delay() + 2)
+
+        except Exception as e:
+            print(f'An exception has occurred: {e}')
+
+    async def get_links(self):
+        return self.links
 
     async def process(self, proxy=False):
         self.proxy = proxy
-        await self.do_search()  # Only need to do it once.
-
-    # async def get_emails(self):
-    #     rawres = myparser.Parser(self.total_results, self.word)
-    #     return await rawres.emails()
+        await self.do_search()
diff --git a/theHarvester/discovery/threatcrowd.py b/theHarvester/discovery/threatcrowd.py
index 1cad8f21..78cbbfc3 100644
--- a/theHarvester/discovery/threatcrowd.py
+++ b/theHarvester/discovery/threatcrowd.py
@@ -1,28 +1,31 @@
-from typing import Coroutine
+from typing import List
 from theHarvester.lib.core import *
-from theHarvester.parsers import myparser
 
 
 class SearchThreatcrowd:
 
     def __init__(self, word):
         self.word = word.replace(' ', '%20')
-        self.results: str = ""
-        self.totalresults: str = ""
+        self.hostnames = list()
+        self.ips = list()
         self.proxy = False
 
     async def do_search(self):
         base_url = f'https://www.threatcrowd.org/searchApi/v2/domain/report/?domain={self.word}'
         headers = {'User-Agent': Core.get_user_agent()}
         try:
-            responses = await AsyncFetcher.fetch_all([base_url], headers=headers, proxy=self.proxy)
-            self.results = responses[0]
+            responses = await AsyncFetcher.fetch_all([base_url], headers=headers, proxy=self.proxy, json=True)
+            resp = responses[0]
+            self.ips = {ip['ip_address'] for ip in resp['resolutions'] if len(ip['ip_address']) > 4}
+            self.hostnames = set(list(resp['subdomains']))
         except Exception as e:
             print(e)
-        self.totalresults += self.results
 
-    async def get_hostnames(self) -> Coroutine:
-        return await myparser.Parser(self.results, self.word).hostnames()
+    async def get_ips(self) -> List:
+        return self.ips
+
+    async def get_hostnames(self) -> List:
+        return self.hostnames
 
     async def process(self, proxy=False):
         self.proxy = proxy
diff --git a/theHarvester/discovery/threatminer.py b/theHarvester/discovery/threatminer.py
index e94ab205..d5ee7069 100644
--- a/theHarvester/discovery/threatminer.py
+++ b/theHarvester/discovery/threatminer.py
@@ -7,16 +7,23 @@ class SearchThreatminer:
     def __init__(self, word):
         self.word = word
         self.totalhosts = list
+        self.totalips = list
         self.proxy = False
 
     async def do_search(self):
         url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5'
         response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
         self.totalhosts: set = {host for host in response[0]['results']}
+        second_url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2'
+        secondresp = await AsyncFetcher.fetch_all([second_url], json=True, proxy=self.proxy)
+        self.totalips: set = {resp['ip'] for resp in secondresp[0]['results']}
 
     async def get_hostnames(self) -> Type[list]:
         return self.totalhosts
 
+    async def get_ips(self) -> Type[list]:
+        return self.totalips
+
     async def process(self, proxy=False):
         self.proxy = proxy
         await self.do_search()
diff --git a/theHarvester/discovery/urlscan.py b/theHarvester/discovery/urlscan.py
index 9b08b1c9..24aa3c6e 100644
--- a/theHarvester/discovery/urlscan.py
+++ b/theHarvester/discovery/urlscan.py
@@ -1,12 +1,14 @@
-from typing import Type
+from typing import List
 from theHarvester.lib.core import *
 
 
 class SearchUrlscan:
     def __init__(self, word):
         self.word = word
-        self.totalhosts = list
-        self.totalips = list
+        self.totalhosts = list()
+        self.totalips = list()
+        self.interestingurls = list()
+        self.totalasns = list()
         self.proxy = False
 
     async def do_search(self):
@@ -15,13 +17,22 @@ async def do_search(self):
         resp = response[0]
         self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']}
         self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()}
+        self.interestingurls = {f"{page['page']['url']}" for page in resp['results'] if self.word in page['page']['url']
+                                and 'url' in page['page'].keys()}
+        self.totalasns = {f"{page['page']['asn']}" for page in resp['results'] if 'asn' in page['page'].keys()}
 
-    async def get_hostnames(self) -> Type[list]:
+    async def get_hostnames(self) -> List:
         return self.totalhosts
 
-    async def get_ips(self) -> Type[list]:
+    async def get_ips(self) -> List:
         return self.totalips
 
+    async def get_interestingurls(self) -> List:
+        return self.interestingurls
+
+    async def get_asns(self) -> List:
+        return self.totalasns
+
     async def process(self, proxy=False):
         self.proxy = proxy
         await self.do_search()
diff --git a/theHarvester/discovery/zoomeyesearch.py b/theHarvester/discovery/zoomeyesearch.py
new file mode 100644
index 00000000..5df69d43
--- /dev/null
+++ b/theHarvester/discovery/zoomeyesearch.py
@@ -0,0 +1,199 @@
+from theHarvester.discovery.constants import *
+from theHarvester.lib.core import *
+from theHarvester.parsers import myparser
+import asyncio
+import re
+
+
+class SearchZoomEye:
+
+    def __init__(self, word, limit):
+        self.word = word
+        self.limit = limit
+        self.key = Core.zoomeye_key()
+        if self.key is None:
+            raise MissingKey('zoomeye')
+        self.baseurl = 'https://api.zoomeye.org/host/search'
+        self.proxy = False
+        self.totalasns = list()
+        self.totalhosts = list()
+        self.interestingurls = list()
+        self.totalips = list()
+        self.totalemails = list()
+        # Regex used is directly from: https://github.com/GerbenJavado/LinkFinder/blob/master/linkfinder.py#L29
+        # Maybe one day it will be a pip package
+        # Regardless LinkFinder is an amazing tool!
+        self.iurl_regex = r"""
+          (?:"|')                               # Start newline delimiter
+          (
+            ((?:[a-zA-Z]{1,10}://|//)           # Match a scheme [a-Z]*1-10 or //
+            [^"'/]{1,}\.                        # Match a domainname (any character + dot)
+            [a-zA-Z]{2,}[^"']{0,})              # The domainextension and/or path
+            |
+            ((?:/|\.\./|\./)                    # Start with /,../,./
+            [^"'><,;| *()(%%$^/\\\[\]]          # Next character can't be...
+            [^"'><,;|()]{1,})                   # Rest of the characters can't be
+            |
+            ([a-zA-Z0-9_\-/]{1,}/               # Relative endpoint with /
+            [a-zA-Z0-9_\-/]{1,}                 # Resource name
+            \.(?:[a-zA-Z]{1,4}|action)          # Rest + extension (length 1-4 or action)
+            (?:[\?|#][^"|']{0,}|))              # ? or # mark with parameters
+            |
+            ([a-zA-Z0-9_\-/]{1,}/               # REST API (no extension) with /
+            [a-zA-Z0-9_\-/]{3,}                 # Proper REST endpoints usually have 3+ chars
+            (?:[\?|#][^"|']{0,}|))              # ? or # mark with parameters
+            |
+            ([a-zA-Z0-9_\-]{1,}                 # filename
+            \.(?:php|asp|aspx|jsp|json|
+                 action|html|js|txt|xml)        # . + extension
+            (?:[\?|#][^"|']{0,}|))              # ? or # mark with parameters
+          )
+          (?:"|')                               # End newline delimiter
+        """
+        self.iurl_regex = re.compile(self.iurl_regex, re.VERBOSE)
+
+    async def do_search(self):
+        headers = {
+            'API-KEY': self.key,
+            'User-Agent': Core.get_user_agent()
+        }
+        params = (
+            ('query', f'site:{self.word}'),
+            ('page', '1'),
+        )
+        # TODO add: https://www.zoomeye.org/profile/domain to fetch subdomains more easily once
+        # once api endpoint is created
+        response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers,
+                                                params=params)
+        # First request determines how many pages there in total
+        resp = response[0]
+        total_pages = int(resp['available'])
+        self.limit = self.limit if total_pages > self.limit else total_pages
+        self.limit = 3 if self.limit == 2 else self.limit
+        cur_page = 2 if self.limit >= 2 else -1
+        # Means there is only one page
+        # hostnames, emails, ips, asns, iurls
+        nomatches_counter = 0
+        # cur_page = -1
+        if cur_page == -1:
+            # No need to do loop just parse and leave
+            if 'matches' in resp.keys():
+                hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
+                self.totalhosts.extend(hostnames)
+                self.totalemails.extend(emails)
+                self.totalips.extend(ips)
+                self.totalasns.extend(asns)
+                self.interestingurls.extend(iurls)
+        else:
+            if 'matches' in resp.keys():
+                # Parse out initial results and then continue to loop
+                hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
+                self.totalhosts.extend(hostnames)
+                self.totalemails.extend(emails)
+                self.totalips.extend(ips)
+                self.totalasns.extend(asns)
+                self.interestingurls.extend(iurls)
+
+            for num in range(2, self.limit):
+                print(f'Currently on page: {num}')
+                params = (
+                    ('query', f'site:{self.word}'),
+                    ('page', f'{num}'),
+                )
+                response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers,
+                                                        params=params)
+                resp = response[0]
+                if 'matches' not in resp.keys():
+                    print(f'Your resp: {resp}')
+                    print('Match not found in keys')
+                    break
+
+                hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
+
+                if len(hostnames) == 0 and len(emails) == 0 and len(ips) == 0 \
+                        and len(asns) == 0 and len(iurls) == 0:
+                    nomatches_counter += 1
+
+                if nomatches_counter >= 5:
+                    break
+
+                self.totalhosts.extend(hostnames)
+                self.totalemails.extend(emails)
+                self.totalips.extend(ips)
+                self.totalasns.extend(asns)
+                self.interestingurls.extend(iurls)
+
+                await asyncio.sleep(get_delay() + 2)
+
+    async def parse_matchs(self, matches):
+        # Helper function to parse items from match json
+        # ips = {match["ip"] for match in matches}
+        ips = set()
+        iurls = set()
+        hostnames = set()
+        asns = set()
+        emails = set()
+        for match in matches:
+            try:
+                ips.add(match['ip'])
+
+                if 'geoinfo' in match.keys():
+                    asns.add(int(match['geoinfo']['asn']))
+
+                if 'rdns_new' in match.keys():
+                    rdns_new = match['rdns_new']
+
+                    if ',' in rdns_new:
+                        parts = str(rdns_new).split(',')
+                        rdns_new = parts[0]
+                        if len(parts) == 2:
+                            hostnames.add(parts[1])
+                        rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
+                        hostnames.add(rdns_new)
+                    else:
+                        rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
+                        hostnames.add(rdns_new)
+
+                if 'rdns' in match.keys():
+                    rdns = match['rdns']
+                    rdns = rdns[:-1] if rdns[-1] == '.' else rdns
+                    hostnames.add(rdns)
+
+                if 'portinfo' in match.keys():
+                    # re.
+                    temp_emails = set(await self.parse_emails(match['portinfo']['banner']))
+                    emails.update(temp_emails)
+                    hostnames.update(set(await self.parse_hostnames(match['portinfo']['banner'])))
+                    iurls = {str(iurl.group(1)).replace('"', '') for iurl
+                             in re.finditer(self.iurl_regex, match['portinfo']['banner'])
+                             if self.word in str(iurl.group(1))}
+            except Exception as e:
+                print(f'An exception has occurred: {e}')
+        return hostnames, emails, ips, asns, iurls
+
+    async def process(self, proxy=False):
+        self.proxy = proxy
+        await self.do_search()  # Only need to do it once.
+
+    async def parse_emails(self, content):
+        rawres = myparser.Parser(content, self.word)
+        return await rawres.emails()
+
+    async def parse_hostnames(self, content):
+        rawres = myparser.Parser(content, self.word)
+        return await rawres.hostnames()
+
+    async def get_hostnames(self):
+        return set(self.totalhosts)
+
+    async def get_emails(self):
+        return set(self.totalemails)
+
+    async def get_ips(self):
+        return set(self.totalips)
+
+    async def get_asns(self):
+        return set(self.totalasns)
+
+    async def get_interestingurls(self):
+        return set(self.interestingurls)
diff --git a/theHarvester/lib/api/api.py b/theHarvester/lib/api/api.py
index 032017fe..7b6e6373 100644
--- a/theHarvester/lib/api/api.py
+++ b/theHarvester/lib/api/api.py
@@ -78,20 +78,20 @@ async def dnsbrute(request: Request, user_agent: str = Header(None),
     if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
         response = RedirectResponse(app.url_path_for('bot'))
         return response
-    dns_bruteforce = await __main__.entry_point(argparse.Namespace(dns_brute=True,
-                                                                   dns_lookup=False,
-                                                                   dns_server=False,
-                                                                   dns_tld=False,
-                                                                   domain=domain,
-                                                                   filename='',
-                                                                   google_dork=False,
-                                                                   limit=500,
-                                                                   proxies=False,
-                                                                   shodan=False,
-                                                                   source=','.join([]),
-                                                                   start=0,
-                                                                   take_over=False,
-                                                                   virtual_host=False))
+    dns_bruteforce = await __main__.start(argparse.Namespace(dns_brute=True,
+                                                             dns_lookup=False,
+                                                             dns_server=False,
+                                                             dns_tld=False,
+                                                             domain=domain,
+                                                             filename='',
+                                                             google_dork=False,
+                                                             limit=500,
+                                                             proxies=False,
+                                                             shodan=False,
+                                                             source=','.join([]),
+                                                             start=0,
+                                                             take_over=False,
+                                                             virtual_host=False))
     return {'dns_bruteforce': dns_bruteforce}
 
 
@@ -115,7 +115,7 @@ async def query(request: Request, dns_server: str = Query(""), user_agent: str =
         response = RedirectResponse(app.url_path_for('bot'))
         return response
     try:
-        emails, ips, urls, html_filename, xml_filename = await __main__.start(argparse.Namespace(dns_brute=dns_brute,
+        emails, ips, urls, xml_filename = await __main__.start(argparse.Namespace(dns_brute=dns_brute,
                                                                                                  dns_lookup=dns_lookup,
                                                                                                  dns_server=dns_server,
                                                                                                  dns_tld=dns_tld,
@@ -130,7 +130,6 @@ async def query(request: Request, dns_server: str = Query(""), user_agent: str =
                                                                                                  take_over=take_over,
                                                                                                  virtual_host=virtual_host))
 
-        return {'domain': f'{domain}', 'emails': emails, 'ips': ips, 'urls': urls, 'html_file': f'{html_filename}',
-                'xml_file': f'{xml_filename}'}
+        return {'domain': f'{domain}', 'emails': emails, 'ips': ips, 'urls': urls, 'xml_file': f'{xml_filename}'}
     except Exception as e:
         return {'exception': f'{e}'}
diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py
index 13935d0e..6d343eb6 100644
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@@ -28,6 +28,10 @@ def api_keys() -> dict:
                     keys = yaml.safe_load(api_keys)
         return keys['apikeys']
 
+    @staticmethod
+    def binaryedge_key() -> str:
+        return Core.api_keys()['binaryedge']['key']
+
     @staticmethod
     def bing_key() -> str:
         return Core.api_keys()['bing']['key']
@@ -72,6 +76,10 @@ def shodan_key() -> str:
     def spyse_key() -> str:
         return Core.api_keys()['spyse']['key']
 
+    @staticmethod
+    def zoomeye_key() -> str:
+        return Core.api_keys()['zoomEye']['key']
+
     @staticmethod
     def proxy_list() -> List:
         try:
@@ -106,6 +114,7 @@ def banner() -> None:
     @staticmethod
     def get_supportedengines() -> Set[Union[str, Any]]:
         supportedengines = {'baidu',
+                            'binaryedge',
                             'bing',
                             'bingapi',
                             'bufferoverun',
@@ -140,6 +149,7 @@ def get_supportedengines() -> Set[Union[str, Any]]:
                             'urlscan',
                             'virustotal',
                             'yahoo',
+                            'zoomeye'
                             }
         return supportedengines