Merge pull request #146 from NotoriousRebel/dev

Initial commit for rest API.
2024-09-21 07:46:32 +08:00 · 2020-06-05 10:58:50 +01:00 · 2020-06-05 10:58:50 +01:00 · a8cad8a74e
parent ee77074508 f6561c4081
commit a8cad8a74e
7 changed files with 258 additions and 89 deletions
--- a/restfulHarvest.py
+++ b/restfulHarvest.py
@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-
 import uvicorn
 import theHarvester.lib.web.api as api
 import argparse
@ -7,7 +6,8 @@
 parser = argparse.ArgumentParser()
 parser.add_argument('-H', '--host', default='127.0.0.1', help='IP address to listen on default is 127.0.0.1')
 parser.add_argument('-p', '--port', default=5000, help='Port to bind the web server to, default is 5000')
-parser.add_argument('-l', '--log-level', default='info', help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set')
+parser.add_argument('-l', '--log-level', default='info',
+                    help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set')
 parser.add_argument('-r', '--reload', default=False, help='Enable auto-reload.', action='store_true')

 args = parser.parse_args()
--- a/theHarvester/main.py
+++ b/theHarvester/main.py
@ -17,30 +17,50 @@
 Core.banner()


-async def start():
+async def start(rest_args=None):
    parser = argparse.ArgumentParser(
        description='theHarvester is used to gather open source intelligence (OSINT) on a\n'
                    'company or domain.')
    parser.add_argument('-d', '--domain', help='Company name or domain to search.', required=True)
    parser.add_argument('-l', '--limit', help='Limit the number of search results, default=500.', default=500, type=int)
    parser.add_argument('-S', '--start', help='Start with result number X, default=0.', default=0, type=int)
-    parser.add_argument('-g', '--google-dork', help='Use Google Dorks for Google search.', default=False, action='store_true')
-    parser.add_argument('-p', '--proxies', help='Use proxies for requests, enter proxies in proxies.yaml.', default=False, action='store_true')
-    parser.add_argument('-s', '--shodan', help='Use Shodan to query discovered hosts.', default=False, action='store_true')
-    parser.add_argument('-v', '--virtual-host', help='Verify host name via DNS resolution and search for virtual hosts.', action='store_const', const='basic', default=False)
+    parser.add_argument('-g', '--google-dork', help='Use Google Dorks for Google search.', default=False,
+                        action='store_true')
+    parser.add_argument('-p', '--proxies', help='Use proxies for requests, enter proxies in proxies.yaml.',
+                        default=False, action='store_true')
+    parser.add_argument('-s', '--shodan', help='Use Shodan to query discovered hosts.', default=False,
+                        action='store_true')
+    parser.add_argument('-v', '--virtual-host',
+                        help='Verify host name via DNS resolution and search for virtual hosts.', action='store_const',
+                        const='basic', default=False)
    parser.add_argument('-e', '--dns-server', help='DNS server to use for lookup.')
    parser.add_argument('-t', '--dns-tld', help='Perform a DNS TLD expansion discovery, default False.', default=False)
    parser.add_argument('-r', '--take-over', help='Check for takeovers.', default=False, action='store_true')
-    parser.add_argument('-n', '--dns-lookup', help='Enable DNS server lookup, default False.', default=False, action='store_true')
-    parser.add_argument('-c', '--dns-brute', help='Perform a DNS brute force on the domain.', default=False, action='store_true')
+    parser.add_argument('-n', '--dns-lookup', help='Enable DNS server lookup, default False.', default=False,
+                        action='store_true')
+    parser.add_argument('-c', '--dns-brute', help='Perform a DNS brute force on the domain.', default=False,
+                        action='store_true')
    parser.add_argument('-f', '--filename', help='Save the results to an HTML and/or XML file.', default='', type=str)
    parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, bufferoverun, certspotter, crtsh, dnsdumpster,
                        dogpile, duckduckgo, exalead, github-code, google,
                        hackertarget, hunter, intelx, linkedin, linkedin_links, netcraft, otx, pentesttools,
-                        rapiddns, securityTrails, spyse, sublist3r, suip, threatcrowd, threatminer,
-                        trello, twitter, urlscan, virustotal, yahoo, all''')
-
+                        rapiddns, securityTrails, spyse, suip, threatcrowd,
+                        trello, twitter, vhost, virustotal, yahoo, all''')
+    # determines if filename is coming from rest api or user
+    rest_filename = ""
+    # indicates this from the rest API
+    if rest_args:
+        args = rest_args
+        # We need to make sure the filename is random as to not overwrite other files
+        filename: str = args.filename
+        import string
+        import secrets
+        alphabet = string.ascii_letters + string.digits
+        rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" if len(filename) != 0 \
+            else ""
+    else:
        args = parser.parse_args()
+        filename: str = args.filename
    try:
        db = stash.StashManager()
        await db.do_init()
@ -55,7 +75,8 @@ async def start():
    dnsserver = args.dns_server
    dnstld = args.dns_tld
    engines = []
-    filename: str = args.filename
+    # If the user specifies
+
    full: list = []
    ips: list = []
    google_dorking = args.google_dork
@ -72,7 +93,7 @@ async def start():

    async def store(search_engine: Any, source: str, process_param: Any = None, store_host: bool = False,
                    store_emails: bool = False, store_ip: bool = False, store_people: bool = False,
-                    store_data: bool = False, store_links: bool = False, store_results: bool = False) -> None:
+                    store_links: bool = False, store_results: bool = False) -> None:
        """
        Persist details into the database.
        The details to be stored is controlled by the parameters passed to the method.
@ -85,7 +106,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
        :param store_emails: whether to store emails
        :param store_ip: whether to store IP address
        :param store_people: whether to store user details
-        :param store_data: whether to fetch host from method get_data() and persist
        :param store_links: whether to store links
        :param store_results: whether to fetch details from get_results() and persist
        """
@ -437,6 +457,17 @@ async def handler(lst):

    await handler(lst=stor_lst)

+    return_ips = []
+    print("rest_filename: ", rest_filename)
+    print("rest_args: ", rest_args)
+    if rest_args is not None and len(rest_filename) == 0:
+        # Indicates user is using rest api but not wanting output to be saved to a file
+        full = [host if ':' in host and word in host else word in host.split(':')[0] and host for host in full]
+        full = list({host for host in full if host})
+        full.sort()
+        # cast to string so Rest API can understand type
+        return_ips.extend([str(ip) for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])])
+        return list(set(all_emails)), return_ips, full, "", ""
    # Sanity check to see if all_emails and all_hosts are defined.
    try:
        all_emails
@ -535,7 +566,7 @@ async def handler(lst):
                        target=word,
                        local_results=dnsrev,
                        overall_results=full),
-                    nameservers=[dnsserver] if dnsserver else None))
+                    nameservers=list(map(str, dnsserver.split(','))) if dnsserver else None))

        # run all the reversing tasks concurrently
        await asyncio.gather(*__reverse_dns_tasks.values())
@ -646,19 +677,41 @@ async def handler(lst):
               </body>
               </html>
               '''
+            if len(rest_filename) == 0:
                Html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
                Html_file.write(HTMLcode)
                Html_file.close()
                print('[*] Reporting finished.')
                print('[*] Saving files.')
+            else:
+                # indicates the rest api is being used in that case we asynchronously write the file to our static directory
+                try:
+                    import aiofiles
+                    async with aiofiles.open(
+                            f'theHarvester/lib/web/static/{rest_filename}.html' if '.html' not in rest_filename
+                            else f'theHarvester/lib/web/static/{rest_filename}', 'w+') as Html_file:
+                        await Html_file.write(HTMLcode)
+                except Exception as ex:
+                    print(f"An excpetion has occurred: {ex}")
+                    list(set(all_emails)), return_ips, full, f'{ex}', ""
+                # Html_file = async with aiofiles.open(f'{filename}.html' if '.html' not in filename else filename, 'w')
+                # Html_file.write(HTMLcode)
+                # Html_file.close()
        except Exception as e:
            print(e)
            print('\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m')
            sys.exit(1)

        try:
+            # filename = filename.rsplit('.', 1)[0] + '.xml'
+            # file = open(filename, 'w')
+            if len(rest_filename) == 0:
                filename = filename.rsplit('.', 1)[0] + '.xml'
-            file = open(filename, 'w')
+            else:
+                filename = 'theHarvester/lib/web/static/' \
+                           + rest_filename.rsplit('.', 1)[0] + '.xml'
+            # TODO use aiofiles if user is using rest api
+            with open(filename, 'w+') as file:
                file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
                for x in all_emails:
                    file.write('<email>' + x + '</email>')
@ -695,8 +748,10 @@ async def handler(lst):
                        file.write('</servers>')

                file.write('</theHarvester>')
-            file.flush()
-            file.close()
+            if len(rest_filename) > 0:
+                return list(set(all_emails)), return_ips, full, f'/static/{rest_filename}.html', \
+                       f'/static/{filename[filename.find("/static/") + 8:]}' if '/static/' in filename \
+                           else f'/static/{filename}'
            print('[*] Files saved.')
        except Exception as er:
            print(f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m')
@ -707,7 +762,6 @@ async def handler(lst):
 async def entry_point():
    try:
        await start()
-        # await handler()
    except KeyboardInterrupt:
        print('\n\n\033[93m[!] ctrl+c detected from user, quitting.\n\n \033[0m')
    except Exception as error_entry_point:
--- a/theHarvester/discovery/dnssearch.py
+++ b/theHarvester/discovery/dnssearch.py
@ -16,7 +16,6 @@
 from typing import Callable, List, Optional
 from theHarvester.lib import hostchecker

-# TODO: need big focus on performance and results parsing, now does the basic.

 #####################################################################
 # DNS FORCE
@ -29,7 +28,8 @@ def __init__(self, domain, dnsserver, verbose=False):
        self.domain = domain
        self.subdo = False
        self.verbose = verbose
-        self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver
+        # self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver
+        self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver
        try:
            with open('wordlists/dns-names.txt', 'r') as file:
                self.list = file.readlines()
@ -41,9 +41,13 @@ def __init__(self, domain, dnsserver, verbose=False):

    async def run(self):
        print(f'Created checker with this many words {len(self.list)}')
-        checker = hostchecker.Checker(self.list)
+        checker = hostchecker.Checker(
+            self.list) if self.dnsserver == [] or self.dnsserver == "" or self.dnsserver is None \
+            else hostchecker.Checker(self.list, nameserver=self.dnsserver)
        hosts, ips = await checker.check()
        return hosts, ips
+
+
 #####################################################################
 # DNS REVERSE
 #####################################################################
@ -159,6 +163,7 @@ async def reverse_all_ips_in_range(iprange: str, callback: Callable, nameservers
        callback(__host)
        log_result(__host)

+
 #####################################################################
 # IO
 #####################################################################
@ -217,6 +222,7 @@ def generate_postprocessing_callback(target: str, **allhosts: List[str]) -> Call
        A function that will update the collection of target subdomains
        when the query result is satisfying.
    """
+
    def append_matching_hosts(host: str) -> None:
        if host and target in host:
            for __name, __hosts in allhosts.items():
--- a/theHarvester/lib/web/api.py
+++ b/theHarvester/lib/web/api.py
--- a/theHarvester/lib/web/parser.py
+++ b/theHarvester/lib/web/parser.py
@ -0,0 +1,60 @@
+"""
+Example script to query theHarvester rest API, obtain results, and write out to stdout as well as an html & xml file
+"""
+
+import asyncio
+import pprint
+
+import aiohttp
+
+
+async def fetch_json(session, url):
+    async with session.get(url) as response:
+        return await response.json()
+
+
+async def fetch(session, url):
+    async with session.get(url) as response:
+        return await response.text()
+
+
+async def main():
+    """
+    Just a simple example of how to interact with the rest api
+    you can easily use requests instead of aiohttp or whatever you best see fit
+    """
+    url = "http://127.0.0.1:5000"
+    domain = "netflix.com"
+    query_url = f'{url}/query?limit=300&filename=helloworld&source=bing,baidu,duckduckgo,dogpile&domain={domain}'
+    async with aiohttp.ClientSession() as session:
+        fetched_json = await fetch_json(session, query_url)
+        emails = fetched_json["emails"]
+        ips = fetched_json["ips"]
+        urls = fetched_json["urls"]
+        html_filename = fetched_json["html_file"]
+        xml_filename = fetched_json["xml_file"]
+
+    async with aiohttp.ClientSession() as session:
+        html_file = await fetch(session, f"{url}{html_filename}")
+        xml_file = await fetch(session, f"{url}{xml_filename}")
+
+    if len(html_file) > 0:
+        with open('results.html', 'w+') as fp:
+            fp.write(html_file)
+
+    if len(xml_file) > 0:
+        with open('results.xml', 'w+') as fp:
+            fp.write(xml_file)
+
+    print('Emails found: ')
+    pprint.pprint(emails, indent=4)
+    print('\n')
+    print('Ips found: ')
+    pprint.pprint(ips, indent=4)
+    print('\n')
+    print('Urls found: ')
+    pprint.pprint(urls, indent=4)
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
--- a/theHarvester/lib/web/requirements.txt
+++ b/theHarvester/lib/web/requirements.txt
@ -0,0 +1,6 @@
+aiofiles
+argparse
+fastapi==0.54.1
+slowapi
+uvicorn
+uvloop
--- a/theHarvester/lib/web/static/test.txt
+++ b/theHarvester/lib/web/static/test.txt
@ -0,0 +1 @@
+hello world