diff --git a/restfulHarvest.py b/restfulHarvest.py index de7c4756..e772df6f 100755 --- a/restfulHarvest.py +++ b/restfulHarvest.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 from theHarvester.restfulHarvest import main -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/theHarvester.py b/theHarvester.py index 5f3355bc..bc1af5e2 100755 --- a/theHarvester.py +++ b/theHarvester.py @@ -5,10 +5,8 @@ from theHarvester.theHarvester import main if sys.version_info.major < 3 or sys.version_info.minor < 10: - print( - "\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m" - ) + print('\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m') sys.exit(1) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py index ff1173b7..0bd074ef 100644 --- a/theHarvester/__main__.py +++ b/theHarvester/__main__.py @@ -63,93 +63,91 @@ async def start(rest_args: argparse.Namespace | None = None): """Main program function""" parser = argparse.ArgumentParser( - description="theHarvester is used to gather open source intelligence (OSINT) on a company or domain." + description='theHarvester is used to gather open source intelligence (OSINT) on a company or domain.' ) + parser.add_argument('-d', '--domain', help='Company name or domain to search.', required=True) parser.add_argument( - "-d", "--domain", help="Company name or domain to search.", required=True - ) - parser.add_argument( - "-l", - "--limit", - help="Limit the number of search results, default=500.", + '-l', + '--limit', + help='Limit the number of search results, default=500.', default=500, type=int, ) parser.add_argument( - "-S", - "--start", - help="Start with result number X, default=0.", + '-S', + '--start', + help='Start with result number X, default=0.', default=0, type=int, ) parser.add_argument( - "-p", - "--proxies", - help="Use proxies for requests, enter proxies in proxies.yaml.", + '-p', + '--proxies', + help='Use proxies for requests, enter proxies in proxies.yaml.', default=False, - action="store_true", + action='store_true', ) parser.add_argument( - "-s", - "--shodan", - help="Use Shodan to query discovered hosts.", + '-s', + '--shodan', + help='Use Shodan to query discovered hosts.', default=False, - action="store_true", + action='store_true', ) parser.add_argument( - "--screenshot", - help="Take screenshots of resolved domains specify output directory: --screenshot output_directory", - default="", + '--screenshot', + help='Take screenshots of resolved domains specify output directory: --screenshot output_directory', + default='', type=str, ) parser.add_argument( - "-v", - "--virtual-host", - help="Verify host name via DNS resolution and search for virtual hosts.", - action="store_const", - const="basic", + '-v', + '--virtual-host', + help='Verify host name via DNS resolution and search for virtual hosts.', + action='store_const', + const='basic', default=False, ) - parser.add_argument("-e", "--dns-server", help="DNS server to use for lookup.") + parser.add_argument('-e', '--dns-server', help='DNS server to use for lookup.') parser.add_argument( - "-t", - "--take-over", - help="Check for takeovers.", + '-t', + '--take-over', + help='Check for takeovers.', default=False, - action="store_true", + action='store_true', ) parser.add_argument( - "-r", - "--dns-resolve", - help="Perform DNS resolution on subdomains with a resolver list or passed in resolvers, default False.", - default="", + '-r', + '--dns-resolve', + help='Perform DNS resolution on subdomains with a resolver list or passed in resolvers, default False.', + default='', type=str, - nargs="?", + nargs='?', ) parser.add_argument( - "-n", - "--dns-lookup", - help="Enable DNS server lookup, default False.", + '-n', + '--dns-lookup', + help='Enable DNS server lookup, default False.', default=False, - action="store_true", + action='store_true', ) parser.add_argument( - "-c", - "--dns-brute", - help="Perform a DNS brute force on the domain.", + '-c', + '--dns-brute', + help='Perform a DNS brute force on the domain.', default=False, - action="store_true", + action='store_true', ) parser.add_argument( - "-f", - "--filename", - help="Save the results to an XML and JSON file.", - default="", + '-f', + '--filename', + help='Save the results to an XML and JSON file.', + default='', type=str, ) parser.add_argument( - "-b", - "--source", + '-b', + '--source', help="""anubis, baidu, bevigil, binaryedge, bing, bingapi, bufferoverun, brave, censys, certspotter, criminalip, crtsh, dnsdumpster, duckduckgo, fullhunt, github-code, hackertarget, hunter, hunterhow, intelx, netlas, onyphe, otx, pentesttools, projectdiscovery, @@ -158,10 +156,10 @@ async def start(rest_args: argparse.Namespace | None = None): ) # determines if filename is coming from rest api or user - rest_filename = "" + rest_filename = '' # indicates this from the rest API if rest_args: - if rest_args.source and rest_args.source == "getsources": + if rest_args.source and rest_args.source == 'getsources': return list(sorted(Core.get_supportedengines())) elif rest_args.dns_brute: args = rest_args @@ -171,11 +169,7 @@ async def start(rest_args: argparse.Namespace | None = None): # We need to make sure the filename is random as to not overwrite other files filename: str = args.filename alphabet = string.ascii_letters + string.digits - rest_filename += ( - f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" - if len(filename) != 0 - else "" - ) + rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" if len(filename) != 0 else '' else: args = parser.parse_args() filename = args.filename @@ -186,16 +180,14 @@ async def start(rest_args: argparse.Namespace | None = None): except Exception: pass - if len(filename) > 2 and filename[:2] == "~/": + if len(filename) > 2 and filename[:2] == '~/': filename = os.path.expanduser(filename) all_emails: list = [] all_hosts: list = [] all_ip: list = [] dnslookup = args.dns_lookup - dnsserver = ( - args.dns_server - ) # TODO arg is not used anywhere replace with resolvers wordlist arg dnsresolve + dnsserver = args.dns_server # TODO arg is not used anywhere replace with resolvers wordlist arg dnsresolve dnsresolve = args.dns_resolve final_dns_resolver_list = [] if dnsresolve is not None and len(dnsresolve) > 0: @@ -204,7 +196,7 @@ async def start(rest_args: argparse.Namespace | None = None): # 1.1.1.1,8.8.8.8 or 1.1.1.1, 8.8.8.8 # resolvers.txt if os.path.exists(dnsresolve): - with open(dnsresolve, encoding="UTF-8") as fp: + with open(dnsresolve, encoding='UTF-8') as fp: for line in fp: line = line.strip() try: @@ -212,16 +204,14 @@ async def start(rest_args: argparse.Namespace | None = None): _ = netaddr.IPAddress(line) final_dns_resolver_list.append(line) except Exception as e: - print( - f"An exception has occurred while reading from: {dnsresolve}, {e}" - ) - print(f"Current line: {line}") + print(f'An exception has occurred while reading from: {dnsresolve}, {e}') + print(f'Current line: {line}') return else: try: - if "," in dnsresolve: - cleaned = dnsresolve.replace(" ", "") - for item in cleaned.split(","): + if ',' in dnsresolve: + cleaned = dnsresolve.replace(' ', '') + for item in cleaned.split(','): _ = netaddr.IPAddress(item) final_dns_resolver_list.append(item) else: @@ -229,10 +219,8 @@ async def start(rest_args: argparse.Namespace | None = None): _ = netaddr.IPAddress(dnsresolve) final_dns_resolver_list.append(dnsresolve) except Exception as e: - print( - f"Passed in DNS resolvers are invalid double check, got error: {e}" - ) - print(f"Dumping resolvers passed in: {e}") + print(f'Passed in DNS resolvers are invalid double check, got error: {e}') + print(f'Dumping resolvers passed in: {e}') sys.exit(0) # if for some reason, there are duplicates @@ -249,7 +237,7 @@ async def start(rest_args: argparse.Namespace | None = None): all_urls: list = [] vhost: list = [] virtual = args.virtual_host - word: str = args.domain.rstrip("\n") + word: str = args.domain.rstrip('\n') takeover_status = args.take_over use_proxy = args.proxies linkedin_people_list_tracker: list = [] @@ -302,29 +290,17 @@ async def store( db_stash = stash.StashManager() if source: - print(f"\033[94m[*] Searching {source[0].upper() + source[1:]}. ") + print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. ') if store_host: - host_names = list( - { - host - for host in await search_engine.get_hostnames() - if f".{word}" in host - } - ) + host_names = list({host for host in await search_engine.get_hostnames() if f'.{word}' in host}) host_names = list(host_names) - if ( - source != "hackertarget" - and source != "pentesttools" - and source != "rapiddns" - ): + if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns': # If a source is inside this conditional, it means the hosts returned must be resolved to obtain ip # This should only be checked if --dns-resolve has a wordlist if dnsresolve is None or len(final_dns_resolver_list) > 0: # indicates that -r was passed in if dnsresolve is None - full_hosts_checker = hostchecker.Checker( - host_names, final_dns_resolver_list - ) + full_hosts_checker = hostchecker.Checker(host_names, final_dns_resolver_list) # If full, this is only getting resolved hosts ( resolved_pair, @@ -339,70 +315,68 @@ async def store( else: full.extend(host_names) all_hosts.extend(host_names) - await db_stash.store_all(word, all_hosts, "host", source) + await db_stash.store_all(word, all_hosts, 'host', source) if store_emails: email_list = await search_engine.get_emails() all_emails.extend(email_list) - await db_stash.store_all(word, email_list, "email", source) + await db_stash.store_all(word, email_list, 'email', source) if store_ip: ips_list = await search_engine.get_ips() all_ip.extend(ips_list) - await db_stash.store_all(word, all_ip, "ip", source) + await db_stash.store_all(word, all_ip, 'ip', source) if store_results: email_list, host_names, urls = await search_engine.get_results() all_emails.extend(email_list) - host_names = list({host for host in host_names if f".{word}" in host}) + host_names = list({host for host in host_names if f'.{word}' in host}) all_urls.extend(urls) all_hosts.extend(host_names) - await db.store_all(word, all_hosts, "host", source) - await db.store_all(word, all_emails, "email", source) + await db.store_all(word, all_hosts, 'host', source) + await db.store_all(word, all_emails, 'email', source) if store_people: people_list = await search_engine.get_people() - await db_stash.store_all(word, people_list, "people", source) + await db_stash.store_all(word, people_list, 'people', source) if store_links: links = await search_engine.get_links() linkedin_links_tracker.extend(links) if len(links) > 0: - await db.store_all(word, links, "linkedinlinks", engineitem) + await db.store_all(word, links, 'linkedinlinks', engineitem) if store_interestingurls: iurls = await search_engine.get_interestingurls() interesting_urls.extend(iurls) if len(iurls) > 0: - await db.store_all(word, iurls, "interestingurls", engineitem) + await db.store_all(word, iurls, 'interestingurls', engineitem) if store_asns: fasns = await search_engine.get_asns() total_asns.extend(fasns) if len(fasns) > 0: - await db.store_all(word, fasns, "asns", engineitem) + await db.store_all(word, fasns, 'asns', engineitem) stor_lst = [] if args.source is not None: - if args.source.lower() != "all": - engines = sorted(set(map(str.strip, args.source.split(",")))) + if args.source.lower() != 'all': + engines = sorted(set(map(str.strip, args.source.split(',')))) else: engines = Core.get_supportedengines() # Iterate through search engines in order if set(engines).issubset(Core.get_supportedengines()): - print(f"\n[*] Target: {word} \n") + print(f'\n[*] Target: {word} \n') for engineitem in engines: - if engineitem == "anubis": + if engineitem == 'anubis': try: anubis_search = anubis.SearchAnubis(word) - stor_lst.append( - store(anubis_search, engineitem, store_host=True) - ) + stor_lst.append(store(anubis_search, engineitem, store_host=True)) except Exception as e: print(e) - elif engineitem == "baidu": + elif engineitem == 'baidu': try: baidu_search = baidusearch.SearchBaidu(word, limit) stor_lst.append( @@ -416,7 +390,7 @@ async def store( except Exception as e: print(e) - elif engineitem == "bevigil": + elif engineitem == 'bevigil': try: bevigil_search = bevigil.SearchBeVigil(word) stor_lst.append( @@ -430,29 +404,25 @@ async def store( except Exception as e: print(e) - elif engineitem == "binaryedge": + elif engineitem == 'binaryedge': try: - binaryedge_search = binaryedgesearch.SearchBinaryEdge( - word, limit - ) - stor_lst.append( - store(binaryedge_search, engineitem, store_host=True) - ) + binaryedge_search = binaryedgesearch.SearchBinaryEdge(word, limit) + stor_lst.append(store(binaryedge_search, engineitem, store_host=True)) except Exception as e: print(e) - elif engineitem == "bing" or engineitem == "bingapi": + elif engineitem == 'bing' or engineitem == 'bingapi': try: bing_search = bingsearch.SearchBing(word, limit, start) - bingapi = "" - if engineitem == "bingapi": - bingapi += "yes" + bingapi = '' + if engineitem == 'bingapi': + bingapi += 'yes' else: - bingapi += "no" + bingapi += 'no' stor_lst.append( store( bing_search, - "bing", + 'bing', process_param=bingapi, store_host=True, store_emails=True, @@ -464,7 +434,7 @@ async def store( else: print(e) - elif engineitem == "bufferoverun": + elif engineitem == 'bufferoverun': try: bufferoverun_search = bufferoverun.SearchBufferover(word) stor_lst.append( @@ -478,7 +448,7 @@ async def store( except Exception as e: print(e) - elif engineitem == "brave": + elif engineitem == 'brave': try: brave_search = bravesearch.SearchBrave(word, limit) stor_lst.append( @@ -492,7 +462,7 @@ async def store( except Exception as e: print(e) - elif engineitem == "censys": + elif engineitem == 'censys': try: censys_search = censysearch.SearchCensys(word, limit) stor_lst.append( @@ -507,16 +477,14 @@ async def store( if isinstance(e, MissingKey): print(e) - elif engineitem == "certspotter": + elif engineitem == 'certspotter': try: certspotter_search = certspottersearch.SearchCertspoter(word) - stor_lst.append( - store(certspotter_search, engineitem, None, store_host=True) - ) + stor_lst.append(store(certspotter_search, engineitem, None, store_host=True)) except Exception as e: print(e) - elif engineitem == "criminalip": + elif engineitem == 'criminalip': try: criminalip_search = criminalip.SearchCriminalIP(word) stor_lst.append( @@ -532,18 +500,16 @@ async def store( if isinstance(e, MissingKey): print(e) else: - print(f"An excepion has occurred in criminalip: {e}") + print(f'An excepion has occurred in criminalip: {e}') - elif engineitem == "crtsh": + elif engineitem == 'crtsh': try: crtsh_search = crtsh.SearchCrtsh(word) - stor_lst.append(store(crtsh_search, "CRTsh", store_host=True)) + stor_lst.append(store(crtsh_search, 'CRTsh', store_host=True)) except Exception as e: - print( - f"[!] A timeout occurred with crtsh, cannot find {args.domain}\n {e}" - ) + print(f'[!] A timeout occurred with crtsh, cannot find {args.domain}\n {e}') - elif engineitem == "dnsdumpster": + elif engineitem == 'dnsdumpster': try: dns_dumpster_search = dnsdumpster.SearchDnsDumpster(word) stor_lst.append( @@ -555,9 +521,9 @@ async def store( ) ) except Exception as e: - print(f"[!] An error occurred with dnsdumpster: {e}") + print(f'[!] An error occurred with dnsdumpster: {e}') - elif engineitem == "duckduckgo": + elif engineitem == 'duckduckgo': duckduckgo_search = duckduckgosearch.SearchDuckDuckGo(word, limit) stor_lst.append( store( @@ -568,17 +534,15 @@ async def store( ) ) - elif engineitem == "fullhunt": + elif engineitem == 'fullhunt': try: fullhunt_search = fullhuntsearch.SearchFullHunt(word) - stor_lst.append( - store(fullhunt_search, engineitem, store_host=True) - ) + stor_lst.append(store(fullhunt_search, engineitem, store_host=True)) except Exception as e: if isinstance(e, MissingKey): print(e) - elif engineitem == "github-code": + elif engineitem == 'github-code': try: github_search = githubcode.SearchGithubCode(word, limit) stor_lst.append( @@ -592,13 +556,11 @@ async def store( except MissingKey as ex: print(ex) - elif engineitem == "hackertarget": + elif engineitem == 'hackertarget': hackertarget_search = hackertarget.SearchHackerTarget(word) - stor_lst.append( - store(hackertarget_search, engineitem, store_host=True) - ) + stor_lst.append(store(hackertarget_search, engineitem, store_host=True)) - elif engineitem == "hunter": + elif engineitem == 'hunter': try: hunter_search = huntersearch.SearchHunter(word, limit, start) stor_lst.append( @@ -613,19 +575,17 @@ async def store( if isinstance(e, MissingKey): print(e) - elif engineitem == "hunterhow": + elif engineitem == 'hunterhow': try: hunterhow_search = searchhunterhow.SearchHunterHow(word) - stor_lst.append( - store(hunterhow_search, engineitem, store_host=True) - ) + stor_lst.append(store(hunterhow_search, engineitem, store_host=True)) except Exception as e: if isinstance(e, MissingKey): print(e) else: - print(f"An exception has occurred in hunterhow search: {e}") + print(f'An exception has occurred in hunterhow search: {e}') - elif engineitem == "intelx": + elif engineitem == 'intelx': try: intelx_search = intelxsearch.SearchIntelx(word) stor_lst.append( @@ -640,9 +600,9 @@ async def store( if isinstance(e, MissingKey): print(e) else: - print(f"An exception has occurred in Intelx search: {e}") + print(f'An exception has occurred in Intelx search: {e}') - elif engineitem == "netlas": + elif engineitem == 'netlas': try: netlas_search = netlas.SearchNetlas(word) stor_lst.append( @@ -657,7 +617,7 @@ async def store( if isinstance(e, MissingKey): print(e) - elif engineitem == "onyphe": + elif engineitem == 'onyphe': try: onyphe_search = onyphe.SearchOnyphe(word) stor_lst.append( @@ -672,7 +632,7 @@ async def store( except Exception as e: print(e) - elif engineitem == "otx": + elif engineitem == 'otx': try: otxsearch_search = otxsearch.SearchOtx(word) stor_lst.append( @@ -686,67 +646,53 @@ async def store( except Exception as e: print(e) - elif engineitem == "pentesttools": + elif engineitem == 'pentesttools': try: pentesttools_search = pentesttools.SearchPentestTools(word) - stor_lst.append( - store(pentesttools_search, engineitem, store_host=True) - ) + stor_lst.append(store(pentesttools_search, engineitem, store_host=True)) except Exception as e: if isinstance(e, MissingKey): print(e) else: - print( - f"An exception has occurred in PentestTools search: {e}" - ) + print(f'An exception has occurred in PentestTools search: {e}') - elif engineitem == "projectdiscovery": + elif engineitem == 'projectdiscovery': try: projectdiscovery_search = projectdiscovery.SearchDiscovery(word) - stor_lst.append( - store(projectdiscovery_search, engineitem, store_host=True) - ) + stor_lst.append(store(projectdiscovery_search, engineitem, store_host=True)) except Exception as e: if isinstance(e, MissingKey): print(e) else: - print("An exception has occurred in ProjectDiscovery") + print('An exception has occurred in ProjectDiscovery') - elif engineitem == "rapiddns": + elif engineitem == 'rapiddns': try: rapiddns_search = rapiddns.SearchRapidDns(word) - stor_lst.append( - store(rapiddns_search, engineitem, store_host=True) - ) + stor_lst.append(store(rapiddns_search, engineitem, store_host=True)) except Exception as e: print(e) - elif engineitem == "rocketreach": + elif engineitem == 'rocketreach': try: rocketreach_search = rocketreach.SearchRocketReach(word, limit) - stor_lst.append( - store(rocketreach_search, engineitem, store_links=True) - ) + stor_lst.append(store(rocketreach_search, engineitem, store_links=True)) except Exception as e: if isinstance(e, MissingKey): print(e) else: - print(f"An exception has occurred in RocketReach: {e}") + print(f'An exception has occurred in RocketReach: {e}') - elif engineitem == "subdomaincenter": + elif engineitem == 'subdomaincenter': try: subdomaincenter_search = subdomaincenter.SubdomainCenter(word) - stor_lst.append( - store(subdomaincenter_search, engineitem, store_host=True) - ) + stor_lst.append(store(subdomaincenter_search, engineitem, store_host=True)) except Exception as e: print(e) - elif engineitem == "securityTrails": + elif engineitem == 'securityTrails': try: - securitytrails_search = ( - securitytrailssearch.SearchSecuritytrail(word) - ) + securitytrails_search = securitytrailssearch.SearchSecuritytrail(word) stor_lst.append( store( securitytrails_search, @@ -759,34 +705,24 @@ async def store( if isinstance(e, MissingKey): print(e) - elif engineitem == "sitedossier": + elif engineitem == 'sitedossier': try: sitedossier_search = sitedossier.SearchSitedossier(word) - stor_lst.append( - store(sitedossier_search, engineitem, store_host=True) - ) + stor_lst.append(store(sitedossier_search, engineitem, store_host=True)) except Exception as e: print(e) - elif engineitem == "subdomainfinderc99": + elif engineitem == 'subdomainfinderc99': try: - subdomainfinderc99_search = ( - subdomainfinderc99.SearchSubdomainfinderc99(word) - ) - stor_lst.append( - store( - subdomainfinderc99_search, engineitem, store_host=True - ) - ) + subdomainfinderc99_search = subdomainfinderc99.SearchSubdomainfinderc99(word) + stor_lst.append(store(subdomainfinderc99_search, engineitem, store_host=True)) except Exception as e: if isinstance(e, MissingKey): print(e) else: - print( - f"An exception has occurred in Subdomainfinderc99 search: {e}" - ) + print(f'An exception has occurred in Subdomainfinderc99 search: {e}') - elif engineitem == "threatminer": + elif engineitem == 'threatminer': try: threatminer_search = threatminer.SearchThreatminer(word) stor_lst.append( @@ -800,7 +736,7 @@ async def store( except Exception as e: print(e) - elif engineitem == "tomba": + elif engineitem == 'tomba': try: tomba_search = tombasearch.SearchTomba(word, limit, start) stor_lst.append( @@ -815,7 +751,7 @@ async def store( if isinstance(e, MissingKey): print(e) - elif engineitem == "urlscan": + elif engineitem == 'urlscan': try: urlscan_search = urlscan.SearchUrlscan(word) stor_lst.append( @@ -831,17 +767,15 @@ async def store( except Exception as e: print(e) - elif engineitem == "virustotal": + elif engineitem == 'virustotal': try: virustotal_search = virustotal.SearchVirustotal(word) - stor_lst.append( - store(virustotal_search, engineitem, store_host=True) - ) + stor_lst.append(store(virustotal_search, engineitem, store_host=True)) except Exception as e: if isinstance(e, MissingKey): print(e) - elif engineitem == "yahoo": + elif engineitem == 'yahoo': try: yahoo_search = yahoosearch.SearchYahoo(word, limit) stor_lst.append( @@ -855,7 +789,7 @@ async def store( except Exception as e: print(e) - elif engineitem == "zoomeye": + elif engineitem == 'zoomeye': try: zoomeye_search = zoomeyesearch.SearchZoomEye(word, limit) stor_lst.append( @@ -877,10 +811,10 @@ async def store( try: rest_args.dns_brute except Exception: - print("\n[!] Invalid source.\n") + print('\n[!] Invalid source.\n') sys.exit(1) else: - print("\n[!] Invalid source.\n") + print('\n[!] Invalid source.\n') sys.exit(1) async def worker(queue): @@ -916,25 +850,12 @@ async def handler(lst): await handler(lst=stor_lst) return_ips: list = [] - if ( - rest_args is not None - and len(rest_filename) == 0 - and rest_args.dns_brute is False - ): + if rest_args is not None and len(rest_filename) == 0 and rest_args.dns_brute is False: # Indicates user is using REST api but not wanting output to be saved to a file # cast to string so Rest API can understand the type - return_ips.extend( - [ - str(ip) - for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)]) - ] - ) + return_ips.extend([str(ip) for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])]) # return list(set(all_emails)), return_ips, full, '', '' - all_hosts = [ - host.replace("www.", "") - for host in all_hosts - if host.replace("www.", "") in all_hosts - ] + all_hosts = [host.replace('www.', '') for host in all_hosts if host.replace('www.', '') in all_hosts] all_hosts = list(sorted(set(all_hosts))) return ( total_asns, @@ -951,152 +872,140 @@ async def handler(lst): try: all_emails except NameError: - print("\n\n[!] No emails found because all_emails is not defined.\n\n ") + print('\n\n[!] No emails found because all_emails is not defined.\n\n ') sys.exit(1) try: all_hosts except NameError: - print("\n\n[!] No hosts found because all_hosts is not defined.\n\n ") + print('\n\n[!] No hosts found because all_hosts is not defined.\n\n ') sys.exit(1) # Results if len(total_asns) > 0: - print(f"\n[*] ASNS found: {len(total_asns)}") - print("--------------------") + print(f'\n[*] ASNS found: {len(total_asns)}') + print('--------------------') total_asns = list(sorted(set(total_asns))) for asn in total_asns: print(asn) if len(interesting_urls) > 0: - print(f"\n[*] Interesting Urls found: {len(interesting_urls)}") - print("--------------------") + print(f'\n[*] Interesting Urls found: {len(interesting_urls)}') + print('--------------------') interesting_urls = list(sorted(set(interesting_urls))) for iurl in interesting_urls: print(iurl) - if len(twitter_people_list_tracker) == 0 and "twitter" in engines: - print("\n[*] No Twitter users found.\n\n") + if len(twitter_people_list_tracker) == 0 and 'twitter' in engines: + print('\n[*] No Twitter users found.\n\n') else: if len(twitter_people_list_tracker) >= 1: - print("\n[*] Twitter Users found: " + str(len(twitter_people_list_tracker))) - print("---------------------") + print('\n[*] Twitter Users found: ' + str(len(twitter_people_list_tracker))) + print('---------------------') twitter_people_list_tracker = list(sorted(set(twitter_people_list_tracker))) for usr in twitter_people_list_tracker: print(usr) - if len(linkedin_people_list_tracker) == 0 and "linkedin" in engines: - print("\n[*] No LinkedIn users found.\n\n") + if len(linkedin_people_list_tracker) == 0 and 'linkedin' in engines: + print('\n[*] No LinkedIn users found.\n\n') else: if len(linkedin_people_list_tracker) >= 1: - print( - "\n[*] LinkedIn Users found: " + str(len(linkedin_people_list_tracker)) - ) - print("---------------------") - linkedin_people_list_tracker = list( - sorted(set(linkedin_people_list_tracker)) - ) + print('\n[*] LinkedIn Users found: ' + str(len(linkedin_people_list_tracker))) + print('---------------------') + linkedin_people_list_tracker = list(sorted(set(linkedin_people_list_tracker))) for usr in linkedin_people_list_tracker: print(usr) - if len(linkedin_links_tracker) == 0 and ( - "linkedin" in engines or "rocketreach" in engines - ): - print(f"\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}") + if len(linkedin_links_tracker) == 0 and ('linkedin' in engines or 'rocketreach' in engines): + print(f'\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}') linkedin_links_tracker = list(sorted(set(linkedin_links_tracker))) - print("---------------------") + print('---------------------') for link in linkedin_people_list_tracker: print(link) length_urls = len(all_urls) if length_urls == 0: - if len(engines) >= 1 and "trello" in engines: - print("\n[*] No Trello URLs found.") + if len(engines) >= 1 and 'trello' in engines: + print('\n[*] No Trello URLs found.') else: total = length_urls - print("\n[*] Trello URLs found: " + str(total)) - print("--------------------") + print('\n[*] Trello URLs found: ' + str(total)) + print('--------------------') all_urls = list(sorted(set(all_urls))) for url in sorted(all_urls): print(url) if len(all_ip) == 0: - print("\n[*] No IPs found.") + print('\n[*] No IPs found.') else: - print("\n[*] IPs found: " + str(len(all_ip))) - print("-------------------") + print('\n[*] IPs found: ' + str(len(all_ip))) + print('-------------------') # use netaddr as the list may contain ipv4 and ipv6 addresses ip_list = [] for ip in set(all_ip): try: ip = ip.strip() if len(ip) > 0: - if "/" in ip: + if '/' in ip: ip_list.append(str(netaddr.IPNetwork(ip))) else: ip_list.append(str(netaddr.IPAddress(ip))) except Exception as e: - print(f"An exception has occurred while adding: {ip} to ip_list: {e}") + print(f'An exception has occurred while adding: {ip} to ip_list: {e}') continue ip_list = list(sorted(ip_list)) - print("\n".join(map(str, ip_list))) + print('\n'.join(map(str, ip_list))) if len(all_emails) == 0: - print("\n[*] No emails found.") + print('\n[*] No emails found.') else: - print("\n[*] Emails found: " + str(len(all_emails))) - print("----------------------") + print('\n[*] Emails found: ' + str(len(all_emails))) + print('----------------------') all_emails = sorted(list(set(all_emails))) - print("\n".join(all_emails)) + print('\n'.join(all_emails)) if len(all_hosts) == 0: - print("\n[*] No hosts found.\n\n") + print('\n[*] No hosts found.\n\n') else: db = stash.StashManager() if dnsresolve is None or len(final_dns_resolver_list) > 0: temp = set() for host in full: - if ":" in host: + if ':' in host: # TODO parse addresses and sort them as they are IPs - subdomain, addr = host.split(":", 1) + subdomain, addr = host.split(':', 1) if subdomain.endswith(word): - temp.add(subdomain + ":" + addr) + temp.add(subdomain + ':' + addr) continue if host.endswith(word): - if host[:4] == "www.": + if host[:4] == 'www.': if host[4:] in all_hosts or host[4:] in full: temp.add(host[4:]) continue temp.add(host) full = list(sorted(temp)) - full.sort(key=lambda el: el.split(":")[0]) - print("\n[*] Hosts found: " + str(len(full))) - print("---------------------") + full.sort(key=lambda el: el.split(':')[0]) + print('\n[*] Hosts found: ' + str(len(full))) + print('---------------------') for host in full: print(host) try: - if ":" in host: - _, addr = host.split(":", 1) - await db.store(word, addr, "ip", "DNS-resolver") + if ':' in host: + _, addr = host.split(':', 1) + await db.store(word, addr, 'ip', 'DNS-resolver') except Exception as e: - print( - f"An exception has occurred while attempting to insert: {host} IP into DB: {e}" - ) + print(f'An exception has occurred while attempting to insert: {host} IP into DB: {e}') continue else: - all_hosts = [ - host.replace("www.", "") - for host in all_hosts - if host.replace("www.", "") in all_hosts - ] + all_hosts = [host.replace('www.', '') for host in all_hosts if host.replace('www.', '') in all_hosts] all_hosts = list(sorted(set(all_hosts))) - print("\n[*] Hosts found: " + str(len(all_hosts))) - print("---------------------") + print('\n[*] Hosts found: ' + str(len(all_hosts))) + print('---------------------') for host in all_hosts: print(host) # DNS brute force if dnsbrute and dnsbrute[0] is True: - print("\n[*] Starting DNS brute force.") + print('\n[*] Starting DNS brute force.') dns_force = dnssearch.DnsForce(word, final_dns_resolver_list, verbose=True) resolved_pair, hosts, ips = await dns_force.run() # Check if Rest API is being used if so return found hosts @@ -1105,19 +1014,19 @@ async def handler(lst): db = stash.StashManager() temp = set() for host in resolved_pair: - if ":" in host: + if ':' in host: # TODO parse addresses and sort them as they are IPs - subdomain, addr = host.split(":", 1) + subdomain, addr = host.split(':', 1) if subdomain.endswith(word): # Append to full, so it's within JSON/XML at the end if output file is requested if host not in full: full.append(host) - temp.add(subdomain + ":" + addr) + temp.add(subdomain + ':' + addr) if host not in all_hosts: all_hosts.append(host) continue if host.endswith(word): - if host[:4] == "www.": + if host[:4] == 'www.': if host[4:] in all_hosts or host[4:] in full: continue if host not in full: @@ -1125,16 +1034,16 @@ async def handler(lst): temp.add(host) if host not in all_hosts: all_hosts.append(host) - print("\n[*] Hosts found after DNS brute force:") + print('\n[*] Hosts found after DNS brute force:') for sub in temp: print(sub) - await db.store_all(word, list(sorted(temp)), "host", "dns_bruteforce") + await db.store_all(word, list(sorted(temp)), 'host', 'dns_bruteforce') takeover_results = dict() # TakeOver Checking if takeover_status: - print("\n[*] Performing subdomain takeover check") - print("\n[*] Subdomain Takeover checking IS ACTIVE RECON") + print('\n[*] Performing subdomain takeover check') + print('\n[*] Subdomain Takeover checking IS ACTIVE RECON') search_take = takeover.TakeOver(all_hosts) await search_take.populate_fingerprints() await search_take.process(proxy=use_proxy) @@ -1143,25 +1052,21 @@ async def handler(lst): dnsrev: list = [] # print(f'DNSlookup: {dnslookup}') if dnslookup is True: - print("\n[*] Starting active queries for DNSLookup.") + print('\n[*] Starting active queries for DNSLookup.') # reverse each iprange in a separate task __reverse_dns_tasks: dict = {} for entry in host_ip: - __ip_range = dnssearch.serialize_ip_range(ip=entry, netmask="24") + __ip_range = dnssearch.serialize_ip_range(ip=entry, netmask='24') if __ip_range and __ip_range not in set(__reverse_dns_tasks.keys()): - print("\n[*] Performing reverse lookup on " + __ip_range) + print('\n[*] Performing reverse lookup on ' + __ip_range) __reverse_dns_tasks[__ip_range] = asyncio.create_task( dnssearch.reverse_all_ips_in_range( iprange=__ip_range, callback=dnssearch.generate_postprocessing_callback( target=word, local_results=dnsrev, overall_results=full ), - nameservers=( - final_dns_resolver_list - if len(final_dns_resolver_list) > 0 - else None - ), + nameservers=(final_dns_resolver_list if len(final_dns_resolver_list) > 0 else None), ) ) # nameservers=list(map(str, dnsserver.split(','))) if dnsserver else None)) @@ -1169,26 +1074,26 @@ async def handler(lst): # run all the reversing tasks concurrently await asyncio.gather(*__reverse_dns_tasks.values()) # Display the newly found hosts - print("\n[*] Hosts found after reverse lookup (in target domain):") - print("--------------------------------------------------------") + print('\n[*] Hosts found after reverse lookup (in target domain):') + print('--------------------------------------------------------') for xh in dnsrev: print(xh) # Virtual hosts search - if virtual == "basic": - print("\n[*] Virtual hosts:") - print("------------------") + if virtual == 'basic': + print('\n[*] Virtual hosts:') + print('------------------') for data in host_ip: basic_search = bingsearch.SearchBing(data, limit, start) await basic_search.process_vhost() results = await basic_search.get_allhostnames() for result in results: - result = re.sub(r"[[]*", "", result) - result = re.sub("<", "", result) - result = re.sub(">", "", result) - print(data + "\t" + result) - vhost.append(data + ":" + result) - full.append(data + ":" + result) + result = re.sub(r'[[]*', '', result) + result = re.sub('<', '', result) + result = re.sub('>', '', result) + print(data + '\t' + result) + vhost.append(data + ':' + result) + full.append(data + ':' + result) vhost = sorted(set(vhost)) else: pass @@ -1201,187 +1106,153 @@ async def handler(lst): # Verify the path exists, if not create it or if user does not create it skips screenshot if path_exists: await screen_shotter.verify_installation() - print( - f"\nScreenshots can be found in: {screen_shotter.output}{screen_shotter.slash}" - ) + print(f'\nScreenshots can be found in: {screen_shotter.output}{screen_shotter.slash}') start_time = time.perf_counter() - print("Filtering domains for ones we can reach") + print('Filtering domains for ones we can reach') if dnsresolve is None or len(final_dns_resolver_list) > 0: - unique_resolved_domains = { - url.split(":")[0] - for url in full - if ":" in url and "www." not in url - } + unique_resolved_domains = {url.split(':')[0] for url in full if ':' in url and 'www.' not in url} else: # Technically not resolved in this case, which is not ideal # You should always use dns resolve when doing screenshotting - print( - "NOTE for future use cases you should only use screenshotting in tandem with DNS resolving" - ) + print('NOTE for future use cases you should only use screenshotting in tandem with DNS resolving') unique_resolved_domains = set(all_hosts) if len(unique_resolved_domains) > 0: # First filter out ones that didn't resolve - print( - "Attempting to visit unique resolved domains, this is ACTIVE RECON" - ) + print('Attempting to visit unique resolved domains, this is ACTIVE RECON') async with Pool(10) as pool: - results = await pool.map( - screen_shotter.visit, list(unique_resolved_domains) - ) + results = await pool.map(screen_shotter.visit, list(unique_resolved_domains)) # Filter out domains that we couldn't connect to - unique_resolved_domains_list = list( - sorted({tup[0] for tup in results if len(tup[1]) > 0}) - ) + unique_resolved_domains_list = list(sorted({tup[0] for tup in results if len(tup[1]) > 0})) async with Pool(3) as pool: - print( - f"Length of unique resolved domains: {len(unique_resolved_domains_list)} chunking now!\n" - ) + print(f'Length of unique resolved domains: {len(unique_resolved_domains_list)} chunking now!\n') # If you have the resources, you could make the function faster by increasing the chunk number chunk_number = 14 - for chunk in screen_shotter.chunk_list( - unique_resolved_domains_list, chunk_number - ): + for chunk in screen_shotter.chunk_list(unique_resolved_domains_list, chunk_number): try: - screenshot_tups.extend( - await pool.map(screen_shotter.take_screenshot, chunk) - ) + screenshot_tups.extend(await pool.map(screen_shotter.take_screenshot, chunk)) except Exception as ee: - print(f"An exception has occurred while mapping: {ee}") + print(f'An exception has occurred while mapping: {ee}') end = time.perf_counter() # There is probably an easier way to do this total = int(end - start_time) mon, sec = divmod(total, 60) hr, mon = divmod(mon, 60) - total_time = "%02d:%02d" % (mon, sec) - print(f"Finished taking screenshots in {total_time} seconds") - print( - "[+] Note there may be leftover chrome processes you may have to kill manually\n" - ) + total_time = '%02d:%02d' % (mon, sec) + print(f'Finished taking screenshots in {total_time} seconds') + print('[+] Note there may be leftover chrome processes you may have to kill manually\n') # Shodan shodanres = [] if shodan is True: - print("\033[94m[*] Searching Shodan. ") + print('\033[94m[*] Searching Shodan. ') try: for ip in host_ip: # TODO fix shodan - print("\tSearching for " + ip) + print('\tSearching for ' + ip) shodan = shodansearch.SearchShodan() shodandict = await shodan.search_ip(ip) await asyncio.sleep(5) rowdata = [] for key, value in shodandict[ip].items(): - if str( - value - ) == "Not in Shodan" or "Error occurred in the Shodan IP search module" in str( - value - ): + if str(value) == 'Not in Shodan' or 'Error occurred in the Shodan IP search module' in str(value): break if isinstance(value, int): value = str(value) if isinstance(value, list): - value = ", ".join(map(str, value)) + value = ', '.join(map(str, value)) rowdata.append(value) shodanres.append(rowdata) print(ujson.dumps(shodandict[ip], indent=4, sort_keys=True)) - print("\n") + print('\n') except Exception as e: - print(f"[!] An error occurred with Shodan: {e} ") + print(f'[!] An error occurred with Shodan: {e} ') else: pass - if filename != "": - print("\n[*] Reporting started.") + if filename != '': + print('\n[*] Reporting started.') try: if len(rest_filename) == 0: - filename = filename.rsplit(".", 1)[0] + ".xml" + filename = filename.rsplit('.', 1)[0] + '.xml' else: - filename = ( - "theHarvester/app/static/" - + rest_filename.rsplit(".", 1)[0] - + ".xml" - ) + filename = 'theHarvester/app/static/' + rest_filename.rsplit('.', 1)[0] + '.xml' # TODO use aiofiles if user is using rest api # XML REPORT SECTION - with open(filename, "w+") as file: + with open(filename, 'w+') as file: file.write('') for x in all_emails: - file.write("" + x + "") + file.write('' + x + '') for x in full: - host, ip = x.split(":", 1) if ":" in x else (x, "") + host, ip = x.split(':', 1) if ':' in x else (x, '') if ip and len(ip) > 3: - file.write( - f"{ip}{host}" - ) + file.write(f'{ip}{host}') else: - file.write(f"{host}") + file.write(f'{host}') for x in vhost: - host, ip = x.split(":", 1) if ":" in x else (x, "") + host, ip = x.split(':', 1) if ':' in x else (x, '') if ip and len(ip) > 3: - file.write( - f"{ip} {host}" - ) + file.write(f'{ip} {host}') else: - file.write(f"{host}") + file.write(f'{host}') # TODO add Shodan output into XML report - file.write("") - print("[*] XML File saved.") + file.write('') + print('[*] XML File saved.') except Exception as error: - print(f"[!] An error occurred while saving the XML file: {error}") + print(f'[!] An error occurred while saving the XML file: {error}') try: # JSON REPORT SECTION - filename = filename.rsplit(".", 1)[0] + ".json" + filename = filename.rsplit('.', 1)[0] + '.json' # create dict with values for json output json_dict: dict = dict() # determine if a variable exists # it should but just a validation check - if "ip_list" in locals(): + if 'ip_list' in locals(): if all_ip and len(all_ip) >= 1 and ip_list and len(ip_list) > 0: - json_dict["ips"] = ip_list + json_dict['ips'] = ip_list if len(all_emails) > 0: - json_dict["emails"] = all_emails + json_dict['emails'] = all_emails if dnsresolve is None or len(final_dns_resolver_list) > 0 and len(full) > 0: - json_dict["hosts"] = full + json_dict['hosts'] = full elif len(all_hosts) > 0: - json_dict["hosts"] = all_hosts + json_dict['hosts'] = all_hosts else: - json_dict["hosts"] = [] + json_dict['hosts'] = [] if vhost and len(vhost) > 0: - json_dict["vhosts"] = vhost + json_dict['vhosts'] = vhost if len(interesting_urls) > 0: - json_dict["interesting_urls"] = interesting_urls + json_dict['interesting_urls'] = interesting_urls if len(all_urls) > 0: - json_dict["trello_urls"] = all_urls + json_dict['trello_urls'] = all_urls if len(total_asns) > 0: - json_dict["asns"] = total_asns + json_dict['asns'] = total_asns if len(twitter_people_list_tracker) > 0: - json_dict["twitter_people"] = twitter_people_list_tracker + json_dict['twitter_people'] = twitter_people_list_tracker if len(linkedin_people_list_tracker) > 0: - json_dict["linkedin_people"] = linkedin_people_list_tracker + json_dict['linkedin_people'] = linkedin_people_list_tracker if len(linkedin_links_tracker) > 0: - json_dict["linkedin_links"] = linkedin_links_tracker + json_dict['linkedin_links'] = linkedin_links_tracker if takeover_status and len(takeover_results) > 0: - json_dict["takeover_results"] = takeover_results + json_dict['takeover_results'] = takeover_results - json_dict["shodan"] = shodanres - with open(filename, "w+") as fp: + json_dict['shodan'] = shodanres + with open(filename, 'w+') as fp: dumped_json = ujson.dumps(json_dict, sort_keys=True) fp.write(dumped_json) - print("[*] JSON File saved.") + print('[*] JSON File saved.') except Exception as er: - print(f"[!] An error occurred while saving the JSON file: {er} ") - print("\n\n") + print(f'[!] An error occurred while saving the JSON file: {er} ') + print('\n\n') sys.exit(0) @@ -1390,7 +1261,7 @@ async def entry_point() -> None: Core.banner() await start() except KeyboardInterrupt: - print("\n\n[!] ctrl+c detected from user, quitting.\n\n ") + print('\n\n[!] ctrl+c detected from user, quitting.\n\n ') except Exception as error_entry_point: print(error_entry_point) sys.exit(1) diff --git a/theHarvester/discovery/anubis.py b/theHarvester/discovery/anubis.py index 5d8a9fc0..d3a1e8ca 100644 --- a/theHarvester/discovery/anubis.py +++ b/theHarvester/discovery/anubis.py @@ -8,7 +8,7 @@ def __init__(self, word) -> None: self.proxy = False async def do_search(self) -> None: - url = f"https://jldc.me/anubis/subdomains/{self.word}" + url = f'https://jldc.me/anubis/subdomains/{self.word}' response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) self.totalhosts = response[0] diff --git a/theHarvester/discovery/baidusearch.py b/theHarvester/discovery/baidusearch.py index 687a8721..e640bc11 100644 --- a/theHarvester/discovery/baidusearch.py +++ b/theHarvester/discovery/baidusearch.py @@ -5,23 +5,17 @@ class SearchBaidu: def __init__(self, word, limit) -> None: self.word = word - self.total_results = "" - self.server = "www.baidu.com" - self.hostname = "www.baidu.com" + self.total_results = '' + self.server = 'www.baidu.com' + self.hostname = 'www.baidu.com' self.limit = limit self.proxy = False async def do_search(self) -> None: - headers = {"Host": self.hostname, "User-agent": Core.get_user_agent()} - base_url = f"https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}" - urls = [ - base_url.replace("xx", str(num)) - for num in range(0, self.limit, 10) - if num <= self.limit - ] - responses = await AsyncFetcher.fetch_all( - urls, headers=headers, proxy=self.proxy - ) + headers = {'Host': self.hostname, 'User-agent': Core.get_user_agent()} + base_url = f'https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}' + urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit] + responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy) for response in responses: self.total_results += response diff --git a/theHarvester/discovery/bevigil.py b/theHarvester/discovery/bevigil.py index cc17c0e8..09f5ed25 100644 --- a/theHarvester/discovery/bevigil.py +++ b/theHarvester/discovery/bevigil.py @@ -9,27 +9,23 @@ def __init__(self, word) -> None: self.interestingurls: set = set() self.key = Core.bevigil_key() if self.key is None: - self.key = "" - raise MissingKey("bevigil") + self.key = '' + raise MissingKey('bevigil') self.proxy = False async def do_search(self) -> None: - subdomain_endpoint = f"https://osint.bevigil.com/api/{self.word}/subdomains/" - url_endpoint = f"https://osint.bevigil.com/api/{self.word}/urls/" - headers = {"X-Access-Token": self.key} + subdomain_endpoint = f'https://osint.bevigil.com/api/{self.word}/subdomains/' + url_endpoint = f'https://osint.bevigil.com/api/{self.word}/urls/' + headers = {'X-Access-Token': self.key} - responses = await AsyncFetcher.fetch_all( - [subdomain_endpoint], json=True, proxy=self.proxy, headers=headers - ) + responses = await AsyncFetcher.fetch_all([subdomain_endpoint], json=True, proxy=self.proxy, headers=headers) response = responses[0] - for subdomain in response["subdomains"]: + for subdomain in response['subdomains']: self.totalhosts.add(subdomain) - responses = await AsyncFetcher.fetch_all( - [url_endpoint], json=True, proxy=self.proxy, headers=headers - ) + responses = await AsyncFetcher.fetch_all([url_endpoint], json=True, proxy=self.proxy, headers=headers) response = responses[0] - for url in response["urls"]: + for url in response['urls']: self.interestingurls.add(url) async def get_hostnames(self) -> set: diff --git a/theHarvester/discovery/binaryedgesearch.py b/theHarvester/discovery/binaryedgesearch.py index 77e04923..895e0cd4 100644 --- a/theHarvester/discovery/binaryedgesearch.py +++ b/theHarvester/discovery/binaryedgesearch.py @@ -13,29 +13,25 @@ def __init__(self, word, limit) -> None: self.limit = 501 if limit >= 501 else limit self.limit = 2 if self.limit == 1 else self.limit if self.key is None: - raise MissingKey("binaryedge") + raise MissingKey('binaryedge') async def do_search(self) -> None: - base_url = f"https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}" - headers = {"X-KEY": self.key, "User-Agent": Core.get_user_agent()} + base_url = f'https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}' + headers = {'X-KEY': self.key, 'User-Agent': Core.get_user_agent()} for page in range(1, self.limit): - params = {"page": page} - response = await AsyncFetcher.fetch_all( - [base_url], json=True, proxy=self.proxy, params=params, headers=headers - ) + params = {'page': page} + response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, params=params, headers=headers) responses = response[0] dct = responses - if ("status" in dct.keys() and "message" in dct.keys()) and ( - dct["status"] == 400 - or "Bad Parameter" in dct["message"] - or "Error" in dct["message"] + if ('status' in dct.keys() and 'message' in dct.keys()) and ( + dct['status'] == 400 or 'Bad Parameter' in dct['message'] or 'Error' in dct['message'] ): # 400 status code means no more results break - if "events" in dct.keys(): - if len(dct["events"]) == 0: + if 'events' in dct.keys(): + if len(dct['events']) == 0: break - self.totalhosts.update({host for host in dct["events"]}) + self.totalhosts.update({host for host in dct['events']}) await asyncio.sleep(get_delay()) async def get_hostnames(self) -> set: diff --git a/theHarvester/discovery/bingsearch.py b/theHarvester/discovery/bingsearch.py index f7cf25ae..ebfc06fb 100644 --- a/theHarvester/discovery/bingsearch.py +++ b/theHarvester/discovery/bingsearch.py @@ -7,12 +7,12 @@ class SearchBing: def __init__(self, word, limit, start) -> None: - self.word = word.replace(" ", "%20") + self.word = word.replace(' ', '%20') self.results: list[Any] = [] - self.total_results = "" - self.server = "www.bing.com" - self.apiserver = "api.search.live.net" - self.hostname = "www.bing.com" + self.total_results = '' + self.server = 'www.bing.com' + self.apiserver = 'api.search.live.net' + self.hostname = 'www.bing.com' self.limit = int(limit) self.bingApi = Core.bing_key() self.counter = start @@ -20,58 +20,44 @@ def __init__(self, word, limit, start) -> None: async def do_search(self) -> None: headers = { - "Host": self.hostname, - "Cookie": "SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50", - "Accept-Language": "en-us,en", - "User-agent": Core.get_user_agent(), + 'Host': self.hostname, + 'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50', + 'Accept-Language': 'en-us,en', + 'User-agent': Core.get_user_agent(), } base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx' - urls = [ - base_url.replace("xx", str(num)) - for num in range(0, self.limit, 50) - if num <= self.limit - ] - responses = await AsyncFetcher.fetch_all( - urls, headers=headers, proxy=self.proxy - ) + urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit] + responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy) for response in responses: self.total_results += response async def do_search_api(self) -> None: - url = "https://api.bing.microsoft.com/v7.0/search?" + url = 'https://api.bing.microsoft.com/v7.0/search?' params = { - "q": self.word, - "count": str(self.limit), - "offset": "0", - "mkt": "en-us", - "safesearch": "Off", + 'q': self.word, + 'count': str(self.limit), + 'offset': '0', + 'mkt': 'en-us', + 'safesearch': 'Off', } headers = { - "User-Agent": Core.get_user_agent(), - "Ocp-Apim-Subscription-Key": self.bingApi, + 'User-Agent': Core.get_user_agent(), + 'Ocp-Apim-Subscription-Key': self.bingApi, } - self.results = await AsyncFetcher.fetch_all( - [url], headers=headers, params=params, proxy=self.proxy - ) + self.results = await AsyncFetcher.fetch_all([url], headers=headers, params=params, proxy=self.proxy) for res in self.results: self.total_results += res async def do_search_vhost(self) -> None: headers = { - "Host": self.hostname, - "Cookie": "mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50", - "Accept-Language": "en-us,en", - "User-agent": Core.get_user_agent(), + 'Host': self.hostname, + 'Cookie': 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50', + 'Accept-Language': 'en-us,en', + 'User-agent': Core.get_user_agent(), } - base_url = f"http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx" - urls = [ - base_url.replace("xx", str(num)) - for num in range(0, self.limit, 50) - if num <= self.limit - ] - responses = await AsyncFetcher.fetch_all( - urls, headers=headers, proxy=self.proxy - ) + base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx' + urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit] + responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy) for response in responses: self.total_results += response @@ -89,13 +75,13 @@ async def get_allhostnames(self): async def process(self, api, proxy: bool = False) -> None: self.proxy = proxy - if api == "yes": + if api == 'yes': if self.bingApi is None: - raise MissingKey("BingAPI") + raise MissingKey('BingAPI') await self.do_search_api() else: await self.do_search() - print(f"\tSearching {self.counter} results.") + print(f'\tSearching {self.counter} results.') async def process_vhost(self) -> None: await self.do_search_vhost() diff --git a/theHarvester/discovery/bravesearch.py b/theHarvester/discovery/bravesearch.py index 5a174b77..39eef9d8 100644 --- a/theHarvester/discovery/bravesearch.py +++ b/theHarvester/discovery/bravesearch.py @@ -8,37 +8,34 @@ class SearchBrave: def __init__(self, word, limit): self.word = word - self.results = "" - self.totalresults = "" - self.server = "https://search.brave.com/search?q=" + self.results = '' + self.totalresults = '' + self.server = 'https://search.brave.com/search?q=' self.limit = limit self.proxy = False async def do_search(self): - headers = {"User-Agent": Core.get_user_agent()} - for query in [f'"{self.word}"', f"site:{self.word}"]: + headers = {'User-Agent': Core.get_user_agent()} + for query in [f'"{self.word}"', f'site:{self.word}']: try: for offset in range(0, 50): # To reduce the total number of requests, only two queries are made "self.word" and site:self.word - current_url = f"{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0" - resp = await AsyncFetcher.fetch_all( - [current_url], headers=headers, proxy=self.proxy - ) + current_url = f'{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0' + resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy) self.results = resp[0] self.totalresults += self.results # if 'Results from Microsoft Bing.' in resp[0] \ if ( - "Not many great matches came back for your search" in resp[0] - or "Your request has been flagged as being suspicious and Brave Search" - in resp[0] - or "Prove" in resp[0] - and "robot" in resp[0] - or "Robot" in resp[0] + 'Not many great matches came back for your search' in resp[0] + or 'Your request has been flagged as being suspicious and Brave Search' in resp[0] + or 'Prove' in resp[0] + and 'robot' in resp[0] + or 'Robot' in resp[0] ): break await asyncio.sleep(get_delay() + 15) except Exception as e: - print(f"An exception has occurred in bravesearch: {e}") + print(f'An exception has occurred in bravesearch: {e}') await asyncio.sleep(get_delay() + 80) continue diff --git a/theHarvester/discovery/bufferoverun.py b/theHarvester/discovery/bufferoverun.py index 843b2ca6..4e731f65 100644 --- a/theHarvester/discovery/bufferoverun.py +++ b/theHarvester/discovery/bufferoverun.py @@ -11,33 +11,30 @@ def __init__(self, word) -> None: self.totalips: set = set() self.key = Core.bufferoverun_key() if self.key is None: - raise MissingKey("bufferoverun") + raise MissingKey('bufferoverun') self.proxy = False async def do_search(self) -> None: - url = f"https://tls.bufferover.run/dns?q={self.word}" + url = f'https://tls.bufferover.run/dns?q={self.word}' response = await AsyncFetcher.fetch_all( [url], json=True, - headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"}, + headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'}, proxy=self.proxy, ) dct = response[0] - if dct["Results"]: + if dct['Results']: self.totalhosts = { ( - host.split(",") - if "," in host - and self.word.replace("www.", "") in host.split(",")[0] in host - else host.split(",")[4] + host.split(',') + if ',' in host and self.word.replace('www.', '') in host.split(',')[0] in host + else host.split(',')[4] ) - for host in dct["Results"] + for host in dct['Results'] } self.totalips = { - ip.split(",")[0] - for ip in dct["Results"] - if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip.split(",")[0]) + ip.split(',')[0] for ip in dct['Results'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip.split(',')[0]) } async def get_hostnames(self) -> set: diff --git a/theHarvester/discovery/censysearch.py b/theHarvester/discovery/censysearch.py index e303e469..ff771402 100644 --- a/theHarvester/discovery/censysearch.py +++ b/theHarvester/discovery/censysearch.py @@ -15,7 +15,7 @@ def __init__(self, domain, limit: int = 500) -> None: self.word = domain self.key = Core.censys_key() if self.key[0] is None or self.key[1] is None: - raise MissingKey("Censys ID and/or Secret") + raise MissingKey('Censys ID and/or Secret') self.totalhosts: set = set() self.emails: set = set() self.limit = limit @@ -26,26 +26,24 @@ async def do_search(self) -> None: cert_search = CensysCerts( api_id=self.key[0], api_secret=self.key[1], - user_agent=f"censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)", + user_agent=f'censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)', ) except CensysUnauthorizedException: - raise MissingKey("Censys ID and/or Secret") + raise MissingKey('Censys ID and/or Secret') - query = f"names: {self.word}" + query = f'names: {self.word}' try: response = cert_search.search( query=query, - fields=["names", "parsed.subject.email_address"], + fields=['names', 'parsed.subject.email_address'], max_records=self.limit, ) for cert in response(): - self.totalhosts.update(cert.get("names", [])) - email_address = ( - cert.get("parsed", {}).get("subject", {}).get("email_address", []) - ) + self.totalhosts.update(cert.get('names', [])) + email_address = cert.get('parsed', {}).get('subject', {}).get('email_address', []) self.emails.update(email_address) except CensysRateLimitExceededException: - print("Censys rate limit exceeded") + print('Censys rate limit exceeded') async def get_hostnames(self) -> set: return self.totalhosts diff --git a/theHarvester/discovery/certspottersearch.py b/theHarvester/discovery/certspottersearch.py index 4783cd4a..56ae3e3e 100644 --- a/theHarvester/discovery/certspottersearch.py +++ b/theHarvester/discovery/certspottersearch.py @@ -8,21 +8,19 @@ def __init__(self, word) -> None: self.proxy = False async def do_search(self) -> None: - base_url = f"https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names" + base_url = f'https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names' try: - response = await AsyncFetcher.fetch_all( - [base_url], json=True, proxy=self.proxy - ) + response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy) response = response[0] if isinstance(response, list): for dct in response: for key, value in dct.items(): - if key == "dns_names": + if key == 'dns_names': self.totalhosts.update({name for name in value if name}) elif isinstance(response, dict): - self.totalhosts.update({response["dns_names"] if "dns_names" in response.keys() else ""}) # type: ignore + self.totalhosts.update({response['dns_names'] if 'dns_names' in response.keys() else ''}) # type: ignore else: - self.totalhosts.update({""}) + self.totalhosts.update({''}) except Exception as e: print(e) @@ -32,4 +30,4 @@ async def get_hostnames(self) -> set: async def process(self, proxy: bool = False) -> None: self.proxy = proxy await self.do_search() - print("\tSearching results.") + print('\tSearching results.') diff --git a/theHarvester/discovery/constants.py b/theHarvester/discovery/constants.py index cf9fc28a..6ec47430 100644 --- a/theHarvester/discovery/constants.py +++ b/theHarvester/discovery/constants.py @@ -14,10 +14,10 @@ async def splitter(links): unique_list = [] name_check = [] for url in links: - tail = url.split("/")[-1] - if len(tail) == 2 or tail == "zh-cn": - tail = url.split("/")[-2] - name = tail.split("-") + tail = url.split('/')[-1] + if len(tail) == 2 or tail == 'zh-cn': + tail = url.split('/')[-2] + name = tail.split('-') if len(name) > 1: joined_name = name[0] + name[1] else: @@ -41,12 +41,8 @@ def filter(lst): new_lst = [] for item in lst: item = str(item) - if ( - (item[0].isalpha() or item[0].isdigit()) - and ("xxx" not in item) - and (".." not in item) - ): - item = item.replace("252f", "").replace("2F", "").replace("2f", "") + if (item[0].isalpha() or item[0].isdigit()) and ('xxx' not in item) and ('..' not in item): + item = item.replace('252f', '').replace('2F', '').replace('2f', '') new_lst.append(item.lower()) return new_lst @@ -63,10 +59,9 @@ async def search(text: str) -> bool: """ for line in text.strip().splitlines(): if ( - "This page appears when Google automatically detects requests coming from your computer network" - in line - or "http://www.google.com/sorry/index" in line - or "https://www.google.com/sorry/index" in line + 'This page appears when Google automatically detects requests coming from your computer network' in line + or 'http://www.google.com/sorry/index' in line + or 'https://www.google.com/sorry/index' in line ): # print('\tGoogle is blocking your IP due to too many automated requests, wait or change your IP') return True @@ -79,47 +74,37 @@ async def google_workaround(visit_url: str) -> bool | str: :param visit_url: Url to scrape :return: Correct html that can be parsed by BS4 """ - url = "https://websniffer.cc/" + url = 'https://websniffer.cc/' data = { - "Cookie": "", - "url": visit_url, - "submit": "Submit", - "type": "GET&http=1.1", - "uak": str(random.randint(4, 8)), # select random UA to send to Google + 'Cookie': '', + 'url': visit_url, + 'submit': 'Submit', + 'type': 'GET&http=1.1', + 'uak': str(random.randint(4, 8)), # select random UA to send to Google } - returned_html = await AsyncFetcher.post_fetch( - url, headers={"User-Agent": Core.get_user_agent()}, data=data - ) + returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data) returned_html = ( - "This page appears when Google automatically detects requests coming from your computer network" - if returned_html == "" + 'This page appears when Google automatically detects requests coming from your computer network' + if returned_html == '' else returned_html[0] ) - returned_html = ( - "" if "Please Wait... | Cloudflare" in returned_html else returned_html - ) + returned_html = '' if 'Please Wait... | Cloudflare' in returned_html else returned_html - if ( - len(returned_html) == 0 - or await search(returned_html) - or "<html" not in returned_html - ): + if len(returned_html) == 0 or await search(returned_html) or '<html' not in returned_html: # indicates that google is serving workaround a captcha # That means we will try out second option which will utilize proxies return True # the html we get is malformed for BS4 as there are no greater than or less than signs - if "<html>" in returned_html: - start_index = returned_html.index("<html>") + if '<html>' in returned_html: + start_index = returned_html.index('<html>') else: - start_index = returned_html.index("<html") + start_index = returned_html.index('<html') - end_index = returned_html.index("</html>") + 1 + end_index = returned_html.index('</html>') + 1 correct_html = returned_html[start_index:end_index] # Slice list to get the response's html - correct_html = "".join( - [ch.strip().replace("<", "<").replace(">", ">") for ch in correct_html] - ) + correct_html = ''.join([ch.strip().replace('<', '<').replace('>', '>') for ch in correct_html]) return correct_html @@ -130,9 +115,9 @@ class MissingKey(Exception): def __init__(self, source: str | None) -> None: if source: - self.message = f"\n\033[93m[!] Missing API key for {source}. \033[0m" + self.message = f'\n\033[93m[!] Missing API key for {source}. \033[0m' else: - self.message = "\n\033[93m[!] Missing CSE id. \033[0m" + self.message = '\n\033[93m[!] Missing CSE id. \033[0m' def __str__(self) -> str: return self.message diff --git a/theHarvester/discovery/criminalip.py b/theHarvester/discovery/criminalip.py index 194f741e..b1260026 100644 --- a/theHarvester/discovery/criminalip.py +++ b/theHarvester/discovery/criminalip.py @@ -13,64 +13,56 @@ def __init__(self, word) -> None: self.asns: set = set() self.key = Core.criminalip_key() if self.key is None: - raise MissingKey("criminalip") + raise MissingKey('criminalip') self.proxy = False async def do_search(self) -> None: # https://www.criminalip.io/developer/api/post-domain-scan # https://www.criminalip.io/developer/api/get-domain-status-id # https://www.criminalip.io/developer/api/get-domain-report-id - url = "https://api.criminalip.io/v1/domain/scan" + url = 'https://api.criminalip.io/v1/domain/scan' data = f'{{"query": "{self.word}"}}' # print(f'Current key: {self.key}') user_agent = Core.get_user_agent() response = await AsyncFetcher.post_fetch( url, json=True, - headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"}, + headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'}, data=data, proxy=self.proxy, ) # print(f'My response: {response}') # Expected response format: # {'data': {'scan_id': scan_id}, 'message': 'api success', 'status': 200} - if "status" in response.keys(): - status = response["status"] + if 'status' in response.keys(): + status = response['status'] if status != 200: - print( - f"An error has occurred searching criminalip dumping response: {response}" - ) + print(f'An error has occurred searching criminalip dumping response: {response}') else: - scan_id = response["data"]["scan_id"] + scan_id = response['data']['scan_id'] scan_percentage = 0 counter = 0 while scan_percentage != 100: - status_url = f"https://api.criminalip.io/v1/domain/status/{scan_id}" + status_url = f'https://api.criminalip.io/v1/domain/status/{scan_id}' status_response = await AsyncFetcher.fetch_all( [status_url], json=True, - headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"}, + headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'}, proxy=self.proxy, ) status = status_response[0] # print(f'Status response: {status}') # Expected format: # {"data": {"scan_percentage": 100}, "message": "api success", "status": 200} - scan_percentage = status["data"]["scan_percentage"] + scan_percentage = status['data']['scan_percentage'] if scan_percentage == 100: break if scan_percentage == -2: - print( - f"CriminalIP failed to scan: {self.word} does not exist, verify manually" - ) - print( - f"Dumping data: scan_response: {response} status_response: {status}" - ) + print(f'CriminalIP failed to scan: {self.word} does not exist, verify manually') + print(f'Dumping data: scan_response: {response} status_response: {status}') return if scan_percentage == -1: - print( - f"CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}" - ) + print(f'CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}') return # Wait for scan to finish if counter >= 5: @@ -80,18 +72,18 @@ async def do_search(self) -> None: counter += 1 if counter == 10: print( - "Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop." + 'Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop.' ) print( - f"Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}" + f'Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}' ) return - report_url = f"https://api.criminalip.io/v1/domain/report/{scan_id}" + report_url = f'https://api.criminalip.io/v1/domain/report/{scan_id}' scan_response = await AsyncFetcher.fetch_all( [report_url], json=True, - headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"}, + headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'}, proxy=self.proxy, ) scan = scan_response[0] @@ -100,125 +92,113 @@ async def do_search(self) -> None: try: await self.parser(scan) except Exception as e: - print(f"An exception occurred while parsing criminalip result: {e}") - print("Dumping json: ") + print(f'An exception occurred while parsing criminalip result: {e}') + print('Dumping json: ') print(scan) async def parser(self, jlines): # TODO when new scope field is added to parse lines for potential new scope! # TODO map as_name to asn for asn data # TODO determine if worth storing interesting urls - if "data" not in jlines.keys(): - print(f"Error with criminalip data, dumping: {jlines}") + if 'data' not in jlines.keys(): + print(f'Error with criminalip data, dumping: {jlines}') return - data = jlines["data"] - for cert in data["certificates"]: + data = jlines['data'] + for cert in data['certificates']: # print(f'Current cert: {cert}') - if cert["subject"].endswith("." + self.word): - self.totalhosts.add(cert["subject"]) + if cert['subject'].endswith('.' + self.word): + self.totalhosts.add(cert['subject']) - for connected_domain in data["connected_domain_subdomain"]: + for connected_domain in data['connected_domain_subdomain']: try: - main_domain = connected_domain["main_domain"]["domain"] - subdomains = [sub["domain"] for sub in connected_domain["subdomains"]] - if main_domain.endswith("." + self.word): + main_domain = connected_domain['main_domain']['domain'] + subdomains = [sub['domain'] for sub in connected_domain['subdomains']] + if main_domain.endswith('.' + self.word): self.totalhosts.add(main_domain) for sub in subdomains: # print(f'Current sub: {sub}') - if sub.endswith("." + self.word): + if sub.endswith('.' + self.word): self.totalhosts.add(sub) except Exception as e: - print(f"An exception has occurred: {e}") - print(f"Main line: {connected_domain}") + print(f'An exception has occurred: {e}') + print(f'Main line: {connected_domain}') - for ip_info in data["connected_ip_info"]: - self.asns.add(str(ip_info["asn"])) - domains = [sub["domain"] for sub in ip_info["domain_list"]] + for ip_info in data['connected_ip_info']: + self.asns.add(str(ip_info['asn'])) + domains = [sub['domain'] for sub in ip_info['domain_list']] for sub in domains: - if sub.endswith("." + self.word): + if sub.endswith('.' + self.word): self.totalhosts.add(sub) - self.totalips.add(ip_info["ip"]) + self.totalips.add(ip_info['ip']) - for cookie in data["cookies"]: - if cookie["domain"] != "." + self.word and cookie["domain"].endswith( - "." + self.word - ): - self.totalhosts.add(cookie["domain"]) + for cookie in data['cookies']: + if cookie['domain'] != '.' + self.word and cookie['domain'].endswith('.' + self.word): + self.totalhosts.add(cookie['domain']) - for country in data["country"]: - if country["domain"].endswith("." + self.word): - self.totalhosts.add(country["domain"]) - for ip in country["mapped_ips"]: - self.totalips.add(ip["ip"]) + for country in data['country']: + if country['domain'].endswith('.' + self.word): + self.totalhosts.add(country['domain']) + for ip in country['mapped_ips']: + self.totalips.add(ip['ip']) - for k, v in data["dns_record"].items(): - if k == "dns_record_type_a": - for ip in data["dns_record"][k]["ipv4"]: - self.totalips.add(ip["ip"]) + for k, v in data['dns_record'].items(): + if k == 'dns_record_type_a': + for ip in data['dns_record'][k]['ipv4']: + self.totalips.add(ip['ip']) else: if isinstance(v, list): for item in v: if isinstance(item, list): for subitem in item: - if subitem.endswith("." + self.word): + if subitem.endswith('.' + self.word): self.totalhosts.add(subitem) else: - if item.endswith("." + self.word): + if item.endswith('.' + self.word): self.totalhosts.add(item) - for domain_list in data["domain_list"]: - self.asns.add(str(domain_list["asn"])) - domains = [sub["domain"] for sub in domain_list["domain_list"]] + for domain_list in data['domain_list']: + self.asns.add(str(domain_list['asn'])) + domains = [sub['domain'] for sub in domain_list['domain_list']] for sub in domains: - if sub.endswith("." + self.word): + if sub.endswith('.' + self.word): self.totalhosts.add(sub) - self.totalips.add(domain_list["ip"]) + self.totalips.add(domain_list['ip']) - for html_page_links in data["html_page_link_domains"]: - domain = html_page_links["domain"] - if domain.endswith("." + self.word): + for html_page_links in data['html_page_link_domains']: + domain = html_page_links['domain'] + if domain.endswith('.' + self.word): self.totalhosts.add(domain) - for ip in html_page_links["mapped_ips"]: - self.totalips.add(ip["ip"]) + for ip in html_page_links['mapped_ips']: + self.totalips.add(ip['ip']) # TODO combine data['links'] and data['network_logs'] urls into one list for one run through - for link in data["links"]: - url = link["url"] + for link in data['links']: + url = link['url'] parsed_url = urlparse(url) netloc = parsed_url.netloc if self.word in netloc: - if ( - ":" in netloc and netloc.split(":")[0].endswith(self.word) - ) or netloc.endswith(self.word): + if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word): self.totalhosts.add(netloc) - for log in data["network_logs"]: - url = log["url"] + for log in data['network_logs']: + url = log['url'] parsed_url = urlparse(url) netloc = parsed_url.netloc if self.word in netloc: - if ( - ":" in netloc and netloc.split(":")[0].endswith(self.word) - ) or netloc.endswith(self.word): + if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word): self.totalhosts.add(netloc) - self.asns.add(str(log["as_number"])) + self.asns.add(str(log['as_number'])) - for redirects in data["page_redirections"]: + for redirects in data['page_redirections']: for redirect in redirects: - url = redirect["url"] + url = redirect['url'] parsed_url = urlparse(url) netloc = parsed_url.netloc if self.word in netloc: - if ( - ":" in netloc and netloc.split(":")[0].endswith(self.word) - ) or netloc.endswith(self.word): + if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word): self.totalhosts.add(netloc) - self.totalhosts = { - host.replace("www.", "") - for host in self.totalhosts - if "*." + self.word != host - } + self.totalhosts = {host.replace('www.', '') for host in self.totalhosts if '*.' + self.word != host} # print(f'hostnames: {self.totalhosts}') # print(f'asns: {self.asns}') diff --git a/theHarvester/discovery/crtsh.py b/theHarvester/discovery/crtsh.py index 9d03630e..3ba662f5 100644 --- a/theHarvester/discovery/crtsh.py +++ b/theHarvester/discovery/crtsh.py @@ -10,24 +10,11 @@ def __init__(self, word) -> None: async def do_search(self) -> list: data: set = set() try: - url = f"https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json" + url = f'https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json' response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) response = response[0] - data = set( - [ - ( - dct["name_value"][2:] - if "*." == dct["name_value"][:2] - else dct["name_value"] - ) - for dct in response - ] - ) - data = { - domain - for domain in data - if (domain[0] != "*" and str(domain[0:4]).isnumeric() is False) - } + data = set([(dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value']) for dct in response]) + data = {domain for domain in data if (domain[0] != '*' and str(domain[0:4]).isnumeric() is False)} except Exception as e: print(e) clean: list = [] diff --git a/theHarvester/discovery/dnsdumpster.py b/theHarvester/discovery/dnsdumpster.py index 3e8c4c11..effa606e 100644 --- a/theHarvester/discovery/dnsdumpster.py +++ b/theHarvester/discovery/dnsdumpster.py @@ -8,51 +8,49 @@ class SearchDnsDumpster: def __init__(self, word) -> None: - self.word = word.replace(" ", "%20") - self.results = "" - self.totalresults = "" - self.server = "dnsdumpster.com" + self.word = word.replace(' ', '%20') + self.results = '' + self.totalresults = '' + self.server = 'dnsdumpster.com' self.proxy = False async def do_search(self) -> None: try: agent = Core.get_user_agent() - headers = {"User-Agent": agent} + headers = {'User-Agent': agent} session = aiohttp.ClientSession(headers=headers) # create a session to properly verify - url = f"https://{self.server}" - csrftoken = "" + url = f'https://{self.server}' + csrftoken = '' if self.proxy is False: async with session.get(url, headers=headers) as resp: resp_cookies = str(resp.cookies) - cookies = resp_cookies.split("csrftoken=") - csrftoken += cookies[1][: cookies[1].find(";")] + cookies = resp_cookies.split('csrftoken=') + csrftoken += cookies[1][: cookies[1].find(';')] else: async with session.get(url, headers=headers, proxy=self.proxy) as resp: resp_cookies = str(resp.cookies) - cookies = resp_cookies.split("csrftoken=") - csrftoken += cookies[1][: cookies[1].find(";")] + cookies = resp_cookies.split('csrftoken=') + csrftoken += cookies[1][: cookies[1].find(';')] await asyncio.sleep(5) # extract csrftoken from cookies data = { - "Cookie": f"csfrtoken={csrftoken}", - "csrfmiddlewaretoken": csrftoken, - "targetip": self.word, - "user": "free", + 'Cookie': f'csfrtoken={csrftoken}', + 'csrfmiddlewaretoken': csrftoken, + 'targetip': self.word, + 'user': 'free', } - headers["Referer"] = url + headers['Referer'] = url if self.proxy is False: async with session.post(url, headers=headers, data=data) as resp: self.results = await resp.text() else: - async with session.post( - url, headers=headers, data=data, proxy=self.proxy - ) as resp: + async with session.post(url, headers=headers, data=data, proxy=self.proxy) as resp: self.results = await resp.text() await session.close() except Exception as e: - print(f"An exception occurred: {e}") + print(f'An exception occurred: {e}') self.totalresults += self.results async def get_hostnames(self): diff --git a/theHarvester/discovery/dnssearch.py b/theHarvester/discovery/dnssearch.py index 263d19c5..c7ce3e8f 100644 --- a/theHarvester/discovery/dnssearch.py +++ b/theHarvester/discovery/dnssearch.py @@ -21,7 +21,7 @@ # DNS FORCE ##################################################################### -DNS_NAMES = DATA_DIR / "wordlists" / "dns-names.txt" +DNS_NAMES = DATA_DIR / 'wordlists' / 'dns-names.txt' class DnsForce: @@ -32,13 +32,13 @@ def __init__(self, domain, dnsserver, verbose: bool = False) -> None: # self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver # self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver self.dnsserver = dnsserver - with DNS_NAMES.open("r") as file: + with DNS_NAMES.open('r') as file: self.list = file.readlines() - self.domain = domain.replace("www.", "") - self.list = [f"{word.strip()}.{self.domain}" for word in self.list] + self.domain = domain.replace('www.', '') + self.list = [f'{word.strip()}.{self.domain}' for word in self.list] async def run(self): - print(f"Starting DNS brute forcing with {len(self.list)} words") + print(f'Starting DNS brute forcing with {len(self.list)} words') checker = hostchecker.Checker(self.list, nameserver=self.dnsserver) resolved_pair, hosts, ips = await checker.check() return resolved_pair, hosts, ips @@ -49,13 +49,13 @@ async def run(self): ##################################################################### -IP_REGEX = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}" -PORT_REGEX = r"\d{1,5}" -NETMASK_REGEX: str = r"\d{1,2}|" + IP_REGEX -NETWORK_REGEX: str = rf"\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b" +IP_REGEX = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}' +PORT_REGEX = r'\d{1,5}' +NETMASK_REGEX: str = r'\d{1,2}|' + IP_REGEX +NETWORK_REGEX: str = rf'\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b' -def serialize_ip_range(ip: str, netmask: str = "24") -> str: +def serialize_ip_range(ip: str, netmask: str = '24') -> str: """ Serialize a network range in a constant format, 'x.x.x.x/y'. @@ -78,12 +78,12 @@ def serialize_ip_range(ip: str, netmask: str = "24") -> str: __ip = __ip_matches.group(1) __netmask = netmask if netmask else __ip_matches.group(3) if __ip and __netmask: - return str(IPv4Network(f"{__ip}/{__netmask}", strict=False)) + return str(IPv4Network(f'{__ip}/{__netmask}', strict=False)) elif __ip: - return str(IPv4Network("{}/{}".format(__ip, "24"), strict=False)) + return str(IPv4Network('{}/{}'.format(__ip, '24'), strict=False)) # invalid input ip - return "" + return '' def list_ips_in_network_range(iprange: str) -> list[str]: @@ -122,14 +122,12 @@ async def reverse_single_ip(ip: str, resolver: DNSResolver) -> str: """ try: __host = await resolver.gethostbyaddr(ip) - return __host.name if __host else "" + return __host.name if __host else '' except Exception: - return "" + return '' -async def reverse_all_ips_in_range( - iprange: str, callback: Callable, nameservers: list[str] | None = None -) -> None: +async def reverse_all_ips_in_range(iprange: str, callback: Callable, nameservers: list[str] | None = None) -> None: """ Reverse all the IPs stored in a network range. All the queries are made concurrently. @@ -176,8 +174,8 @@ def log_query(ip: str) -> None: ------- out: None. """ - sys.stdout.write(chr(27) + "[2K" + chr(27) + "[G") - sys.stdout.write("\r" + ip + " - ") + sys.stdout.write(chr(27) + '[2K' + chr(27) + '[G') + sys.stdout.write('\r' + ip + ' - ') sys.stdout.flush() diff --git a/theHarvester/discovery/duckduckgosearch.py b/theHarvester/discovery/duckduckgosearch.py index 196ecc96..5ed4512f 100644 --- a/theHarvester/discovery/duckduckgosearch.py +++ b/theHarvester/discovery/duckduckgosearch.py @@ -7,29 +7,27 @@ class SearchDuckDuckGo: def __init__(self, word, limit) -> None: self.word = word - self.results = "" - self.totalresults = "" + self.results = '' + self.totalresults = '' self.dorks: list = [] self.links: list = [] - self.database = "https://duckduckgo.com/?q=" - self.api = "https://api.duckduckgo.com/?q=x&format=json&pretty=1" # Currently using API. - self.quantity = "100" + self.database = 'https://duckduckgo.com/?q=' + self.api = 'https://api.duckduckgo.com/?q=x&format=json&pretty=1' # Currently using API. + self.quantity = '100' self.limit = limit self.proxy = False async def do_search(self) -> None: # Do normal scraping. - url = self.api.replace("x", self.word) - headers = {"User-Agent": Core.get_user_agent()} - first_resp = await AsyncFetcher.fetch_all( - [url], headers=headers, proxy=self.proxy - ) + url = self.api.replace('x', self.word) + headers = {'User-Agent': Core.get_user_agent()} + first_resp = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy) self.results = first_resp[0] self.totalresults += self.results urls = await self.crawl(self.results) urls = {url for url in urls if len(url) > 5} all_resps = await AsyncFetcher.fetch_all(urls) - self.totalresults += "".join(all_resps) + self.totalresults += ''.join(all_resps) async def crawl(self, text): """ @@ -54,39 +52,27 @@ async def crawl(self, text): if isinstance(val, dict): # Validation check. for key in val.keys(): value = val.get(key) - if ( - isinstance(value, str) - and value != "" - and "https://" in value - or "http://" in value - ): + if isinstance(value, str) and value != '' and 'https://' in value or 'http://' in value: urls.add(value) - if ( - isinstance(val, str) - and val != "" - and "https://" in val - or "http://" in val - ): + if isinstance(val, str) and val != '' and 'https://' in val or 'http://' in val: urls.add(val) tmp = set() for url in urls: - if ( - "<" in url and "href=" in url - ): # Format is - equal_index = url.index("=") - true_url = "" + if '<' in url and 'href=' in url: # Format is + equal_index = url.index('=') + true_url = '' for ch in url[equal_index + 1 :]: if ch == '"': tmp.add(true_url) break true_url += ch else: - if url != "": + if url != '': tmp.add(url) return tmp except Exception as e: - print(f"Exception occurred: {e}") + print(f'Exception occurred: {e}') return [] async def get_emails(self): diff --git a/theHarvester/discovery/fullhuntsearch.py b/theHarvester/discovery/fullhuntsearch.py index de7b7daf..1eee55f8 100644 --- a/theHarvester/discovery/fullhuntsearch.py +++ b/theHarvester/discovery/fullhuntsearch.py @@ -7,19 +7,19 @@ def __init__(self, word) -> None: self.word = word self.key = Core.fullhunt_key() if self.key is None: - raise MissingKey("fullhunt") + raise MissingKey('fullhunt') self.total_results = None self.proxy = False async def do_search(self) -> None: - url = f"https://fullhunt.io/api/v1/domain/{self.word}/subdomains" + url = f'https://fullhunt.io/api/v1/domain/{self.word}/subdomains' response = await AsyncFetcher.fetch_all( [url], json=True, - headers={"User-Agent": Core.get_user_agent(), "X-API-KEY": self.key}, + headers={'User-Agent': Core.get_user_agent(), 'X-API-KEY': self.key}, proxy=self.proxy, ) - self.total_results = response[0]["hosts"] + self.total_results = response[0]['hosts'] async def get_hostnames(self): return self.total_results diff --git a/theHarvester/discovery/githubcode.py b/theHarvester/discovery/githubcode.py index 0cf469ab..64aa31aa 100644 --- a/theHarvester/discovery/githubcode.py +++ b/theHarvester/discovery/githubcode.py @@ -28,8 +28,8 @@ class ErrorResult(NamedTuple): class SearchGithubCode: def __init__(self, word, limit) -> None: self.word = word - self.total_results = "" - self.server = "api.github.com" + self.total_results = '' + self.server = 'api.github.com' self.limit = limit self.counter: int = 0 self.page: int | None = 1 @@ -38,17 +38,17 @@ def __init__(self, word, limit) -> None: # rate limits you more severely # https://developer.github.com/v3/search/#rate-limit if self.key is None: - raise MissingKey("Github") + raise MissingKey('Github') self.proxy = False @staticmethod async def fragments_from_response(json_data: dict) -> list[str]: - items: list[dict[str, Any]] = json_data.get("items") or list() + items: list[dict[str, Any]] = json_data.get('items') or list() fragments: list[str] = list() for item in items: - matches = item.get("text_matches") or list() + matches = item.get('text_matches') or list() for match in matches: - fragments.append(match.get("fragment")) + fragments.append(match.get('fragment')) return [fragment for fragment in fragments if fragment is not None] @@ -56,22 +56,20 @@ async def fragments_from_response(json_data: dict) -> list[str]: async def page_from_response(page: str, links) -> int | None: page_link = links.get(page) if page_link: - parsed = urlparse.urlparse(str(page_link.get("url"))) + parsed = urlparse.urlparse(str(page_link.get('url'))) params = urlparse.parse_qs(parsed.query) - pages: list[Any] = params.get("page", [None]) + pages: list[Any] = params.get('page', [None]) page_number = pages[0] and int(pages[0]) return page_number else: return None - async def handle_response( - self, response: tuple[str, dict, int, Any] - ) -> ErrorResult | RetryResult | SuccessResult: + async def handle_response(self, response: tuple[str, dict, int, Any]) -> ErrorResult | RetryResult | SuccessResult: text, json_data, status, links = response if status == 200: results = await self.fragments_from_response(json_data) - next_page = await self.page_from_response("next", links) - last_page = await self.page_from_response("last", links) + next_page = await self.page_from_response('next', links) + last_page = await self.page_from_response('last', links) return SuccessResult(results, next_page, last_page) elif status == 429 or status == 403: return RetryResult(60) @@ -87,17 +85,15 @@ async def do_search(self, page: int) -> tuple[str, dict, int, Any]: else: url = f'https://{self.server}/search/code?q="{self.word}"&page={page}' headers = { - "Host": self.server, - "User-agent": Core.get_user_agent(), - "Accept": "application/vnd.github.v3.text-match+json", - "Authorization": f"token {self.key}", + 'Host': self.server, + 'User-agent': Core.get_user_agent(), + 'Accept': 'application/vnd.github.v3.text-match+json', + 'Authorization': f'token {self.key}', } async with aiohttp.ClientSession(headers=headers) as sess: if self.proxy: - async with sess.get( - url, proxy=random.choice(Core.proxy_list()) - ) as resp: + async with sess.get(url, proxy=random.choice(Core.proxy_list())) as resp: return await resp.text(), await resp.json(), resp.status, resp.links else: async with sess.get(url) as resp: @@ -117,7 +113,7 @@ async def process(self, proxy: bool = False) -> None: api_response = await self.do_search(self.page) result = await self.handle_response(api_response) if isinstance(result, SuccessResult): - print(f"\tSearching {self.counter} results.") + print(f'\tSearching {self.counter} results.') for fragment in result.fragments: self.total_results += fragment self.counter = self.counter + 1 @@ -125,16 +121,14 @@ async def process(self, proxy: bool = False) -> None: await asyncio.sleep(get_delay()) elif isinstance(result, RetryResult): sleepy_time = get_delay() + result.time - print(f"\tRetrying page in {sleepy_time} seconds...") + print(f'\tRetrying page in {sleepy_time} seconds...') await asyncio.sleep(sleepy_time) elif isinstance(result, ErrorResult): - raise Exception( - f"\tException occurred: status_code: {result.status_code} reason: {result.body}" - ) + raise Exception(f'\tException occurred: status_code: {result.status_code} reason: {result.body}') else: - raise Exception("\tUnknown exception occurred") + raise Exception('\tUnknown exception occurred') except Exception as e: - print(f"An exception has occurred: {e}") + print(f'An exception has occurred: {e}') async def get_emails(self): rawres = myparser.Parser(self.total_results, self.word) diff --git a/theHarvester/discovery/hackertarget.py b/theHarvester/discovery/hackertarget.py index 4980ee5c..b4403928 100644 --- a/theHarvester/discovery/hackertarget.py +++ b/theHarvester/discovery/hackertarget.py @@ -8,30 +8,24 @@ class SearchHackerTarget: def __init__(self, word) -> None: self.word = word - self.total_results = "" - self.hostname = "https://api.hackertarget.com" + self.total_results = '' + self.hostname = 'https://api.hackertarget.com' self.proxy = False self.results = None async def do_search(self) -> None: - headers = {"User-agent": Core.get_user_agent()} + headers = {'User-agent': Core.get_user_agent()} urls = [ - f"{self.hostname}/hostsearch/?q={self.word}", - f"{self.hostname}/reversedns/?q={self.word}", + f'{self.hostname}/hostsearch/?q={self.word}', + f'{self.hostname}/reversedns/?q={self.word}', ] - responses = await AsyncFetcher.fetch_all( - urls, headers=headers, proxy=self.proxy - ) + responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy) for response in responses: - self.total_results += response.replace(",", ":") + self.total_results += response.replace(',', ':') async def process(self, proxy: bool = False) -> None: self.proxy = proxy await self.do_search() async def get_hostnames(self) -> list: - return [ - result - for result in self.total_results.splitlines() - if "No PTR records found" not in result - ] + return [result for result in self.total_results.splitlines() if 'No PTR records found' not in result] diff --git a/theHarvester/discovery/huntersearch.py b/theHarvester/discovery/huntersearch.py index d7554ce6..575798bb 100644 --- a/theHarvester/discovery/huntersearch.py +++ b/theHarvester/discovery/huntersearch.py @@ -12,10 +12,10 @@ def __init__(self, word, limit, start) -> None: self.start = start self.key = Core.hunter_key() if self.key is None: - raise MissingKey("Hunter") - self.total_results = "" + raise MissingKey('Hunter') + self.total_results = '' self.counter = start - self.database = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10" + self.database = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10' self.proxy = False self.hostnames: list = [] self.emails: list = [] @@ -23,76 +23,56 @@ def __init__(self, word, limit, start) -> None: async def do_search(self) -> None: # First determine if a user account is not a free account, this call is free is_free = True - headers = {"User-Agent": Core.get_user_agent()} - acc_info_url = f"https://api.hunter.io/v2/account?api_key={self.key}" - response = await AsyncFetcher.fetch_all( - [acc_info_url], headers=headers, json=True - ) + headers = {'User-Agent': Core.get_user_agent()} + acc_info_url = f'https://api.hunter.io/v2/account?api_key={self.key}' + response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True) is_free = ( - is_free - if "plan_name" in response[0]["data"].keys() - and response[0]["data"]["plan_name"].lower() == "free" - else False + is_free if 'plan_name' in response[0]['data'].keys() and response[0]['data']['plan_name'].lower() == 'free' else False ) # Extract the total number of requests that are available for an account total_requests_avail = ( - response[0]["data"]["requests"]["searches"]["available"] - - response[0]["data"]["requests"]["searches"]["used"] + response[0]['data']['requests']['searches']['available'] - response[0]['data']['requests']['searches']['used'] ) if is_free: - response = await AsyncFetcher.fetch_all( - [self.database], headers=headers, proxy=self.proxy, json=True - ) + response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True) self.emails, self.hostnames = await self.parse_resp(json_resp=response[0]) else: # Determine the total number of emails that are available # As the most emails you can get within one query are 100 # This is only done where paid accounts are in play - hunter_dinfo_url = ( - f"https://api.hunter.io/v2/email-count?domain={self.word}" - ) - response = await AsyncFetcher.fetch_all( - [hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True - ) - total_number_reqs = response[0]["data"]["total"] // 100 + hunter_dinfo_url = f'https://api.hunter.io/v2/email-count?domain={self.word}' + response = await AsyncFetcher.fetch_all([hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True) + total_number_reqs = response[0]['data']['total'] // 100 # Parse out meta field within initial JSON response to determine the total number of results if total_requests_avail < total_number_reqs: + print('WARNING: account does not have enough requests to gather all emails') print( - "WARNING: account does not have enough requests to gather all emails" - ) - print( - f"Total requests available: {total_requests_avail}, total requests " - f"needed to be made: {total_number_reqs}" - ) - print( - "RETURNING current results, if you would still like to " - "run this module comment out the if request" + f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}' ) + print('RETURNING current results, if you would still like to ' 'run this module comment out the if request') return self.limit = 100 # max number of emails you can get per request is 100 # increments of 100 with offset determining where to start # See docs for more details: https://hunter.io/api-documentation/v2#domain-search for offset in range(0, 100 * total_number_reqs, 100): - req_url = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}" - response = await AsyncFetcher.fetch_all( - [req_url], headers=headers, proxy=self.proxy, json=True - ) + req_url = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}' + response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True) temp_emails, temp_hostnames = await self.parse_resp(response[0]) self.emails.extend(temp_emails) self.hostnames.extend(temp_hostnames) await asyncio.sleep(1) async def parse_resp(self, json_resp): - emails = list(sorted({email["value"] for email in json_resp["data"]["emails"]})) + emails = list(sorted({email['value'] for email in json_resp['data']['emails']})) domains = list( sorted( { - source["domain"] - for email in json_resp["data"]["emails"] - for source in email["sources"] - if self.word in source["domain"] + source['domain'] + for email in json_resp['data']['emails'] + for source in email['sources'] + if self.word in source['domain'] } ) ) diff --git a/theHarvester/discovery/intelxsearch.py b/theHarvester/discovery/intelxsearch.py index f2f382bc..272c26a7 100644 --- a/theHarvester/discovery/intelxsearch.py +++ b/theHarvester/discovery/intelxsearch.py @@ -14,8 +14,8 @@ def __init__(self, word) -> None: self.word = word self.key = Core.intelx_key() if self.key is None: - raise MissingKey("Intelx") - self.database = "https://2.intelx.io" + raise MissingKey('Intelx') + self.database = 'https://2.intelx.io' self.results: Any = None self.info: tuple[Any, ...] = () self.limit: int = 10000 @@ -28,34 +28,30 @@ async def do_search(self) -> None: # API requests self identification # https://intelx.io/integrations headers = { - "x-key": self.key, - "User-Agent": f"{Core.get_user_agent()}-theHarvester", + 'x-key': self.key, + 'User-Agent': f'{Core.get_user_agent()}-theHarvester', } data = { - "term": self.word, - "buckets": [], - "lookuplevel": 0, - "maxresults": self.limit, - "timeout": 5, - "datefrom": "", - "dateto": "", - "sort": 2, - "media": 0, - "terminate": [], - "target": 0, + 'term': self.word, + 'buckets': [], + 'lookuplevel': 0, + 'maxresults': self.limit, + 'timeout': 5, + 'datefrom': '', + 'dateto': '', + 'sort': 2, + 'media': 0, + 'terminate': [], + 'target': 0, } - total_resp = requests.post( - f"{self.database}/phonebook/search", headers=headers, json=data - ) - phonebook_id = ujson.loads(total_resp.text)["id"] + total_resp = requests.post(f'{self.database}/phonebook/search', headers=headers, json=data) + phonebook_id = ujson.loads(total_resp.text)['id'] await asyncio.sleep(5) # Fetch results from phonebook based on ID resp = await AsyncFetcher.fetch_all( - [ - f"{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}" - ], + [f'{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}'], headers=headers, json=True, proxy=self.proxy, @@ -63,7 +59,7 @@ async def do_search(self) -> None: resp = resp[0] self.results = resp # TODO: give self.results more appropriate typing except Exception as e: - print(f"An exception has occurred in Intelx: {e}") + print(f'An exception has occurred in Intelx: {e}') async def process(self, proxy: bool = False): self.proxy = proxy diff --git a/theHarvester/discovery/netlas.py b/theHarvester/discovery/netlas.py index b5151aec..05d3798e 100644 --- a/theHarvester/discovery/netlas.py +++ b/theHarvester/discovery/netlas.py @@ -9,17 +9,15 @@ def __init__(self, word) -> None: self.totalips: list = [] self.key = Core.netlas_key() if self.key is None: - raise MissingKey("netlas") + raise MissingKey('netlas') self.proxy = False async def do_search(self) -> None: - api = f"https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*" - headers = {"X-API-Key": self.key} - response = await AsyncFetcher.fetch_all( - [api], json=True, headers=headers, proxy=self.proxy - ) - for domain in response[0]["items"]: - self.totalhosts.append(domain["data"]["domain"]) + api = f'https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*' + headers = {'X-API-Key': self.key} + response = await AsyncFetcher.fetch_all([api], json=True, headers=headers, proxy=self.proxy) + for domain in response[0]['items']: + self.totalhosts.append(domain['data']['domain']) async def get_hostnames(self) -> list: return self.totalhosts diff --git a/theHarvester/discovery/onyphe.py b/theHarvester/discovery/onyphe.py index 7f0cef86..e807a687 100644 --- a/theHarvester/discovery/onyphe.py +++ b/theHarvester/discovery/onyphe.py @@ -9,28 +9,26 @@ class SearchOnyphe: def __init__(self, word) -> None: self.word = word - self.response = "" + self.response = '' self.totalhosts: set = set() self.totalips: set = set() self.asns: set = set() self.key = Core.onyphe_key() if self.key is None: - raise MissingKey("onyphe") + raise MissingKey('onyphe') self.proxy = False async def do_search(self) -> None: # https://www.onyphe.io/docs/apis/search # https://www.onyphe.io/search?q=domain%3Acharter.com&captcharesponse=j5cGT # base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:domain:{self.word}' - base_url = f"https://www.onyphe.io/api/v2/search/?q=domain:{self.word}" + base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:{self.word}' headers = { - "User-Agent": Core.get_user_agent(), - "Content-Type": "application/json", - "Authorization": f"bearer {self.key}", + 'User-Agent': Core.get_user_agent(), + 'Content-Type': 'application/json', + 'Authorization': f'bearer {self.key}', } - response = await AsyncFetcher.fetch_all( - [base_url], json=True, headers=headers, proxy=self.proxy - ) + response = await AsyncFetcher.fetch_all([base_url], json=True, headers=headers, proxy=self.proxy) self.response = response[0] await self.parse_onyphe_resp_json() @@ -38,74 +36,52 @@ async def parse_onyphe_resp_json(self): if isinstance(self.response, list): self.response = self.response[0] if not isinstance(self.response, dict): - raise Exception(f"An exception has occurred {self.response} is not a dict") - if "Success" == self.response["text"]: - if "results" in self.response.keys(): - for result in self.response["results"]: + raise Exception(f'An exception has occurred {self.response} is not a dict') + if 'Success' == self.response['text']: + if 'results' in self.response.keys(): + for result in self.response['results']: try: - if "alternativeip" in result.keys(): - self.totalips.update( - {altip for altip in result["alternativeip"]} - ) - if "url" in result.keys() and isinstance(result["url"], list): + if 'alternativeip' in result.keys(): + self.totalips.update({altip for altip in result['alternativeip']}) + if 'url' in result.keys() and isinstance(result['url'], list): self.totalhosts.update( - urlparse(url).netloc - for url in result["url"] - if urlparse(url).netloc.endswith(self.word) + urlparse(url).netloc for url in result['url'] if urlparse(url).netloc.endswith(self.word) ) - self.asns.add(result["asn"]) - self.asns.add(result["geolocus"]["asn"]) - self.totalips.add(result["geolocus"]["subnet"]) - self.totalips.add(result["ip"]) - self.totalips.add(result["subnet"]) + self.asns.add(result['asn']) + self.asns.add(result['geolocus']['asn']) + self.totalips.add(result['geolocus']['subnet']) + self.totalips.add(result['ip']) + self.totalips.add(result['subnet']) # Shouldn't be needed as API autoparses urls from html raw data # rawres = myparser.Parser(result['data'], self.word) # if await rawres.hostnames(): # self.totalhosts.update(set(await rawres.hostnames())) for subdomain_key in [ - "domain", - "hostname", - "subdomains", - "subject", - "reverse", - "geolocus", + 'domain', + 'hostname', + 'subdomains', + 'subject', + 'reverse', + 'geolocus', ]: if subdomain_key in result.keys(): - if subdomain_key == "subject": + if subdomain_key == 'subject': self.totalhosts.update( - { - domain - for domain in result[subdomain_key][ - "altname" - ] - if domain.endswith(self.word) - } + {domain for domain in result[subdomain_key]['altname'] if domain.endswith(self.word)} ) - elif subdomain_key == "geolocus": + elif subdomain_key == 'geolocus': self.totalhosts.update( - { - domain - for domain in result[subdomain_key][ - "domain" - ] - if domain.endswith(self.word) - } + {domain for domain in result[subdomain_key]['domain'] if domain.endswith(self.word)} ) else: self.totalhosts.update( - { - domain - for domain in result[subdomain_key] - if domain.endswith(self.word) - } + {domain for domain in result[subdomain_key] if domain.endswith(self.word)} ) except Exception as e: - print(f"An exception has occurred on result: {result}: {e}") + print(f'An exception has occurred on result: {result}: {e}') continue else: - print( - f"Onhyphe API query did not succeed dumping current response: {self.response}" - ) + print(f'Onhyphe API query did not succeed dumping current response: {self.response}') async def get_asns(self) -> set: return self.asns diff --git a/theHarvester/discovery/otxsearch.py b/theHarvester/discovery/otxsearch.py index f87a1bc3..afbd01e5 100644 --- a/theHarvester/discovery/otxsearch.py +++ b/theHarvester/discovery/otxsearch.py @@ -11,16 +11,14 @@ def __init__(self, word) -> None: self.proxy = False async def do_search(self) -> None: - url = f"https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns" + url = f'https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns' response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) responses = response[0] dct = responses - self.totalhosts = {host["hostname"] for host in dct["passive_dns"]} + self.totalhosts = {host['hostname'] for host in dct['passive_dns']} # filter out ips that are just called NXDOMAIN self.totalips = { - ip["address"] - for ip in dct["passive_dns"] - if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip["address"]) + ip['address'] for ip in dct['passive_dns'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip['address']) } async def get_hostnames(self) -> set: diff --git a/theHarvester/discovery/pentesttools.py b/theHarvester/discovery/pentesttools.py index d351df17..a39a6620 100644 --- a/theHarvester/discovery/pentesttools.py +++ b/theHarvester/discovery/pentesttools.py @@ -12,54 +12,41 @@ def __init__(self, word) -> None: self.word = word self.key = Core.pentest_tools_key() if self.key is None: - raise MissingKey("PentestTools") + raise MissingKey('PentestTools') self.total_results: list = [] - self.api = f"https://pentest-tools.com/api?key={self.key}" + self.api = f'https://pentest-tools.com/api?key={self.key}' self.proxy = False async def poll(self, scan_id): while True: time.sleep(3) # Get the status of our scan - scan_status_data = {"op": "get_scan_status", "scan_id": scan_id} - responses = await AsyncFetcher.post_fetch( - url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy - ) + scan_status_data = {'op': 'get_scan_status', 'scan_id': scan_id} + responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy) res_json = ujson.loads(responses.strip()) - if res_json["op_status"] == "success": - if ( - res_json["scan_status"] != "waiting" - and res_json["scan_status"] != "running" - ): + if res_json['op_status'] == 'success': + if res_json['scan_status'] != 'waiting' and res_json['scan_status'] != 'running': getoutput_data = { - "op": "get_output", - "scan_id": scan_id, - "output_format": "json", + 'op': 'get_output', + 'scan_id': scan_id, + 'output_format': 'json', } - responses = await AsyncFetcher.post_fetch( - url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy - ) + responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy) - res_json = ujson.loads(responses.strip("\n")) + res_json = ujson.loads(responses.strip('\n')) self.total_results = await self.parse_json(res_json) break else: - print( - f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}" - ) + print(f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}") break @staticmethod async def parse_json(json_results): - status = json_results["op_status"] - if status == "success": - scan_tests = json_results["scan_output"]["output_json"] - output_data = scan_tests[0]["output_data"] - host_to_ip = [ - f"{subdomain[0]}:{subdomain[1]}" - for subdomain in output_data - if len(subdomain) > 0 - ] + status = json_results['op_status'] + if status == 'success': + scan_tests = json_results['scan_output']['output_json'] + output_data = scan_tests[0]['output_data'] + host_to_ip = [f'{subdomain[0]}:{subdomain[1]}' for subdomain in output_data if len(subdomain) > 0] return host_to_ip return [] @@ -68,20 +55,18 @@ async def get_hostnames(self) -> list: async def do_search(self) -> None: subdomain_payload = { - "op": "start_scan", - "tool_id": 20, - "tool_params": { - "target": f"{self.word}", - "web_details": "off", - "do_smart_search": "off", + 'op': 'start_scan', + 'tool_id': 20, + 'tool_params': { + 'target': f'{self.word}', + 'web_details': 'off', + 'do_smart_search': 'off', }, } - responses = await AsyncFetcher.post_fetch( - url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy - ) + responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy) res_json = ujson.loads(responses.strip()) - if res_json["op_status"] == "success": - scan_id = res_json["scan_id"] + if res_json['op_status'] == 'success': + scan_id = res_json['scan_id'] await self.poll(scan_id) async def process(self, proxy: bool = False) -> None: diff --git a/theHarvester/discovery/projectdiscovery.py b/theHarvester/discovery/projectdiscovery.py index 3f1bde9c..39963cea 100644 --- a/theHarvester/discovery/projectdiscovery.py +++ b/theHarvester/discovery/projectdiscovery.py @@ -7,21 +7,19 @@ def __init__(self, word) -> None: self.word = word self.key = Core.projectdiscovery_key() if self.key is None: - raise MissingKey("ProjectDiscovery") + raise MissingKey('ProjectDiscovery') self.total_results = None self.proxy = False async def do_search(self): - url = f"https://dns.projectdiscovery.io/dns/{self.word}/subdomains" + url = f'https://dns.projectdiscovery.io/dns/{self.word}/subdomains' response = await AsyncFetcher.fetch_all( [url], json=True, - headers={"User-Agent": Core.get_user_agent(), "Authorization": self.key}, + headers={'User-Agent': Core.get_user_agent(), 'Authorization': self.key}, proxy=self.proxy, ) - self.total_results = [ - f"{domains}.{self.word}" for domains in response[0]["subdomains"] - ] + self.total_results = [f'{domains}.{self.word}' for domains in response[0]['subdomains']] async def get_hostnames(self): return self.total_results diff --git a/theHarvester/discovery/rapiddns.py b/theHarvester/discovery/rapiddns.py index 01bcd13b..92fbaf9e 100644 --- a/theHarvester/discovery/rapiddns.py +++ b/theHarvester/discovery/rapiddns.py @@ -11,33 +11,29 @@ def __init__(self, word) -> None: async def do_search(self): try: - headers = {"User-agent": Core.get_user_agent()} + headers = {'User-agent': Core.get_user_agent()} # TODO see if it's worth adding sameip searches # f'{self.hostname}/sameip/{self.word}?full=1#result' - urls = [f"https://rapiddns.io/subdomain/{self.word}?full=1#result"] - responses = await AsyncFetcher.fetch_all( - urls, headers=headers, proxy=self.proxy - ) + urls = [f'https://rapiddns.io/subdomain/{self.word}?full=1#result'] + responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy) if len(responses[0]) <= 1: return self.total_results - soup = BeautifulSoup(responses[0], "html.parser") - rows = soup.find("table").find("tbody").find_all("tr") + soup = BeautifulSoup(responses[0], 'html.parser') + rows = soup.find('table').find('tbody').find_all('tr') if rows: # Validation check for row in rows: - cells = row.find_all("td") + cells = row.find_all('td') if len(cells) > 0: # sanity check subdomain = str(cells[0].get_text()) - if cells[-1].get_text() == "CNAME": - self.total_results.append(f"{subdomain}") + if cells[-1].get_text() == 'CNAME': + self.total_results.append(f'{subdomain}') else: - self.total_results.append( - f"{subdomain}:{str(cells[1].get_text()).strip()}" - ) + self.total_results.append(f'{subdomain}:{str(cells[1].get_text()).strip()}') self.total_results = list({domain for domain in self.total_results}) except Exception as e: - print(f"An exception has occurred: {str(e)}") + print(f'An exception has occurred: {str(e)}') async def process(self, proxy: bool = False) -> None: self.proxy = proxy diff --git a/theHarvester/discovery/rocketreach.py b/theHarvester/discovery/rocketreach.py index f41c2697..c5b7d233 100644 --- a/theHarvester/discovery/rocketreach.py +++ b/theHarvester/discovery/rocketreach.py @@ -10,59 +10,50 @@ def __init__(self, word, limit) -> None: self.word = word self.key = Core.rocketreach_key() if self.key is None: - raise MissingKey("RocketReach") + raise MissingKey('RocketReach') self.hosts: set = set() self.proxy = False - self.baseurl = "https://rocketreach.co/api/v2/person/search" + self.baseurl = 'https://rocketreach.co/api/v2/person/search' self.links: set = set() self.limit = limit async def do_search(self) -> None: try: headers = { - "Api-Key": self.key, - "Content-Type": "application/json", - "User-Agent": Core.get_user_agent(), + 'Api-Key': self.key, + 'Content-Type': 'application/json', + 'User-Agent': Core.get_user_agent(), } next_page = 1 # track pagination for count in range(1, self.limit): data = f'{{"query":{{"company_domain": ["{self.word}"]}}, "start": {next_page}, "page_size": 100}}' - result = await AsyncFetcher.post_fetch( - self.baseurl, headers=headers, data=data, json=True - ) - if ( - "detail" in result.keys() - and "error" in result.keys() - and "Subscribe to a plan to access" in result["detail"] - ): + result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True) + if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result['detail']: # No more results can be fetched break - if ( - "detail" in result.keys() - and "Request was throttled." in result["detail"] - ): + if 'detail' in result.keys() and 'Request was throttled.' in result['detail']: # Rate limit has been triggered need to sleep extra print( f"RocketReach requests have been throttled; " f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}' ) break - if "profiles" in dict(result).keys(): - if len(result["profiles"]) == 0: + if 'profiles' in dict(result).keys(): + if len(result['profiles']) == 0: break - for profile in result["profiles"]: - if "linkedin_url" in dict(profile).keys(): - self.links.add(profile["linkedin_url"]) - if "pagination" in dict(result).keys(): - next_page = int(result["pagination"]["next"]) - if next_page > int(result["pagination"]["total"]): + for profile in result['profiles']: + if 'linkedin_url' in dict(profile).keys(): + self.links.add(profile['linkedin_url']) + if 'pagination' in dict(result).keys(): + next_page = int(result['pagination']['next']) + if next_page > int(result['pagination']['total']): break await asyncio.sleep(get_delay() + 5) except Exception as e: - print(f"An exception has occurred: {e}") + print(f'An exception has occurred: {e}') async def get_links(self): return self.links diff --git a/theHarvester/discovery/searchhunterhow.py b/theHarvester/discovery/searchhunterhow.py index ff7b1707..bb1b24ee 100644 --- a/theHarvester/discovery/searchhunterhow.py +++ b/theHarvester/discovery/searchhunterhow.py @@ -13,55 +13,52 @@ def __init__(self, word) -> None: self.total_hostnames: set = set() self.key = Core.hunterhow_key() if self.key is None: - raise MissingKey("hunterhow") + raise MissingKey('hunterhow') self.proxy = False async def do_search(self) -> None: # https://hunter.how/search-api query = f'domain.suffix="{self.word}"' # second_query = f'domain="{self.word}"' - encoded_query = base64.urlsafe_b64encode(query.encode("utf-8")).decode("ascii") + encoded_query = base64.urlsafe_b64encode(query.encode('utf-8')).decode('ascii') page = 1 page_size = 100 # can be either: 10,20,50,100) # The interval between the start time and the end time cannot exceed one year # Can not exceed one year, but years=1 does not work due to their backend, 364 will suffice today = datetime.today() one_year_ago = today - relativedelta(days=364) - start_time = one_year_ago.strftime("%Y-%m-%d") - end_time = today.strftime("%Y-%m-%d") + start_time = one_year_ago.strftime('%Y-%m-%d') + end_time = today.strftime('%Y-%m-%d') # two_years_ago = one_year_ago - relativedelta(days=364) # start_time = two_years_ago.strftime('%Y-%m-%d') # end_time = one_year_ago.strftime('%Y-%m-%d') - url = ( - "https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s" - % ( - # self.key, encoded_query, page, page_size, start_time, end_time - self.key, - encoded_query, - page, - page_size, - start_time, - end_time, - ) + url = 'https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s' % ( + # self.key, encoded_query, page, page_size, start_time, end_time + self.key, + encoded_query, + page, + page_size, + start_time, + end_time, ) # print(f'Sending url: {url}') response = await AsyncFetcher.fetch_all( [url], json=True, - headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"}, + headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'}, proxy=self.proxy, ) dct = response[0] # print(f'json response: ') # print(dct) - if "code" in dct.keys(): - if dct["code"] == 40001: + if 'code' in dct.keys(): + if dct['code'] == 40001: print(f'Code 40001 indicates for searchhunterhow: {dct["message"]}') return # total = dct['data']['total'] # TODO determine if total is ever 100 how to get more subdomains? - for sub in dct["data"]["list"]: - self.total_hostnames.add(sub["domain"]) + for sub in dct['data']['list']: + self.total_hostnames.add(sub['domain']) async def get_hostnames(self) -> set: return self.total_hostnames diff --git a/theHarvester/discovery/securitytrailssearch.py b/theHarvester/discovery/securitytrailssearch.py index 552b598c..4f972c26 100644 --- a/theHarvester/discovery/securitytrailssearch.py +++ b/theHarvester/discovery/securitytrailssearch.py @@ -10,41 +10,33 @@ def __init__(self, word) -> None: self.word = word self.key = Core.security_trails_key() if self.key is None: - raise MissingKey("Securitytrail") - self.results = "" - self.totalresults = "" - self.api = "https://api.securitytrails.com/v1/" + raise MissingKey('Securitytrail') + self.results = '' + self.totalresults = '' + self.api = 'https://api.securitytrails.com/v1/' self.info: tuple[set, set] = (set(), set()) self.proxy = False async def authenticate(self) -> None: # Method to authenticate API key before sending requests. - headers = {"APIKEY": self.key} - url = f"{self.api}ping" - auth_responses = await AsyncFetcher.fetch_all( - [url], headers=headers, proxy=self.proxy - ) + headers = {'APIKEY': self.key} + url = f'{self.api}ping' + auth_responses = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy) auth_responses = auth_responses[0] - if "False" in auth_responses or "Invalid authentication" in auth_responses: - print("\tKey could not be authenticated exiting program.") + if 'False' in auth_responses or 'Invalid authentication' in auth_responses: + print('\tKey could not be authenticated exiting program.') await asyncio.sleep(5) async def do_search(self) -> None: # https://api.securitytrails.com/v1/domain/domain.com - url = f"{self.api}domain/{self.word}" - headers = {"APIKEY": self.key} - response = await AsyncFetcher.fetch_all( - [url], headers=headers, proxy=self.proxy - ) - await asyncio.sleep( - 5 - ) # Not random delay because 2 seconds is required due to rate limit. + url = f'{self.api}domain/{self.word}' + headers = {'APIKEY': self.key} + response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy) + await asyncio.sleep(5) # Not random delay because 2 seconds is required due to rate limit. self.results = response[0] self.totalresults += self.results - url += "/subdomains" # Get subdomains now. - subdomain_response = await AsyncFetcher.fetch_all( - [url], headers=headers, proxy=self.proxy - ) + url += '/subdomains' # Get subdomains now. + subdomain_response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy) await asyncio.sleep(5) self.results = subdomain_response[0] self.totalresults += self.results @@ -56,7 +48,7 @@ async def process(self, proxy: bool = False) -> None: parser = securitytrailsparser.Parser(word=self.word, text=self.totalresults) self.info = await parser.parse_text() # Create parser and set self.info to tuple returned from parsing text. - print("\tDone Searching Results") + print('\tDone Searching Results') async def get_ips(self) -> set: return self.info[0] diff --git a/theHarvester/discovery/shodansearch.py b/theHarvester/discovery/shodansearch.py index a767f8fa..26ea5ca6 100644 --- a/theHarvester/discovery/shodansearch.py +++ b/theHarvester/discovery/shodansearch.py @@ -10,7 +10,7 @@ class SearchShodan: def __init__(self) -> None: self.key = Core.shodan_key() if self.key is None: - raise MissingKey("Shodan") + raise MissingKey('Shodan') self.api = Shodan(self.key) self.hostdatarow: list = [] self.tracker: OrderedDict = OrderedDict() @@ -19,81 +19,81 @@ async def search_ip(self, ip) -> OrderedDict: try: ipaddress = ip results = self.api.host(ipaddress) - asn = "" + asn = '' domains: list = list() hostnames: list = list() - ip_str = "" - isp = "" - org = "" + ip_str = '' + isp = '' + org = '' ports: list = list() - title = "" - server = "" - product = "" + title = '' + server = '' + product = '' technologies: list = list() - data_first_dict = dict(results["data"][0]) + data_first_dict = dict(results['data'][0]) - if "ip_str" in data_first_dict.keys(): - ip_str += data_first_dict["ip_str"] + if 'ip_str' in data_first_dict.keys(): + ip_str += data_first_dict['ip_str'] - if "http" in data_first_dict.keys(): - http_results_dict = dict(data_first_dict["http"]) - if "title" in http_results_dict.keys(): - title_val = str(http_results_dict["title"]).strip() - if title_val != "None": + if 'http' in data_first_dict.keys(): + http_results_dict = dict(data_first_dict['http']) + if 'title' in http_results_dict.keys(): + title_val = str(http_results_dict['title']).strip() + if title_val != 'None': title += title_val - if "components" in http_results_dict.keys(): - for key in http_results_dict["components"].keys(): + if 'components' in http_results_dict.keys(): + for key in http_results_dict['components'].keys(): technologies.append(key) - if "server" in http_results_dict.keys(): - server_val = str(http_results_dict["server"]).strip() - if server_val != "None": + if 'server' in http_results_dict.keys(): + server_val = str(http_results_dict['server']).strip() + if server_val != 'None': server += server_val for key, value in results.items(): - if key == "asn": + if key == 'asn': asn += value - if key == "domains": + if key == 'domains': value = list(value) value.sort() domains.extend(value) - if key == "hostnames": + if key == 'hostnames': value = [host.strip() for host in list(value)] value.sort() hostnames.extend(value) - if key == "isp": + if key == 'isp': isp += value - if key == "org": + if key == 'org': org += str(value) - if key == "ports": + if key == 'ports': value = list(value) value.sort() ports.extend(value) - if key == "product": + if key == 'product': product += value technologies = list(set(technologies)) self.tracker[ip] = { - "asn": asn.strip(), - "domains": domains, - "hostnames": hostnames, - "ip_str": ip_str.strip(), - "isp": isp.strip(), - "org": org.strip(), - "ports": ports, - "product": product.strip(), - "server": server.strip(), - "technologies": technologies, - "title": title.strip(), + 'asn': asn.strip(), + 'domains': domains, + 'hostnames': hostnames, + 'ip_str': ip_str.strip(), + 'isp': isp.strip(), + 'org': org.strip(), + 'ports': ports, + 'product': product.strip(), + 'server': server.strip(), + 'technologies': technologies, + 'title': title.strip(), } return self.tracker except exception.APIError: - print(f"{ip}: Not in Shodan") - self.tracker[ip] = "Not in Shodan" + print(f'{ip}: Not in Shodan') + self.tracker[ip] = 'Not in Shodan' except Exception as e: # print(f'Error occurred in the Shodan IP search module: {e}') - self.tracker[ip] = f"Error occurred in the Shodan IP search module: {e}" + self.tracker[ip] = f'Error occurred in the Shodan IP search module: {e}' finally: return self.tracker diff --git a/theHarvester/discovery/sitedossier.py b/theHarvester/discovery/sitedossier.py index 4dbcedf7..3e9259a0 100644 --- a/theHarvester/discovery/sitedossier.py +++ b/theHarvester/discovery/sitedossier.py @@ -10,7 +10,7 @@ class SearchSitedossier: def __init__(self, word): self.word = word self.totalhosts = set() - self.server = "www.sitedossier.com" + self.server = 'www.sitedossier.com' self.proxy = False async def do_search(self): @@ -18,92 +18,75 @@ async def do_search(self): # This site seems to yield a lot of results but is a bit annoying to scrape # Hence the need for delays after each request to get the most results # Feel free to tweak the delays as needed - url = f"http://{self.server}/parentdomain/{self.word}" - headers = {"User-Agent": Core.get_user_agent()} - response = await AsyncFetcher.fetch_all( - [url], headers=headers, proxy=self.proxy - ) + url = f'http://{self.server}/parentdomain/{self.word}' + headers = {'User-Agent': Core.get_user_agent()} + response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy) base_response = response[0] - soup = BeautifulSoup(base_response, "html.parser") + soup = BeautifulSoup(base_response, 'html.parser') # iter_counter = 1 # iterations_needed = total_number // 100 # iterations_needed += 1 flagged_counter = 0 - stop_conditions = ["End of list.", "No data currently available."] + stop_conditions = ['End of list.', 'No data currently available.'] bot_string = ( - "Our web servers have detected unusual or excessive requests " + 'Our web servers have detected unusual or excessive requests ' 'from your computer or network. Please enter the unique "word"' - " below to confirm that you are a human interactively using this site." + ' below to confirm that you are a human interactively using this site.' ) if ( - stop_conditions[0] not in base_response - and stop_conditions[1] not in base_response + stop_conditions[0] not in base_response and stop_conditions[1] not in base_response ) and bot_string not in base_response: - total_number = soup.find("i") - total_number = int( - total_number.text.strip().split(" ")[-1].replace(",", "") - ) - hrefs = soup.find_all("a", href=True) + total_number = soup.find('i') + total_number = int(total_number.text.strip().split(' ')[-1].replace(',', '')) + hrefs = soup.find_all('a', href=True) for a in hrefs: - unparsed = a["href"] - if "/site/" in unparsed: - subdomain = str(unparsed.split("/")[-1]).lower() + unparsed = a['href'] + if '/site/' in unparsed: + subdomain = str(unparsed.split('/')[-1]).lower() self.totalhosts.add(subdomain) await asyncio.sleep(get_delay() + 15 + get_delay()) for i in range(101, total_number, 100): - headers = {"User-Agent": Core.get_user_agent()} - iter_url = f"http://{self.server}/parentdomain/{self.word}/{i}" - print(f"My current iter_url: {iter_url}") - response = await AsyncFetcher.fetch_all( - [iter_url], headers=headers, proxy=self.proxy - ) + headers = {'User-Agent': Core.get_user_agent()} + iter_url = f'http://{self.server}/parentdomain/{self.word}/{i}' + print(f'My current iter_url: {iter_url}') + response = await AsyncFetcher.fetch_all([iter_url], headers=headers, proxy=self.proxy) response = response[0] - if ( - stop_conditions[0] in response - or stop_conditions[1] in response - or flagged_counter >= 3 - ): + if stop_conditions[0] in response or stop_conditions[1] in response or flagged_counter >= 3: break if bot_string in response: new_sleep_time = get_delay() * 30 - print( - f"Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds" - ) + print(f'Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds') flagged_counter += 1 await asyncio.sleep(new_sleep_time) response = await AsyncFetcher.fetch_all( [iter_url], - headers={"User-Agent": Core.get_user_agent()}, + headers={'User-Agent': Core.get_user_agent()}, proxy=self.proxy, ) response = response[0] if bot_string in response: new_sleep_time = get_delay() * 30 * get_delay() print( - f"Still triggering a captcha, sleeping longer for: {new_sleep_time}" - f" and skipping this batch: {iter_url}" + f'Still triggering a captcha, sleeping longer for: {new_sleep_time}' + f' and skipping this batch: {iter_url}' ) await asyncio.sleep(new_sleep_time) flagged_counter += 1 if flagged_counter >= 3: break - soup = BeautifulSoup(response, "html.parser") - hrefs = soup.find_all("a", href=True) + soup = BeautifulSoup(response, 'html.parser') + hrefs = soup.find_all('a', href=True) for a in hrefs: - unparsed = a["href"] - if "/site/" in unparsed: - subdomain = str(unparsed.split("/")[-1]).lower() + unparsed = a['href'] + if '/site/' in unparsed: + subdomain = str(unparsed.split('/')[-1]).lower() self.totalhosts.add(subdomain) await asyncio.sleep(get_delay() + 15 + get_delay()) - print(f"In total found: {len(self.totalhosts)}") + print(f'In total found: {len(self.totalhosts)}') print(self.totalhosts) else: - print( - "Sitedossier module has triggered a captcha on first iteration, no results can be found." - ) - print( - "Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module" - ) + print('Sitedossier module has triggered a captcha on first iteration, no results can be found.') + print('Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module') async def get_hostnames(self): return self.totalhosts diff --git a/theHarvester/discovery/subdomaincenter.py b/theHarvester/discovery/subdomaincenter.py index cfa09a5c..7785cf1f 100644 --- a/theHarvester/discovery/subdomaincenter.py +++ b/theHarvester/discovery/subdomaincenter.py @@ -5,23 +5,18 @@ class SubdomainCenter: def __init__(self, word): self.word = word self.results = set() - self.server = "https://api.subdomain.center/?domain=" + self.server = 'https://api.subdomain.center/?domain=' self.proxy = False async def do_search(self): - headers = {"User-Agent": Core.get_user_agent()} + headers = {'User-Agent': Core.get_user_agent()} try: - current_url = f"{self.server}{self.word}" - resp = await AsyncFetcher.fetch_all( - [current_url], headers=headers, proxy=self.proxy, json=True - ) + current_url = f'{self.server}{self.word}' + resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy, json=True) self.results = resp[0] - self.results = { - sub[4:] if sub[:4] == "www." and sub[4:] else sub - for sub in self.results - } + self.results = {sub[4:] if sub[:4] == 'www.' and sub[4:] else sub for sub in self.results} except Exception as e: - print(f"An exception has occurred in SubdomainCenter on : {e}") + print(f'An exception has occurred in SubdomainCenter on : {e}') async def get_hostnames(self): return self.results diff --git a/theHarvester/discovery/subdomainfinderc99.py b/theHarvester/discovery/subdomainfinderc99.py index ee086617..2b897cd7 100644 --- a/theHarvester/discovery/subdomainfinderc99.py +++ b/theHarvester/discovery/subdomainfinderc99.py @@ -14,24 +14,20 @@ def __init__(self, word) -> None: self.total_results: set = set() self.proxy = False # TODO add api support - self.server = "https://subdomainfinder.c99.nl/" - self.totalresults = "" + self.server = 'https://subdomainfinder.c99.nl/' + self.totalresults = '' async def do_search(self) -> None: # Based on https://gist.github.com/th3gundy/bc83580cbe04031e9164362b33600962 - headers = {"User-Agent": Core.get_user_agent()} - resp = await AsyncFetcher.fetch_all( - [self.server], headers=headers, proxy=self.proxy - ) + headers = {'User-Agent': Core.get_user_agent()} + resp = await AsyncFetcher.fetch_all([self.server], headers=headers, proxy=self.proxy) data = await self.get_csrf_params(resp[0]) - data["scan_subdomains"] = "" - data["domain"] = self.word - data["privatequery"] = "on" + data['scan_subdomains'] = '' + data['domain'] = self.word + data['privatequery'] = 'on' await asyncio.sleep(get_delay()) - second_resp = await AsyncFetcher.post_fetch( - self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data) - ) + second_resp = await AsyncFetcher.post_fetch(self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data)) # print(second_resp) self.totalresults += second_resp @@ -55,10 +51,10 @@ async def process(self, proxy: bool = False) -> None: @staticmethod async def get_csrf_params(data): csrf_params = {} - html = BeautifulSoup(data, "html.parser").find("div", {"class": "input-group"}) - for c in html.find_all("input"): + html = BeautifulSoup(data, 'html.parser').find('div', {'class': 'input-group'}) + for c in html.find_all('input'): try: - csrf_params[c.get("name")] = c.get("value") + csrf_params[c.get('name')] = c.get('value') except Exception: continue diff --git a/theHarvester/discovery/takeover.py b/theHarvester/discovery/takeover.py index d7d7d732..a1a0f302 100644 --- a/theHarvester/discovery/takeover.py +++ b/theHarvester/discovery/takeover.py @@ -18,70 +18,59 @@ def __init__(self, hosts) -> None: async def populate_fingerprints(self): # Thank you to https://github.com/EdOverflow/can-i-take-over-xyz for these fingerprints - populate_url = "https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json" - headers = {"User-Agent": Core.get_user_agent()} + populate_url = 'https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json' + headers = {'User-Agent': Core.get_user_agent()} response = await AsyncFetcher.fetch_all([populate_url], headers=headers) try: resp = response[0] unparsed_json = ujson.loads(resp) for unparsed_fingerprint in unparsed_json: - if unparsed_fingerprint["service"] in ["Smugsmug"]: + if unparsed_fingerprint['service'] in ['Smugsmug']: # Subdomain must be in format domain.smugsmug.com # This will never happen as subdomains are parsed and filtered to be in format of *.word.com continue - if ( - unparsed_fingerprint["status"] == "Vulnerable" - or unparsed_fingerprint["status"] == "Edge case" - ): - self.fingerprints[unparsed_fingerprint["fingerprint"]] = ( - unparsed_fingerprint["service"] - ) + if unparsed_fingerprint['status'] == 'Vulnerable' or unparsed_fingerprint['status'] == 'Edge case': + self.fingerprints[unparsed_fingerprint['fingerprint']] = unparsed_fingerprint['service'] except Exception as e: - print( - f"An exception has occurred populating takeover fingerprints: {e}, defaulting to static list" - ) + print(f'An exception has occurred populating takeover fingerprints: {e}, defaulting to static list') self.fingerprints = { - "'Trying to access your account?'": "Campaign Monitor", - "404 Not Found": "Fly.io", - "404 error unknown site!": "Pantheon", - "Do you want to register *.wordpress.com?": "Wordpress", - "Domain uses DO name serves with no records in DO.": "Digital Ocean", - "It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": "LaunchRock", - "No Site For Domain": "Kinsta", - "No settings were found for this company:": "Help Scout", - "Project doesnt exist... yet!": "Readme.io", - "Repository not found": "Bitbucket", - "The feed has not been found.": "Feedpress", - "No such app": "Heroku", - "The specified bucket does not exist": "AWS/S3", - "The thing you were looking for is no longer here, or never was": "Ghost", - "There isn't a Github Pages site here.": "Github", - "This UserVoice subdomain is currently available!": "UserVoice", - "Uh oh. That page doesn't exist.": "Intercom", - "We could not find what you're looking for.": "Help Juice", - "Whatever you were looking for doesn't currently exist at this address": "Tumblr", - "is not a registered InCloud YouTrack": "JetBrains", - "page not found": "Uptimerobot", - "project not found": "Surge.sh", + "'Trying to access your account?'": 'Campaign Monitor', + '404 Not Found': 'Fly.io', + '404 error unknown site!': 'Pantheon', + 'Do you want to register *.wordpress.com?': 'Wordpress', + 'Domain uses DO name serves with no records in DO.': 'Digital Ocean', + "It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": 'LaunchRock', + 'No Site For Domain': 'Kinsta', + 'No settings were found for this company:': 'Help Scout', + 'Project doesnt exist... yet!': 'Readme.io', + 'Repository not found': 'Bitbucket', + 'The feed has not been found.': 'Feedpress', + 'No such app': 'Heroku', + 'The specified bucket does not exist': 'AWS/S3', + 'The thing you were looking for is no longer here, or never was': 'Ghost', + "There isn't a Github Pages site here.": 'Github', + 'This UserVoice subdomain is currently available!': 'UserVoice', + "Uh oh. That page doesn't exist.": 'Intercom', + "We could not find what you're looking for.": 'Help Juice', + "Whatever you were looking for doesn't currently exist at this address": 'Tumblr', + 'is not a registered InCloud YouTrack': 'JetBrains', + 'page not found': 'Uptimerobot', + 'project not found': 'Surge.sh', } async def check(self, url, resp) -> None: # Simple function that takes response and checks if any fingerprints exist # If a fingerprint exists figures out which one and prints it out - regex = re.compile( - "(?=(" + "|".join(map(re.escape, list(self.fingerprints.keys()))) + "))" - ) + regex = re.compile('(?=(' + '|'.join(map(re.escape, list(self.fingerprints.keys()))) + '))') # Sanitize fingerprints matches = re.findall(regex, resp) matches = list(set(matches)) for match in matches: - print(f"\t\033[91m Takeover detected: {url}\033[1;32;40m") + print(f'\t\033[91m Takeover detected: {url}\033[1;32;40m') if match in self.fingerprints.keys(): # Validation check as to not error out service = self.fingerprints[match] - print( - f"\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m" - ) + print(f'\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m') self.results[url].append({match: service}) async def do_take(self) -> None: @@ -89,13 +78,11 @@ async def do_take(self) -> None: if len(self.hosts) > 0: # Returns a list of tuples in this format: (url, response) # Filter out responses whose responses are empty strings (indicates errored) - https_hosts = [f"https://{host}" for host in self.hosts] - http_hosts = [f"http://{host}" for host in self.hosts] + https_hosts = [f'https://{host}' for host in self.hosts] + http_hosts = [f'http://{host}' for host in self.hosts] all_hosts = https_hosts + http_hosts shuffle(all_hosts) - resps: list = await AsyncFetcher.fetch_all( - all_hosts, takeover=True, proxy=self.proxy - ) + resps: list = await AsyncFetcher.fetch_all(all_hosts, takeover=True, proxy=self.proxy) for url, resp in tuple(resp for resp in resps if len(resp[1]) >= 1): await self.check(url, resp) else: diff --git a/theHarvester/discovery/threatminer.py b/theHarvester/discovery/threatminer.py index fd8acc0c..944ff327 100644 --- a/theHarvester/discovery/threatminer.py +++ b/theHarvester/discovery/threatminer.py @@ -9,15 +9,13 @@ def __init__(self, word) -> None: self.proxy = False async def do_search(self) -> None: - url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5" + url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5' response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) - self.totalhosts = {host for host in response[0]["results"]} - second_url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2" - secondresp = await AsyncFetcher.fetch_all( - [second_url], json=True, proxy=self.proxy - ) + self.totalhosts = {host for host in response[0]['results']} + second_url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2' + secondresp = await AsyncFetcher.fetch_all([second_url], json=True, proxy=self.proxy) try: - self.totalips = {resp["ip"] for resp in secondresp[0]["results"]} + self.totalips = {resp['ip'] for resp in secondresp[0]['results']} except TypeError: pass diff --git a/theHarvester/discovery/tombasearch.py b/theHarvester/discovery/tombasearch.py index cdc49442..7897243c 100644 --- a/theHarvester/discovery/tombasearch.py +++ b/theHarvester/discovery/tombasearch.py @@ -12,12 +12,10 @@ def __init__(self, word, limit, start) -> None: self.start = start self.key = Core.tomba_key() if self.key[0] is None or self.key[1] is None: - raise MissingKey("Tomba Key and/or Secret") - self.total_results = "" + raise MissingKey('Tomba Key and/or Secret') + self.total_results = '' self.counter = start - self.database = ( - f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10" - ) + self.database = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10' self.proxy = False self.hostnames: list = [] self.emails: list = [] @@ -26,49 +24,38 @@ async def do_search(self) -> None: # First determine if a user account is not a free account, this call is free is_free = True headers = { - "User-Agent": Core.get_user_agent(), - "X-Tomba-Key": self.key[0], - "X-Tomba-Secret": self.key[1], + 'User-Agent': Core.get_user_agent(), + 'X-Tomba-Key': self.key[0], + 'X-Tomba-Secret': self.key[1], } - acc_info_url = "https://api.tomba.io/v1/me" - response = await AsyncFetcher.fetch_all( - [acc_info_url], headers=headers, json=True - ) + acc_info_url = 'https://api.tomba.io/v1/me' + response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True) is_free = ( is_free - if "name" in response[0]["data"]["pricing"].keys() - and response[0]["data"]["pricing"]["name"].lower() == "free" + if 'name' in response[0]['data']['pricing'].keys() and response[0]['data']['pricing']['name'].lower() == 'free' else False ) # Extract the total number of requests that are available for an account total_requests_avail = ( - response[0]["data"]["requests"]["domains"]["available"] - - response[0]["data"]["requests"]["domains"]["used"] + response[0]['data']['requests']['domains']['available'] - response[0]['data']['requests']['domains']['used'] ) if is_free: - response = await AsyncFetcher.fetch_all( - [self.database], headers=headers, proxy=self.proxy, json=True - ) + response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True) self.emails, self.hostnames = await self.parse_resp(json_resp=response[0]) else: # Determine the total number of emails that are available # As the most emails you can get within one query are 100 # This is only done where paid accounts are in play - tomba_counter = f"https://api.tomba.io/v1/email-count?domain={self.word}" - response = await AsyncFetcher.fetch_all( - [tomba_counter], headers=headers, proxy=self.proxy, json=True - ) - total_number_reqs = response[0]["data"]["total"] // 100 + tomba_counter = f'https://api.tomba.io/v1/email-count?domain={self.word}' + response = await AsyncFetcher.fetch_all([tomba_counter], headers=headers, proxy=self.proxy, json=True) + total_number_reqs = response[0]['data']['total'] // 100 # Parse out meta field within initial JSON response to determine the total number of results if total_requests_avail < total_number_reqs: + print('WARNING: The account does not have enough requests to gather all the emails.') print( - "WARNING: The account does not have enough requests to gather all the emails." - ) - print( - f"Total requests available: {total_requests_avail}, total requests " - f"needed to be made: {total_number_reqs}" + f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}' ) print( 'RETURNING current results, If you still wish to run this module despite the current results, please comment out the "if request" line.' @@ -79,24 +66,22 @@ async def do_search(self) -> None: # increments of max number with page determining where to start # See docs for more details: https://developer.tomba.io/#domain-search for page in range(0, total_number_reqs + 1): - req_url = f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}" - response = await AsyncFetcher.fetch_all( - [req_url], headers=headers, proxy=self.proxy, json=True - ) + req_url = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}' + response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True) temp_emails, temp_hostnames = await self.parse_resp(response[0]) self.emails.extend(temp_emails) self.hostnames.extend(temp_hostnames) await asyncio.sleep(1) async def parse_resp(self, json_resp): - emails = list(sorted({email["email"] for email in json_resp["data"]["emails"]})) + emails = list(sorted({email['email'] for email in json_resp['data']['emails']})) domains = list( sorted( { - source["website_url"] - for email in json_resp["data"]["emails"] - for source in email["sources"] - if self.word in source["website_url"] + source['website_url'] + for email in json_resp['data']['emails'] + for source in email['sources'] + if self.word in source['website_url'] } ) ) diff --git a/theHarvester/discovery/urlscan.py b/theHarvester/discovery/urlscan.py index 062ea525..74975fcf 100644 --- a/theHarvester/discovery/urlscan.py +++ b/theHarvester/discovery/urlscan.py @@ -11,25 +11,17 @@ def __init__(self, word) -> None: self.proxy = False async def do_search(self) -> None: - url = f"https://urlscan.io/api/v1/search/?q=domain:{self.word}" + url = f'https://urlscan.io/api/v1/search/?q=domain:{self.word}' response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) resp = response[0] - self.totalhosts = {f"{page['page']['domain']}" for page in resp["results"]} - self.totalips = { - f"{page['page']['ip']}" - for page in resp["results"] - if "ip" in page["page"].keys() - } + self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']} + self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()} self.interestingurls = { f"{page['page']['url']}" - for page in resp["results"] - if self.word in page["page"]["url"] and "url" in page["page"].keys() - } - self.totalasns = { - f"{page['page']['asn']}" - for page in resp["results"] - if "asn" in page["page"].keys() + for page in resp['results'] + if self.word in page['page']['url'] and 'url' in page['page'].keys() } + self.totalasns = {f"{page['page']['asn']}" for page in resp['results'] if 'asn' in page['page'].keys()} async def get_hostnames(self) -> set: return self.totalhosts diff --git a/theHarvester/discovery/virustotal.py b/theHarvester/discovery/virustotal.py index 9e511070..1fe47c53 100644 --- a/theHarvester/discovery/virustotal.py +++ b/theHarvester/discovery/virustotal.py @@ -8,7 +8,7 @@ class SearchVirustotal: def __init__(self, word) -> None: self.key = Core.virustotal_key() if self.key is None: - raise MissingKey("virustotal") + raise MissingKey('virustotal') self.word = word self.proxy = False self.hostnames: list = [] @@ -18,14 +18,12 @@ async def do_search(self) -> None: # based on: https://developers.virustotal.com/reference/domains-relationships # base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40" headers = { - "User-Agent": Core.get_user_agent(), - "Accept": "application/json", - "x-apikey": self.key, + 'User-Agent': Core.get_user_agent(), + 'Accept': 'application/json', + 'x-apikey': self.key, } - base_url = ( - f"https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40" - ) - cursor = "" + base_url = f'https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40' + cursor = '' count = 0 fail_counter = 0 counter = 0 @@ -37,42 +35,29 @@ async def do_search(self) -> None: # TODO add timer logic if proven to be needed # in the meantime sleeping 16 seconds should eliminate hitting the rate limit # in case rate limit is hit, fail counter exists and sleep for 65 seconds - send_url = ( - base_url + "&cursor=" + cursor - if cursor != "" and len(cursor) > 2 - else base_url - ) - responses = await AsyncFetcher.fetch_all( - [send_url], headers=headers, proxy=self.proxy, json=True - ) + send_url = base_url + '&cursor=' + cursor if cursor != '' and len(cursor) > 2 else base_url + responses = await AsyncFetcher.fetch_all([send_url], headers=headers, proxy=self.proxy, json=True) jdata = responses[0] - if "data" not in jdata.keys(): + if 'data' not in jdata.keys(): await asyncio.sleep(60 + 5) fail_counter += 1 - if "meta" in jdata.keys(): - cursor = ( - jdata["meta"]["cursor"] if "cursor" in jdata["meta"].keys() else "" - ) - if len(cursor) == 0 and "data" in jdata.keys(): + if 'meta' in jdata.keys(): + cursor = jdata['meta']['cursor'] if 'cursor' in jdata['meta'].keys() else '' + if len(cursor) == 0 and 'data' in jdata.keys(): # if cursor no longer is within the meta field have hit last entry breakcon = True - count += jdata["meta"]["count"] + count += jdata['meta']['count'] if count == 0 or fail_counter >= 2: break - if "data" in jdata.keys(): - data = jdata["data"] + if 'data' in jdata.keys(): + data = jdata['data'] self.hostnames.extend(await self.parse_hostnames(data, self.word)) counter += 1 await asyncio.sleep(16) self.hostnames = list(sorted(set(self.hostnames))) # verify domains such as x.x.com.multicdn.x.com are parsed properly self.hostnames = [ - host - for host in self.hostnames - if ( - (len(host.split(".")) >= 3) - and host.split(".")[-2] == self.word.split(".")[-2] - ) + host for host in self.hostnames if ((len(host.split('.')) >= 3) and host.split('.')[-2] == self.word.split('.')[-2]) ] async def get_hostnames(self) -> list: @@ -82,22 +67,20 @@ async def get_hostnames(self) -> list: async def parse_hostnames(data, word): total_subdomains = set() for attribute in data: - total_subdomains.add(attribute["id"].replace('"', "").replace("www.", "")) - attributes = attribute["attributes"] + total_subdomains.add(attribute['id'].replace('"', '').replace('www.', '')) + attributes = attribute['attributes'] total_subdomains.update( { - value["value"].replace('"', "").replace("www.", "") - for value in attributes["last_dns_records"] - if word in value["value"] + value['value'].replace('"', '').replace('www.', '') + for value in attributes['last_dns_records'] + if word in value['value'] } ) - if "last_https_certificate" in attributes.keys(): + if 'last_https_certificate' in attributes.keys(): total_subdomains.update( { - value.replace('"', "").replace("www.", "") - for value in attributes["last_https_certificate"]["extensions"][ - "subject_alternative_name" - ] + value.replace('"', '').replace('www.', '') + for value in attributes['last_https_certificate']['extensions']['subject_alternative_name'] if word in value } ) @@ -108,9 +91,7 @@ async def parse_hostnames(data, word): total_subdomains = [ x for x in total_subdomains - if "edgekey.net" not in str(x) - and "akadns.net" not in str(x) - and "include:_spf" not in str(x) + if 'edgekey.net' not in str(x) and 'akadns.net' not in str(x) and 'include:_spf' not in str(x) ] total_subdomains.sort() return total_subdomains diff --git a/theHarvester/discovery/yahoosearch.py b/theHarvester/discovery/yahoosearch.py index 7ac9b42d..ea7f5b93 100644 --- a/theHarvester/discovery/yahoosearch.py +++ b/theHarvester/discovery/yahoosearch.py @@ -5,22 +5,16 @@ class SearchYahoo: def __init__(self, word, limit) -> None: self.word = word - self.total_results = "" - self.server = "search.yahoo.com" + self.total_results = '' + self.server = 'search.yahoo.com' self.limit = limit self.proxy = False async def do_search(self) -> None: - base_url = f"https://{self.server}/search?p=%40{self.word}&b=xx&pz=10" - headers = {"Host": self.server, "User-agent": Core.get_user_agent()} - urls = [ - base_url.replace("xx", str(num)) - for num in range(0, self.limit, 10) - if num <= self.limit - ] - responses = await AsyncFetcher.fetch_all( - urls, headers=headers, proxy=self.proxy - ) + base_url = f'https://{self.server}/search?p=%40{self.word}&b=xx&pz=10' + headers = {'Host': self.server, 'User-agent': Core.get_user_agent()} + urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit] + responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy) for response in responses: self.total_results += response @@ -35,8 +29,8 @@ async def get_emails(self): # strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld for email in toparse_emails: email = str(email) - if "-" in email and email[0].isdigit() and email.index("-") <= 9: - while email[0] == "-" or email[0].isdigit(): + if '-' in email and email[0].isdigit() and email.index('-') <= 9: + while email[0] == '-' or email[0].isdigit(): email = email[1:] emails.add(email) return list(emails) diff --git a/theHarvester/discovery/zoomeyesearch.py b/theHarvester/discovery/zoomeyesearch.py index a7a1bfbe..e693868b 100644 --- a/theHarvester/discovery/zoomeyesearch.py +++ b/theHarvester/discovery/zoomeyesearch.py @@ -16,8 +16,8 @@ def __init__(self, word, limit) -> None: # If you wish to extract as many subdomains as possible visit the fetch_subdomains # To see how if self.key is None: - raise MissingKey("zoomeye") - self.baseurl = "https://api.zoomeye.org/host/search" + raise MissingKey('zoomeye') + self.baseurl = 'https://api.zoomeye.org/host/search' self.proxy = False self.totalasns: list = list() self.totalhosts: list = list() @@ -58,40 +58,38 @@ def __init__(self, word, limit) -> None: async def fetch_subdomains(self) -> None: # Based on docs from: https://www.zoomeye.org/doc#search-sub-domain-ip - headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()} + headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()} - subdomain_search_endpoint = ( - f"https://api.zoomeye.org/domain/search?q={self.word}&type=0&" - ) + subdomain_search_endpoint = f'https://api.zoomeye.org/domain/search?q={self.word}&type=0&' response = await AsyncFetcher.fetch_all( - [subdomain_search_endpoint + "page=1"], + [subdomain_search_endpoint + 'page=1'], json=True, proxy=self.proxy, headers=headers, ) # Make initial request to determine total number of subdomains resp = response[0] - if resp["status"] != 200: + if resp['status'] != 200: return - total = resp["total"] + total = resp['total'] # max number of results per request seems to be 30 # NOTE: If you wish to get as many subdomains as possible # Change the line below to: # self.limit = (total // 30) + 1 self.limit = self.limit if total > self.limit else (total // 30) + 1 - self.totalhosts.extend([item["name"] for item in resp["list"]]) + self.totalhosts.extend([item['name'] for item in resp['list']]) for i in range(2, self.limit): response = await AsyncFetcher.fetch_all( - [subdomain_search_endpoint + f"page={i}"], + [subdomain_search_endpoint + f'page={i}'], json=True, proxy=self.proxy, headers=headers, ) resp = response[0] - if resp["status"] != 200: + if resp['status'] != 200: return - found_subdomains = [item["name"] for item in resp["list"]] + found_subdomains = [item['name'] for item in resp['list']] if len(found_subdomains) == 0: break self.totalhosts.extend(found_subdomains) @@ -99,19 +97,17 @@ async def fetch_subdomains(self) -> None: await asyncio.sleep(get_delay() + 1) async def do_search(self) -> None: - headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()} + headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()} # Fetch subdomains first await self.fetch_subdomains() params = ( - ("query", f"site:{self.word}"), - ("page", "1"), - ) - response = await AsyncFetcher.fetch_all( - [self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params + ('query', f'site:{self.word}'), + ('page', '1'), ) + response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params) # The First request determines how many pages there in total resp = response[0] - total_pages = int(resp["available"]) + total_pages = int(resp['available']) self.limit = self.limit if total_pages > self.limit else total_pages self.limit = 3 if self.limit == 2 else self.limit cur_page = 2 if self.limit >= 2 else -1 @@ -121,21 +117,17 @@ async def do_search(self) -> None: # cur_page = -1 if cur_page == -1: # No need to do loop just parse and leave - if "matches" in resp.keys(): - hostnames, emails, ips, asns, iurls = await self.parse_matches( - resp["matches"] - ) + if 'matches' in resp.keys(): + hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches']) self.totalhosts.extend(hostnames) self.totalemails.extend(emails) self.totalips.extend(ips) self.totalasns.extend(asns) self.interestingurls.extend(iurls) else: - if "matches" in resp.keys(): + if 'matches' in resp.keys(): # Parse out initial results and then continue to loop - hostnames, emails, ips, asns, iurls = await self.parse_matches( - resp["matches"] - ) + hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches']) self.totalhosts.extend(hostnames) self.totalemails.extend(emails) self.totalips.extend(ips) @@ -145,8 +137,8 @@ async def do_search(self) -> None: for num in range(2, self.limit): # print(f'Currently on page: {num}') params = ( - ("query", f"site:{self.word}"), - ("page", f"{num}"), + ('query', f'site:{self.word}'), + ('page', f'{num}'), ) response = await AsyncFetcher.fetch_all( [self.baseurl], @@ -156,22 +148,14 @@ async def do_search(self) -> None: params=params, ) resp = response[0] - if "matches" not in resp.keys(): - print(f"Your resp: {resp}") - print("Match not found in keys") + if 'matches' not in resp.keys(): + print(f'Your resp: {resp}') + print('Match not found in keys') break - hostnames, emails, ips, asns, iurls = await self.parse_matches( - resp["matches"] - ) + hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches']) - if ( - len(hostnames) == 0 - and len(emails) == 0 - and len(ips) == 0 - and len(asns) == 0 - and len(iurls) == 0 - ): + if len(hostnames) == 0 and len(emails) == 0 and len(ips) == 0 and len(asns) == 0 and len(iurls) == 0: nomatches_counter += 1 if nomatches_counter >= 5: @@ -196,48 +180,42 @@ async def parse_matches(self, matches): emails = set() for match in matches: try: - ips.add(match["ip"]) + ips.add(match['ip']) - if "geoinfo" in match.keys(): + if 'geoinfo' in match.keys(): asns.add(f"AS{match['geoinfo']['asn']}") - if "rdns_new" in match.keys(): - rdns_new = match["rdns_new"] + if 'rdns_new' in match.keys(): + rdns_new = match['rdns_new'] - if "," in rdns_new: - parts = str(rdns_new).split(",") + if ',' in rdns_new: + parts = str(rdns_new).split(',') rdns_new = parts[0] if len(parts) == 2: hostnames.add(parts[1]) - rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new + rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new hostnames.add(rdns_new) else: - rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new + rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new hostnames.add(rdns_new) - if "rdns" in match.keys(): - rdns = match["rdns"] - rdns = rdns[:-1] if rdns[-1] == "." else rdns + if 'rdns' in match.keys(): + rdns = match['rdns'] + rdns = rdns[:-1] if rdns[-1] == '.' else rdns hostnames.add(rdns) - if "portinfo" in match.keys(): + if 'portinfo' in match.keys(): # re. - temp_emails = set( - await self.parse_emails(match["portinfo"]["banner"]) - ) + temp_emails = set(await self.parse_emails(match['portinfo']['banner'])) emails.update(temp_emails) - hostnames.update( - set(await self.parse_hostnames(match["portinfo"]["banner"])) - ) + hostnames.update(set(await self.parse_hostnames(match['portinfo']['banner']))) iurls = { - str(iurl.group(1)).replace('"', "") - for iurl in re.finditer( - self.iurl_regex, match["portinfo"]["banner"] - ) + str(iurl.group(1)).replace('"', '') + for iurl in re.finditer(self.iurl_regex, match['portinfo']['banner']) if self.word in str(iurl.group(1)) } except Exception as e: - print(f"An exception has occurred: {e}") + print(f'An exception has occurred: {e}') return hostnames, emails, ips, asns, iurls async def process(self, proxy: bool = False) -> None: diff --git a/theHarvester/lib/__init__.py b/theHarvester/lib/__init__.py index 7145285d..8fc0aea3 100644 --- a/theHarvester/lib/__init__.py +++ b/theHarvester/lib/__init__.py @@ -1 +1 @@ -__all__ = ["hostchecker"] +__all__ = ['hostchecker'] diff --git a/theHarvester/lib/api/api.py b/theHarvester/lib/api/api.py index e29983f6..53d903a7 100644 --- a/theHarvester/lib/api/api.py +++ b/theHarvester/lib/api/api.py @@ -12,36 +12,32 @@ limiter = Limiter(key_func=get_remote_address) app = FastAPI( - title="Restful Harvest", - description="Rest API for theHarvester powered by FastAPI", - version="0.0.2", + title='Restful Harvest', + description='Rest API for theHarvester powered by FastAPI', + version='0.0.2', ) app.state.limiter = limiter app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore # This is where we will host files that arise if the user specifies a filename try: - app.mount( - "/static", StaticFiles(directory="theHarvester/lib/api/static/"), name="static" - ) + app.mount('/static', StaticFiles(directory='theHarvester/lib/api/static/'), name='static') except RuntimeError: - static_path = os.path.expanduser("~/.local/share/theHarvester/static/") + static_path = os.path.expanduser('~/.local/share/theHarvester/static/') if not os.path.isdir(static_path): os.makedirs(static_path) app.mount( - "/static", + '/static', StaticFiles(directory=static_path), - name="static", + name='static', ) -@app.get("/") +@app.get('/') async def root(*, user_agent: str = Header(None)) -> Response: # very basic user agent filtering - if user_agent and ( - "gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent - ): - response = RedirectResponse(app.url_path_for("bot")) + if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent): + response = RedirectResponse(app.url_path_for('bot')) return response return HTMLResponse( @@ -70,36 +66,34 @@ async def root(*, user_agent: str = Header(None)) -> Response: ) -@app.get("/nicebot") +@app.get('/nicebot') async def bot() -> dict[str, str]: # nice bot - string = {"bot": "These are not the droids you are looking for"} + string = {'bot': 'These are not the droids you are looking for'} return string -@app.get("/sources", response_class=UJSONResponse) -@limiter.limit("5/minute") +@app.get('/sources', response_class=UJSONResponse) +@limiter.limit('5/minute') async def getsources(request: Request): # Endpoint for user to query for available sources theHarvester supports # Rate limit of 5 requests per minute sources = __main__.Core.get_supportedengines() - return {"sources": sources} + return {'sources': sources} -@app.get("/dnsbrute") -@limiter.limit("5/minute") +@app.get('/dnsbrute') +@limiter.limit('5/minute') async def dnsbrute( request: Request, user_agent: str = Header(None), - domain: str = Query(..., description="Domain to be brute forced"), + domain: str = Query(..., description='Domain to be brute forced'), ) -> Response: # Endpoint for user to signal to do DNS brute forcing # Rate limit of 5 requests per minute # basic user agent filtering - if user_agent and ( - "gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent - ): - response = RedirectResponse(app.url_path_for("bot")) + if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent): + response = RedirectResponse(app.url_path_for('bot')) return response dns_bruteforce = await __main__.start( argparse.Namespace( @@ -108,49 +102,45 @@ async def dnsbrute( dns_server=False, dns_tld=False, domain=domain, - filename="", + filename='', google_dork=False, limit=500, proxies=False, shodan=False, - source=",".join([]), + source=','.join([]), start=0, take_over=False, virtual_host=False, ) ) - return UJSONResponse({"dns_bruteforce": dns_bruteforce}) + return UJSONResponse({'dns_bruteforce': dns_bruteforce}) -@app.get("/query") -@limiter.limit("2/minute") +@app.get('/query') +@limiter.limit('2/minute') async def query( request: Request, - dns_server: str = Query(""), + dns_server: str = Query(''), user_agent: str = Header(None), dns_brute: bool = Query(False), dns_lookup: bool = Query(False), dns_tld: bool = Query(False), - filename: str = Query(""), + filename: str = Query(''), google_dork: bool = Query(False), proxies: bool = Query(False), shodan: bool = Query(False), take_over: bool = Query(False), virtual_host: bool = Query(False), - source: list[str] = Query( - ..., description="Data sources to query comma separated with no space" - ), + source: list[str] = Query(..., description='Data sources to query comma separated with no space'), limit: int = Query(500), start: int = Query(0), - domain: str = Query(..., description="Domain to be harvested"), + domain: str = Query(..., description='Domain to be harvested'), ) -> Response: # Query function that allows user to query theHarvester rest API # Rate limit of 2 requests per minute # basic user agent filtering - if user_agent and ( - "gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent - ): - response = RedirectResponse(app.url_path_for("bot")) + if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent): + response = RedirectResponse(app.url_path_for('bot')) return response try: ( @@ -175,7 +165,7 @@ async def query( limit=limit, proxies=proxies, shodan=shodan, - source=",".join(source), + source=','.join(source), start=start, take_over=take_over, virtual_host=virtual_host, @@ -184,18 +174,16 @@ async def query( return UJSONResponse( { - "asns": asns, - "interesting_urls": iurls, - "twitter_people": twitter_people_list, - "linkedin_people": linkedin_people_list, - "linkedin_links": linkedin_links, - "trello_urls": aurls, - "ips": aips, - "emails": aemails, - "hosts": ahosts, + 'asns': asns, + 'interesting_urls': iurls, + 'twitter_people': twitter_people_list, + 'linkedin_people': linkedin_people_list, + 'linkedin_links': linkedin_links, + 'trello_urls': aurls, + 'ips': aips, + 'emails': aemails, + 'hosts': ahosts, } ) except Exception: - return UJSONResponse( - {"exception": "Please contact the server administrator to check the issue"} - ) + return UJSONResponse({'exception': 'Please contact the server administrator to check the issue'}) diff --git a/theHarvester/lib/api/api_example.py b/theHarvester/lib/api/api_example.py index 9174cfd3..af7b5cdf 100644 --- a/theHarvester/lib/api/api_example.py +++ b/theHarvester/lib/api/api_example.py @@ -23,100 +23,94 @@ async def main() -> None: Just a simple example of how to interact with the rest api you can easily use requests instead of aiohttp or whatever you best see fit """ - url = "http://127.0.0.1:5000" - domain = "netflix.com" - query_url = ( - f"{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}" - ) + url = 'http://127.0.0.1:5000' + domain = 'netflix.com' + query_url = f'{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}' async with aiohttp.ClientSession() as session: fetched_json = await fetch_json(session, query_url) - total_asns = fetched_json["asns"] - interesting_urls = fetched_json["interesting_urls"] - twitter_people_list_tracker = fetched_json["twitter_people"] - linkedin_people_list_tracker = fetched_json["linkedin_people"] - linkedin_links_tracker = fetched_json["linkedin_links"] - trello_urls = fetched_json["trello_urls"] - ips = fetched_json["ips"] - emails = fetched_json["emails"] - hosts = fetched_json["hosts"] + total_asns = fetched_json['asns'] + interesting_urls = fetched_json['interesting_urls'] + twitter_people_list_tracker = fetched_json['twitter_people'] + linkedin_people_list_tracker = fetched_json['linkedin_people'] + linkedin_links_tracker = fetched_json['linkedin_links'] + trello_urls = fetched_json['trello_urls'] + ips = fetched_json['ips'] + emails = fetched_json['emails'] + hosts = fetched_json['hosts'] if len(total_asns) > 0: - print(f"\n[*] ASNS found: {len(total_asns)}") - print("--------------------") + print(f'\n[*] ASNS found: {len(total_asns)}') + print('--------------------') total_asns = list(sorted(set(total_asns))) for asn in total_asns: print(asn) if len(interesting_urls) > 0: - print(f"\n[*] Interesting Urls found: {len(interesting_urls)}") - print("--------------------") + print(f'\n[*] Interesting Urls found: {len(interesting_urls)}') + print('--------------------') interesting_urls = list(sorted(set(interesting_urls))) for iurl in interesting_urls: print(iurl) if len(twitter_people_list_tracker) == 0: - print("\n[*] No Twitter users found.\n\n") + print('\n[*] No Twitter users found.\n\n') else: if len(twitter_people_list_tracker) >= 1: - print("\n[*] Twitter Users found: " + str(len(twitter_people_list_tracker))) - print("---------------------") + print('\n[*] Twitter Users found: ' + str(len(twitter_people_list_tracker))) + print('---------------------') twitter_people_list_tracker = list(sorted(set(twitter_people_list_tracker))) for usr in twitter_people_list_tracker: print(usr) if len(linkedin_people_list_tracker) == 0: - print("\n[*] No LinkedIn users found.\n\n") + print('\n[*] No LinkedIn users found.\n\n') else: if len(linkedin_people_list_tracker) >= 1: - print( - "\n[*] LinkedIn Users found: " + str(len(linkedin_people_list_tracker)) - ) - print("---------------------") - linkedin_people_list_tracker = list( - sorted(set(linkedin_people_list_tracker)) - ) + print('\n[*] LinkedIn Users found: ' + str(len(linkedin_people_list_tracker))) + print('---------------------') + linkedin_people_list_tracker = list(sorted(set(linkedin_people_list_tracker))) for usr in linkedin_people_list_tracker: print(usr) if len(linkedin_links_tracker) == 0: - print(f"\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}") + print(f'\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}') linkedin_links_tracker = list(sorted(set(linkedin_links_tracker))) - print("---------------------") + print('---------------------') for link in linkedin_links_tracker: print(link) length_urls = len(trello_urls) total = length_urls - print("\n[*] Trello URLs found: " + str(total)) - print("--------------------") + print('\n[*] Trello URLs found: ' + str(total)) + print('--------------------') all_urls = list(sorted(set(trello_urls))) for url in sorted(all_urls): print(url) if len(ips) == 0: - print("\n[*] No IPs found.") + print('\n[*] No IPs found.') else: - print("\n[*] IPs found: " + str(len(ips))) - print("-------------------") + print('\n[*] IPs found: ' + str(len(ips))) + print('-------------------') # use netaddr as the list may contain ipv4 and ipv6 addresses ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(ips)]) - print("\n".join(map(str, ip_list))) + print('\n'.join(map(str, ip_list))) if len(emails) == 0: - print("\n[*] No emails found.") + print('\n[*] No emails found.') else: - print("\n[*] Emails found: " + str(len(emails))) - print("----------------------") + print('\n[*] Emails found: ' + str(len(emails))) + print('----------------------') all_emails = sorted(list(set(emails))) - print("\n".join(all_emails)) + print('\n'.join(all_emails)) if len(hosts) == 0: - print("\n[*] No hosts found.\n\n") + print('\n[*] No hosts found.\n\n') else: - print("\n[*] Hosts found: " + str(len(hosts))) - print("---------------------") - print("\n".join(hosts)) + print('\n[*] Hosts found: ' + str(len(hosts))) + print('---------------------') + print('\n'.join(hosts)) -if __name__ == "__main__": +if __name__ == '__main__': asyncio.run(main()) diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py index 5897b5a1..84a656d9 100644 --- a/theHarvester/lib/core.py +++ b/theHarvester/lib/core.py @@ -19,11 +19,11 @@ if TYPE_CHECKING: from collections.abc import Sized -DATA_DIR = Path(__file__).parents[1] / "data" +DATA_DIR = Path(__file__).parents[1] / 'data' CONFIG_DIRS = [ - Path("/etc/theHarvester/"), - Path("/usr/local/etc/theHarvester/"), - Path("~/.theHarvester"), + Path('/etc/theHarvester/'), + Path('/usr/local/etc/theHarvester/'), + Path('~/.theHarvester'), ] @@ -35,7 +35,7 @@ def _read_config(filename: str) -> str: with contextlib.suppress(FileNotFoundError): file = path.expanduser() / filename config = file.read_text() - print(f"Read {filename} from {file}") + print(f'Read {filename} from {file}') return config # Fallback to creating default in user's home dir @@ -43,168 +43,160 @@ def _read_config(filename: str) -> str: dest = CONFIG_DIRS[-1].expanduser() / filename dest.parent.mkdir(exist_ok=True) dest.write_text(default) - print(f"Created default {filename} at {dest}") + print(f'Created default {filename} at {dest}') return default @staticmethod def api_keys() -> dict: - keys = yaml.safe_load(Core._read_config("api-keys.yaml")) - return keys["apikeys"] + keys = yaml.safe_load(Core._read_config('api-keys.yaml')) + return keys['apikeys'] @staticmethod def bevigil_key() -> str: - return Core.api_keys()["bevigil"]["key"] + return Core.api_keys()['bevigil']['key'] @staticmethod def binaryedge_key() -> str: - return Core.api_keys()["binaryedge"]["key"] + return Core.api_keys()['binaryedge']['key'] @staticmethod def bing_key() -> str: - return Core.api_keys()["bing"]["key"] + return Core.api_keys()['bing']['key'] @staticmethod def bufferoverun_key() -> str: - return Core.api_keys()["bufferoverun"]["key"] + return Core.api_keys()['bufferoverun']['key'] @staticmethod def censys_key() -> tuple: - return Core.api_keys()["censys"]["id"], Core.api_keys()["censys"]["secret"] + return Core.api_keys()['censys']['id'], Core.api_keys()['censys']['secret'] @staticmethod def criminalip_key() -> str: - return Core.api_keys()["criminalip"]["key"] + return Core.api_keys()['criminalip']['key'] @staticmethod def fullhunt_key() -> str: - return Core.api_keys()["fullhunt"]["key"] + return Core.api_keys()['fullhunt']['key'] @staticmethod def github_key() -> str: - return Core.api_keys()["github"]["key"] + return Core.api_keys()['github']['key'] @staticmethod def hunter_key() -> str: - return Core.api_keys()["hunter"]["key"] + return Core.api_keys()['hunter']['key'] @staticmethod def hunterhow_key() -> str: - return Core.api_keys()["hunterhow"]["key"] + return Core.api_keys()['hunterhow']['key'] @staticmethod def intelx_key() -> str: - return Core.api_keys()["intelx"]["key"] + return Core.api_keys()['intelx']['key'] @staticmethod def netlas_key() -> str: - return Core.api_keys()["netlas"]["key"] + return Core.api_keys()['netlas']['key'] @staticmethod def pentest_tools_key() -> str: - return Core.api_keys()["pentestTools"]["key"] + return Core.api_keys()['pentestTools']['key'] @staticmethod def onyphe_key() -> str: - return Core.api_keys()["onyphe"]["key"] + return Core.api_keys()['onyphe']['key'] @staticmethod def projectdiscovery_key() -> str: - return Core.api_keys()["projectDiscovery"]["key"] + return Core.api_keys()['projectDiscovery']['key'] @staticmethod def rocketreach_key() -> str: - return Core.api_keys()["rocketreach"]["key"] + return Core.api_keys()['rocketreach']['key'] @staticmethod def security_trails_key() -> str: - return Core.api_keys()["securityTrails"]["key"] + return Core.api_keys()['securityTrails']['key'] @staticmethod def shodan_key() -> str: - return Core.api_keys()["shodan"]["key"] + return Core.api_keys()['shodan']['key'] @staticmethod def zoomeye_key() -> str: - return Core.api_keys()["zoomeye"]["key"] + return Core.api_keys()['zoomeye']['key'] @staticmethod def tomba_key() -> tuple[str, str]: - return Core.api_keys()["tomba"]["key"], Core.api_keys()["tomba"]["secret"] + return Core.api_keys()['tomba']['key'], Core.api_keys()['tomba']['secret'] @staticmethod def virustotal_key() -> str: - return Core.api_keys()["virustotal"]["key"] + return Core.api_keys()['virustotal']['key'] @staticmethod def proxy_list() -> list: - keys = yaml.safe_load(Core._read_config("proxies.yaml")) - http_list = ( - [f"http://{proxy}" for proxy in keys["http"]] - if keys["http"] is not None - else [] - ) + keys = yaml.safe_load(Core._read_config('proxies.yaml')) + http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else [] return http_list @staticmethod def banner() -> None: - print("*******************************************************************") - print("* _ _ _ *") - print(r"* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *") + print('*******************************************************************') + print('* _ _ _ *') + print(r'* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *') print(r"* | __| _ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *") - print(r"* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *") - print(r"* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *") - print("* *") - print( - "* theHarvester {version}{filler}*".format( - version=version(), filler=" " * (51 - len(version())) - ) - ) - print("* Coded by Christian Martorella *") - print("* Edge-Security Research *") - print("* cmartorella@edge-security.com *") - print("* *") - print("*******************************************************************") + print(r'* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *') + print(r'* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *') + print('* *') + print('* theHarvester {version}{filler}*'.format(version=version(), filler=' ' * (51 - len(version())))) + print('* Coded by Christian Martorella *') + print('* Edge-Security Research *') + print('* cmartorella@edge-security.com *') + print('* *') + print('*******************************************************************') @staticmethod def get_supportedengines() -> list[str | Any]: supportedengines = [ - "anubis", - "baidu", - "bevigil", - "binaryedge", - "bing", - "bingapi", - "bufferoverun", - "brave", - "censys", - "certspotter", - "criminalip", - "crtsh", - "dnsdumpster", - "duckduckgo", - "fullhunt", - "github-code", - "hackertarget", - "hunter", - "hunterhow", - "intelx", - "netlas", - "onyphe", - "otx", - "pentesttools", - "projectdiscovery", - "rapiddns", - "rocketreach", - "securityTrails", - "sitedossier", - "subdomaincenter", - "subdomainfinderc99", - "threatminer", - "tomba", - "urlscan", - "virustotal", - "yahoo", - "zoomeye", + 'anubis', + 'baidu', + 'bevigil', + 'binaryedge', + 'bing', + 'bingapi', + 'bufferoverun', + 'brave', + 'censys', + 'certspotter', + 'criminalip', + 'crtsh', + 'dnsdumpster', + 'duckduckgo', + 'fullhunt', + 'github-code', + 'hackertarget', + 'hunter', + 'hunterhow', + 'intelx', + 'netlas', + 'onyphe', + 'otx', + 'pentesttools', + 'projectdiscovery', + 'rapiddns', + 'rocketreach', + 'securityTrails', + 'sitedossier', + 'subdomaincenter', + 'subdomainfinderc99', + 'threatminer', + 'tomba', + 'urlscan', + 'virustotal', + 'yahoo', + 'zoomeye', ] return supportedengines @@ -214,58 +206,58 @@ def get_user_agent() -> str: # Lasted updated 7/2/23 # TODO use bs4 to auto parse user agents user_agents = [ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0", - "Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43", - "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0", - "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37", - "Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15", - "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15", - "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0", - "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0", - "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", - "Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0', + 'Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43', + 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37', + 'Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0', + 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0', + 'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', ] return random.choice(user_agents) @@ -278,129 +270,85 @@ async def post_fetch( cls, url, headers=None, - data: str | dict[str, str] = "", - params: str = "", + data: str | dict[str, str] = '', + params: str = '', json: bool = False, proxy: bool = False, ): if headers is None: headers = {} if len(headers) == 0: - headers = {"User-Agent": Core.get_user_agent()} + headers = {'User-Agent': Core.get_user_agent()} timeout = aiohttp.ClientTimeout(total=720) # By default, timeout is 5 minutes, changed to 12-minutes # results are well worth the wait try: if proxy: proxy = random.choice(cls().proxy_list) - if params != "": - async with aiohttp.ClientSession( - headers=headers, timeout=timeout - ) as session: - async with session.get( - url, params=params, proxy=proxy - ) as response: + if params != '': + async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: + async with session.get(url, params=params, proxy=proxy) as response: await asyncio.sleep(5) - return ( - await response.text() - if json is False - else await response.json() - ) + return await response.text() if json is False else await response.json() else: - async with aiohttp.ClientSession( - headers=headers, timeout=timeout - ) as session: + async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: async with session.get(url, proxy=proxy) as response: await asyncio.sleep(5) - return ( - await response.text() - if json is False - else await response.json() - ) - elif params == "": + return await response.text() if json is False else await response.json() + elif params == '': if isinstance(data, str): data = json_loader.loads(data) - async with aiohttp.ClientSession( - headers=headers, timeout=timeout - ) as session: + async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: async with session.post(url, data=data) as resp: await asyncio.sleep(3) return await resp.text() if json is False else await resp.json() else: if isinstance(data, str): data = json_loader.loads(data) - async with aiohttp.ClientSession( - headers=headers, timeout=timeout - ) as session: + async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: sslcontext = ssl.create_default_context(cafile=certifi.where()) - async with session.post( - url, data=data, ssl=sslcontext, params=params - ) as resp: + async with session.post(url, data=data, ssl=sslcontext, params=params) as resp: await asyncio.sleep(3) return await resp.text() if json is False else await resp.json() except Exception as e: - print(f"An exception has occurred in post_fetch: {e}") - return "" + print(f'An exception has occurred in post_fetch: {e}') + return '' @classmethod - async def fetch( - cls, session, url, params: Sized = "", json: bool = False, proxy: str = "" - ) -> str | dict | list | bool: + async def fetch(cls, session, url, params: Sized = '', json: bool = False, proxy: str = '') -> str | dict | list | bool: # This fetch method solely focuses on get requests try: # Wrap in try except due to 0x89 png/jpg files # This fetch method solely focuses on get requests - if proxy != "": + if proxy != '': proxy = str(random.choice(cls().proxy_list)) if len(params) != 0: sslcontext = ssl.create_default_context(cafile=certifi.where()) - async with session.get( - url, ssl=sslcontext, params=params, proxy=proxy - ) as response: - return ( - await response.text() - if json is False - else await response.json() - ) + async with session.get(url, ssl=sslcontext, params=params, proxy=proxy) as response: + return await response.text() if json is False else await response.json() else: sslcontext = ssl.create_default_context(cafile=certifi.where()) - async with session.get( - url, ssl=sslcontext, proxy=proxy - ) as response: + async with session.get(url, ssl=sslcontext, proxy=proxy) as response: await asyncio.sleep(5) - return ( - await response.text() - if json is False - else await response.json() - ) + return await response.text() if json is False else await response.json() if len(params) != 0: sslcontext = ssl.create_default_context(cafile=certifi.where()) async with session.get(url, ssl=sslcontext, params=params) as response: await asyncio.sleep(5) - return ( - await response.text() - if json is False - else await response.json() - ) + return await response.text() if json is False else await response.json() else: sslcontext = ssl.create_default_context(cafile=certifi.where()) async with session.get(url, ssl=sslcontext) as response: await asyncio.sleep(5) - return ( - await response.text() - if json is False - else await response.json() - ) + return await response.text() if json is False else await response.json() except Exception as e: - print(f"An exception has occurred: {e}") - return "" + print(f'An exception has occurred: {e}') + return '' @staticmethod - async def takeover_fetch( - session, url: str, proxy: str = "" - ) -> tuple[Any, Any] | str: + async def takeover_fetch(session, url: str, proxy: str = '') -> tuple[Any, Any] | str: # This fetch method solely focuses on get requests try: # Wrap in try except due to 0x89 png/jpg files @@ -408,12 +356,10 @@ async def takeover_fetch( # TODO determine if method for post requests is necessary # url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url # Clean up urls with proper schemas - if proxy != "": - if "https://" in url: + if proxy != '': + if 'https://' in url: sslcontext = ssl.create_default_context(cafile=certifi.where()) - async with session.get( - url, proxy=proxy, ssl=sslcontext - ) as response: + async with session.get(url, proxy=proxy, ssl=sslcontext) as response: await asyncio.sleep(5) return url, await response.text() else: @@ -421,7 +367,7 @@ async def takeover_fetch( await asyncio.sleep(5) return url, await response.text() else: - if "https://" in url: + if 'https://' in url: sslcontext = ssl.create_default_context(cafile=certifi.where()) async with session.get(url, ssl=sslcontext) as response: await asyncio.sleep(5) @@ -431,15 +377,15 @@ async def takeover_fetch( await asyncio.sleep(5) return url, await response.text() except Exception as e: - print(f"Takeover check error: {e}") - return url, "" + print(f'Takeover check error: {e}') + return url, '' @classmethod async def fetch_all( cls, urls, headers=None, - params: Sized = "", + params: Sized = '', json: bool = False, takeover: bool = False, proxy: bool = False, @@ -449,29 +395,18 @@ async def fetch_all( headers = {} timeout = aiohttp.ClientTimeout(total=60) if len(headers) == 0: - headers = {"User-Agent": Core.get_user_agent()} + headers = {'User-Agent': Core.get_user_agent()} if takeover: - async with aiohttp.ClientSession( - headers=headers, timeout=aiohttp.ClientTimeout(total=15) - ) as session: + async with aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as session: if proxy: return await asyncio.gather( - *[ - AsyncFetcher.takeover_fetch( - session, url, proxy=random.choice(cls().proxy_list) - ) - for url in urls - ] + *[AsyncFetcher.takeover_fetch(session, url, proxy=random.choice(cls().proxy_list)) for url in urls] ) else: - return await asyncio.gather( - *[AsyncFetcher.takeover_fetch(session, url) for url in urls] - ) + return await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls]) if len(params) == 0: - async with aiohttp.ClientSession( - headers=headers, timeout=timeout, max_field_size=13000 - ) as session: + async with aiohttp.ClientSession(headers=headers, timeout=timeout, max_field_size=13000) as session: if proxy: return await asyncio.gather( *[ @@ -485,14 +420,10 @@ async def fetch_all( ] ) else: - return await asyncio.gather( - *[AsyncFetcher.fetch(session, url, json=json) for url in urls] - ) + return await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls]) else: # Indicates the request has certain params - async with aiohttp.ClientSession( - headers=headers, timeout=timeout - ) as session: + async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: if proxy: return await asyncio.gather( *[ @@ -507,9 +438,4 @@ async def fetch_all( ] ) else: - return await asyncio.gather( - *[ - AsyncFetcher.fetch(session, url, params, json) - for url in urls - ] - ) + return await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls]) diff --git a/theHarvester/lib/hostchecker.py b/theHarvester/lib/hostchecker.py index bb5f806e..17a6796d 100644 --- a/theHarvester/lib/hostchecker.py +++ b/theHarvester/lib/hostchecker.py @@ -40,13 +40,13 @@ async def resolve_host(host, resolver) -> str: result = await resolver.gethostbyname(host, socket.AF_INET) addresses = result.addresses if addresses == [] or addresses is None or result is None: - return f"{host}:" + return f'{host}:' else: - addresses = ",".join(map(str, list(sorted(set(addresses))))) + addresses = ','.join(map(str, list(sorted(set(addresses))))) # addresses = list(sorted(addresses)) - return f"{host}:{addresses}" + return f'{host}:{addresses}' except Exception: - return f"{host}:" + return f'{host}:' # https://stackoverflow.com/questions/312443/how-do-i-split-a-list-into-equally-sized-chunks @staticmethod @@ -57,9 +57,7 @@ def chunks(lst, n): async def query_all(self, resolver, hosts) -> list[Any]: # TODO chunk list into 50 pieces regardless of IPs and subnets - results = await asyncio.gather( - *[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts] - ) + results = await asyncio.gather(*[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts]) return results async def check(self): @@ -75,9 +73,9 @@ async def check(self): results = await self.query_all(resolver, chunk) all_results.update(results) for pair in results: - host, addresses = pair.split(":") + host, addresses = pair.split(':') self.realhosts.append(host) - self.addresses.update({addr for addr in addresses.split(",")}) + self.addresses.update({addr for addr in addresses.split(',')}) # address may be a list of ips # and do a set comprehension to remove duplicates self.realhosts.sort() diff --git a/theHarvester/lib/stash.py b/theHarvester/lib/stash.py index f2ee9eba..1e12f881 100644 --- a/theHarvester/lib/stash.py +++ b/theHarvester/lib/stash.py @@ -5,7 +5,7 @@ import aiosqlite -db_path = os.path.expanduser("~/.local/share/theHarvester") +db_path = os.path.expanduser('~/.local/share/theHarvester') if not os.path.isdir(db_path): os.makedirs(db_path) @@ -13,9 +13,9 @@ class StashManager: def __init__(self) -> None: - self.db = os.path.join(db_path, "stash.sqlite") - self.results = "" - self.totalresults = "" + self.db = os.path.join(db_path, 'stash.sqlite') + self.results = '' + self.totalresults = '' self.latestscandomain: dict = {} self.domainscanhistory: list = [] self.scanboarddata: dict = {} @@ -26,7 +26,7 @@ def __init__(self) -> None: async def do_init(self) -> None: async with aiosqlite.connect(self.db) as db: await db.execute( - "CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)" + 'CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)' ) await db.commit() @@ -39,7 +39,7 @@ async def store(self, domain, resource, res_type, source) -> None: try: async with aiosqlite.connect(self.db, timeout=30) as db: await db.execute( - "INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)", + 'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)', (self.domain, self.resource, self.type, self.date, self.source), ) await db.commit() @@ -52,13 +52,11 @@ async def store_all(self, domain, all, res_type, source) -> None: self.type = res_type self.source = source self.date = datetime.date.today() - master_list = [ - (self.domain, x, self.type, self.date, self.source) for x in self.all - ] + master_list = [(self.domain, x, self.type, self.date, self.source) for x in self.all] async with aiosqlite.connect(self.db, timeout=30) as db: try: await db.executemany( - "INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)", + 'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)', master_list, ) await db.commit() @@ -68,43 +66,41 @@ async def store_all(self, domain, all, res_type, source) -> None: async def generatedashboardcode(self, domain): try: # TODO refactor into generic method - self.latestscandomain["domain"] = domain + self.latestscandomain['domain'] = domain async with aiosqlite.connect(self.db, timeout=30) as conn: cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="host"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["host"] = data[0] + self.latestscandomain['host'] = data[0] cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="email"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["email"] = data[0] + self.latestscandomain['email'] = data[0] cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["ip"] = data[0] + self.latestscandomain['ip'] = data[0] cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["vhost"] = data[0] + self.latestscandomain['vhost'] = data[0] cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["shodan"] = data[0] - cursor = await conn.execute( - """SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,) - ) + self.latestscandomain['shodan'] = data[0] + cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)) data = await cursor.fetchone() - self.latestscandomain["latestdate"] = data[0] + self.latestscandomain['latestdate'] = data[0] latestdate = data[0] cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''', @@ -114,7 +110,7 @@ async def generatedashboardcode(self, domain): ), ) scandetailshost = await cursor.fetchall() - self.latestscandomain["scandetailshost"] = scandetailshost + self.latestscandomain['scandetailshost'] = scandetailshost cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''', ( @@ -123,7 +119,7 @@ async def generatedashboardcode(self, domain): ), ) scandetailsemail = await cursor.fetchall() - self.latestscandomain["scandetailsemail"] = scandetailsemail + self.latestscandomain['scandetailsemail'] = scandetailsemail cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''', ( @@ -132,7 +128,7 @@ async def generatedashboardcode(self, domain): ), ) scandetailsip = await cursor.fetchall() - self.latestscandomain["scandetailsip"] = scandetailsip + self.latestscandomain['scandetailsip'] = scandetailsip cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''', ( @@ -141,7 +137,7 @@ async def generatedashboardcode(self, domain): ), ) scandetailsvhost = await cursor.fetchall() - self.latestscandomain["scandetailsvhost"] = scandetailsvhost + self.latestscandomain['scandetailsvhost'] = scandetailsvhost cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''', ( @@ -150,14 +146,12 @@ async def generatedashboardcode(self, domain): ), ) scandetailsshodan = await cursor.fetchall() - self.latestscandomain["scandetailsshodan"] = scandetailsshodan + self.latestscandomain['scandetailsshodan'] = scandetailsshodan return self.latestscandomain except Exception as e: print(e) - async def getlatestscanresults( - self, domain, previousday: bool = False - ) -> Iterable[Row | str] | None: + async def getlatestscanresults(self, domain, previousday: bool = False) -> Iterable[Row | str] | None: try: async with aiosqlite.connect(self.db, timeout=30) as conn: if previousday: @@ -170,15 +164,13 @@ async def getlatestscanresults( (domain,), ) previousscandate = await cursor.fetchone() - if ( - not previousscandate - ): # When theHarvester runs first time/day, this query will return. + if not previousscandate: # When theHarvester runs first time/day, this query will return. self.previousscanresults = [ - "No results", - "No results", - "No results", - "No results", - "No results", + 'No results', + 'No results', + 'No results', + 'No results', + 'No results', ] else: cursor = await conn.execute( @@ -197,9 +189,7 @@ async def getlatestscanresults( self.previousscanresults = list(results) return self.previousscanresults except Exception as e: - print( - f"Error in getting the previous scan results from the database: {e}" - ) + print(f'Error in getting the previous scan results from the database: {e}') else: try: cursor = await conn.execute( @@ -223,46 +213,32 @@ async def getlatestscanresults( self.latestscanresults = list(results) return self.latestscanresults except Exception as e: - print( - f"Error in getting the latest scan results from the database: {e}" - ) + print(f'Error in getting the latest scan results from the database: {e}') except Exception as e: - print(f"Error connecting to theHarvester database: {e}") + print(f'Error connecting to theHarvester database: {e}') return self.latestscanresults async def getscanboarddata(self): try: async with aiosqlite.connect(self.db, timeout=30) as conn: - cursor = await conn.execute( - '''SELECT COUNT(*) from results WHERE type="host"''' - ) + cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''') data = await cursor.fetchone() - self.scanboarddata["host"] = data[0] - cursor = await conn.execute( - '''SELECT COUNT(*) from results WHERE type="email"''' - ) + self.scanboarddata['host'] = data[0] + cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="email"''') data = await cursor.fetchone() - self.scanboarddata["email"] = data[0] - cursor = await conn.execute( - '''SELECT COUNT(*) from results WHERE type="ip"''' - ) + self.scanboarddata['email'] = data[0] + cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="ip"''') data = await cursor.fetchone() - self.scanboarddata["ip"] = data[0] - cursor = await conn.execute( - '''SELECT COUNT(*) from results WHERE type="vhost"''' - ) + self.scanboarddata['ip'] = data[0] + cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="vhost"''') data = await cursor.fetchone() - self.scanboarddata["vhost"] = data[0] - cursor = await conn.execute( - '''SELECT COUNT(*) from results WHERE type="shodan"''' - ) + self.scanboarddata['vhost'] = data[0] + cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="shodan"''') data = await cursor.fetchone() - self.scanboarddata["shodan"] = data[0] - cursor = await conn.execute( - """SELECT COUNT(DISTINCT(domain)) FROM results """ - ) + self.scanboarddata['shodan'] = data[0] + cursor = await conn.execute("""SELECT COUNT(DISTINCT(domain)) FROM results """) data = await cursor.fetchone() - self.scanboarddata["domains"] = data[0] + self.scanboarddata['domains'] = data[0] return self.scanboarddata except Exception as e: print(e) @@ -302,12 +278,12 @@ async def getscanhistorydomain(self, domain): ) countshodan = await cursor.fetchone() results = { - "date": str(date[0]), - "hosts": str(counthost[0]), - "email": str(countemail[0]), - "ip": str(countip[0]), - "vhost": str(countvhost[0]), - "shodan": str(countshodan[0]), + 'date': str(date[0]), + 'hosts': str(counthost[0]), + 'email': str(countemail[0]), + 'ip': str(countip[0]), + 'vhost': str(countvhost[0]), + 'shodan': str(countshodan[0]), } self.domainscanhistory.append(results) return self.domainscanhistory @@ -333,42 +309,40 @@ async def getpluginscanstatistics(self) -> Iterable[Row] | None: async def latestscanchartdata(self, domain): try: async with aiosqlite.connect(self.db, timeout=30) as conn: - self.latestscandomain["domain"] = domain + self.latestscandomain['domain'] = domain cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="host"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["host"] = data[0] + self.latestscandomain['host'] = data[0] cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="email"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["email"] = data[0] + self.latestscandomain['email'] = data[0] cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["ip"] = data[0] + self.latestscandomain['ip'] = data[0] cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["vhost"] = data[0] + self.latestscandomain['vhost'] = data[0] cursor = await conn.execute( '''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''', (domain,), ) data = await cursor.fetchone() - self.latestscandomain["shodan"] = data[0] - cursor = await conn.execute( - """SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,) - ) + self.latestscandomain['shodan'] = data[0] + cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)) data = await cursor.fetchone() - self.latestscandomain["latestdate"] = data[0] + self.latestscandomain['latestdate'] = data[0] latestdate = data[0] cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''', @@ -378,7 +352,7 @@ async def latestscanchartdata(self, domain): ), ) scandetailshost = await cursor.fetchall() - self.latestscandomain["scandetailshost"] = scandetailshost + self.latestscandomain['scandetailshost'] = scandetailshost cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''', ( @@ -387,7 +361,7 @@ async def latestscanchartdata(self, domain): ), ) scandetailsemail = await cursor.fetchall() - self.latestscandomain["scandetailsemail"] = scandetailsemail + self.latestscandomain['scandetailsemail'] = scandetailsemail cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''', ( @@ -396,7 +370,7 @@ async def latestscanchartdata(self, domain): ), ) scandetailsip = await cursor.fetchall() - self.latestscandomain["scandetailsip"] = scandetailsip + self.latestscandomain['scandetailsip'] = scandetailsip cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''', ( @@ -405,7 +379,7 @@ async def latestscanchartdata(self, domain): ), ) scandetailsvhost = await cursor.fetchall() - self.latestscandomain["scandetailsvhost"] = scandetailsvhost + self.latestscandomain['scandetailsvhost'] = scandetailsvhost cursor = await conn.execute( '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''', ( @@ -414,7 +388,7 @@ async def latestscanchartdata(self, domain): ), ) scandetailsshodan = await cursor.fetchall() - self.latestscandomain["scandetailsshodan"] = scandetailsshodan + self.latestscandomain['scandetailsshodan'] = scandetailsshodan return self.latestscandomain except Exception as e: print(e) diff --git a/theHarvester/lib/version.py b/theHarvester/lib/version.py index 9dd7212a..7fcba431 100644 --- a/theHarvester/lib/version.py +++ b/theHarvester/lib/version.py @@ -1,4 +1,4 @@ -VERSION = "4.6.0" +VERSION = '4.6.0' def version() -> str: diff --git a/theHarvester/parsers/intelxparser.py b/theHarvester/parsers/intelxparser.py index 8dc428be..e47a6166 100644 --- a/theHarvester/parsers/intelxparser.py +++ b/theHarvester/parsers/intelxparser.py @@ -10,17 +10,17 @@ async def parse_dictionaries(self, results: dict) -> tuple: :return: tuple of emails and hosts """ if results is not None: - for dictionary in results["selectors"]: - field = dictionary["selectorvalue"] - if "@" in field: + for dictionary in results['selectors']: + field = dictionary['selectorvalue'] + if '@' in field: self.emails.add(field) else: field = str(field) - if "http" in field or "https" in field: - if field[:5] == "https": + if 'http' in field or 'https' in field: + if field[:5] == 'https': field = field[8:] else: field = field[7:] - self.hosts.add(field.replace(")", "").replace(",", "")) + self.hosts.add(field.replace(')', '').replace(',', '')) return self.emails, self.hosts return None, None diff --git a/theHarvester/parsers/myparser.py b/theHarvester/parsers/myparser.py index f1dd2da1..ae22c987 100644 --- a/theHarvester/parsers/myparser.py +++ b/theHarvester/parsers/myparser.py @@ -10,59 +10,49 @@ def __init__(self, results, word) -> None: async def genericClean(self) -> None: self.results = ( - self.results.replace("", "") - .replace("", "") - .replace("", "") - .replace("", "") - .replace("%3a", "") - .replace("", "") - .replace("", "") - .replace("", "") - .replace("", "") + self.results.replace('', '') + .replace('', '') + .replace('', '') + .replace('', '') + .replace('%3a', '') + .replace('', '') + .replace('', '') + .replace('', '') + .replace('', '') ) for search in ( - "<", - ">", - ":", - "=", - ";", - "&", - "%3A", - "%3D", - "%3C", - "%2f", - "/", - "\\", + '<', + '>', + ':', + '=', + ';', + '&', + '%3A', + '%3D', + '%3C', + '%2f', + '/', + '\\', ): - self.results = self.results.replace(search, " ") + self.results = self.results.replace(search, ' ') async def urlClean(self) -> None: - self.results = ( - self.results.replace("", "") - .replace("", "") - .replace("%2f", "") - .replace("%3a", "") - ) - for search in ("<", ">", ":", "=", ";", "&", "%3A", "%3D", "%3C"): - self.results = self.results.replace(search, " ") + self.results = self.results.replace('', '').replace('', '').replace('%2f', '').replace('%3a', '') + for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'): + self.results = self.results.replace(search, ' ') async def emails(self): await self.genericClean() # Local part is required, charset is flexible. # https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly) - reg_emails = re.compile( - r"[a-zA-Z0-9.\-_+#~!$&\',;=:]+" - + "@" - + "[a-zA-Z0-9.-]*" - + self.word.replace("www.", "") - ) + reg_emails = re.compile(r'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' + '@' + '[a-zA-Z0-9.-]*' + self.word.replace('www.', '')) self.temp = reg_emails.findall(self.results) emails = await self.unique() true_emails = { ( str(email)[1:].lower().strip() - if len(str(email)) > 1 and str(email)[0] == "." + if len(str(email)) > 1 and str(email)[0] == '.' else len(str(email)) > 1 and str(email).lower().strip() ) for email in emails @@ -76,11 +66,7 @@ async def fileurls(self, file) -> list: self.temp = reg_urls.findall(self.results) allurls = await self.unique() for iteration in allurls: - if ( - iteration.count("webcache") - or iteration.count("google.com") - or iteration.count("search?hl") - ): + if iteration.count('webcache') or iteration.count('google.com') or iteration.count('search?hl'): pass else: urls.append(iteration) @@ -90,11 +76,11 @@ async def hostnames(self): # should check both www. and not www. hostnames = [] await self.genericClean() - reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word) + reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word) first_hostnames = reg_hosts.findall(self.results) hostnames.extend(first_hostnames) # TODO determine if necessary below or if only pass through is fine - reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word.replace("www.", "")) + reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word.replace('www.', '')) # reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.' + 'www.' + self.word) # reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.(?:' + 'www.' + self.word + ')?') second_hostnames = reg_hosts.findall(self.results) @@ -102,31 +88,29 @@ async def hostnames(self): return list(set(hostnames)) async def hostnames_all(self): - reg_hosts = re.compile("(.*?)") + reg_hosts = re.compile('(.*?)') temp = reg_hosts.findall(self.results) for iteration in temp: - if iteration.count(":"): - res = iteration.split(":")[1].split("/")[2] + if iteration.count(':'): + res = iteration.split(':')[1].split('/')[2] else: - res = iteration.split("/")[0] + res = iteration.split('/')[0] self.temp.append(res) hostnames = await self.unique() return hostnames async def set(self): - reg_sets = re.compile(r">[a-zA-Z\d]*") + reg_sets = re.compile(r'>[a-zA-Z\d]*') self.temp = reg_sets.findall(self.results) sets = [] for iteration in self.temp: - delete = iteration.replace(">", "") - delete = delete.replace("', '') + delete = delete.replace(' Set[str]: - found = re.finditer( - r"(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*", self.results - ) + found = re.finditer(r'(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*', self.results) urls = {match.group().strip() for match in found} return urls diff --git a/theHarvester/parsers/securitytrailsparser.py b/theHarvester/parsers/securitytrailsparser.py index 3edc976c..76194c21 100644 --- a/theHarvester/parsers/securitytrailsparser.py +++ b/theHarvester/parsers/securitytrailsparser.py @@ -13,7 +13,7 @@ async def parse_text(self) -> tuple[set, set]: line = self.text[index].strip() if '"ip":' in line: # Extract IP. - ip = "" + ip = '' for ch in line[7:]: if ch == '"': break @@ -25,17 +25,13 @@ async def parse_text(self) -> tuple[set, set]: sub_domain_flag = 1 continue elif sub_domain_flag > 0: - if "]" in line: + if ']' in line: sub_domain_flag = 0 else: - if "www" in self.word: - self.word = ( - str(self.word).replace("www.", "").replace("www", "") - ) + if 'www' in self.word: + self.word = str(self.word).replace('www.', '').replace('www', '') # Remove www from word if entered - self.hostnames.add( - str(line).replace('"', "").replace(",", "") + "." + self.word - ) + self.hostnames.add(str(line).replace('"', '').replace(',', '') + '.' + self.word) else: continue return self.ips, self.hostnames diff --git a/theHarvester/restfulHarvest.py b/theHarvester/restfulHarvest.py index 13d8d3c7..90cb3140 100644 --- a/theHarvester/restfulHarvest.py +++ b/theHarvester/restfulHarvest.py @@ -6,35 +6,35 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument( - "-H", - "--host", - default="127.0.0.1", - help="IP address to listen on default is 127.0.0.1", + '-H', + '--host', + default='127.0.0.1', + help='IP address to listen on default is 127.0.0.1', ) parser.add_argument( - "-p", - "--port", + '-p', + '--port', default=5000, - help="Port to bind the web server to, default is 5000", + help='Port to bind the web server to, default is 5000', type=int, ) parser.add_argument( - "-l", - "--log-level", - default="info", - help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set", + '-l', + '--log-level', + default='info', + help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set', ) parser.add_argument( - "-r", - "--reload", + '-r', + '--reload', default=False, - help="Enable automatic reload used during development of the api", - action="store_true", + help='Enable automatic reload used during development of the api', + action='store_true', ) args: argparse.Namespace = parser.parse_args() uvicorn.run( - "theHarvester.lib.api.api:app", + 'theHarvester.lib.api.api:app', host=args.host, port=args.port, log_level=args.log_level, @@ -42,5 +42,5 @@ def main(): ) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/theHarvester/screenshot/screenshot.py b/theHarvester/screenshot/screenshot.py index 8db0809f..c48761bc 100644 --- a/theHarvester/screenshot/screenshot.py +++ b/theHarvester/screenshot/screenshot.py @@ -17,27 +17,21 @@ class ScreenShotter: def __init__(self, output) -> None: self.output = output - self.slash = "\\" if "win" in sys.platform else "/" - self.slash = ( - "" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash - ) + self.slash = '\\' if 'win' in sys.platform else '/' + self.slash = '' if (self.output[-1] == '\\' or self.output[-1] == '/') else self.slash def verify_path(self) -> bool: try: if not os.path.isdir(self.output): - answer = input( - "[+] The output path you have entered does not exist would you like to create it (y/n): " - ) - if answer.lower() == "yes" or answer.lower() == "y": + answer = input('[+] The output path you have entered does not exist would you like to create it (y/n): ') + if answer.lower() == 'yes' or answer.lower() == 'y': os.makedirs(self.output) return True else: return False return True except Exception as e: - print( - f"An exception has occurred while attempting to verify output path's existence: {e}" - ) + print(f"An exception has occurred while attempting to verify output path's existence: {e}") return False @staticmethod @@ -47,29 +41,25 @@ async def verify_installation() -> None: async with async_playwright() as p: browser = await p.chromium.launch() await browser.close() - print("Playwright and Chromium are successfully installed.") + print('Playwright and Chromium are successfully installed.') except Exception as e: - print( - f"An exception has occurred while attempting to verify installation: {e}" - ) + print(f'An exception has occurred while attempting to verify installation: {e}') @staticmethod def chunk_list(items: Collection, chunk_size: int) -> list: # Based off of: https://github.com/apache/incubator-sdap-ingester - return [ - list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size) - ] + return [list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)] @staticmethod async def visit(url: str) -> tuple[str, str]: try: timeout = aiohttp.ClientTimeout(total=35) headers = { - "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/122.0.0.0 Safari/537.36" + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/122.0.0.0 Safari/537.36' } - url = f"http://{url}" if not url.startswith("http") else url - url = url.replace("www.", "") + url = f'http://{url}' if not url.startswith('http') else url + url = url.replace('www.', '') sslcontext = ssl.create_default_context(cafile=certifi.where()) async with aiohttp.ClientSession( timeout=timeout, @@ -77,16 +67,16 @@ async def visit(url: str) -> tuple[str, str]: connector=aiohttp.TCPConnector(ssl=sslcontext), ) as session: async with session.get(url, verify_ssl=False) as resp: - text = await resp.text("UTF-8") - return f"http://{url}" if not url.startswith("http") else url, text + text = await resp.text('UTF-8') + return f'http://{url}' if not url.startswith('http') else url, text except Exception as e: - print(f"An exception has occurred while attempting to visit {url} : {e}") - return "", "" + print(f'An exception has occurred while attempting to visit {url} : {e}') + return '', '' async def take_screenshot(self, url: str) -> tuple[str, ...]: - url = f"http://{url}" if not url.startswith("http") else url - url = url.replace("www.", "") - print(f"Attempting to take a screenshot of: {url}") + url = f'http://{url}' if not url.startswith('http') else url + url = url.replace('www.', '') + print(f'Attempting to take a screenshot of: {url}') async with async_playwright() as p: browser = await p.chromium.launch(headless=True) # New browser context @@ -100,10 +90,8 @@ async def take_screenshot(self, url: str) -> tuple[str, ...]: await page.goto(url, timeout=35000) await page.screenshot(path=path) except Exception as e: - print( - f"An exception has occurred attempting to screenshot: {url} : {e}" - ) - path = "" + print(f'An exception has occurred attempting to screenshot: {url} : {e}') + path = '' finally: await page.close() await context.close() diff --git a/theHarvester/theHarvester.py b/theHarvester/theHarvester.py index c07c2830..80692f0f 100644 --- a/theHarvester/theHarvester.py +++ b/theHarvester/theHarvester.py @@ -6,7 +6,7 @@ def main(): platform = sys.platform - if platform == "win32": + if platform == 'win32': # Required or things will break if trying to take screenshots import multiprocessing @@ -23,9 +23,9 @@ def main(): uvloop.install() - if "linux" in platform: + if 'linux' in platform: import aiomultiprocess # As we are not using Windows, we can change the spawn method to fork for greater performance - aiomultiprocess.set_context("fork") + aiomultiprocess.set_context('fork') asyncio.run(__main__.entry_point())