diff --git a/restfulHarvest.py b/restfulHarvest.py
index de7c4756..e772df6f 100755
--- a/restfulHarvest.py
+++ b/restfulHarvest.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
from theHarvester.restfulHarvest import main
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
diff --git a/theHarvester.py b/theHarvester.py
index 5f3355bc..bc1af5e2 100755
--- a/theHarvester.py
+++ b/theHarvester.py
@@ -5,10 +5,8 @@
from theHarvester.theHarvester import main
if sys.version_info.major < 3 or sys.version_info.minor < 10:
- print(
- "\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m"
- )
+ print('\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m')
sys.exit(1)
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py
index ff1173b7..0bd074ef 100644
--- a/theHarvester/__main__.py
+++ b/theHarvester/__main__.py
@@ -63,93 +63,91 @@
async def start(rest_args: argparse.Namespace | None = None):
"""Main program function"""
parser = argparse.ArgumentParser(
- description="theHarvester is used to gather open source intelligence (OSINT) on a company or domain."
+ description='theHarvester is used to gather open source intelligence (OSINT) on a company or domain.'
)
+ parser.add_argument('-d', '--domain', help='Company name or domain to search.', required=True)
parser.add_argument(
- "-d", "--domain", help="Company name or domain to search.", required=True
- )
- parser.add_argument(
- "-l",
- "--limit",
- help="Limit the number of search results, default=500.",
+ '-l',
+ '--limit',
+ help='Limit the number of search results, default=500.',
default=500,
type=int,
)
parser.add_argument(
- "-S",
- "--start",
- help="Start with result number X, default=0.",
+ '-S',
+ '--start',
+ help='Start with result number X, default=0.',
default=0,
type=int,
)
parser.add_argument(
- "-p",
- "--proxies",
- help="Use proxies for requests, enter proxies in proxies.yaml.",
+ '-p',
+ '--proxies',
+ help='Use proxies for requests, enter proxies in proxies.yaml.',
default=False,
- action="store_true",
+ action='store_true',
)
parser.add_argument(
- "-s",
- "--shodan",
- help="Use Shodan to query discovered hosts.",
+ '-s',
+ '--shodan',
+ help='Use Shodan to query discovered hosts.',
default=False,
- action="store_true",
+ action='store_true',
)
parser.add_argument(
- "--screenshot",
- help="Take screenshots of resolved domains specify output directory: --screenshot output_directory",
- default="",
+ '--screenshot',
+ help='Take screenshots of resolved domains specify output directory: --screenshot output_directory',
+ default='',
type=str,
)
parser.add_argument(
- "-v",
- "--virtual-host",
- help="Verify host name via DNS resolution and search for virtual hosts.",
- action="store_const",
- const="basic",
+ '-v',
+ '--virtual-host',
+ help='Verify host name via DNS resolution and search for virtual hosts.',
+ action='store_const',
+ const='basic',
default=False,
)
- parser.add_argument("-e", "--dns-server", help="DNS server to use for lookup.")
+ parser.add_argument('-e', '--dns-server', help='DNS server to use for lookup.')
parser.add_argument(
- "-t",
- "--take-over",
- help="Check for takeovers.",
+ '-t',
+ '--take-over',
+ help='Check for takeovers.',
default=False,
- action="store_true",
+ action='store_true',
)
parser.add_argument(
- "-r",
- "--dns-resolve",
- help="Perform DNS resolution on subdomains with a resolver list or passed in resolvers, default False.",
- default="",
+ '-r',
+ '--dns-resolve',
+ help='Perform DNS resolution on subdomains with a resolver list or passed in resolvers, default False.',
+ default='',
type=str,
- nargs="?",
+ nargs='?',
)
parser.add_argument(
- "-n",
- "--dns-lookup",
- help="Enable DNS server lookup, default False.",
+ '-n',
+ '--dns-lookup',
+ help='Enable DNS server lookup, default False.',
default=False,
- action="store_true",
+ action='store_true',
)
parser.add_argument(
- "-c",
- "--dns-brute",
- help="Perform a DNS brute force on the domain.",
+ '-c',
+ '--dns-brute',
+ help='Perform a DNS brute force on the domain.',
default=False,
- action="store_true",
+ action='store_true',
)
parser.add_argument(
- "-f",
- "--filename",
- help="Save the results to an XML and JSON file.",
- default="",
+ '-f',
+ '--filename',
+ help='Save the results to an XML and JSON file.',
+ default='',
type=str,
)
parser.add_argument(
- "-b",
- "--source",
+ '-b',
+ '--source',
help="""anubis, baidu, bevigil, binaryedge, bing, bingapi, bufferoverun, brave,
censys, certspotter, criminalip, crtsh, dnsdumpster, duckduckgo, fullhunt, github-code,
hackertarget, hunter, hunterhow, intelx, netlas, onyphe, otx, pentesttools, projectdiscovery,
@@ -158,10 +156,10 @@ async def start(rest_args: argparse.Namespace | None = None):
)
# determines if filename is coming from rest api or user
- rest_filename = ""
+ rest_filename = ''
# indicates this from the rest API
if rest_args:
- if rest_args.source and rest_args.source == "getsources":
+ if rest_args.source and rest_args.source == 'getsources':
return list(sorted(Core.get_supportedengines()))
elif rest_args.dns_brute:
args = rest_args
@@ -171,11 +169,7 @@ async def start(rest_args: argparse.Namespace | None = None):
# We need to make sure the filename is random as to not overwrite other files
filename: str = args.filename
alphabet = string.ascii_letters + string.digits
- rest_filename += (
- f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}"
- if len(filename) != 0
- else ""
- )
+ rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" if len(filename) != 0 else ''
else:
args = parser.parse_args()
filename = args.filename
@@ -186,16 +180,14 @@ async def start(rest_args: argparse.Namespace | None = None):
except Exception:
pass
- if len(filename) > 2 and filename[:2] == "~/":
+ if len(filename) > 2 and filename[:2] == '~/':
filename = os.path.expanduser(filename)
all_emails: list = []
all_hosts: list = []
all_ip: list = []
dnslookup = args.dns_lookup
- dnsserver = (
- args.dns_server
- ) # TODO arg is not used anywhere replace with resolvers wordlist arg dnsresolve
+ dnsserver = args.dns_server # TODO arg is not used anywhere replace with resolvers wordlist arg dnsresolve
dnsresolve = args.dns_resolve
final_dns_resolver_list = []
if dnsresolve is not None and len(dnsresolve) > 0:
@@ -204,7 +196,7 @@ async def start(rest_args: argparse.Namespace | None = None):
# 1.1.1.1,8.8.8.8 or 1.1.1.1, 8.8.8.8
# resolvers.txt
if os.path.exists(dnsresolve):
- with open(dnsresolve, encoding="UTF-8") as fp:
+ with open(dnsresolve, encoding='UTF-8') as fp:
for line in fp:
line = line.strip()
try:
@@ -212,16 +204,14 @@ async def start(rest_args: argparse.Namespace | None = None):
_ = netaddr.IPAddress(line)
final_dns_resolver_list.append(line)
except Exception as e:
- print(
- f"An exception has occurred while reading from: {dnsresolve}, {e}"
- )
- print(f"Current line: {line}")
+ print(f'An exception has occurred while reading from: {dnsresolve}, {e}')
+ print(f'Current line: {line}')
return
else:
try:
- if "," in dnsresolve:
- cleaned = dnsresolve.replace(" ", "")
- for item in cleaned.split(","):
+ if ',' in dnsresolve:
+ cleaned = dnsresolve.replace(' ', '')
+ for item in cleaned.split(','):
_ = netaddr.IPAddress(item)
final_dns_resolver_list.append(item)
else:
@@ -229,10 +219,8 @@ async def start(rest_args: argparse.Namespace | None = None):
_ = netaddr.IPAddress(dnsresolve)
final_dns_resolver_list.append(dnsresolve)
except Exception as e:
- print(
- f"Passed in DNS resolvers are invalid double check, got error: {e}"
- )
- print(f"Dumping resolvers passed in: {e}")
+ print(f'Passed in DNS resolvers are invalid double check, got error: {e}')
+ print(f'Dumping resolvers passed in: {e}')
sys.exit(0)
# if for some reason, there are duplicates
@@ -249,7 +237,7 @@ async def start(rest_args: argparse.Namespace | None = None):
all_urls: list = []
vhost: list = []
virtual = args.virtual_host
- word: str = args.domain.rstrip("\n")
+ word: str = args.domain.rstrip('\n')
takeover_status = args.take_over
use_proxy = args.proxies
linkedin_people_list_tracker: list = []
@@ -302,29 +290,17 @@ async def store(
db_stash = stash.StashManager()
if source:
- print(f"\033[94m[*] Searching {source[0].upper() + source[1:]}. ")
+ print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. ')
if store_host:
- host_names = list(
- {
- host
- for host in await search_engine.get_hostnames()
- if f".{word}" in host
- }
- )
+ host_names = list({host for host in await search_engine.get_hostnames() if f'.{word}' in host})
host_names = list(host_names)
- if (
- source != "hackertarget"
- and source != "pentesttools"
- and source != "rapiddns"
- ):
+ if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
# If a source is inside this conditional, it means the hosts returned must be resolved to obtain ip
# This should only be checked if --dns-resolve has a wordlist
if dnsresolve is None or len(final_dns_resolver_list) > 0:
# indicates that -r was passed in if dnsresolve is None
- full_hosts_checker = hostchecker.Checker(
- host_names, final_dns_resolver_list
- )
+ full_hosts_checker = hostchecker.Checker(host_names, final_dns_resolver_list)
# If full, this is only getting resolved hosts
(
resolved_pair,
@@ -339,70 +315,68 @@ async def store(
else:
full.extend(host_names)
all_hosts.extend(host_names)
- await db_stash.store_all(word, all_hosts, "host", source)
+ await db_stash.store_all(word, all_hosts, 'host', source)
if store_emails:
email_list = await search_engine.get_emails()
all_emails.extend(email_list)
- await db_stash.store_all(word, email_list, "email", source)
+ await db_stash.store_all(word, email_list, 'email', source)
if store_ip:
ips_list = await search_engine.get_ips()
all_ip.extend(ips_list)
- await db_stash.store_all(word, all_ip, "ip", source)
+ await db_stash.store_all(word, all_ip, 'ip', source)
if store_results:
email_list, host_names, urls = await search_engine.get_results()
all_emails.extend(email_list)
- host_names = list({host for host in host_names if f".{word}" in host})
+ host_names = list({host for host in host_names if f'.{word}' in host})
all_urls.extend(urls)
all_hosts.extend(host_names)
- await db.store_all(word, all_hosts, "host", source)
- await db.store_all(word, all_emails, "email", source)
+ await db.store_all(word, all_hosts, 'host', source)
+ await db.store_all(word, all_emails, 'email', source)
if store_people:
people_list = await search_engine.get_people()
- await db_stash.store_all(word, people_list, "people", source)
+ await db_stash.store_all(word, people_list, 'people', source)
if store_links:
links = await search_engine.get_links()
linkedin_links_tracker.extend(links)
if len(links) > 0:
- await db.store_all(word, links, "linkedinlinks", engineitem)
+ await db.store_all(word, links, 'linkedinlinks', engineitem)
if store_interestingurls:
iurls = await search_engine.get_interestingurls()
interesting_urls.extend(iurls)
if len(iurls) > 0:
- await db.store_all(word, iurls, "interestingurls", engineitem)
+ await db.store_all(word, iurls, 'interestingurls', engineitem)
if store_asns:
fasns = await search_engine.get_asns()
total_asns.extend(fasns)
if len(fasns) > 0:
- await db.store_all(word, fasns, "asns", engineitem)
+ await db.store_all(word, fasns, 'asns', engineitem)
stor_lst = []
if args.source is not None:
- if args.source.lower() != "all":
- engines = sorted(set(map(str.strip, args.source.split(","))))
+ if args.source.lower() != 'all':
+ engines = sorted(set(map(str.strip, args.source.split(','))))
else:
engines = Core.get_supportedengines()
# Iterate through search engines in order
if set(engines).issubset(Core.get_supportedengines()):
- print(f"\n[*] Target: {word} \n")
+ print(f'\n[*] Target: {word} \n')
for engineitem in engines:
- if engineitem == "anubis":
+ if engineitem == 'anubis':
try:
anubis_search = anubis.SearchAnubis(word)
- stor_lst.append(
- store(anubis_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(anubis_search, engineitem, store_host=True))
except Exception as e:
print(e)
- elif engineitem == "baidu":
+ elif engineitem == 'baidu':
try:
baidu_search = baidusearch.SearchBaidu(word, limit)
stor_lst.append(
@@ -416,7 +390,7 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "bevigil":
+ elif engineitem == 'bevigil':
try:
bevigil_search = bevigil.SearchBeVigil(word)
stor_lst.append(
@@ -430,29 +404,25 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "binaryedge":
+ elif engineitem == 'binaryedge':
try:
- binaryedge_search = binaryedgesearch.SearchBinaryEdge(
- word, limit
- )
- stor_lst.append(
- store(binaryedge_search, engineitem, store_host=True)
- )
+ binaryedge_search = binaryedgesearch.SearchBinaryEdge(word, limit)
+ stor_lst.append(store(binaryedge_search, engineitem, store_host=True))
except Exception as e:
print(e)
- elif engineitem == "bing" or engineitem == "bingapi":
+ elif engineitem == 'bing' or engineitem == 'bingapi':
try:
bing_search = bingsearch.SearchBing(word, limit, start)
- bingapi = ""
- if engineitem == "bingapi":
- bingapi += "yes"
+ bingapi = ''
+ if engineitem == 'bingapi':
+ bingapi += 'yes'
else:
- bingapi += "no"
+ bingapi += 'no'
stor_lst.append(
store(
bing_search,
- "bing",
+ 'bing',
process_param=bingapi,
store_host=True,
store_emails=True,
@@ -464,7 +434,7 @@ async def store(
else:
print(e)
- elif engineitem == "bufferoverun":
+ elif engineitem == 'bufferoverun':
try:
bufferoverun_search = bufferoverun.SearchBufferover(word)
stor_lst.append(
@@ -478,7 +448,7 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "brave":
+ elif engineitem == 'brave':
try:
brave_search = bravesearch.SearchBrave(word, limit)
stor_lst.append(
@@ -492,7 +462,7 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "censys":
+ elif engineitem == 'censys':
try:
censys_search = censysearch.SearchCensys(word, limit)
stor_lst.append(
@@ -507,16 +477,14 @@ async def store(
if isinstance(e, MissingKey):
print(e)
- elif engineitem == "certspotter":
+ elif engineitem == 'certspotter':
try:
certspotter_search = certspottersearch.SearchCertspoter(word)
- stor_lst.append(
- store(certspotter_search, engineitem, None, store_host=True)
- )
+ stor_lst.append(store(certspotter_search, engineitem, None, store_host=True))
except Exception as e:
print(e)
- elif engineitem == "criminalip":
+ elif engineitem == 'criminalip':
try:
criminalip_search = criminalip.SearchCriminalIP(word)
stor_lst.append(
@@ -532,18 +500,16 @@ async def store(
if isinstance(e, MissingKey):
print(e)
else:
- print(f"An excepion has occurred in criminalip: {e}")
+ print(f'An excepion has occurred in criminalip: {e}')
- elif engineitem == "crtsh":
+ elif engineitem == 'crtsh':
try:
crtsh_search = crtsh.SearchCrtsh(word)
- stor_lst.append(store(crtsh_search, "CRTsh", store_host=True))
+ stor_lst.append(store(crtsh_search, 'CRTsh', store_host=True))
except Exception as e:
- print(
- f"[!] A timeout occurred with crtsh, cannot find {args.domain}\n {e}"
- )
+ print(f'[!] A timeout occurred with crtsh, cannot find {args.domain}\n {e}')
- elif engineitem == "dnsdumpster":
+ elif engineitem == 'dnsdumpster':
try:
dns_dumpster_search = dnsdumpster.SearchDnsDumpster(word)
stor_lst.append(
@@ -555,9 +521,9 @@ async def store(
)
)
except Exception as e:
- print(f"[!] An error occurred with dnsdumpster: {e}")
+ print(f'[!] An error occurred with dnsdumpster: {e}')
- elif engineitem == "duckduckgo":
+ elif engineitem == 'duckduckgo':
duckduckgo_search = duckduckgosearch.SearchDuckDuckGo(word, limit)
stor_lst.append(
store(
@@ -568,17 +534,15 @@ async def store(
)
)
- elif engineitem == "fullhunt":
+ elif engineitem == 'fullhunt':
try:
fullhunt_search = fullhuntsearch.SearchFullHunt(word)
- stor_lst.append(
- store(fullhunt_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(fullhunt_search, engineitem, store_host=True))
except Exception as e:
if isinstance(e, MissingKey):
print(e)
- elif engineitem == "github-code":
+ elif engineitem == 'github-code':
try:
github_search = githubcode.SearchGithubCode(word, limit)
stor_lst.append(
@@ -592,13 +556,11 @@ async def store(
except MissingKey as ex:
print(ex)
- elif engineitem == "hackertarget":
+ elif engineitem == 'hackertarget':
hackertarget_search = hackertarget.SearchHackerTarget(word)
- stor_lst.append(
- store(hackertarget_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(hackertarget_search, engineitem, store_host=True))
- elif engineitem == "hunter":
+ elif engineitem == 'hunter':
try:
hunter_search = huntersearch.SearchHunter(word, limit, start)
stor_lst.append(
@@ -613,19 +575,17 @@ async def store(
if isinstance(e, MissingKey):
print(e)
- elif engineitem == "hunterhow":
+ elif engineitem == 'hunterhow':
try:
hunterhow_search = searchhunterhow.SearchHunterHow(word)
- stor_lst.append(
- store(hunterhow_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(hunterhow_search, engineitem, store_host=True))
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
- print(f"An exception has occurred in hunterhow search: {e}")
+ print(f'An exception has occurred in hunterhow search: {e}')
- elif engineitem == "intelx":
+ elif engineitem == 'intelx':
try:
intelx_search = intelxsearch.SearchIntelx(word)
stor_lst.append(
@@ -640,9 +600,9 @@ async def store(
if isinstance(e, MissingKey):
print(e)
else:
- print(f"An exception has occurred in Intelx search: {e}")
+ print(f'An exception has occurred in Intelx search: {e}')
- elif engineitem == "netlas":
+ elif engineitem == 'netlas':
try:
netlas_search = netlas.SearchNetlas(word)
stor_lst.append(
@@ -657,7 +617,7 @@ async def store(
if isinstance(e, MissingKey):
print(e)
- elif engineitem == "onyphe":
+ elif engineitem == 'onyphe':
try:
onyphe_search = onyphe.SearchOnyphe(word)
stor_lst.append(
@@ -672,7 +632,7 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "otx":
+ elif engineitem == 'otx':
try:
otxsearch_search = otxsearch.SearchOtx(word)
stor_lst.append(
@@ -686,67 +646,53 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "pentesttools":
+ elif engineitem == 'pentesttools':
try:
pentesttools_search = pentesttools.SearchPentestTools(word)
- stor_lst.append(
- store(pentesttools_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(pentesttools_search, engineitem, store_host=True))
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
- print(
- f"An exception has occurred in PentestTools search: {e}"
- )
+ print(f'An exception has occurred in PentestTools search: {e}')
- elif engineitem == "projectdiscovery":
+ elif engineitem == 'projectdiscovery':
try:
projectdiscovery_search = projectdiscovery.SearchDiscovery(word)
- stor_lst.append(
- store(projectdiscovery_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(projectdiscovery_search, engineitem, store_host=True))
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
- print("An exception has occurred in ProjectDiscovery")
+ print('An exception has occurred in ProjectDiscovery')
- elif engineitem == "rapiddns":
+ elif engineitem == 'rapiddns':
try:
rapiddns_search = rapiddns.SearchRapidDns(word)
- stor_lst.append(
- store(rapiddns_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(rapiddns_search, engineitem, store_host=True))
except Exception as e:
print(e)
- elif engineitem == "rocketreach":
+ elif engineitem == 'rocketreach':
try:
rocketreach_search = rocketreach.SearchRocketReach(word, limit)
- stor_lst.append(
- store(rocketreach_search, engineitem, store_links=True)
- )
+ stor_lst.append(store(rocketreach_search, engineitem, store_links=True))
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
- print(f"An exception has occurred in RocketReach: {e}")
+ print(f'An exception has occurred in RocketReach: {e}')
- elif engineitem == "subdomaincenter":
+ elif engineitem == 'subdomaincenter':
try:
subdomaincenter_search = subdomaincenter.SubdomainCenter(word)
- stor_lst.append(
- store(subdomaincenter_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(subdomaincenter_search, engineitem, store_host=True))
except Exception as e:
print(e)
- elif engineitem == "securityTrails":
+ elif engineitem == 'securityTrails':
try:
- securitytrails_search = (
- securitytrailssearch.SearchSecuritytrail(word)
- )
+ securitytrails_search = securitytrailssearch.SearchSecuritytrail(word)
stor_lst.append(
store(
securitytrails_search,
@@ -759,34 +705,24 @@ async def store(
if isinstance(e, MissingKey):
print(e)
- elif engineitem == "sitedossier":
+ elif engineitem == 'sitedossier':
try:
sitedossier_search = sitedossier.SearchSitedossier(word)
- stor_lst.append(
- store(sitedossier_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(sitedossier_search, engineitem, store_host=True))
except Exception as e:
print(e)
- elif engineitem == "subdomainfinderc99":
+ elif engineitem == 'subdomainfinderc99':
try:
- subdomainfinderc99_search = (
- subdomainfinderc99.SearchSubdomainfinderc99(word)
- )
- stor_lst.append(
- store(
- subdomainfinderc99_search, engineitem, store_host=True
- )
- )
+ subdomainfinderc99_search = subdomainfinderc99.SearchSubdomainfinderc99(word)
+ stor_lst.append(store(subdomainfinderc99_search, engineitem, store_host=True))
except Exception as e:
if isinstance(e, MissingKey):
print(e)
else:
- print(
- f"An exception has occurred in Subdomainfinderc99 search: {e}"
- )
+ print(f'An exception has occurred in Subdomainfinderc99 search: {e}')
- elif engineitem == "threatminer":
+ elif engineitem == 'threatminer':
try:
threatminer_search = threatminer.SearchThreatminer(word)
stor_lst.append(
@@ -800,7 +736,7 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "tomba":
+ elif engineitem == 'tomba':
try:
tomba_search = tombasearch.SearchTomba(word, limit, start)
stor_lst.append(
@@ -815,7 +751,7 @@ async def store(
if isinstance(e, MissingKey):
print(e)
- elif engineitem == "urlscan":
+ elif engineitem == 'urlscan':
try:
urlscan_search = urlscan.SearchUrlscan(word)
stor_lst.append(
@@ -831,17 +767,15 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "virustotal":
+ elif engineitem == 'virustotal':
try:
virustotal_search = virustotal.SearchVirustotal(word)
- stor_lst.append(
- store(virustotal_search, engineitem, store_host=True)
- )
+ stor_lst.append(store(virustotal_search, engineitem, store_host=True))
except Exception as e:
if isinstance(e, MissingKey):
print(e)
- elif engineitem == "yahoo":
+ elif engineitem == 'yahoo':
try:
yahoo_search = yahoosearch.SearchYahoo(word, limit)
stor_lst.append(
@@ -855,7 +789,7 @@ async def store(
except Exception as e:
print(e)
- elif engineitem == "zoomeye":
+ elif engineitem == 'zoomeye':
try:
zoomeye_search = zoomeyesearch.SearchZoomEye(word, limit)
stor_lst.append(
@@ -877,10 +811,10 @@ async def store(
try:
rest_args.dns_brute
except Exception:
- print("\n[!] Invalid source.\n")
+ print('\n[!] Invalid source.\n')
sys.exit(1)
else:
- print("\n[!] Invalid source.\n")
+ print('\n[!] Invalid source.\n')
sys.exit(1)
async def worker(queue):
@@ -916,25 +850,12 @@ async def handler(lst):
await handler(lst=stor_lst)
return_ips: list = []
- if (
- rest_args is not None
- and len(rest_filename) == 0
- and rest_args.dns_brute is False
- ):
+ if rest_args is not None and len(rest_filename) == 0 and rest_args.dns_brute is False:
# Indicates user is using REST api but not wanting output to be saved to a file
# cast to string so Rest API can understand the type
- return_ips.extend(
- [
- str(ip)
- for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])
- ]
- )
+ return_ips.extend([str(ip) for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])])
# return list(set(all_emails)), return_ips, full, '', ''
- all_hosts = [
- host.replace("www.", "")
- for host in all_hosts
- if host.replace("www.", "") in all_hosts
- ]
+ all_hosts = [host.replace('www.', '') for host in all_hosts if host.replace('www.', '') in all_hosts]
all_hosts = list(sorted(set(all_hosts)))
return (
total_asns,
@@ -951,152 +872,140 @@ async def handler(lst):
try:
all_emails
except NameError:
- print("\n\n[!] No emails found because all_emails is not defined.\n\n ")
+ print('\n\n[!] No emails found because all_emails is not defined.\n\n ')
sys.exit(1)
try:
all_hosts
except NameError:
- print("\n\n[!] No hosts found because all_hosts is not defined.\n\n ")
+ print('\n\n[!] No hosts found because all_hosts is not defined.\n\n ')
sys.exit(1)
# Results
if len(total_asns) > 0:
- print(f"\n[*] ASNS found: {len(total_asns)}")
- print("--------------------")
+ print(f'\n[*] ASNS found: {len(total_asns)}')
+ print('--------------------')
total_asns = list(sorted(set(total_asns)))
for asn in total_asns:
print(asn)
if len(interesting_urls) > 0:
- print(f"\n[*] Interesting Urls found: {len(interesting_urls)}")
- print("--------------------")
+ print(f'\n[*] Interesting Urls found: {len(interesting_urls)}')
+ print('--------------------')
interesting_urls = list(sorted(set(interesting_urls)))
for iurl in interesting_urls:
print(iurl)
- if len(twitter_people_list_tracker) == 0 and "twitter" in engines:
- print("\n[*] No Twitter users found.\n\n")
+ if len(twitter_people_list_tracker) == 0 and 'twitter' in engines:
+ print('\n[*] No Twitter users found.\n\n')
else:
if len(twitter_people_list_tracker) >= 1:
- print("\n[*] Twitter Users found: " + str(len(twitter_people_list_tracker)))
- print("---------------------")
+ print('\n[*] Twitter Users found: ' + str(len(twitter_people_list_tracker)))
+ print('---------------------')
twitter_people_list_tracker = list(sorted(set(twitter_people_list_tracker)))
for usr in twitter_people_list_tracker:
print(usr)
- if len(linkedin_people_list_tracker) == 0 and "linkedin" in engines:
- print("\n[*] No LinkedIn users found.\n\n")
+ if len(linkedin_people_list_tracker) == 0 and 'linkedin' in engines:
+ print('\n[*] No LinkedIn users found.\n\n')
else:
if len(linkedin_people_list_tracker) >= 1:
- print(
- "\n[*] LinkedIn Users found: " + str(len(linkedin_people_list_tracker))
- )
- print("---------------------")
- linkedin_people_list_tracker = list(
- sorted(set(linkedin_people_list_tracker))
- )
+ print('\n[*] LinkedIn Users found: ' + str(len(linkedin_people_list_tracker)))
+ print('---------------------')
+ linkedin_people_list_tracker = list(sorted(set(linkedin_people_list_tracker)))
for usr in linkedin_people_list_tracker:
print(usr)
- if len(linkedin_links_tracker) == 0 and (
- "linkedin" in engines or "rocketreach" in engines
- ):
- print(f"\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}")
+ if len(linkedin_links_tracker) == 0 and ('linkedin' in engines or 'rocketreach' in engines):
+ print(f'\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}')
linkedin_links_tracker = list(sorted(set(linkedin_links_tracker)))
- print("---------------------")
+ print('---------------------')
for link in linkedin_people_list_tracker:
print(link)
length_urls = len(all_urls)
if length_urls == 0:
- if len(engines) >= 1 and "trello" in engines:
- print("\n[*] No Trello URLs found.")
+ if len(engines) >= 1 and 'trello' in engines:
+ print('\n[*] No Trello URLs found.')
else:
total = length_urls
- print("\n[*] Trello URLs found: " + str(total))
- print("--------------------")
+ print('\n[*] Trello URLs found: ' + str(total))
+ print('--------------------')
all_urls = list(sorted(set(all_urls)))
for url in sorted(all_urls):
print(url)
if len(all_ip) == 0:
- print("\n[*] No IPs found.")
+ print('\n[*] No IPs found.')
else:
- print("\n[*] IPs found: " + str(len(all_ip)))
- print("-------------------")
+ print('\n[*] IPs found: ' + str(len(all_ip)))
+ print('-------------------')
# use netaddr as the list may contain ipv4 and ipv6 addresses
ip_list = []
for ip in set(all_ip):
try:
ip = ip.strip()
if len(ip) > 0:
- if "/" in ip:
+ if '/' in ip:
ip_list.append(str(netaddr.IPNetwork(ip)))
else:
ip_list.append(str(netaddr.IPAddress(ip)))
except Exception as e:
- print(f"An exception has occurred while adding: {ip} to ip_list: {e}")
+ print(f'An exception has occurred while adding: {ip} to ip_list: {e}')
continue
ip_list = list(sorted(ip_list))
- print("\n".join(map(str, ip_list)))
+ print('\n'.join(map(str, ip_list)))
if len(all_emails) == 0:
- print("\n[*] No emails found.")
+ print('\n[*] No emails found.')
else:
- print("\n[*] Emails found: " + str(len(all_emails)))
- print("----------------------")
+ print('\n[*] Emails found: ' + str(len(all_emails)))
+ print('----------------------')
all_emails = sorted(list(set(all_emails)))
- print("\n".join(all_emails))
+ print('\n'.join(all_emails))
if len(all_hosts) == 0:
- print("\n[*] No hosts found.\n\n")
+ print('\n[*] No hosts found.\n\n')
else:
db = stash.StashManager()
if dnsresolve is None or len(final_dns_resolver_list) > 0:
temp = set()
for host in full:
- if ":" in host:
+ if ':' in host:
# TODO parse addresses and sort them as they are IPs
- subdomain, addr = host.split(":", 1)
+ subdomain, addr = host.split(':', 1)
if subdomain.endswith(word):
- temp.add(subdomain + ":" + addr)
+ temp.add(subdomain + ':' + addr)
continue
if host.endswith(word):
- if host[:4] == "www.":
+ if host[:4] == 'www.':
if host[4:] in all_hosts or host[4:] in full:
temp.add(host[4:])
continue
temp.add(host)
full = list(sorted(temp))
- full.sort(key=lambda el: el.split(":")[0])
- print("\n[*] Hosts found: " + str(len(full)))
- print("---------------------")
+ full.sort(key=lambda el: el.split(':')[0])
+ print('\n[*] Hosts found: ' + str(len(full)))
+ print('---------------------')
for host in full:
print(host)
try:
- if ":" in host:
- _, addr = host.split(":", 1)
- await db.store(word, addr, "ip", "DNS-resolver")
+ if ':' in host:
+ _, addr = host.split(':', 1)
+ await db.store(word, addr, 'ip', 'DNS-resolver')
except Exception as e:
- print(
- f"An exception has occurred while attempting to insert: {host} IP into DB: {e}"
- )
+ print(f'An exception has occurred while attempting to insert: {host} IP into DB: {e}')
continue
else:
- all_hosts = [
- host.replace("www.", "")
- for host in all_hosts
- if host.replace("www.", "") in all_hosts
- ]
+ all_hosts = [host.replace('www.', '') for host in all_hosts if host.replace('www.', '') in all_hosts]
all_hosts = list(sorted(set(all_hosts)))
- print("\n[*] Hosts found: " + str(len(all_hosts)))
- print("---------------------")
+ print('\n[*] Hosts found: ' + str(len(all_hosts)))
+ print('---------------------')
for host in all_hosts:
print(host)
# DNS brute force
if dnsbrute and dnsbrute[0] is True:
- print("\n[*] Starting DNS brute force.")
+ print('\n[*] Starting DNS brute force.')
dns_force = dnssearch.DnsForce(word, final_dns_resolver_list, verbose=True)
resolved_pair, hosts, ips = await dns_force.run()
# Check if Rest API is being used if so return found hosts
@@ -1105,19 +1014,19 @@ async def handler(lst):
db = stash.StashManager()
temp = set()
for host in resolved_pair:
- if ":" in host:
+ if ':' in host:
# TODO parse addresses and sort them as they are IPs
- subdomain, addr = host.split(":", 1)
+ subdomain, addr = host.split(':', 1)
if subdomain.endswith(word):
# Append to full, so it's within JSON/XML at the end if output file is requested
if host not in full:
full.append(host)
- temp.add(subdomain + ":" + addr)
+ temp.add(subdomain + ':' + addr)
if host not in all_hosts:
all_hosts.append(host)
continue
if host.endswith(word):
- if host[:4] == "www.":
+ if host[:4] == 'www.':
if host[4:] in all_hosts or host[4:] in full:
continue
if host not in full:
@@ -1125,16 +1034,16 @@ async def handler(lst):
temp.add(host)
if host not in all_hosts:
all_hosts.append(host)
- print("\n[*] Hosts found after DNS brute force:")
+ print('\n[*] Hosts found after DNS brute force:')
for sub in temp:
print(sub)
- await db.store_all(word, list(sorted(temp)), "host", "dns_bruteforce")
+ await db.store_all(word, list(sorted(temp)), 'host', 'dns_bruteforce')
takeover_results = dict()
# TakeOver Checking
if takeover_status:
- print("\n[*] Performing subdomain takeover check")
- print("\n[*] Subdomain Takeover checking IS ACTIVE RECON")
+ print('\n[*] Performing subdomain takeover check')
+ print('\n[*] Subdomain Takeover checking IS ACTIVE RECON')
search_take = takeover.TakeOver(all_hosts)
await search_take.populate_fingerprints()
await search_take.process(proxy=use_proxy)
@@ -1143,25 +1052,21 @@ async def handler(lst):
dnsrev: list = []
# print(f'DNSlookup: {dnslookup}')
if dnslookup is True:
- print("\n[*] Starting active queries for DNSLookup.")
+ print('\n[*] Starting active queries for DNSLookup.')
# reverse each iprange in a separate task
__reverse_dns_tasks: dict = {}
for entry in host_ip:
- __ip_range = dnssearch.serialize_ip_range(ip=entry, netmask="24")
+ __ip_range = dnssearch.serialize_ip_range(ip=entry, netmask='24')
if __ip_range and __ip_range not in set(__reverse_dns_tasks.keys()):
- print("\n[*] Performing reverse lookup on " + __ip_range)
+ print('\n[*] Performing reverse lookup on ' + __ip_range)
__reverse_dns_tasks[__ip_range] = asyncio.create_task(
dnssearch.reverse_all_ips_in_range(
iprange=__ip_range,
callback=dnssearch.generate_postprocessing_callback(
target=word, local_results=dnsrev, overall_results=full
),
- nameservers=(
- final_dns_resolver_list
- if len(final_dns_resolver_list) > 0
- else None
- ),
+ nameservers=(final_dns_resolver_list if len(final_dns_resolver_list) > 0 else None),
)
)
# nameservers=list(map(str, dnsserver.split(','))) if dnsserver else None))
@@ -1169,26 +1074,26 @@ async def handler(lst):
# run all the reversing tasks concurrently
await asyncio.gather(*__reverse_dns_tasks.values())
# Display the newly found hosts
- print("\n[*] Hosts found after reverse lookup (in target domain):")
- print("--------------------------------------------------------")
+ print('\n[*] Hosts found after reverse lookup (in target domain):')
+ print('--------------------------------------------------------')
for xh in dnsrev:
print(xh)
# Virtual hosts search
- if virtual == "basic":
- print("\n[*] Virtual hosts:")
- print("------------------")
+ if virtual == 'basic':
+ print('\n[*] Virtual hosts:')
+ print('------------------')
for data in host_ip:
basic_search = bingsearch.SearchBing(data, limit, start)
await basic_search.process_vhost()
results = await basic_search.get_allhostnames()
for result in results:
- result = re.sub(r"[[?]*\w*>]*", "", result)
- result = re.sub("<", "", result)
- result = re.sub(">", "", result)
- print(data + "\t" + result)
- vhost.append(data + ":" + result)
- full.append(data + ":" + result)
+ result = re.sub(r'[[?]*\w*>]*', '', result)
+ result = re.sub('<', '', result)
+ result = re.sub('>', '', result)
+ print(data + '\t' + result)
+ vhost.append(data + ':' + result)
+ full.append(data + ':' + result)
vhost = sorted(set(vhost))
else:
pass
@@ -1201,187 +1106,153 @@ async def handler(lst):
# Verify the path exists, if not create it or if user does not create it skips screenshot
if path_exists:
await screen_shotter.verify_installation()
- print(
- f"\nScreenshots can be found in: {screen_shotter.output}{screen_shotter.slash}"
- )
+ print(f'\nScreenshots can be found in: {screen_shotter.output}{screen_shotter.slash}')
start_time = time.perf_counter()
- print("Filtering domains for ones we can reach")
+ print('Filtering domains for ones we can reach')
if dnsresolve is None or len(final_dns_resolver_list) > 0:
- unique_resolved_domains = {
- url.split(":")[0]
- for url in full
- if ":" in url and "www." not in url
- }
+ unique_resolved_domains = {url.split(':')[0] for url in full if ':' in url and 'www.' not in url}
else:
# Technically not resolved in this case, which is not ideal
# You should always use dns resolve when doing screenshotting
- print(
- "NOTE for future use cases you should only use screenshotting in tandem with DNS resolving"
- )
+ print('NOTE for future use cases you should only use screenshotting in tandem with DNS resolving')
unique_resolved_domains = set(all_hosts)
if len(unique_resolved_domains) > 0:
# First filter out ones that didn't resolve
- print(
- "Attempting to visit unique resolved domains, this is ACTIVE RECON"
- )
+ print('Attempting to visit unique resolved domains, this is ACTIVE RECON')
async with Pool(10) as pool:
- results = await pool.map(
- screen_shotter.visit, list(unique_resolved_domains)
- )
+ results = await pool.map(screen_shotter.visit, list(unique_resolved_domains))
# Filter out domains that we couldn't connect to
- unique_resolved_domains_list = list(
- sorted({tup[0] for tup in results if len(tup[1]) > 0})
- )
+ unique_resolved_domains_list = list(sorted({tup[0] for tup in results if len(tup[1]) > 0}))
async with Pool(3) as pool:
- print(
- f"Length of unique resolved domains: {len(unique_resolved_domains_list)} chunking now!\n"
- )
+ print(f'Length of unique resolved domains: {len(unique_resolved_domains_list)} chunking now!\n')
# If you have the resources, you could make the function faster by increasing the chunk number
chunk_number = 14
- for chunk in screen_shotter.chunk_list(
- unique_resolved_domains_list, chunk_number
- ):
+ for chunk in screen_shotter.chunk_list(unique_resolved_domains_list, chunk_number):
try:
- screenshot_tups.extend(
- await pool.map(screen_shotter.take_screenshot, chunk)
- )
+ screenshot_tups.extend(await pool.map(screen_shotter.take_screenshot, chunk))
except Exception as ee:
- print(f"An exception has occurred while mapping: {ee}")
+ print(f'An exception has occurred while mapping: {ee}')
end = time.perf_counter()
# There is probably an easier way to do this
total = int(end - start_time)
mon, sec = divmod(total, 60)
hr, mon = divmod(mon, 60)
- total_time = "%02d:%02d" % (mon, sec)
- print(f"Finished taking screenshots in {total_time} seconds")
- print(
- "[+] Note there may be leftover chrome processes you may have to kill manually\n"
- )
+ total_time = '%02d:%02d' % (mon, sec)
+ print(f'Finished taking screenshots in {total_time} seconds')
+ print('[+] Note there may be leftover chrome processes you may have to kill manually\n')
# Shodan
shodanres = []
if shodan is True:
- print("\033[94m[*] Searching Shodan. ")
+ print('\033[94m[*] Searching Shodan. ')
try:
for ip in host_ip:
# TODO fix shodan
- print("\tSearching for " + ip)
+ print('\tSearching for ' + ip)
shodan = shodansearch.SearchShodan()
shodandict = await shodan.search_ip(ip)
await asyncio.sleep(5)
rowdata = []
for key, value in shodandict[ip].items():
- if str(
- value
- ) == "Not in Shodan" or "Error occurred in the Shodan IP search module" in str(
- value
- ):
+ if str(value) == 'Not in Shodan' or 'Error occurred in the Shodan IP search module' in str(value):
break
if isinstance(value, int):
value = str(value)
if isinstance(value, list):
- value = ", ".join(map(str, value))
+ value = ', '.join(map(str, value))
rowdata.append(value)
shodanres.append(rowdata)
print(ujson.dumps(shodandict[ip], indent=4, sort_keys=True))
- print("\n")
+ print('\n')
except Exception as e:
- print(f"[!] An error occurred with Shodan: {e} ")
+ print(f'[!] An error occurred with Shodan: {e} ')
else:
pass
- if filename != "":
- print("\n[*] Reporting started.")
+ if filename != '':
+ print('\n[*] Reporting started.')
try:
if len(rest_filename) == 0:
- filename = filename.rsplit(".", 1)[0] + ".xml"
+ filename = filename.rsplit('.', 1)[0] + '.xml'
else:
- filename = (
- "theHarvester/app/static/"
- + rest_filename.rsplit(".", 1)[0]
- + ".xml"
- )
+ filename = 'theHarvester/app/static/' + rest_filename.rsplit('.', 1)[0] + '.xml'
# TODO use aiofiles if user is using rest api
# XML REPORT SECTION
- with open(filename, "w+") as file:
+ with open(filename, 'w+') as file:
file.write('')
for x in all_emails:
- file.write("" + x + "")
+ file.write('' + x + '')
for x in full:
- host, ip = x.split(":", 1) if ":" in x else (x, "")
+ host, ip = x.split(':', 1) if ':' in x else (x, '')
if ip and len(ip) > 3:
- file.write(
- f"{ip}{host}"
- )
+ file.write(f'{ip}{host}')
else:
- file.write(f"{host}")
+ file.write(f'{host}')
for x in vhost:
- host, ip = x.split(":", 1) if ":" in x else (x, "")
+ host, ip = x.split(':', 1) if ':' in x else (x, '')
if ip and len(ip) > 3:
- file.write(
- f"{ip} {host}"
- )
+ file.write(f'{ip} {host}')
else:
- file.write(f"{host}")
+ file.write(f'{host}')
# TODO add Shodan output into XML report
- file.write("")
- print("[*] XML File saved.")
+ file.write('')
+ print('[*] XML File saved.')
except Exception as error:
- print(f"[!] An error occurred while saving the XML file: {error}")
+ print(f'[!] An error occurred while saving the XML file: {error}')
try:
# JSON REPORT SECTION
- filename = filename.rsplit(".", 1)[0] + ".json"
+ filename = filename.rsplit('.', 1)[0] + '.json'
# create dict with values for json output
json_dict: dict = dict()
# determine if a variable exists
# it should but just a validation check
- if "ip_list" in locals():
+ if 'ip_list' in locals():
if all_ip and len(all_ip) >= 1 and ip_list and len(ip_list) > 0:
- json_dict["ips"] = ip_list
+ json_dict['ips'] = ip_list
if len(all_emails) > 0:
- json_dict["emails"] = all_emails
+ json_dict['emails'] = all_emails
if dnsresolve is None or len(final_dns_resolver_list) > 0 and len(full) > 0:
- json_dict["hosts"] = full
+ json_dict['hosts'] = full
elif len(all_hosts) > 0:
- json_dict["hosts"] = all_hosts
+ json_dict['hosts'] = all_hosts
else:
- json_dict["hosts"] = []
+ json_dict['hosts'] = []
if vhost and len(vhost) > 0:
- json_dict["vhosts"] = vhost
+ json_dict['vhosts'] = vhost
if len(interesting_urls) > 0:
- json_dict["interesting_urls"] = interesting_urls
+ json_dict['interesting_urls'] = interesting_urls
if len(all_urls) > 0:
- json_dict["trello_urls"] = all_urls
+ json_dict['trello_urls'] = all_urls
if len(total_asns) > 0:
- json_dict["asns"] = total_asns
+ json_dict['asns'] = total_asns
if len(twitter_people_list_tracker) > 0:
- json_dict["twitter_people"] = twitter_people_list_tracker
+ json_dict['twitter_people'] = twitter_people_list_tracker
if len(linkedin_people_list_tracker) > 0:
- json_dict["linkedin_people"] = linkedin_people_list_tracker
+ json_dict['linkedin_people'] = linkedin_people_list_tracker
if len(linkedin_links_tracker) > 0:
- json_dict["linkedin_links"] = linkedin_links_tracker
+ json_dict['linkedin_links'] = linkedin_links_tracker
if takeover_status and len(takeover_results) > 0:
- json_dict["takeover_results"] = takeover_results
+ json_dict['takeover_results'] = takeover_results
- json_dict["shodan"] = shodanres
- with open(filename, "w+") as fp:
+ json_dict['shodan'] = shodanres
+ with open(filename, 'w+') as fp:
dumped_json = ujson.dumps(json_dict, sort_keys=True)
fp.write(dumped_json)
- print("[*] JSON File saved.")
+ print('[*] JSON File saved.')
except Exception as er:
- print(f"[!] An error occurred while saving the JSON file: {er} ")
- print("\n\n")
+ print(f'[!] An error occurred while saving the JSON file: {er} ')
+ print('\n\n')
sys.exit(0)
@@ -1390,7 +1261,7 @@ async def entry_point() -> None:
Core.banner()
await start()
except KeyboardInterrupt:
- print("\n\n[!] ctrl+c detected from user, quitting.\n\n ")
+ print('\n\n[!] ctrl+c detected from user, quitting.\n\n ')
except Exception as error_entry_point:
print(error_entry_point)
sys.exit(1)
diff --git a/theHarvester/discovery/anubis.py b/theHarvester/discovery/anubis.py
index 5d8a9fc0..d3a1e8ca 100644
--- a/theHarvester/discovery/anubis.py
+++ b/theHarvester/discovery/anubis.py
@@ -8,7 +8,7 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
- url = f"https://jldc.me/anubis/subdomains/{self.word}"
+ url = f'https://jldc.me/anubis/subdomains/{self.word}'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
self.totalhosts = response[0]
diff --git a/theHarvester/discovery/baidusearch.py b/theHarvester/discovery/baidusearch.py
index 687a8721..e640bc11 100644
--- a/theHarvester/discovery/baidusearch.py
+++ b/theHarvester/discovery/baidusearch.py
@@ -5,23 +5,17 @@
class SearchBaidu:
def __init__(self, word, limit) -> None:
self.word = word
- self.total_results = ""
- self.server = "www.baidu.com"
- self.hostname = "www.baidu.com"
+ self.total_results = ''
+ self.server = 'www.baidu.com'
+ self.hostname = 'www.baidu.com'
self.limit = limit
self.proxy = False
async def do_search(self) -> None:
- headers = {"Host": self.hostname, "User-agent": Core.get_user_agent()}
- base_url = f"https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}"
- urls = [
- base_url.replace("xx", str(num))
- for num in range(0, self.limit, 10)
- if num <= self.limit
- ]
- responses = await AsyncFetcher.fetch_all(
- urls, headers=headers, proxy=self.proxy
- )
+ headers = {'Host': self.hostname, 'User-agent': Core.get_user_agent()}
+ base_url = f'https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}'
+ urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
+ responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response
diff --git a/theHarvester/discovery/bevigil.py b/theHarvester/discovery/bevigil.py
index cc17c0e8..09f5ed25 100644
--- a/theHarvester/discovery/bevigil.py
+++ b/theHarvester/discovery/bevigil.py
@@ -9,27 +9,23 @@ def __init__(self, word) -> None:
self.interestingurls: set = set()
self.key = Core.bevigil_key()
if self.key is None:
- self.key = ""
- raise MissingKey("bevigil")
+ self.key = ''
+ raise MissingKey('bevigil')
self.proxy = False
async def do_search(self) -> None:
- subdomain_endpoint = f"https://osint.bevigil.com/api/{self.word}/subdomains/"
- url_endpoint = f"https://osint.bevigil.com/api/{self.word}/urls/"
- headers = {"X-Access-Token": self.key}
+ subdomain_endpoint = f'https://osint.bevigil.com/api/{self.word}/subdomains/'
+ url_endpoint = f'https://osint.bevigil.com/api/{self.word}/urls/'
+ headers = {'X-Access-Token': self.key}
- responses = await AsyncFetcher.fetch_all(
- [subdomain_endpoint], json=True, proxy=self.proxy, headers=headers
- )
+ responses = await AsyncFetcher.fetch_all([subdomain_endpoint], json=True, proxy=self.proxy, headers=headers)
response = responses[0]
- for subdomain in response["subdomains"]:
+ for subdomain in response['subdomains']:
self.totalhosts.add(subdomain)
- responses = await AsyncFetcher.fetch_all(
- [url_endpoint], json=True, proxy=self.proxy, headers=headers
- )
+ responses = await AsyncFetcher.fetch_all([url_endpoint], json=True, proxy=self.proxy, headers=headers)
response = responses[0]
- for url in response["urls"]:
+ for url in response['urls']:
self.interestingurls.add(url)
async def get_hostnames(self) -> set:
diff --git a/theHarvester/discovery/binaryedgesearch.py b/theHarvester/discovery/binaryedgesearch.py
index 77e04923..895e0cd4 100644
--- a/theHarvester/discovery/binaryedgesearch.py
+++ b/theHarvester/discovery/binaryedgesearch.py
@@ -13,29 +13,25 @@ def __init__(self, word, limit) -> None:
self.limit = 501 if limit >= 501 else limit
self.limit = 2 if self.limit == 1 else self.limit
if self.key is None:
- raise MissingKey("binaryedge")
+ raise MissingKey('binaryedge')
async def do_search(self) -> None:
- base_url = f"https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}"
- headers = {"X-KEY": self.key, "User-Agent": Core.get_user_agent()}
+ base_url = f'https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}'
+ headers = {'X-KEY': self.key, 'User-Agent': Core.get_user_agent()}
for page in range(1, self.limit):
- params = {"page": page}
- response = await AsyncFetcher.fetch_all(
- [base_url], json=True, proxy=self.proxy, params=params, headers=headers
- )
+ params = {'page': page}
+ response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, params=params, headers=headers)
responses = response[0]
dct = responses
- if ("status" in dct.keys() and "message" in dct.keys()) and (
- dct["status"] == 400
- or "Bad Parameter" in dct["message"]
- or "Error" in dct["message"]
+ if ('status' in dct.keys() and 'message' in dct.keys()) and (
+ dct['status'] == 400 or 'Bad Parameter' in dct['message'] or 'Error' in dct['message']
):
# 400 status code means no more results
break
- if "events" in dct.keys():
- if len(dct["events"]) == 0:
+ if 'events' in dct.keys():
+ if len(dct['events']) == 0:
break
- self.totalhosts.update({host for host in dct["events"]})
+ self.totalhosts.update({host for host in dct['events']})
await asyncio.sleep(get_delay())
async def get_hostnames(self) -> set:
diff --git a/theHarvester/discovery/bingsearch.py b/theHarvester/discovery/bingsearch.py
index f7cf25ae..ebfc06fb 100644
--- a/theHarvester/discovery/bingsearch.py
+++ b/theHarvester/discovery/bingsearch.py
@@ -7,12 +7,12 @@
class SearchBing:
def __init__(self, word, limit, start) -> None:
- self.word = word.replace(" ", "%20")
+ self.word = word.replace(' ', '%20')
self.results: list[Any] = []
- self.total_results = ""
- self.server = "www.bing.com"
- self.apiserver = "api.search.live.net"
- self.hostname = "www.bing.com"
+ self.total_results = ''
+ self.server = 'www.bing.com'
+ self.apiserver = 'api.search.live.net'
+ self.hostname = 'www.bing.com'
self.limit = int(limit)
self.bingApi = Core.bing_key()
self.counter = start
@@ -20,58 +20,44 @@ def __init__(self, word, limit, start) -> None:
async def do_search(self) -> None:
headers = {
- "Host": self.hostname,
- "Cookie": "SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50",
- "Accept-Language": "en-us,en",
- "User-agent": Core.get_user_agent(),
+ 'Host': self.hostname,
+ 'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
+ 'Accept-Language': 'en-us,en',
+ 'User-agent': Core.get_user_agent(),
}
base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx'
- urls = [
- base_url.replace("xx", str(num))
- for num in range(0, self.limit, 50)
- if num <= self.limit
- ]
- responses = await AsyncFetcher.fetch_all(
- urls, headers=headers, proxy=self.proxy
- )
+ urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
+ responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response
async def do_search_api(self) -> None:
- url = "https://api.bing.microsoft.com/v7.0/search?"
+ url = 'https://api.bing.microsoft.com/v7.0/search?'
params = {
- "q": self.word,
- "count": str(self.limit),
- "offset": "0",
- "mkt": "en-us",
- "safesearch": "Off",
+ 'q': self.word,
+ 'count': str(self.limit),
+ 'offset': '0',
+ 'mkt': 'en-us',
+ 'safesearch': 'Off',
}
headers = {
- "User-Agent": Core.get_user_agent(),
- "Ocp-Apim-Subscription-Key": self.bingApi,
+ 'User-Agent': Core.get_user_agent(),
+ 'Ocp-Apim-Subscription-Key': self.bingApi,
}
- self.results = await AsyncFetcher.fetch_all(
- [url], headers=headers, params=params, proxy=self.proxy
- )
+ self.results = await AsyncFetcher.fetch_all([url], headers=headers, params=params, proxy=self.proxy)
for res in self.results:
self.total_results += res
async def do_search_vhost(self) -> None:
headers = {
- "Host": self.hostname,
- "Cookie": "mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50",
- "Accept-Language": "en-us,en",
- "User-agent": Core.get_user_agent(),
+ 'Host': self.hostname,
+ 'Cookie': 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50',
+ 'Accept-Language': 'en-us,en',
+ 'User-agent': Core.get_user_agent(),
}
- base_url = f"http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx"
- urls = [
- base_url.replace("xx", str(num))
- for num in range(0, self.limit, 50)
- if num <= self.limit
- ]
- responses = await AsyncFetcher.fetch_all(
- urls, headers=headers, proxy=self.proxy
- )
+ base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx'
+ urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
+ responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response
@@ -89,13 +75,13 @@ async def get_allhostnames(self):
async def process(self, api, proxy: bool = False) -> None:
self.proxy = proxy
- if api == "yes":
+ if api == 'yes':
if self.bingApi is None:
- raise MissingKey("BingAPI")
+ raise MissingKey('BingAPI')
await self.do_search_api()
else:
await self.do_search()
- print(f"\tSearching {self.counter} results.")
+ print(f'\tSearching {self.counter} results.')
async def process_vhost(self) -> None:
await self.do_search_vhost()
diff --git a/theHarvester/discovery/bravesearch.py b/theHarvester/discovery/bravesearch.py
index 5a174b77..39eef9d8 100644
--- a/theHarvester/discovery/bravesearch.py
+++ b/theHarvester/discovery/bravesearch.py
@@ -8,37 +8,34 @@
class SearchBrave:
def __init__(self, word, limit):
self.word = word
- self.results = ""
- self.totalresults = ""
- self.server = "https://search.brave.com/search?q="
+ self.results = ''
+ self.totalresults = ''
+ self.server = 'https://search.brave.com/search?q='
self.limit = limit
self.proxy = False
async def do_search(self):
- headers = {"User-Agent": Core.get_user_agent()}
- for query in [f'"{self.word}"', f"site:{self.word}"]:
+ headers = {'User-Agent': Core.get_user_agent()}
+ for query in [f'"{self.word}"', f'site:{self.word}']:
try:
for offset in range(0, 50):
# To reduce the total number of requests, only two queries are made "self.word" and site:self.word
- current_url = f"{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0"
- resp = await AsyncFetcher.fetch_all(
- [current_url], headers=headers, proxy=self.proxy
- )
+ current_url = f'{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0'
+ resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy)
self.results = resp[0]
self.totalresults += self.results
# if 'Results from Microsoft Bing.' in resp[0] \
if (
- "Not many great matches came back for your search" in resp[0]
- or "Your request has been flagged as being suspicious and Brave Search"
- in resp[0]
- or "Prove" in resp[0]
- and "robot" in resp[0]
- or "Robot" in resp[0]
+ 'Not many great matches came back for your search' in resp[0]
+ or 'Your request has been flagged as being suspicious and Brave Search' in resp[0]
+ or 'Prove' in resp[0]
+ and 'robot' in resp[0]
+ or 'Robot' in resp[0]
):
break
await asyncio.sleep(get_delay() + 15)
except Exception as e:
- print(f"An exception has occurred in bravesearch: {e}")
+ print(f'An exception has occurred in bravesearch: {e}')
await asyncio.sleep(get_delay() + 80)
continue
diff --git a/theHarvester/discovery/bufferoverun.py b/theHarvester/discovery/bufferoverun.py
index 843b2ca6..4e731f65 100644
--- a/theHarvester/discovery/bufferoverun.py
+++ b/theHarvester/discovery/bufferoverun.py
@@ -11,33 +11,30 @@ def __init__(self, word) -> None:
self.totalips: set = set()
self.key = Core.bufferoverun_key()
if self.key is None:
- raise MissingKey("bufferoverun")
+ raise MissingKey('bufferoverun')
self.proxy = False
async def do_search(self) -> None:
- url = f"https://tls.bufferover.run/dns?q={self.word}"
+ url = f'https://tls.bufferover.run/dns?q={self.word}'
response = await AsyncFetcher.fetch_all(
[url],
json=True,
- headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"},
+ headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'},
proxy=self.proxy,
)
dct = response[0]
- if dct["Results"]:
+ if dct['Results']:
self.totalhosts = {
(
- host.split(",")
- if "," in host
- and self.word.replace("www.", "") in host.split(",")[0] in host
- else host.split(",")[4]
+ host.split(',')
+ if ',' in host and self.word.replace('www.', '') in host.split(',')[0] in host
+ else host.split(',')[4]
)
- for host in dct["Results"]
+ for host in dct['Results']
}
self.totalips = {
- ip.split(",")[0]
- for ip in dct["Results"]
- if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip.split(",")[0])
+ ip.split(',')[0] for ip in dct['Results'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip.split(',')[0])
}
async def get_hostnames(self) -> set:
diff --git a/theHarvester/discovery/censysearch.py b/theHarvester/discovery/censysearch.py
index e303e469..ff771402 100644
--- a/theHarvester/discovery/censysearch.py
+++ b/theHarvester/discovery/censysearch.py
@@ -15,7 +15,7 @@ def __init__(self, domain, limit: int = 500) -> None:
self.word = domain
self.key = Core.censys_key()
if self.key[0] is None or self.key[1] is None:
- raise MissingKey("Censys ID and/or Secret")
+ raise MissingKey('Censys ID and/or Secret')
self.totalhosts: set = set()
self.emails: set = set()
self.limit = limit
@@ -26,26 +26,24 @@ async def do_search(self) -> None:
cert_search = CensysCerts(
api_id=self.key[0],
api_secret=self.key[1],
- user_agent=f"censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)",
+ user_agent=f'censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)',
)
except CensysUnauthorizedException:
- raise MissingKey("Censys ID and/or Secret")
+ raise MissingKey('Censys ID and/or Secret')
- query = f"names: {self.word}"
+ query = f'names: {self.word}'
try:
response = cert_search.search(
query=query,
- fields=["names", "parsed.subject.email_address"],
+ fields=['names', 'parsed.subject.email_address'],
max_records=self.limit,
)
for cert in response():
- self.totalhosts.update(cert.get("names", []))
- email_address = (
- cert.get("parsed", {}).get("subject", {}).get("email_address", [])
- )
+ self.totalhosts.update(cert.get('names', []))
+ email_address = cert.get('parsed', {}).get('subject', {}).get('email_address', [])
self.emails.update(email_address)
except CensysRateLimitExceededException:
- print("Censys rate limit exceeded")
+ print('Censys rate limit exceeded')
async def get_hostnames(self) -> set:
return self.totalhosts
diff --git a/theHarvester/discovery/certspottersearch.py b/theHarvester/discovery/certspottersearch.py
index 4783cd4a..56ae3e3e 100644
--- a/theHarvester/discovery/certspottersearch.py
+++ b/theHarvester/discovery/certspottersearch.py
@@ -8,21 +8,19 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
- base_url = f"https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names"
+ base_url = f'https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names'
try:
- response = await AsyncFetcher.fetch_all(
- [base_url], json=True, proxy=self.proxy
- )
+ response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy)
response = response[0]
if isinstance(response, list):
for dct in response:
for key, value in dct.items():
- if key == "dns_names":
+ if key == 'dns_names':
self.totalhosts.update({name for name in value if name})
elif isinstance(response, dict):
- self.totalhosts.update({response["dns_names"] if "dns_names" in response.keys() else ""}) # type: ignore
+ self.totalhosts.update({response['dns_names'] if 'dns_names' in response.keys() else ''}) # type: ignore
else:
- self.totalhosts.update({""})
+ self.totalhosts.update({''})
except Exception as e:
print(e)
@@ -32,4 +30,4 @@ async def get_hostnames(self) -> set:
async def process(self, proxy: bool = False) -> None:
self.proxy = proxy
await self.do_search()
- print("\tSearching results.")
+ print('\tSearching results.')
diff --git a/theHarvester/discovery/constants.py b/theHarvester/discovery/constants.py
index cf9fc28a..6ec47430 100644
--- a/theHarvester/discovery/constants.py
+++ b/theHarvester/discovery/constants.py
@@ -14,10 +14,10 @@ async def splitter(links):
unique_list = []
name_check = []
for url in links:
- tail = url.split("/")[-1]
- if len(tail) == 2 or tail == "zh-cn":
- tail = url.split("/")[-2]
- name = tail.split("-")
+ tail = url.split('/')[-1]
+ if len(tail) == 2 or tail == 'zh-cn':
+ tail = url.split('/')[-2]
+ name = tail.split('-')
if len(name) > 1:
joined_name = name[0] + name[1]
else:
@@ -41,12 +41,8 @@ def filter(lst):
new_lst = []
for item in lst:
item = str(item)
- if (
- (item[0].isalpha() or item[0].isdigit())
- and ("xxx" not in item)
- and (".." not in item)
- ):
- item = item.replace("252f", "").replace("2F", "").replace("2f", "")
+ if (item[0].isalpha() or item[0].isdigit()) and ('xxx' not in item) and ('..' not in item):
+ item = item.replace('252f', '').replace('2F', '').replace('2f', '')
new_lst.append(item.lower())
return new_lst
@@ -63,10 +59,9 @@ async def search(text: str) -> bool:
"""
for line in text.strip().splitlines():
if (
- "This page appears when Google automatically detects requests coming from your computer network"
- in line
- or "http://www.google.com/sorry/index" in line
- or "https://www.google.com/sorry/index" in line
+ 'This page appears when Google automatically detects requests coming from your computer network' in line
+ or 'http://www.google.com/sorry/index' in line
+ or 'https://www.google.com/sorry/index' in line
):
# print('\tGoogle is blocking your IP due to too many automated requests, wait or change your IP')
return True
@@ -79,47 +74,37 @@ async def google_workaround(visit_url: str) -> bool | str:
:param visit_url: Url to scrape
:return: Correct html that can be parsed by BS4
"""
- url = "https://websniffer.cc/"
+ url = 'https://websniffer.cc/'
data = {
- "Cookie": "",
- "url": visit_url,
- "submit": "Submit",
- "type": "GET&http=1.1",
- "uak": str(random.randint(4, 8)), # select random UA to send to Google
+ 'Cookie': '',
+ 'url': visit_url,
+ 'submit': 'Submit',
+ 'type': 'GET&http=1.1',
+ 'uak': str(random.randint(4, 8)), # select random UA to send to Google
}
- returned_html = await AsyncFetcher.post_fetch(
- url, headers={"User-Agent": Core.get_user_agent()}, data=data
- )
+ returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
returned_html = (
- "This page appears when Google automatically detects requests coming from your computer network"
- if returned_html == ""
+ 'This page appears when Google automatically detects requests coming from your computer network'
+ if returned_html == ''
else returned_html[0]
)
- returned_html = (
- "" if "Please Wait... | Cloudflare" in returned_html else returned_html
- )
+ returned_html = '' if 'Please Wait... | Cloudflare' in returned_html else returned_html
- if (
- len(returned_html) == 0
- or await search(returned_html)
- or "<html" not in returned_html
- ):
+ if len(returned_html) == 0 or await search(returned_html) or '<html' not in returned_html:
# indicates that google is serving workaround a captcha
# That means we will try out second option which will utilize proxies
return True
# the html we get is malformed for BS4 as there are no greater than or less than signs
- if "<html>" in returned_html:
- start_index = returned_html.index("<html>")
+ if '<html>' in returned_html:
+ start_index = returned_html.index('<html>')
else:
- start_index = returned_html.index("<html")
+ start_index = returned_html.index('<html')
- end_index = returned_html.index("</html>") + 1
+ end_index = returned_html.index('</html>') + 1
correct_html = returned_html[start_index:end_index]
# Slice list to get the response's html
- correct_html = "".join(
- [ch.strip().replace("<", "<").replace(">", ">") for ch in correct_html]
- )
+ correct_html = ''.join([ch.strip().replace('<', '<').replace('>', '>') for ch in correct_html])
return correct_html
@@ -130,9 +115,9 @@ class MissingKey(Exception):
def __init__(self, source: str | None) -> None:
if source:
- self.message = f"\n\033[93m[!] Missing API key for {source}. \033[0m"
+ self.message = f'\n\033[93m[!] Missing API key for {source}. \033[0m'
else:
- self.message = "\n\033[93m[!] Missing CSE id. \033[0m"
+ self.message = '\n\033[93m[!] Missing CSE id. \033[0m'
def __str__(self) -> str:
return self.message
diff --git a/theHarvester/discovery/criminalip.py b/theHarvester/discovery/criminalip.py
index 194f741e..b1260026 100644
--- a/theHarvester/discovery/criminalip.py
+++ b/theHarvester/discovery/criminalip.py
@@ -13,64 +13,56 @@ def __init__(self, word) -> None:
self.asns: set = set()
self.key = Core.criminalip_key()
if self.key is None:
- raise MissingKey("criminalip")
+ raise MissingKey('criminalip')
self.proxy = False
async def do_search(self) -> None:
# https://www.criminalip.io/developer/api/post-domain-scan
# https://www.criminalip.io/developer/api/get-domain-status-id
# https://www.criminalip.io/developer/api/get-domain-report-id
- url = "https://api.criminalip.io/v1/domain/scan"
+ url = 'https://api.criminalip.io/v1/domain/scan'
data = f'{{"query": "{self.word}"}}'
# print(f'Current key: {self.key}')
user_agent = Core.get_user_agent()
response = await AsyncFetcher.post_fetch(
url,
json=True,
- headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
+ headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
data=data,
proxy=self.proxy,
)
# print(f'My response: {response}')
# Expected response format:
# {'data': {'scan_id': scan_id}, 'message': 'api success', 'status': 200}
- if "status" in response.keys():
- status = response["status"]
+ if 'status' in response.keys():
+ status = response['status']
if status != 200:
- print(
- f"An error has occurred searching criminalip dumping response: {response}"
- )
+ print(f'An error has occurred searching criminalip dumping response: {response}')
else:
- scan_id = response["data"]["scan_id"]
+ scan_id = response['data']['scan_id']
scan_percentage = 0
counter = 0
while scan_percentage != 100:
- status_url = f"https://api.criminalip.io/v1/domain/status/{scan_id}"
+ status_url = f'https://api.criminalip.io/v1/domain/status/{scan_id}'
status_response = await AsyncFetcher.fetch_all(
[status_url],
json=True,
- headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
+ headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
proxy=self.proxy,
)
status = status_response[0]
# print(f'Status response: {status}')
# Expected format:
# {"data": {"scan_percentage": 100}, "message": "api success", "status": 200}
- scan_percentage = status["data"]["scan_percentage"]
+ scan_percentage = status['data']['scan_percentage']
if scan_percentage == 100:
break
if scan_percentage == -2:
- print(
- f"CriminalIP failed to scan: {self.word} does not exist, verify manually"
- )
- print(
- f"Dumping data: scan_response: {response} status_response: {status}"
- )
+ print(f'CriminalIP failed to scan: {self.word} does not exist, verify manually')
+ print(f'Dumping data: scan_response: {response} status_response: {status}')
return
if scan_percentage == -1:
- print(
- f"CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}"
- )
+ print(f'CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}')
return
# Wait for scan to finish
if counter >= 5:
@@ -80,18 +72,18 @@ async def do_search(self) -> None:
counter += 1
if counter == 10:
print(
- "Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop."
+ 'Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop.'
)
print(
- f"Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}"
+ f'Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}'
)
return
- report_url = f"https://api.criminalip.io/v1/domain/report/{scan_id}"
+ report_url = f'https://api.criminalip.io/v1/domain/report/{scan_id}'
scan_response = await AsyncFetcher.fetch_all(
[report_url],
json=True,
- headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
+ headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
proxy=self.proxy,
)
scan = scan_response[0]
@@ -100,125 +92,113 @@ async def do_search(self) -> None:
try:
await self.parser(scan)
except Exception as e:
- print(f"An exception occurred while parsing criminalip result: {e}")
- print("Dumping json: ")
+ print(f'An exception occurred while parsing criminalip result: {e}')
+ print('Dumping json: ')
print(scan)
async def parser(self, jlines):
# TODO when new scope field is added to parse lines for potential new scope!
# TODO map as_name to asn for asn data
# TODO determine if worth storing interesting urls
- if "data" not in jlines.keys():
- print(f"Error with criminalip data, dumping: {jlines}")
+ if 'data' not in jlines.keys():
+ print(f'Error with criminalip data, dumping: {jlines}')
return
- data = jlines["data"]
- for cert in data["certificates"]:
+ data = jlines['data']
+ for cert in data['certificates']:
# print(f'Current cert: {cert}')
- if cert["subject"].endswith("." + self.word):
- self.totalhosts.add(cert["subject"])
+ if cert['subject'].endswith('.' + self.word):
+ self.totalhosts.add(cert['subject'])
- for connected_domain in data["connected_domain_subdomain"]:
+ for connected_domain in data['connected_domain_subdomain']:
try:
- main_domain = connected_domain["main_domain"]["domain"]
- subdomains = [sub["domain"] for sub in connected_domain["subdomains"]]
- if main_domain.endswith("." + self.word):
+ main_domain = connected_domain['main_domain']['domain']
+ subdomains = [sub['domain'] for sub in connected_domain['subdomains']]
+ if main_domain.endswith('.' + self.word):
self.totalhosts.add(main_domain)
for sub in subdomains:
# print(f'Current sub: {sub}')
- if sub.endswith("." + self.word):
+ if sub.endswith('.' + self.word):
self.totalhosts.add(sub)
except Exception as e:
- print(f"An exception has occurred: {e}")
- print(f"Main line: {connected_domain}")
+ print(f'An exception has occurred: {e}')
+ print(f'Main line: {connected_domain}')
- for ip_info in data["connected_ip_info"]:
- self.asns.add(str(ip_info["asn"]))
- domains = [sub["domain"] for sub in ip_info["domain_list"]]
+ for ip_info in data['connected_ip_info']:
+ self.asns.add(str(ip_info['asn']))
+ domains = [sub['domain'] for sub in ip_info['domain_list']]
for sub in domains:
- if sub.endswith("." + self.word):
+ if sub.endswith('.' + self.word):
self.totalhosts.add(sub)
- self.totalips.add(ip_info["ip"])
+ self.totalips.add(ip_info['ip'])
- for cookie in data["cookies"]:
- if cookie["domain"] != "." + self.word and cookie["domain"].endswith(
- "." + self.word
- ):
- self.totalhosts.add(cookie["domain"])
+ for cookie in data['cookies']:
+ if cookie['domain'] != '.' + self.word and cookie['domain'].endswith('.' + self.word):
+ self.totalhosts.add(cookie['domain'])
- for country in data["country"]:
- if country["domain"].endswith("." + self.word):
- self.totalhosts.add(country["domain"])
- for ip in country["mapped_ips"]:
- self.totalips.add(ip["ip"])
+ for country in data['country']:
+ if country['domain'].endswith('.' + self.word):
+ self.totalhosts.add(country['domain'])
+ for ip in country['mapped_ips']:
+ self.totalips.add(ip['ip'])
- for k, v in data["dns_record"].items():
- if k == "dns_record_type_a":
- for ip in data["dns_record"][k]["ipv4"]:
- self.totalips.add(ip["ip"])
+ for k, v in data['dns_record'].items():
+ if k == 'dns_record_type_a':
+ for ip in data['dns_record'][k]['ipv4']:
+ self.totalips.add(ip['ip'])
else:
if isinstance(v, list):
for item in v:
if isinstance(item, list):
for subitem in item:
- if subitem.endswith("." + self.word):
+ if subitem.endswith('.' + self.word):
self.totalhosts.add(subitem)
else:
- if item.endswith("." + self.word):
+ if item.endswith('.' + self.word):
self.totalhosts.add(item)
- for domain_list in data["domain_list"]:
- self.asns.add(str(domain_list["asn"]))
- domains = [sub["domain"] for sub in domain_list["domain_list"]]
+ for domain_list in data['domain_list']:
+ self.asns.add(str(domain_list['asn']))
+ domains = [sub['domain'] for sub in domain_list['domain_list']]
for sub in domains:
- if sub.endswith("." + self.word):
+ if sub.endswith('.' + self.word):
self.totalhosts.add(sub)
- self.totalips.add(domain_list["ip"])
+ self.totalips.add(domain_list['ip'])
- for html_page_links in data["html_page_link_domains"]:
- domain = html_page_links["domain"]
- if domain.endswith("." + self.word):
+ for html_page_links in data['html_page_link_domains']:
+ domain = html_page_links['domain']
+ if domain.endswith('.' + self.word):
self.totalhosts.add(domain)
- for ip in html_page_links["mapped_ips"]:
- self.totalips.add(ip["ip"])
+ for ip in html_page_links['mapped_ips']:
+ self.totalips.add(ip['ip'])
# TODO combine data['links'] and data['network_logs'] urls into one list for one run through
- for link in data["links"]:
- url = link["url"]
+ for link in data['links']:
+ url = link['url']
parsed_url = urlparse(url)
netloc = parsed_url.netloc
if self.word in netloc:
- if (
- ":" in netloc and netloc.split(":")[0].endswith(self.word)
- ) or netloc.endswith(self.word):
+ if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
self.totalhosts.add(netloc)
- for log in data["network_logs"]:
- url = log["url"]
+ for log in data['network_logs']:
+ url = log['url']
parsed_url = urlparse(url)
netloc = parsed_url.netloc
if self.word in netloc:
- if (
- ":" in netloc and netloc.split(":")[0].endswith(self.word)
- ) or netloc.endswith(self.word):
+ if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
self.totalhosts.add(netloc)
- self.asns.add(str(log["as_number"]))
+ self.asns.add(str(log['as_number']))
- for redirects in data["page_redirections"]:
+ for redirects in data['page_redirections']:
for redirect in redirects:
- url = redirect["url"]
+ url = redirect['url']
parsed_url = urlparse(url)
netloc = parsed_url.netloc
if self.word in netloc:
- if (
- ":" in netloc and netloc.split(":")[0].endswith(self.word)
- ) or netloc.endswith(self.word):
+ if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
self.totalhosts.add(netloc)
- self.totalhosts = {
- host.replace("www.", "")
- for host in self.totalhosts
- if "*." + self.word != host
- }
+ self.totalhosts = {host.replace('www.', '') for host in self.totalhosts if '*.' + self.word != host}
# print(f'hostnames: {self.totalhosts}')
# print(f'asns: {self.asns}')
diff --git a/theHarvester/discovery/crtsh.py b/theHarvester/discovery/crtsh.py
index 9d03630e..3ba662f5 100644
--- a/theHarvester/discovery/crtsh.py
+++ b/theHarvester/discovery/crtsh.py
@@ -10,24 +10,11 @@ def __init__(self, word) -> None:
async def do_search(self) -> list:
data: set = set()
try:
- url = f"https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json"
+ url = f'https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
response = response[0]
- data = set(
- [
- (
- dct["name_value"][2:]
- if "*." == dct["name_value"][:2]
- else dct["name_value"]
- )
- for dct in response
- ]
- )
- data = {
- domain
- for domain in data
- if (domain[0] != "*" and str(domain[0:4]).isnumeric() is False)
- }
+ data = set([(dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value']) for dct in response])
+ data = {domain for domain in data if (domain[0] != '*' and str(domain[0:4]).isnumeric() is False)}
except Exception as e:
print(e)
clean: list = []
diff --git a/theHarvester/discovery/dnsdumpster.py b/theHarvester/discovery/dnsdumpster.py
index 3e8c4c11..effa606e 100644
--- a/theHarvester/discovery/dnsdumpster.py
+++ b/theHarvester/discovery/dnsdumpster.py
@@ -8,51 +8,49 @@
class SearchDnsDumpster:
def __init__(self, word) -> None:
- self.word = word.replace(" ", "%20")
- self.results = ""
- self.totalresults = ""
- self.server = "dnsdumpster.com"
+ self.word = word.replace(' ', '%20')
+ self.results = ''
+ self.totalresults = ''
+ self.server = 'dnsdumpster.com'
self.proxy = False
async def do_search(self) -> None:
try:
agent = Core.get_user_agent()
- headers = {"User-Agent": agent}
+ headers = {'User-Agent': agent}
session = aiohttp.ClientSession(headers=headers)
# create a session to properly verify
- url = f"https://{self.server}"
- csrftoken = ""
+ url = f'https://{self.server}'
+ csrftoken = ''
if self.proxy is False:
async with session.get(url, headers=headers) as resp:
resp_cookies = str(resp.cookies)
- cookies = resp_cookies.split("csrftoken=")
- csrftoken += cookies[1][: cookies[1].find(";")]
+ cookies = resp_cookies.split('csrftoken=')
+ csrftoken += cookies[1][: cookies[1].find(';')]
else:
async with session.get(url, headers=headers, proxy=self.proxy) as resp:
resp_cookies = str(resp.cookies)
- cookies = resp_cookies.split("csrftoken=")
- csrftoken += cookies[1][: cookies[1].find(";")]
+ cookies = resp_cookies.split('csrftoken=')
+ csrftoken += cookies[1][: cookies[1].find(';')]
await asyncio.sleep(5)
# extract csrftoken from cookies
data = {
- "Cookie": f"csfrtoken={csrftoken}",
- "csrfmiddlewaretoken": csrftoken,
- "targetip": self.word,
- "user": "free",
+ 'Cookie': f'csfrtoken={csrftoken}',
+ 'csrfmiddlewaretoken': csrftoken,
+ 'targetip': self.word,
+ 'user': 'free',
}
- headers["Referer"] = url
+ headers['Referer'] = url
if self.proxy is False:
async with session.post(url, headers=headers, data=data) as resp:
self.results = await resp.text()
else:
- async with session.post(
- url, headers=headers, data=data, proxy=self.proxy
- ) as resp:
+ async with session.post(url, headers=headers, data=data, proxy=self.proxy) as resp:
self.results = await resp.text()
await session.close()
except Exception as e:
- print(f"An exception occurred: {e}")
+ print(f'An exception occurred: {e}')
self.totalresults += self.results
async def get_hostnames(self):
diff --git a/theHarvester/discovery/dnssearch.py b/theHarvester/discovery/dnssearch.py
index 263d19c5..c7ce3e8f 100644
--- a/theHarvester/discovery/dnssearch.py
+++ b/theHarvester/discovery/dnssearch.py
@@ -21,7 +21,7 @@
# DNS FORCE
#####################################################################
-DNS_NAMES = DATA_DIR / "wordlists" / "dns-names.txt"
+DNS_NAMES = DATA_DIR / 'wordlists' / 'dns-names.txt'
class DnsForce:
@@ -32,13 +32,13 @@ def __init__(self, domain, dnsserver, verbose: bool = False) -> None:
# self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver
# self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver
self.dnsserver = dnsserver
- with DNS_NAMES.open("r") as file:
+ with DNS_NAMES.open('r') as file:
self.list = file.readlines()
- self.domain = domain.replace("www.", "")
- self.list = [f"{word.strip()}.{self.domain}" for word in self.list]
+ self.domain = domain.replace('www.', '')
+ self.list = [f'{word.strip()}.{self.domain}' for word in self.list]
async def run(self):
- print(f"Starting DNS brute forcing with {len(self.list)} words")
+ print(f'Starting DNS brute forcing with {len(self.list)} words')
checker = hostchecker.Checker(self.list, nameserver=self.dnsserver)
resolved_pair, hosts, ips = await checker.check()
return resolved_pair, hosts, ips
@@ -49,13 +49,13 @@ async def run(self):
#####################################################################
-IP_REGEX = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"
-PORT_REGEX = r"\d{1,5}"
-NETMASK_REGEX: str = r"\d{1,2}|" + IP_REGEX
-NETWORK_REGEX: str = rf"\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b"
+IP_REGEX = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
+PORT_REGEX = r'\d{1,5}'
+NETMASK_REGEX: str = r'\d{1,2}|' + IP_REGEX
+NETWORK_REGEX: str = rf'\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b'
-def serialize_ip_range(ip: str, netmask: str = "24") -> str:
+def serialize_ip_range(ip: str, netmask: str = '24') -> str:
"""
Serialize a network range in a constant format, 'x.x.x.x/y'.
@@ -78,12 +78,12 @@ def serialize_ip_range(ip: str, netmask: str = "24") -> str:
__ip = __ip_matches.group(1)
__netmask = netmask if netmask else __ip_matches.group(3)
if __ip and __netmask:
- return str(IPv4Network(f"{__ip}/{__netmask}", strict=False))
+ return str(IPv4Network(f'{__ip}/{__netmask}', strict=False))
elif __ip:
- return str(IPv4Network("{}/{}".format(__ip, "24"), strict=False))
+ return str(IPv4Network('{}/{}'.format(__ip, '24'), strict=False))
# invalid input ip
- return ""
+ return ''
def list_ips_in_network_range(iprange: str) -> list[str]:
@@ -122,14 +122,12 @@ async def reverse_single_ip(ip: str, resolver: DNSResolver) -> str:
"""
try:
__host = await resolver.gethostbyaddr(ip)
- return __host.name if __host else ""
+ return __host.name if __host else ''
except Exception:
- return ""
+ return ''
-async def reverse_all_ips_in_range(
- iprange: str, callback: Callable, nameservers: list[str] | None = None
-) -> None:
+async def reverse_all_ips_in_range(iprange: str, callback: Callable, nameservers: list[str] | None = None) -> None:
"""
Reverse all the IPs stored in a network range.
All the queries are made concurrently.
@@ -176,8 +174,8 @@ def log_query(ip: str) -> None:
-------
out: None.
"""
- sys.stdout.write(chr(27) + "[2K" + chr(27) + "[G")
- sys.stdout.write("\r" + ip + " - ")
+ sys.stdout.write(chr(27) + '[2K' + chr(27) + '[G')
+ sys.stdout.write('\r' + ip + ' - ')
sys.stdout.flush()
diff --git a/theHarvester/discovery/duckduckgosearch.py b/theHarvester/discovery/duckduckgosearch.py
index 196ecc96..5ed4512f 100644
--- a/theHarvester/discovery/duckduckgosearch.py
+++ b/theHarvester/discovery/duckduckgosearch.py
@@ -7,29 +7,27 @@
class SearchDuckDuckGo:
def __init__(self, word, limit) -> None:
self.word = word
- self.results = ""
- self.totalresults = ""
+ self.results = ''
+ self.totalresults = ''
self.dorks: list = []
self.links: list = []
- self.database = "https://duckduckgo.com/?q="
- self.api = "https://api.duckduckgo.com/?q=x&format=json&pretty=1" # Currently using API.
- self.quantity = "100"
+ self.database = 'https://duckduckgo.com/?q='
+ self.api = 'https://api.duckduckgo.com/?q=x&format=json&pretty=1' # Currently using API.
+ self.quantity = '100'
self.limit = limit
self.proxy = False
async def do_search(self) -> None:
# Do normal scraping.
- url = self.api.replace("x", self.word)
- headers = {"User-Agent": Core.get_user_agent()}
- first_resp = await AsyncFetcher.fetch_all(
- [url], headers=headers, proxy=self.proxy
- )
+ url = self.api.replace('x', self.word)
+ headers = {'User-Agent': Core.get_user_agent()}
+ first_resp = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
self.results = first_resp[0]
self.totalresults += self.results
urls = await self.crawl(self.results)
urls = {url for url in urls if len(url) > 5}
all_resps = await AsyncFetcher.fetch_all(urls)
- self.totalresults += "".join(all_resps)
+ self.totalresults += ''.join(all_resps)
async def crawl(self, text):
"""
@@ -54,39 +52,27 @@ async def crawl(self, text):
if isinstance(val, dict): # Validation check.
for key in val.keys():
value = val.get(key)
- if (
- isinstance(value, str)
- and value != ""
- and "https://" in value
- or "http://" in value
- ):
+ if isinstance(value, str) and value != '' and 'https://' in value or 'http://' in value:
urls.add(value)
- if (
- isinstance(val, str)
- and val != ""
- and "https://" in val
- or "http://" in val
- ):
+ if isinstance(val, str) and val != '' and 'https://' in val or 'http://' in val:
urls.add(val)
tmp = set()
for url in urls:
- if (
- "<" in url and "href=" in url
- ): # Format is
- equal_index = url.index("=")
- true_url = ""
+ if '<' in url and 'href=' in url: # Format is
+ equal_index = url.index('=')
+ true_url = ''
for ch in url[equal_index + 1 :]:
if ch == '"':
tmp.add(true_url)
break
true_url += ch
else:
- if url != "":
+ if url != '':
tmp.add(url)
return tmp
except Exception as e:
- print(f"Exception occurred: {e}")
+ print(f'Exception occurred: {e}')
return []
async def get_emails(self):
diff --git a/theHarvester/discovery/fullhuntsearch.py b/theHarvester/discovery/fullhuntsearch.py
index de7b7daf..1eee55f8 100644
--- a/theHarvester/discovery/fullhuntsearch.py
+++ b/theHarvester/discovery/fullhuntsearch.py
@@ -7,19 +7,19 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.fullhunt_key()
if self.key is None:
- raise MissingKey("fullhunt")
+ raise MissingKey('fullhunt')
self.total_results = None
self.proxy = False
async def do_search(self) -> None:
- url = f"https://fullhunt.io/api/v1/domain/{self.word}/subdomains"
+ url = f'https://fullhunt.io/api/v1/domain/{self.word}/subdomains'
response = await AsyncFetcher.fetch_all(
[url],
json=True,
- headers={"User-Agent": Core.get_user_agent(), "X-API-KEY": self.key},
+ headers={'User-Agent': Core.get_user_agent(), 'X-API-KEY': self.key},
proxy=self.proxy,
)
- self.total_results = response[0]["hosts"]
+ self.total_results = response[0]['hosts']
async def get_hostnames(self):
return self.total_results
diff --git a/theHarvester/discovery/githubcode.py b/theHarvester/discovery/githubcode.py
index 0cf469ab..64aa31aa 100644
--- a/theHarvester/discovery/githubcode.py
+++ b/theHarvester/discovery/githubcode.py
@@ -28,8 +28,8 @@ class ErrorResult(NamedTuple):
class SearchGithubCode:
def __init__(self, word, limit) -> None:
self.word = word
- self.total_results = ""
- self.server = "api.github.com"
+ self.total_results = ''
+ self.server = 'api.github.com'
self.limit = limit
self.counter: int = 0
self.page: int | None = 1
@@ -38,17 +38,17 @@ def __init__(self, word, limit) -> None:
# rate limits you more severely
# https://developer.github.com/v3/search/#rate-limit
if self.key is None:
- raise MissingKey("Github")
+ raise MissingKey('Github')
self.proxy = False
@staticmethod
async def fragments_from_response(json_data: dict) -> list[str]:
- items: list[dict[str, Any]] = json_data.get("items") or list()
+ items: list[dict[str, Any]] = json_data.get('items') or list()
fragments: list[str] = list()
for item in items:
- matches = item.get("text_matches") or list()
+ matches = item.get('text_matches') or list()
for match in matches:
- fragments.append(match.get("fragment"))
+ fragments.append(match.get('fragment'))
return [fragment for fragment in fragments if fragment is not None]
@@ -56,22 +56,20 @@ async def fragments_from_response(json_data: dict) -> list[str]:
async def page_from_response(page: str, links) -> int | None:
page_link = links.get(page)
if page_link:
- parsed = urlparse.urlparse(str(page_link.get("url")))
+ parsed = urlparse.urlparse(str(page_link.get('url')))
params = urlparse.parse_qs(parsed.query)
- pages: list[Any] = params.get("page", [None])
+ pages: list[Any] = params.get('page', [None])
page_number = pages[0] and int(pages[0])
return page_number
else:
return None
- async def handle_response(
- self, response: tuple[str, dict, int, Any]
- ) -> ErrorResult | RetryResult | SuccessResult:
+ async def handle_response(self, response: tuple[str, dict, int, Any]) -> ErrorResult | RetryResult | SuccessResult:
text, json_data, status, links = response
if status == 200:
results = await self.fragments_from_response(json_data)
- next_page = await self.page_from_response("next", links)
- last_page = await self.page_from_response("last", links)
+ next_page = await self.page_from_response('next', links)
+ last_page = await self.page_from_response('last', links)
return SuccessResult(results, next_page, last_page)
elif status == 429 or status == 403:
return RetryResult(60)
@@ -87,17 +85,15 @@ async def do_search(self, page: int) -> tuple[str, dict, int, Any]:
else:
url = f'https://{self.server}/search/code?q="{self.word}"&page={page}'
headers = {
- "Host": self.server,
- "User-agent": Core.get_user_agent(),
- "Accept": "application/vnd.github.v3.text-match+json",
- "Authorization": f"token {self.key}",
+ 'Host': self.server,
+ 'User-agent': Core.get_user_agent(),
+ 'Accept': 'application/vnd.github.v3.text-match+json',
+ 'Authorization': f'token {self.key}',
}
async with aiohttp.ClientSession(headers=headers) as sess:
if self.proxy:
- async with sess.get(
- url, proxy=random.choice(Core.proxy_list())
- ) as resp:
+ async with sess.get(url, proxy=random.choice(Core.proxy_list())) as resp:
return await resp.text(), await resp.json(), resp.status, resp.links
else:
async with sess.get(url) as resp:
@@ -117,7 +113,7 @@ async def process(self, proxy: bool = False) -> None:
api_response = await self.do_search(self.page)
result = await self.handle_response(api_response)
if isinstance(result, SuccessResult):
- print(f"\tSearching {self.counter} results.")
+ print(f'\tSearching {self.counter} results.')
for fragment in result.fragments:
self.total_results += fragment
self.counter = self.counter + 1
@@ -125,16 +121,14 @@ async def process(self, proxy: bool = False) -> None:
await asyncio.sleep(get_delay())
elif isinstance(result, RetryResult):
sleepy_time = get_delay() + result.time
- print(f"\tRetrying page in {sleepy_time} seconds...")
+ print(f'\tRetrying page in {sleepy_time} seconds...')
await asyncio.sleep(sleepy_time)
elif isinstance(result, ErrorResult):
- raise Exception(
- f"\tException occurred: status_code: {result.status_code} reason: {result.body}"
- )
+ raise Exception(f'\tException occurred: status_code: {result.status_code} reason: {result.body}')
else:
- raise Exception("\tUnknown exception occurred")
+ raise Exception('\tUnknown exception occurred')
except Exception as e:
- print(f"An exception has occurred: {e}")
+ print(f'An exception has occurred: {e}')
async def get_emails(self):
rawres = myparser.Parser(self.total_results, self.word)
diff --git a/theHarvester/discovery/hackertarget.py b/theHarvester/discovery/hackertarget.py
index 4980ee5c..b4403928 100644
--- a/theHarvester/discovery/hackertarget.py
+++ b/theHarvester/discovery/hackertarget.py
@@ -8,30 +8,24 @@ class SearchHackerTarget:
def __init__(self, word) -> None:
self.word = word
- self.total_results = ""
- self.hostname = "https://api.hackertarget.com"
+ self.total_results = ''
+ self.hostname = 'https://api.hackertarget.com'
self.proxy = False
self.results = None
async def do_search(self) -> None:
- headers = {"User-agent": Core.get_user_agent()}
+ headers = {'User-agent': Core.get_user_agent()}
urls = [
- f"{self.hostname}/hostsearch/?q={self.word}",
- f"{self.hostname}/reversedns/?q={self.word}",
+ f'{self.hostname}/hostsearch/?q={self.word}',
+ f'{self.hostname}/reversedns/?q={self.word}',
]
- responses = await AsyncFetcher.fetch_all(
- urls, headers=headers, proxy=self.proxy
- )
+ responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
- self.total_results += response.replace(",", ":")
+ self.total_results += response.replace(',', ':')
async def process(self, proxy: bool = False) -> None:
self.proxy = proxy
await self.do_search()
async def get_hostnames(self) -> list:
- return [
- result
- for result in self.total_results.splitlines()
- if "No PTR records found" not in result
- ]
+ return [result for result in self.total_results.splitlines() if 'No PTR records found' not in result]
diff --git a/theHarvester/discovery/huntersearch.py b/theHarvester/discovery/huntersearch.py
index d7554ce6..575798bb 100644
--- a/theHarvester/discovery/huntersearch.py
+++ b/theHarvester/discovery/huntersearch.py
@@ -12,10 +12,10 @@ def __init__(self, word, limit, start) -> None:
self.start = start
self.key = Core.hunter_key()
if self.key is None:
- raise MissingKey("Hunter")
- self.total_results = ""
+ raise MissingKey('Hunter')
+ self.total_results = ''
self.counter = start
- self.database = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10"
+ self.database = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10'
self.proxy = False
self.hostnames: list = []
self.emails: list = []
@@ -23,76 +23,56 @@ def __init__(self, word, limit, start) -> None:
async def do_search(self) -> None:
# First determine if a user account is not a free account, this call is free
is_free = True
- headers = {"User-Agent": Core.get_user_agent()}
- acc_info_url = f"https://api.hunter.io/v2/account?api_key={self.key}"
- response = await AsyncFetcher.fetch_all(
- [acc_info_url], headers=headers, json=True
- )
+ headers = {'User-Agent': Core.get_user_agent()}
+ acc_info_url = f'https://api.hunter.io/v2/account?api_key={self.key}'
+ response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True)
is_free = (
- is_free
- if "plan_name" in response[0]["data"].keys()
- and response[0]["data"]["plan_name"].lower() == "free"
- else False
+ is_free if 'plan_name' in response[0]['data'].keys() and response[0]['data']['plan_name'].lower() == 'free' else False
)
# Extract the total number of requests that are available for an account
total_requests_avail = (
- response[0]["data"]["requests"]["searches"]["available"]
- - response[0]["data"]["requests"]["searches"]["used"]
+ response[0]['data']['requests']['searches']['available'] - response[0]['data']['requests']['searches']['used']
)
if is_free:
- response = await AsyncFetcher.fetch_all(
- [self.database], headers=headers, proxy=self.proxy, json=True
- )
+ response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True)
self.emails, self.hostnames = await self.parse_resp(json_resp=response[0])
else:
# Determine the total number of emails that are available
# As the most emails you can get within one query are 100
# This is only done where paid accounts are in play
- hunter_dinfo_url = (
- f"https://api.hunter.io/v2/email-count?domain={self.word}"
- )
- response = await AsyncFetcher.fetch_all(
- [hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True
- )
- total_number_reqs = response[0]["data"]["total"] // 100
+ hunter_dinfo_url = f'https://api.hunter.io/v2/email-count?domain={self.word}'
+ response = await AsyncFetcher.fetch_all([hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True)
+ total_number_reqs = response[0]['data']['total'] // 100
# Parse out meta field within initial JSON response to determine the total number of results
if total_requests_avail < total_number_reqs:
+ print('WARNING: account does not have enough requests to gather all emails')
print(
- "WARNING: account does not have enough requests to gather all emails"
- )
- print(
- f"Total requests available: {total_requests_avail}, total requests "
- f"needed to be made: {total_number_reqs}"
- )
- print(
- "RETURNING current results, if you would still like to "
- "run this module comment out the if request"
+ f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}'
)
+ print('RETURNING current results, if you would still like to ' 'run this module comment out the if request')
return
self.limit = 100
# max number of emails you can get per request is 100
# increments of 100 with offset determining where to start
# See docs for more details: https://hunter.io/api-documentation/v2#domain-search
for offset in range(0, 100 * total_number_reqs, 100):
- req_url = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}"
- response = await AsyncFetcher.fetch_all(
- [req_url], headers=headers, proxy=self.proxy, json=True
- )
+ req_url = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}'
+ response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True)
temp_emails, temp_hostnames = await self.parse_resp(response[0])
self.emails.extend(temp_emails)
self.hostnames.extend(temp_hostnames)
await asyncio.sleep(1)
async def parse_resp(self, json_resp):
- emails = list(sorted({email["value"] for email in json_resp["data"]["emails"]}))
+ emails = list(sorted({email['value'] for email in json_resp['data']['emails']}))
domains = list(
sorted(
{
- source["domain"]
- for email in json_resp["data"]["emails"]
- for source in email["sources"]
- if self.word in source["domain"]
+ source['domain']
+ for email in json_resp['data']['emails']
+ for source in email['sources']
+ if self.word in source['domain']
}
)
)
diff --git a/theHarvester/discovery/intelxsearch.py b/theHarvester/discovery/intelxsearch.py
index f2f382bc..272c26a7 100644
--- a/theHarvester/discovery/intelxsearch.py
+++ b/theHarvester/discovery/intelxsearch.py
@@ -14,8 +14,8 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.intelx_key()
if self.key is None:
- raise MissingKey("Intelx")
- self.database = "https://2.intelx.io"
+ raise MissingKey('Intelx')
+ self.database = 'https://2.intelx.io'
self.results: Any = None
self.info: tuple[Any, ...] = ()
self.limit: int = 10000
@@ -28,34 +28,30 @@ async def do_search(self) -> None:
# API requests self identification
# https://intelx.io/integrations
headers = {
- "x-key": self.key,
- "User-Agent": f"{Core.get_user_agent()}-theHarvester",
+ 'x-key': self.key,
+ 'User-Agent': f'{Core.get_user_agent()}-theHarvester',
}
data = {
- "term": self.word,
- "buckets": [],
- "lookuplevel": 0,
- "maxresults": self.limit,
- "timeout": 5,
- "datefrom": "",
- "dateto": "",
- "sort": 2,
- "media": 0,
- "terminate": [],
- "target": 0,
+ 'term': self.word,
+ 'buckets': [],
+ 'lookuplevel': 0,
+ 'maxresults': self.limit,
+ 'timeout': 5,
+ 'datefrom': '',
+ 'dateto': '',
+ 'sort': 2,
+ 'media': 0,
+ 'terminate': [],
+ 'target': 0,
}
- total_resp = requests.post(
- f"{self.database}/phonebook/search", headers=headers, json=data
- )
- phonebook_id = ujson.loads(total_resp.text)["id"]
+ total_resp = requests.post(f'{self.database}/phonebook/search', headers=headers, json=data)
+ phonebook_id = ujson.loads(total_resp.text)['id']
await asyncio.sleep(5)
# Fetch results from phonebook based on ID
resp = await AsyncFetcher.fetch_all(
- [
- f"{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}"
- ],
+ [f'{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}'],
headers=headers,
json=True,
proxy=self.proxy,
@@ -63,7 +59,7 @@ async def do_search(self) -> None:
resp = resp[0]
self.results = resp # TODO: give self.results more appropriate typing
except Exception as e:
- print(f"An exception has occurred in Intelx: {e}")
+ print(f'An exception has occurred in Intelx: {e}')
async def process(self, proxy: bool = False):
self.proxy = proxy
diff --git a/theHarvester/discovery/netlas.py b/theHarvester/discovery/netlas.py
index b5151aec..05d3798e 100644
--- a/theHarvester/discovery/netlas.py
+++ b/theHarvester/discovery/netlas.py
@@ -9,17 +9,15 @@ def __init__(self, word) -> None:
self.totalips: list = []
self.key = Core.netlas_key()
if self.key is None:
- raise MissingKey("netlas")
+ raise MissingKey('netlas')
self.proxy = False
async def do_search(self) -> None:
- api = f"https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*"
- headers = {"X-API-Key": self.key}
- response = await AsyncFetcher.fetch_all(
- [api], json=True, headers=headers, proxy=self.proxy
- )
- for domain in response[0]["items"]:
- self.totalhosts.append(domain["data"]["domain"])
+ api = f'https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*'
+ headers = {'X-API-Key': self.key}
+ response = await AsyncFetcher.fetch_all([api], json=True, headers=headers, proxy=self.proxy)
+ for domain in response[0]['items']:
+ self.totalhosts.append(domain['data']['domain'])
async def get_hostnames(self) -> list:
return self.totalhosts
diff --git a/theHarvester/discovery/onyphe.py b/theHarvester/discovery/onyphe.py
index 7f0cef86..e807a687 100644
--- a/theHarvester/discovery/onyphe.py
+++ b/theHarvester/discovery/onyphe.py
@@ -9,28 +9,26 @@
class SearchOnyphe:
def __init__(self, word) -> None:
self.word = word
- self.response = ""
+ self.response = ''
self.totalhosts: set = set()
self.totalips: set = set()
self.asns: set = set()
self.key = Core.onyphe_key()
if self.key is None:
- raise MissingKey("onyphe")
+ raise MissingKey('onyphe')
self.proxy = False
async def do_search(self) -> None:
# https://www.onyphe.io/docs/apis/search
# https://www.onyphe.io/search?q=domain%3Acharter.com&captcharesponse=j5cGT
# base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:domain:{self.word}'
- base_url = f"https://www.onyphe.io/api/v2/search/?q=domain:{self.word}"
+ base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:{self.word}'
headers = {
- "User-Agent": Core.get_user_agent(),
- "Content-Type": "application/json",
- "Authorization": f"bearer {self.key}",
+ 'User-Agent': Core.get_user_agent(),
+ 'Content-Type': 'application/json',
+ 'Authorization': f'bearer {self.key}',
}
- response = await AsyncFetcher.fetch_all(
- [base_url], json=True, headers=headers, proxy=self.proxy
- )
+ response = await AsyncFetcher.fetch_all([base_url], json=True, headers=headers, proxy=self.proxy)
self.response = response[0]
await self.parse_onyphe_resp_json()
@@ -38,74 +36,52 @@ async def parse_onyphe_resp_json(self):
if isinstance(self.response, list):
self.response = self.response[0]
if not isinstance(self.response, dict):
- raise Exception(f"An exception has occurred {self.response} is not a dict")
- if "Success" == self.response["text"]:
- if "results" in self.response.keys():
- for result in self.response["results"]:
+ raise Exception(f'An exception has occurred {self.response} is not a dict')
+ if 'Success' == self.response['text']:
+ if 'results' in self.response.keys():
+ for result in self.response['results']:
try:
- if "alternativeip" in result.keys():
- self.totalips.update(
- {altip for altip in result["alternativeip"]}
- )
- if "url" in result.keys() and isinstance(result["url"], list):
+ if 'alternativeip' in result.keys():
+ self.totalips.update({altip for altip in result['alternativeip']})
+ if 'url' in result.keys() and isinstance(result['url'], list):
self.totalhosts.update(
- urlparse(url).netloc
- for url in result["url"]
- if urlparse(url).netloc.endswith(self.word)
+ urlparse(url).netloc for url in result['url'] if urlparse(url).netloc.endswith(self.word)
)
- self.asns.add(result["asn"])
- self.asns.add(result["geolocus"]["asn"])
- self.totalips.add(result["geolocus"]["subnet"])
- self.totalips.add(result["ip"])
- self.totalips.add(result["subnet"])
+ self.asns.add(result['asn'])
+ self.asns.add(result['geolocus']['asn'])
+ self.totalips.add(result['geolocus']['subnet'])
+ self.totalips.add(result['ip'])
+ self.totalips.add(result['subnet'])
# Shouldn't be needed as API autoparses urls from html raw data
# rawres = myparser.Parser(result['data'], self.word)
# if await rawres.hostnames():
# self.totalhosts.update(set(await rawres.hostnames()))
for subdomain_key in [
- "domain",
- "hostname",
- "subdomains",
- "subject",
- "reverse",
- "geolocus",
+ 'domain',
+ 'hostname',
+ 'subdomains',
+ 'subject',
+ 'reverse',
+ 'geolocus',
]:
if subdomain_key in result.keys():
- if subdomain_key == "subject":
+ if subdomain_key == 'subject':
self.totalhosts.update(
- {
- domain
- for domain in result[subdomain_key][
- "altname"
- ]
- if domain.endswith(self.word)
- }
+ {domain for domain in result[subdomain_key]['altname'] if domain.endswith(self.word)}
)
- elif subdomain_key == "geolocus":
+ elif subdomain_key == 'geolocus':
self.totalhosts.update(
- {
- domain
- for domain in result[subdomain_key][
- "domain"
- ]
- if domain.endswith(self.word)
- }
+ {domain for domain in result[subdomain_key]['domain'] if domain.endswith(self.word)}
)
else:
self.totalhosts.update(
- {
- domain
- for domain in result[subdomain_key]
- if domain.endswith(self.word)
- }
+ {domain for domain in result[subdomain_key] if domain.endswith(self.word)}
)
except Exception as e:
- print(f"An exception has occurred on result: {result}: {e}")
+ print(f'An exception has occurred on result: {result}: {e}')
continue
else:
- print(
- f"Onhyphe API query did not succeed dumping current response: {self.response}"
- )
+ print(f'Onhyphe API query did not succeed dumping current response: {self.response}')
async def get_asns(self) -> set:
return self.asns
diff --git a/theHarvester/discovery/otxsearch.py b/theHarvester/discovery/otxsearch.py
index f87a1bc3..afbd01e5 100644
--- a/theHarvester/discovery/otxsearch.py
+++ b/theHarvester/discovery/otxsearch.py
@@ -11,16 +11,14 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
- url = f"https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns"
+ url = f'https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
responses = response[0]
dct = responses
- self.totalhosts = {host["hostname"] for host in dct["passive_dns"]}
+ self.totalhosts = {host['hostname'] for host in dct['passive_dns']}
# filter out ips that are just called NXDOMAIN
self.totalips = {
- ip["address"]
- for ip in dct["passive_dns"]
- if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip["address"])
+ ip['address'] for ip in dct['passive_dns'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip['address'])
}
async def get_hostnames(self) -> set:
diff --git a/theHarvester/discovery/pentesttools.py b/theHarvester/discovery/pentesttools.py
index d351df17..a39a6620 100644
--- a/theHarvester/discovery/pentesttools.py
+++ b/theHarvester/discovery/pentesttools.py
@@ -12,54 +12,41 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.pentest_tools_key()
if self.key is None:
- raise MissingKey("PentestTools")
+ raise MissingKey('PentestTools')
self.total_results: list = []
- self.api = f"https://pentest-tools.com/api?key={self.key}"
+ self.api = f'https://pentest-tools.com/api?key={self.key}'
self.proxy = False
async def poll(self, scan_id):
while True:
time.sleep(3)
# Get the status of our scan
- scan_status_data = {"op": "get_scan_status", "scan_id": scan_id}
- responses = await AsyncFetcher.post_fetch(
- url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy
- )
+ scan_status_data = {'op': 'get_scan_status', 'scan_id': scan_id}
+ responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy)
res_json = ujson.loads(responses.strip())
- if res_json["op_status"] == "success":
- if (
- res_json["scan_status"] != "waiting"
- and res_json["scan_status"] != "running"
- ):
+ if res_json['op_status'] == 'success':
+ if res_json['scan_status'] != 'waiting' and res_json['scan_status'] != 'running':
getoutput_data = {
- "op": "get_output",
- "scan_id": scan_id,
- "output_format": "json",
+ 'op': 'get_output',
+ 'scan_id': scan_id,
+ 'output_format': 'json',
}
- responses = await AsyncFetcher.post_fetch(
- url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy
- )
+ responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy)
- res_json = ujson.loads(responses.strip("\n"))
+ res_json = ujson.loads(responses.strip('\n'))
self.total_results = await self.parse_json(res_json)
break
else:
- print(
- f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}"
- )
+ print(f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}")
break
@staticmethod
async def parse_json(json_results):
- status = json_results["op_status"]
- if status == "success":
- scan_tests = json_results["scan_output"]["output_json"]
- output_data = scan_tests[0]["output_data"]
- host_to_ip = [
- f"{subdomain[0]}:{subdomain[1]}"
- for subdomain in output_data
- if len(subdomain) > 0
- ]
+ status = json_results['op_status']
+ if status == 'success':
+ scan_tests = json_results['scan_output']['output_json']
+ output_data = scan_tests[0]['output_data']
+ host_to_ip = [f'{subdomain[0]}:{subdomain[1]}' for subdomain in output_data if len(subdomain) > 0]
return host_to_ip
return []
@@ -68,20 +55,18 @@ async def get_hostnames(self) -> list:
async def do_search(self) -> None:
subdomain_payload = {
- "op": "start_scan",
- "tool_id": 20,
- "tool_params": {
- "target": f"{self.word}",
- "web_details": "off",
- "do_smart_search": "off",
+ 'op': 'start_scan',
+ 'tool_id': 20,
+ 'tool_params': {
+ 'target': f'{self.word}',
+ 'web_details': 'off',
+ 'do_smart_search': 'off',
},
}
- responses = await AsyncFetcher.post_fetch(
- url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy
- )
+ responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy)
res_json = ujson.loads(responses.strip())
- if res_json["op_status"] == "success":
- scan_id = res_json["scan_id"]
+ if res_json['op_status'] == 'success':
+ scan_id = res_json['scan_id']
await self.poll(scan_id)
async def process(self, proxy: bool = False) -> None:
diff --git a/theHarvester/discovery/projectdiscovery.py b/theHarvester/discovery/projectdiscovery.py
index 3f1bde9c..39963cea 100644
--- a/theHarvester/discovery/projectdiscovery.py
+++ b/theHarvester/discovery/projectdiscovery.py
@@ -7,21 +7,19 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.projectdiscovery_key()
if self.key is None:
- raise MissingKey("ProjectDiscovery")
+ raise MissingKey('ProjectDiscovery')
self.total_results = None
self.proxy = False
async def do_search(self):
- url = f"https://dns.projectdiscovery.io/dns/{self.word}/subdomains"
+ url = f'https://dns.projectdiscovery.io/dns/{self.word}/subdomains'
response = await AsyncFetcher.fetch_all(
[url],
json=True,
- headers={"User-Agent": Core.get_user_agent(), "Authorization": self.key},
+ headers={'User-Agent': Core.get_user_agent(), 'Authorization': self.key},
proxy=self.proxy,
)
- self.total_results = [
- f"{domains}.{self.word}" for domains in response[0]["subdomains"]
- ]
+ self.total_results = [f'{domains}.{self.word}' for domains in response[0]['subdomains']]
async def get_hostnames(self):
return self.total_results
diff --git a/theHarvester/discovery/rapiddns.py b/theHarvester/discovery/rapiddns.py
index 01bcd13b..92fbaf9e 100644
--- a/theHarvester/discovery/rapiddns.py
+++ b/theHarvester/discovery/rapiddns.py
@@ -11,33 +11,29 @@ def __init__(self, word) -> None:
async def do_search(self):
try:
- headers = {"User-agent": Core.get_user_agent()}
+ headers = {'User-agent': Core.get_user_agent()}
# TODO see if it's worth adding sameip searches
# f'{self.hostname}/sameip/{self.word}?full=1#result'
- urls = [f"https://rapiddns.io/subdomain/{self.word}?full=1#result"]
- responses = await AsyncFetcher.fetch_all(
- urls, headers=headers, proxy=self.proxy
- )
+ urls = [f'https://rapiddns.io/subdomain/{self.word}?full=1#result']
+ responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
if len(responses[0]) <= 1:
return self.total_results
- soup = BeautifulSoup(responses[0], "html.parser")
- rows = soup.find("table").find("tbody").find_all("tr")
+ soup = BeautifulSoup(responses[0], 'html.parser')
+ rows = soup.find('table').find('tbody').find_all('tr')
if rows:
# Validation check
for row in rows:
- cells = row.find_all("td")
+ cells = row.find_all('td')
if len(cells) > 0:
# sanity check
subdomain = str(cells[0].get_text())
- if cells[-1].get_text() == "CNAME":
- self.total_results.append(f"{subdomain}")
+ if cells[-1].get_text() == 'CNAME':
+ self.total_results.append(f'{subdomain}')
else:
- self.total_results.append(
- f"{subdomain}:{str(cells[1].get_text()).strip()}"
- )
+ self.total_results.append(f'{subdomain}:{str(cells[1].get_text()).strip()}')
self.total_results = list({domain for domain in self.total_results})
except Exception as e:
- print(f"An exception has occurred: {str(e)}")
+ print(f'An exception has occurred: {str(e)}')
async def process(self, proxy: bool = False) -> None:
self.proxy = proxy
diff --git a/theHarvester/discovery/rocketreach.py b/theHarvester/discovery/rocketreach.py
index f41c2697..c5b7d233 100644
--- a/theHarvester/discovery/rocketreach.py
+++ b/theHarvester/discovery/rocketreach.py
@@ -10,59 +10,50 @@ def __init__(self, word, limit) -> None:
self.word = word
self.key = Core.rocketreach_key()
if self.key is None:
- raise MissingKey("RocketReach")
+ raise MissingKey('RocketReach')
self.hosts: set = set()
self.proxy = False
- self.baseurl = "https://rocketreach.co/api/v2/person/search"
+ self.baseurl = 'https://rocketreach.co/api/v2/person/search'
self.links: set = set()
self.limit = limit
async def do_search(self) -> None:
try:
headers = {
- "Api-Key": self.key,
- "Content-Type": "application/json",
- "User-Agent": Core.get_user_agent(),
+ 'Api-Key': self.key,
+ 'Content-Type': 'application/json',
+ 'User-Agent': Core.get_user_agent(),
}
next_page = 1 # track pagination
for count in range(1, self.limit):
data = f'{{"query":{{"company_domain": ["{self.word}"]}}, "start": {next_page}, "page_size": 100}}'
- result = await AsyncFetcher.post_fetch(
- self.baseurl, headers=headers, data=data, json=True
- )
- if (
- "detail" in result.keys()
- and "error" in result.keys()
- and "Subscribe to a plan to access" in result["detail"]
- ):
+ result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True)
+ if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result['detail']:
# No more results can be fetched
break
- if (
- "detail" in result.keys()
- and "Request was throttled." in result["detail"]
- ):
+ if 'detail' in result.keys() and 'Request was throttled.' in result['detail']:
# Rate limit has been triggered need to sleep extra
print(
f"RocketReach requests have been throttled; "
f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}'
)
break
- if "profiles" in dict(result).keys():
- if len(result["profiles"]) == 0:
+ if 'profiles' in dict(result).keys():
+ if len(result['profiles']) == 0:
break
- for profile in result["profiles"]:
- if "linkedin_url" in dict(profile).keys():
- self.links.add(profile["linkedin_url"])
- if "pagination" in dict(result).keys():
- next_page = int(result["pagination"]["next"])
- if next_page > int(result["pagination"]["total"]):
+ for profile in result['profiles']:
+ if 'linkedin_url' in dict(profile).keys():
+ self.links.add(profile['linkedin_url'])
+ if 'pagination' in dict(result).keys():
+ next_page = int(result['pagination']['next'])
+ if next_page > int(result['pagination']['total']):
break
await asyncio.sleep(get_delay() + 5)
except Exception as e:
- print(f"An exception has occurred: {e}")
+ print(f'An exception has occurred: {e}')
async def get_links(self):
return self.links
diff --git a/theHarvester/discovery/searchhunterhow.py b/theHarvester/discovery/searchhunterhow.py
index ff7b1707..bb1b24ee 100644
--- a/theHarvester/discovery/searchhunterhow.py
+++ b/theHarvester/discovery/searchhunterhow.py
@@ -13,55 +13,52 @@ def __init__(self, word) -> None:
self.total_hostnames: set = set()
self.key = Core.hunterhow_key()
if self.key is None:
- raise MissingKey("hunterhow")
+ raise MissingKey('hunterhow')
self.proxy = False
async def do_search(self) -> None:
# https://hunter.how/search-api
query = f'domain.suffix="{self.word}"'
# second_query = f'domain="{self.word}"'
- encoded_query = base64.urlsafe_b64encode(query.encode("utf-8")).decode("ascii")
+ encoded_query = base64.urlsafe_b64encode(query.encode('utf-8')).decode('ascii')
page = 1
page_size = 100 # can be either: 10,20,50,100)
# The interval between the start time and the end time cannot exceed one year
# Can not exceed one year, but years=1 does not work due to their backend, 364 will suffice
today = datetime.today()
one_year_ago = today - relativedelta(days=364)
- start_time = one_year_ago.strftime("%Y-%m-%d")
- end_time = today.strftime("%Y-%m-%d")
+ start_time = one_year_ago.strftime('%Y-%m-%d')
+ end_time = today.strftime('%Y-%m-%d')
# two_years_ago = one_year_ago - relativedelta(days=364)
# start_time = two_years_ago.strftime('%Y-%m-%d')
# end_time = one_year_ago.strftime('%Y-%m-%d')
- url = (
- "https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s"
- % (
- # self.key, encoded_query, page, page_size, start_time, end_time
- self.key,
- encoded_query,
- page,
- page_size,
- start_time,
- end_time,
- )
+ url = 'https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s' % (
+ # self.key, encoded_query, page, page_size, start_time, end_time
+ self.key,
+ encoded_query,
+ page,
+ page_size,
+ start_time,
+ end_time,
)
# print(f'Sending url: {url}')
response = await AsyncFetcher.fetch_all(
[url],
json=True,
- headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"},
+ headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'},
proxy=self.proxy,
)
dct = response[0]
# print(f'json response: ')
# print(dct)
- if "code" in dct.keys():
- if dct["code"] == 40001:
+ if 'code' in dct.keys():
+ if dct['code'] == 40001:
print(f'Code 40001 indicates for searchhunterhow: {dct["message"]}')
return
# total = dct['data']['total']
# TODO determine if total is ever 100 how to get more subdomains?
- for sub in dct["data"]["list"]:
- self.total_hostnames.add(sub["domain"])
+ for sub in dct['data']['list']:
+ self.total_hostnames.add(sub['domain'])
async def get_hostnames(self) -> set:
return self.total_hostnames
diff --git a/theHarvester/discovery/securitytrailssearch.py b/theHarvester/discovery/securitytrailssearch.py
index 552b598c..4f972c26 100644
--- a/theHarvester/discovery/securitytrailssearch.py
+++ b/theHarvester/discovery/securitytrailssearch.py
@@ -10,41 +10,33 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.security_trails_key()
if self.key is None:
- raise MissingKey("Securitytrail")
- self.results = ""
- self.totalresults = ""
- self.api = "https://api.securitytrails.com/v1/"
+ raise MissingKey('Securitytrail')
+ self.results = ''
+ self.totalresults = ''
+ self.api = 'https://api.securitytrails.com/v1/'
self.info: tuple[set, set] = (set(), set())
self.proxy = False
async def authenticate(self) -> None:
# Method to authenticate API key before sending requests.
- headers = {"APIKEY": self.key}
- url = f"{self.api}ping"
- auth_responses = await AsyncFetcher.fetch_all(
- [url], headers=headers, proxy=self.proxy
- )
+ headers = {'APIKEY': self.key}
+ url = f'{self.api}ping'
+ auth_responses = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
auth_responses = auth_responses[0]
- if "False" in auth_responses or "Invalid authentication" in auth_responses:
- print("\tKey could not be authenticated exiting program.")
+ if 'False' in auth_responses or 'Invalid authentication' in auth_responses:
+ print('\tKey could not be authenticated exiting program.')
await asyncio.sleep(5)
async def do_search(self) -> None:
# https://api.securitytrails.com/v1/domain/domain.com
- url = f"{self.api}domain/{self.word}"
- headers = {"APIKEY": self.key}
- response = await AsyncFetcher.fetch_all(
- [url], headers=headers, proxy=self.proxy
- )
- await asyncio.sleep(
- 5
- ) # Not random delay because 2 seconds is required due to rate limit.
+ url = f'{self.api}domain/{self.word}'
+ headers = {'APIKEY': self.key}
+ response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
+ await asyncio.sleep(5) # Not random delay because 2 seconds is required due to rate limit.
self.results = response[0]
self.totalresults += self.results
- url += "/subdomains" # Get subdomains now.
- subdomain_response = await AsyncFetcher.fetch_all(
- [url], headers=headers, proxy=self.proxy
- )
+ url += '/subdomains' # Get subdomains now.
+ subdomain_response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
await asyncio.sleep(5)
self.results = subdomain_response[0]
self.totalresults += self.results
@@ -56,7 +48,7 @@ async def process(self, proxy: bool = False) -> None:
parser = securitytrailsparser.Parser(word=self.word, text=self.totalresults)
self.info = await parser.parse_text()
# Create parser and set self.info to tuple returned from parsing text.
- print("\tDone Searching Results")
+ print('\tDone Searching Results')
async def get_ips(self) -> set:
return self.info[0]
diff --git a/theHarvester/discovery/shodansearch.py b/theHarvester/discovery/shodansearch.py
index a767f8fa..26ea5ca6 100644
--- a/theHarvester/discovery/shodansearch.py
+++ b/theHarvester/discovery/shodansearch.py
@@ -10,7 +10,7 @@ class SearchShodan:
def __init__(self) -> None:
self.key = Core.shodan_key()
if self.key is None:
- raise MissingKey("Shodan")
+ raise MissingKey('Shodan')
self.api = Shodan(self.key)
self.hostdatarow: list = []
self.tracker: OrderedDict = OrderedDict()
@@ -19,81 +19,81 @@ async def search_ip(self, ip) -> OrderedDict:
try:
ipaddress = ip
results = self.api.host(ipaddress)
- asn = ""
+ asn = ''
domains: list = list()
hostnames: list = list()
- ip_str = ""
- isp = ""
- org = ""
+ ip_str = ''
+ isp = ''
+ org = ''
ports: list = list()
- title = ""
- server = ""
- product = ""
+ title = ''
+ server = ''
+ product = ''
technologies: list = list()
- data_first_dict = dict(results["data"][0])
+ data_first_dict = dict(results['data'][0])
- if "ip_str" in data_first_dict.keys():
- ip_str += data_first_dict["ip_str"]
+ if 'ip_str' in data_first_dict.keys():
+ ip_str += data_first_dict['ip_str']
- if "http" in data_first_dict.keys():
- http_results_dict = dict(data_first_dict["http"])
- if "title" in http_results_dict.keys():
- title_val = str(http_results_dict["title"]).strip()
- if title_val != "None":
+ if 'http' in data_first_dict.keys():
+ http_results_dict = dict(data_first_dict['http'])
+ if 'title' in http_results_dict.keys():
+ title_val = str(http_results_dict['title']).strip()
+ if title_val != 'None':
title += title_val
- if "components" in http_results_dict.keys():
- for key in http_results_dict["components"].keys():
+ if 'components' in http_results_dict.keys():
+ for key in http_results_dict['components'].keys():
technologies.append(key)
- if "server" in http_results_dict.keys():
- server_val = str(http_results_dict["server"]).strip()
- if server_val != "None":
+ if 'server' in http_results_dict.keys():
+ server_val = str(http_results_dict['server']).strip()
+ if server_val != 'None':
server += server_val
for key, value in results.items():
- if key == "asn":
+ if key == 'asn':
asn += value
- if key == "domains":
+ if key == 'domains':
value = list(value)
value.sort()
domains.extend(value)
- if key == "hostnames":
+ if key == 'hostnames':
value = [host.strip() for host in list(value)]
value.sort()
hostnames.extend(value)
- if key == "isp":
+ if key == 'isp':
isp += value
- if key == "org":
+ if key == 'org':
org += str(value)
- if key == "ports":
+ if key == 'ports':
value = list(value)
value.sort()
ports.extend(value)
- if key == "product":
+ if key == 'product':
product += value
technologies = list(set(technologies))
self.tracker[ip] = {
- "asn": asn.strip(),
- "domains": domains,
- "hostnames": hostnames,
- "ip_str": ip_str.strip(),
- "isp": isp.strip(),
- "org": org.strip(),
- "ports": ports,
- "product": product.strip(),
- "server": server.strip(),
- "technologies": technologies,
- "title": title.strip(),
+ 'asn': asn.strip(),
+ 'domains': domains,
+ 'hostnames': hostnames,
+ 'ip_str': ip_str.strip(),
+ 'isp': isp.strip(),
+ 'org': org.strip(),
+ 'ports': ports,
+ 'product': product.strip(),
+ 'server': server.strip(),
+ 'technologies': technologies,
+ 'title': title.strip(),
}
return self.tracker
except exception.APIError:
- print(f"{ip}: Not in Shodan")
- self.tracker[ip] = "Not in Shodan"
+ print(f'{ip}: Not in Shodan')
+ self.tracker[ip] = 'Not in Shodan'
except Exception as e:
# print(f'Error occurred in the Shodan IP search module: {e}')
- self.tracker[ip] = f"Error occurred in the Shodan IP search module: {e}"
+ self.tracker[ip] = f'Error occurred in the Shodan IP search module: {e}'
finally:
return self.tracker
diff --git a/theHarvester/discovery/sitedossier.py b/theHarvester/discovery/sitedossier.py
index 4dbcedf7..3e9259a0 100644
--- a/theHarvester/discovery/sitedossier.py
+++ b/theHarvester/discovery/sitedossier.py
@@ -10,7 +10,7 @@ class SearchSitedossier:
def __init__(self, word):
self.word = word
self.totalhosts = set()
- self.server = "www.sitedossier.com"
+ self.server = 'www.sitedossier.com'
self.proxy = False
async def do_search(self):
@@ -18,92 +18,75 @@ async def do_search(self):
# This site seems to yield a lot of results but is a bit annoying to scrape
# Hence the need for delays after each request to get the most results
# Feel free to tweak the delays as needed
- url = f"http://{self.server}/parentdomain/{self.word}"
- headers = {"User-Agent": Core.get_user_agent()}
- response = await AsyncFetcher.fetch_all(
- [url], headers=headers, proxy=self.proxy
- )
+ url = f'http://{self.server}/parentdomain/{self.word}'
+ headers = {'User-Agent': Core.get_user_agent()}
+ response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
base_response = response[0]
- soup = BeautifulSoup(base_response, "html.parser")
+ soup = BeautifulSoup(base_response, 'html.parser')
# iter_counter = 1
# iterations_needed = total_number // 100
# iterations_needed += 1
flagged_counter = 0
- stop_conditions = ["End of list.", "No data currently available."]
+ stop_conditions = ['End of list.', 'No data currently available.']
bot_string = (
- "Our web servers have detected unusual or excessive requests "
+ 'Our web servers have detected unusual or excessive requests '
'from your computer or network. Please enter the unique "word"'
- " below to confirm that you are a human interactively using this site."
+ ' below to confirm that you are a human interactively using this site.'
)
if (
- stop_conditions[0] not in base_response
- and stop_conditions[1] not in base_response
+ stop_conditions[0] not in base_response and stop_conditions[1] not in base_response
) and bot_string not in base_response:
- total_number = soup.find("i")
- total_number = int(
- total_number.text.strip().split(" ")[-1].replace(",", "")
- )
- hrefs = soup.find_all("a", href=True)
+ total_number = soup.find('i')
+ total_number = int(total_number.text.strip().split(' ')[-1].replace(',', ''))
+ hrefs = soup.find_all('a', href=True)
for a in hrefs:
- unparsed = a["href"]
- if "/site/" in unparsed:
- subdomain = str(unparsed.split("/")[-1]).lower()
+ unparsed = a['href']
+ if '/site/' in unparsed:
+ subdomain = str(unparsed.split('/')[-1]).lower()
self.totalhosts.add(subdomain)
await asyncio.sleep(get_delay() + 15 + get_delay())
for i in range(101, total_number, 100):
- headers = {"User-Agent": Core.get_user_agent()}
- iter_url = f"http://{self.server}/parentdomain/{self.word}/{i}"
- print(f"My current iter_url: {iter_url}")
- response = await AsyncFetcher.fetch_all(
- [iter_url], headers=headers, proxy=self.proxy
- )
+ headers = {'User-Agent': Core.get_user_agent()}
+ iter_url = f'http://{self.server}/parentdomain/{self.word}/{i}'
+ print(f'My current iter_url: {iter_url}')
+ response = await AsyncFetcher.fetch_all([iter_url], headers=headers, proxy=self.proxy)
response = response[0]
- if (
- stop_conditions[0] in response
- or stop_conditions[1] in response
- or flagged_counter >= 3
- ):
+ if stop_conditions[0] in response or stop_conditions[1] in response or flagged_counter >= 3:
break
if bot_string in response:
new_sleep_time = get_delay() * 30
- print(
- f"Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds"
- )
+ print(f'Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds')
flagged_counter += 1
await asyncio.sleep(new_sleep_time)
response = await AsyncFetcher.fetch_all(
[iter_url],
- headers={"User-Agent": Core.get_user_agent()},
+ headers={'User-Agent': Core.get_user_agent()},
proxy=self.proxy,
)
response = response[0]
if bot_string in response:
new_sleep_time = get_delay() * 30 * get_delay()
print(
- f"Still triggering a captcha, sleeping longer for: {new_sleep_time}"
- f" and skipping this batch: {iter_url}"
+ f'Still triggering a captcha, sleeping longer for: {new_sleep_time}'
+ f' and skipping this batch: {iter_url}'
)
await asyncio.sleep(new_sleep_time)
flagged_counter += 1
if flagged_counter >= 3:
break
- soup = BeautifulSoup(response, "html.parser")
- hrefs = soup.find_all("a", href=True)
+ soup = BeautifulSoup(response, 'html.parser')
+ hrefs = soup.find_all('a', href=True)
for a in hrefs:
- unparsed = a["href"]
- if "/site/" in unparsed:
- subdomain = str(unparsed.split("/")[-1]).lower()
+ unparsed = a['href']
+ if '/site/' in unparsed:
+ subdomain = str(unparsed.split('/')[-1]).lower()
self.totalhosts.add(subdomain)
await asyncio.sleep(get_delay() + 15 + get_delay())
- print(f"In total found: {len(self.totalhosts)}")
+ print(f'In total found: {len(self.totalhosts)}')
print(self.totalhosts)
else:
- print(
- "Sitedossier module has triggered a captcha on first iteration, no results can be found."
- )
- print(
- "Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module"
- )
+ print('Sitedossier module has triggered a captcha on first iteration, no results can be found.')
+ print('Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module')
async def get_hostnames(self):
return self.totalhosts
diff --git a/theHarvester/discovery/subdomaincenter.py b/theHarvester/discovery/subdomaincenter.py
index cfa09a5c..7785cf1f 100644
--- a/theHarvester/discovery/subdomaincenter.py
+++ b/theHarvester/discovery/subdomaincenter.py
@@ -5,23 +5,18 @@ class SubdomainCenter:
def __init__(self, word):
self.word = word
self.results = set()
- self.server = "https://api.subdomain.center/?domain="
+ self.server = 'https://api.subdomain.center/?domain='
self.proxy = False
async def do_search(self):
- headers = {"User-Agent": Core.get_user_agent()}
+ headers = {'User-Agent': Core.get_user_agent()}
try:
- current_url = f"{self.server}{self.word}"
- resp = await AsyncFetcher.fetch_all(
- [current_url], headers=headers, proxy=self.proxy, json=True
- )
+ current_url = f'{self.server}{self.word}'
+ resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy, json=True)
self.results = resp[0]
- self.results = {
- sub[4:] if sub[:4] == "www." and sub[4:] else sub
- for sub in self.results
- }
+ self.results = {sub[4:] if sub[:4] == 'www.' and sub[4:] else sub for sub in self.results}
except Exception as e:
- print(f"An exception has occurred in SubdomainCenter on : {e}")
+ print(f'An exception has occurred in SubdomainCenter on : {e}')
async def get_hostnames(self):
return self.results
diff --git a/theHarvester/discovery/subdomainfinderc99.py b/theHarvester/discovery/subdomainfinderc99.py
index ee086617..2b897cd7 100644
--- a/theHarvester/discovery/subdomainfinderc99.py
+++ b/theHarvester/discovery/subdomainfinderc99.py
@@ -14,24 +14,20 @@ def __init__(self, word) -> None:
self.total_results: set = set()
self.proxy = False
# TODO add api support
- self.server = "https://subdomainfinder.c99.nl/"
- self.totalresults = ""
+ self.server = 'https://subdomainfinder.c99.nl/'
+ self.totalresults = ''
async def do_search(self) -> None:
# Based on https://gist.github.com/th3gundy/bc83580cbe04031e9164362b33600962
- headers = {"User-Agent": Core.get_user_agent()}
- resp = await AsyncFetcher.fetch_all(
- [self.server], headers=headers, proxy=self.proxy
- )
+ headers = {'User-Agent': Core.get_user_agent()}
+ resp = await AsyncFetcher.fetch_all([self.server], headers=headers, proxy=self.proxy)
data = await self.get_csrf_params(resp[0])
- data["scan_subdomains"] = ""
- data["domain"] = self.word
- data["privatequery"] = "on"
+ data['scan_subdomains'] = ''
+ data['domain'] = self.word
+ data['privatequery'] = 'on'
await asyncio.sleep(get_delay())
- second_resp = await AsyncFetcher.post_fetch(
- self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data)
- )
+ second_resp = await AsyncFetcher.post_fetch(self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data))
# print(second_resp)
self.totalresults += second_resp
@@ -55,10 +51,10 @@ async def process(self, proxy: bool = False) -> None:
@staticmethod
async def get_csrf_params(data):
csrf_params = {}
- html = BeautifulSoup(data, "html.parser").find("div", {"class": "input-group"})
- for c in html.find_all("input"):
+ html = BeautifulSoup(data, 'html.parser').find('div', {'class': 'input-group'})
+ for c in html.find_all('input'):
try:
- csrf_params[c.get("name")] = c.get("value")
+ csrf_params[c.get('name')] = c.get('value')
except Exception:
continue
diff --git a/theHarvester/discovery/takeover.py b/theHarvester/discovery/takeover.py
index d7d7d732..a1a0f302 100644
--- a/theHarvester/discovery/takeover.py
+++ b/theHarvester/discovery/takeover.py
@@ -18,70 +18,59 @@ def __init__(self, hosts) -> None:
async def populate_fingerprints(self):
# Thank you to https://github.com/EdOverflow/can-i-take-over-xyz for these fingerprints
- populate_url = "https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json"
- headers = {"User-Agent": Core.get_user_agent()}
+ populate_url = 'https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json'
+ headers = {'User-Agent': Core.get_user_agent()}
response = await AsyncFetcher.fetch_all([populate_url], headers=headers)
try:
resp = response[0]
unparsed_json = ujson.loads(resp)
for unparsed_fingerprint in unparsed_json:
- if unparsed_fingerprint["service"] in ["Smugsmug"]:
+ if unparsed_fingerprint['service'] in ['Smugsmug']:
# Subdomain must be in format domain.smugsmug.com
# This will never happen as subdomains are parsed and filtered to be in format of *.word.com
continue
- if (
- unparsed_fingerprint["status"] == "Vulnerable"
- or unparsed_fingerprint["status"] == "Edge case"
- ):
- self.fingerprints[unparsed_fingerprint["fingerprint"]] = (
- unparsed_fingerprint["service"]
- )
+ if unparsed_fingerprint['status'] == 'Vulnerable' or unparsed_fingerprint['status'] == 'Edge case':
+ self.fingerprints[unparsed_fingerprint['fingerprint']] = unparsed_fingerprint['service']
except Exception as e:
- print(
- f"An exception has occurred populating takeover fingerprints: {e}, defaulting to static list"
- )
+ print(f'An exception has occurred populating takeover fingerprints: {e}, defaulting to static list')
self.fingerprints = {
- "'Trying to access your account?'": "Campaign Monitor",
- "404 Not Found": "Fly.io",
- "404 error unknown site!": "Pantheon",
- "Do you want to register *.wordpress.com?": "Wordpress",
- "Domain uses DO name serves with no records in DO.": "Digital Ocean",
- "It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": "LaunchRock",
- "No Site For Domain": "Kinsta",
- "No settings were found for this company:": "Help Scout",
- "Project doesnt exist... yet!": "Readme.io",
- "Repository not found": "Bitbucket",
- "The feed has not been found.": "Feedpress",
- "No such app": "Heroku",
- "The specified bucket does not exist": "AWS/S3",
- "The thing you were looking for is no longer here, or never was": "Ghost",
- "There isn't a Github Pages site here.": "Github",
- "This UserVoice subdomain is currently available!": "UserVoice",
- "Uh oh. That page doesn't exist.": "Intercom",
- "We could not find what you're looking for.": "Help Juice",
- "Whatever you were looking for doesn't currently exist at this address": "Tumblr",
- "is not a registered InCloud YouTrack": "JetBrains",
- "page not found": "Uptimerobot",
- "project not found": "Surge.sh",
+ "'Trying to access your account?'": 'Campaign Monitor',
+ '404 Not Found': 'Fly.io',
+ '404 error unknown site!': 'Pantheon',
+ 'Do you want to register *.wordpress.com?': 'Wordpress',
+ 'Domain uses DO name serves with no records in DO.': 'Digital Ocean',
+ "It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": 'LaunchRock',
+ 'No Site For Domain': 'Kinsta',
+ 'No settings were found for this company:': 'Help Scout',
+ 'Project doesnt exist... yet!': 'Readme.io',
+ 'Repository not found': 'Bitbucket',
+ 'The feed has not been found.': 'Feedpress',
+ 'No such app': 'Heroku',
+ 'The specified bucket does not exist': 'AWS/S3',
+ 'The thing you were looking for is no longer here, or never was': 'Ghost',
+ "There isn't a Github Pages site here.": 'Github',
+ 'This UserVoice subdomain is currently available!': 'UserVoice',
+ "Uh oh. That page doesn't exist.": 'Intercom',
+ "We could not find what you're looking for.": 'Help Juice',
+ "Whatever you were looking for doesn't currently exist at this address": 'Tumblr',
+ 'is not a registered InCloud YouTrack': 'JetBrains',
+ 'page not found': 'Uptimerobot',
+ 'project not found': 'Surge.sh',
}
async def check(self, url, resp) -> None:
# Simple function that takes response and checks if any fingerprints exist
# If a fingerprint exists figures out which one and prints it out
- regex = re.compile(
- "(?=(" + "|".join(map(re.escape, list(self.fingerprints.keys()))) + "))"
- )
+ regex = re.compile('(?=(' + '|'.join(map(re.escape, list(self.fingerprints.keys()))) + '))')
# Sanitize fingerprints
matches = re.findall(regex, resp)
matches = list(set(matches))
for match in matches:
- print(f"\t\033[91m Takeover detected: {url}\033[1;32;40m")
+ print(f'\t\033[91m Takeover detected: {url}\033[1;32;40m')
if match in self.fingerprints.keys():
# Validation check as to not error out
service = self.fingerprints[match]
- print(
- f"\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m"
- )
+ print(f'\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m')
self.results[url].append({match: service})
async def do_take(self) -> None:
@@ -89,13 +78,11 @@ async def do_take(self) -> None:
if len(self.hosts) > 0:
# Returns a list of tuples in this format: (url, response)
# Filter out responses whose responses are empty strings (indicates errored)
- https_hosts = [f"https://{host}" for host in self.hosts]
- http_hosts = [f"http://{host}" for host in self.hosts]
+ https_hosts = [f'https://{host}' for host in self.hosts]
+ http_hosts = [f'http://{host}' for host in self.hosts]
all_hosts = https_hosts + http_hosts
shuffle(all_hosts)
- resps: list = await AsyncFetcher.fetch_all(
- all_hosts, takeover=True, proxy=self.proxy
- )
+ resps: list = await AsyncFetcher.fetch_all(all_hosts, takeover=True, proxy=self.proxy)
for url, resp in tuple(resp for resp in resps if len(resp[1]) >= 1):
await self.check(url, resp)
else:
diff --git a/theHarvester/discovery/threatminer.py b/theHarvester/discovery/threatminer.py
index fd8acc0c..944ff327 100644
--- a/theHarvester/discovery/threatminer.py
+++ b/theHarvester/discovery/threatminer.py
@@ -9,15 +9,13 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
- url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5"
+ url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
- self.totalhosts = {host for host in response[0]["results"]}
- second_url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2"
- secondresp = await AsyncFetcher.fetch_all(
- [second_url], json=True, proxy=self.proxy
- )
+ self.totalhosts = {host for host in response[0]['results']}
+ second_url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2'
+ secondresp = await AsyncFetcher.fetch_all([second_url], json=True, proxy=self.proxy)
try:
- self.totalips = {resp["ip"] for resp in secondresp[0]["results"]}
+ self.totalips = {resp['ip'] for resp in secondresp[0]['results']}
except TypeError:
pass
diff --git a/theHarvester/discovery/tombasearch.py b/theHarvester/discovery/tombasearch.py
index cdc49442..7897243c 100644
--- a/theHarvester/discovery/tombasearch.py
+++ b/theHarvester/discovery/tombasearch.py
@@ -12,12 +12,10 @@ def __init__(self, word, limit, start) -> None:
self.start = start
self.key = Core.tomba_key()
if self.key[0] is None or self.key[1] is None:
- raise MissingKey("Tomba Key and/or Secret")
- self.total_results = ""
+ raise MissingKey('Tomba Key and/or Secret')
+ self.total_results = ''
self.counter = start
- self.database = (
- f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10"
- )
+ self.database = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10'
self.proxy = False
self.hostnames: list = []
self.emails: list = []
@@ -26,49 +24,38 @@ async def do_search(self) -> None:
# First determine if a user account is not a free account, this call is free
is_free = True
headers = {
- "User-Agent": Core.get_user_agent(),
- "X-Tomba-Key": self.key[0],
- "X-Tomba-Secret": self.key[1],
+ 'User-Agent': Core.get_user_agent(),
+ 'X-Tomba-Key': self.key[0],
+ 'X-Tomba-Secret': self.key[1],
}
- acc_info_url = "https://api.tomba.io/v1/me"
- response = await AsyncFetcher.fetch_all(
- [acc_info_url], headers=headers, json=True
- )
+ acc_info_url = 'https://api.tomba.io/v1/me'
+ response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True)
is_free = (
is_free
- if "name" in response[0]["data"]["pricing"].keys()
- and response[0]["data"]["pricing"]["name"].lower() == "free"
+ if 'name' in response[0]['data']['pricing'].keys() and response[0]['data']['pricing']['name'].lower() == 'free'
else False
)
# Extract the total number of requests that are available for an account
total_requests_avail = (
- response[0]["data"]["requests"]["domains"]["available"]
- - response[0]["data"]["requests"]["domains"]["used"]
+ response[0]['data']['requests']['domains']['available'] - response[0]['data']['requests']['domains']['used']
)
if is_free:
- response = await AsyncFetcher.fetch_all(
- [self.database], headers=headers, proxy=self.proxy, json=True
- )
+ response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True)
self.emails, self.hostnames = await self.parse_resp(json_resp=response[0])
else:
# Determine the total number of emails that are available
# As the most emails you can get within one query are 100
# This is only done where paid accounts are in play
- tomba_counter = f"https://api.tomba.io/v1/email-count?domain={self.word}"
- response = await AsyncFetcher.fetch_all(
- [tomba_counter], headers=headers, proxy=self.proxy, json=True
- )
- total_number_reqs = response[0]["data"]["total"] // 100
+ tomba_counter = f'https://api.tomba.io/v1/email-count?domain={self.word}'
+ response = await AsyncFetcher.fetch_all([tomba_counter], headers=headers, proxy=self.proxy, json=True)
+ total_number_reqs = response[0]['data']['total'] // 100
# Parse out meta field within initial JSON response to determine the total number of results
if total_requests_avail < total_number_reqs:
+ print('WARNING: The account does not have enough requests to gather all the emails.')
print(
- "WARNING: The account does not have enough requests to gather all the emails."
- )
- print(
- f"Total requests available: {total_requests_avail}, total requests "
- f"needed to be made: {total_number_reqs}"
+ f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}'
)
print(
'RETURNING current results, If you still wish to run this module despite the current results, please comment out the "if request" line.'
@@ -79,24 +66,22 @@ async def do_search(self) -> None:
# increments of max number with page determining where to start
# See docs for more details: https://developer.tomba.io/#domain-search
for page in range(0, total_number_reqs + 1):
- req_url = f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}"
- response = await AsyncFetcher.fetch_all(
- [req_url], headers=headers, proxy=self.proxy, json=True
- )
+ req_url = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}'
+ response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True)
temp_emails, temp_hostnames = await self.parse_resp(response[0])
self.emails.extend(temp_emails)
self.hostnames.extend(temp_hostnames)
await asyncio.sleep(1)
async def parse_resp(self, json_resp):
- emails = list(sorted({email["email"] for email in json_resp["data"]["emails"]}))
+ emails = list(sorted({email['email'] for email in json_resp['data']['emails']}))
domains = list(
sorted(
{
- source["website_url"]
- for email in json_resp["data"]["emails"]
- for source in email["sources"]
- if self.word in source["website_url"]
+ source['website_url']
+ for email in json_resp['data']['emails']
+ for source in email['sources']
+ if self.word in source['website_url']
}
)
)
diff --git a/theHarvester/discovery/urlscan.py b/theHarvester/discovery/urlscan.py
index 062ea525..74975fcf 100644
--- a/theHarvester/discovery/urlscan.py
+++ b/theHarvester/discovery/urlscan.py
@@ -11,25 +11,17 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
- url = f"https://urlscan.io/api/v1/search/?q=domain:{self.word}"
+ url = f'https://urlscan.io/api/v1/search/?q=domain:{self.word}'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
resp = response[0]
- self.totalhosts = {f"{page['page']['domain']}" for page in resp["results"]}
- self.totalips = {
- f"{page['page']['ip']}"
- for page in resp["results"]
- if "ip" in page["page"].keys()
- }
+ self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']}
+ self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()}
self.interestingurls = {
f"{page['page']['url']}"
- for page in resp["results"]
- if self.word in page["page"]["url"] and "url" in page["page"].keys()
- }
- self.totalasns = {
- f"{page['page']['asn']}"
- for page in resp["results"]
- if "asn" in page["page"].keys()
+ for page in resp['results']
+ if self.word in page['page']['url'] and 'url' in page['page'].keys()
}
+ self.totalasns = {f"{page['page']['asn']}" for page in resp['results'] if 'asn' in page['page'].keys()}
async def get_hostnames(self) -> set:
return self.totalhosts
diff --git a/theHarvester/discovery/virustotal.py b/theHarvester/discovery/virustotal.py
index 9e511070..1fe47c53 100644
--- a/theHarvester/discovery/virustotal.py
+++ b/theHarvester/discovery/virustotal.py
@@ -8,7 +8,7 @@ class SearchVirustotal:
def __init__(self, word) -> None:
self.key = Core.virustotal_key()
if self.key is None:
- raise MissingKey("virustotal")
+ raise MissingKey('virustotal')
self.word = word
self.proxy = False
self.hostnames: list = []
@@ -18,14 +18,12 @@ async def do_search(self) -> None:
# based on: https://developers.virustotal.com/reference/domains-relationships
# base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40"
headers = {
- "User-Agent": Core.get_user_agent(),
- "Accept": "application/json",
- "x-apikey": self.key,
+ 'User-Agent': Core.get_user_agent(),
+ 'Accept': 'application/json',
+ 'x-apikey': self.key,
}
- base_url = (
- f"https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40"
- )
- cursor = ""
+ base_url = f'https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40'
+ cursor = ''
count = 0
fail_counter = 0
counter = 0
@@ -37,42 +35,29 @@ async def do_search(self) -> None:
# TODO add timer logic if proven to be needed
# in the meantime sleeping 16 seconds should eliminate hitting the rate limit
# in case rate limit is hit, fail counter exists and sleep for 65 seconds
- send_url = (
- base_url + "&cursor=" + cursor
- if cursor != "" and len(cursor) > 2
- else base_url
- )
- responses = await AsyncFetcher.fetch_all(
- [send_url], headers=headers, proxy=self.proxy, json=True
- )
+ send_url = base_url + '&cursor=' + cursor if cursor != '' and len(cursor) > 2 else base_url
+ responses = await AsyncFetcher.fetch_all([send_url], headers=headers, proxy=self.proxy, json=True)
jdata = responses[0]
- if "data" not in jdata.keys():
+ if 'data' not in jdata.keys():
await asyncio.sleep(60 + 5)
fail_counter += 1
- if "meta" in jdata.keys():
- cursor = (
- jdata["meta"]["cursor"] if "cursor" in jdata["meta"].keys() else ""
- )
- if len(cursor) == 0 and "data" in jdata.keys():
+ if 'meta' in jdata.keys():
+ cursor = jdata['meta']['cursor'] if 'cursor' in jdata['meta'].keys() else ''
+ if len(cursor) == 0 and 'data' in jdata.keys():
# if cursor no longer is within the meta field have hit last entry
breakcon = True
- count += jdata["meta"]["count"]
+ count += jdata['meta']['count']
if count == 0 or fail_counter >= 2:
break
- if "data" in jdata.keys():
- data = jdata["data"]
+ if 'data' in jdata.keys():
+ data = jdata['data']
self.hostnames.extend(await self.parse_hostnames(data, self.word))
counter += 1
await asyncio.sleep(16)
self.hostnames = list(sorted(set(self.hostnames)))
# verify domains such as x.x.com.multicdn.x.com are parsed properly
self.hostnames = [
- host
- for host in self.hostnames
- if (
- (len(host.split(".")) >= 3)
- and host.split(".")[-2] == self.word.split(".")[-2]
- )
+ host for host in self.hostnames if ((len(host.split('.')) >= 3) and host.split('.')[-2] == self.word.split('.')[-2])
]
async def get_hostnames(self) -> list:
@@ -82,22 +67,20 @@ async def get_hostnames(self) -> list:
async def parse_hostnames(data, word):
total_subdomains = set()
for attribute in data:
- total_subdomains.add(attribute["id"].replace('"', "").replace("www.", ""))
- attributes = attribute["attributes"]
+ total_subdomains.add(attribute['id'].replace('"', '').replace('www.', ''))
+ attributes = attribute['attributes']
total_subdomains.update(
{
- value["value"].replace('"', "").replace("www.", "")
- for value in attributes["last_dns_records"]
- if word in value["value"]
+ value['value'].replace('"', '').replace('www.', '')
+ for value in attributes['last_dns_records']
+ if word in value['value']
}
)
- if "last_https_certificate" in attributes.keys():
+ if 'last_https_certificate' in attributes.keys():
total_subdomains.update(
{
- value.replace('"', "").replace("www.", "")
- for value in attributes["last_https_certificate"]["extensions"][
- "subject_alternative_name"
- ]
+ value.replace('"', '').replace('www.', '')
+ for value in attributes['last_https_certificate']['extensions']['subject_alternative_name']
if word in value
}
)
@@ -108,9 +91,7 @@ async def parse_hostnames(data, word):
total_subdomains = [
x
for x in total_subdomains
- if "edgekey.net" not in str(x)
- and "akadns.net" not in str(x)
- and "include:_spf" not in str(x)
+ if 'edgekey.net' not in str(x) and 'akadns.net' not in str(x) and 'include:_spf' not in str(x)
]
total_subdomains.sort()
return total_subdomains
diff --git a/theHarvester/discovery/yahoosearch.py b/theHarvester/discovery/yahoosearch.py
index 7ac9b42d..ea7f5b93 100644
--- a/theHarvester/discovery/yahoosearch.py
+++ b/theHarvester/discovery/yahoosearch.py
@@ -5,22 +5,16 @@
class SearchYahoo:
def __init__(self, word, limit) -> None:
self.word = word
- self.total_results = ""
- self.server = "search.yahoo.com"
+ self.total_results = ''
+ self.server = 'search.yahoo.com'
self.limit = limit
self.proxy = False
async def do_search(self) -> None:
- base_url = f"https://{self.server}/search?p=%40{self.word}&b=xx&pz=10"
- headers = {"Host": self.server, "User-agent": Core.get_user_agent()}
- urls = [
- base_url.replace("xx", str(num))
- for num in range(0, self.limit, 10)
- if num <= self.limit
- ]
- responses = await AsyncFetcher.fetch_all(
- urls, headers=headers, proxy=self.proxy
- )
+ base_url = f'https://{self.server}/search?p=%40{self.word}&b=xx&pz=10'
+ headers = {'Host': self.server, 'User-agent': Core.get_user_agent()}
+ urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
+ responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response
@@ -35,8 +29,8 @@ async def get_emails(self):
# strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld
for email in toparse_emails:
email = str(email)
- if "-" in email and email[0].isdigit() and email.index("-") <= 9:
- while email[0] == "-" or email[0].isdigit():
+ if '-' in email and email[0].isdigit() and email.index('-') <= 9:
+ while email[0] == '-' or email[0].isdigit():
email = email[1:]
emails.add(email)
return list(emails)
diff --git a/theHarvester/discovery/zoomeyesearch.py b/theHarvester/discovery/zoomeyesearch.py
index a7a1bfbe..e693868b 100644
--- a/theHarvester/discovery/zoomeyesearch.py
+++ b/theHarvester/discovery/zoomeyesearch.py
@@ -16,8 +16,8 @@ def __init__(self, word, limit) -> None:
# If you wish to extract as many subdomains as possible visit the fetch_subdomains
# To see how
if self.key is None:
- raise MissingKey("zoomeye")
- self.baseurl = "https://api.zoomeye.org/host/search"
+ raise MissingKey('zoomeye')
+ self.baseurl = 'https://api.zoomeye.org/host/search'
self.proxy = False
self.totalasns: list = list()
self.totalhosts: list = list()
@@ -58,40 +58,38 @@ def __init__(self, word, limit) -> None:
async def fetch_subdomains(self) -> None:
# Based on docs from: https://www.zoomeye.org/doc#search-sub-domain-ip
- headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()}
+ headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()}
- subdomain_search_endpoint = (
- f"https://api.zoomeye.org/domain/search?q={self.word}&type=0&"
- )
+ subdomain_search_endpoint = f'https://api.zoomeye.org/domain/search?q={self.word}&type=0&'
response = await AsyncFetcher.fetch_all(
- [subdomain_search_endpoint + "page=1"],
+ [subdomain_search_endpoint + 'page=1'],
json=True,
proxy=self.proxy,
headers=headers,
)
# Make initial request to determine total number of subdomains
resp = response[0]
- if resp["status"] != 200:
+ if resp['status'] != 200:
return
- total = resp["total"]
+ total = resp['total']
# max number of results per request seems to be 30
# NOTE: If you wish to get as many subdomains as possible
# Change the line below to:
# self.limit = (total // 30) + 1
self.limit = self.limit if total > self.limit else (total // 30) + 1
- self.totalhosts.extend([item["name"] for item in resp["list"]])
+ self.totalhosts.extend([item['name'] for item in resp['list']])
for i in range(2, self.limit):
response = await AsyncFetcher.fetch_all(
- [subdomain_search_endpoint + f"page={i}"],
+ [subdomain_search_endpoint + f'page={i}'],
json=True,
proxy=self.proxy,
headers=headers,
)
resp = response[0]
- if resp["status"] != 200:
+ if resp['status'] != 200:
return
- found_subdomains = [item["name"] for item in resp["list"]]
+ found_subdomains = [item['name'] for item in resp['list']]
if len(found_subdomains) == 0:
break
self.totalhosts.extend(found_subdomains)
@@ -99,19 +97,17 @@ async def fetch_subdomains(self) -> None:
await asyncio.sleep(get_delay() + 1)
async def do_search(self) -> None:
- headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()}
+ headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()}
# Fetch subdomains first
await self.fetch_subdomains()
params = (
- ("query", f"site:{self.word}"),
- ("page", "1"),
- )
- response = await AsyncFetcher.fetch_all(
- [self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params
+ ('query', f'site:{self.word}'),
+ ('page', '1'),
)
+ response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params)
# The First request determines how many pages there in total
resp = response[0]
- total_pages = int(resp["available"])
+ total_pages = int(resp['available'])
self.limit = self.limit if total_pages > self.limit else total_pages
self.limit = 3 if self.limit == 2 else self.limit
cur_page = 2 if self.limit >= 2 else -1
@@ -121,21 +117,17 @@ async def do_search(self) -> None:
# cur_page = -1
if cur_page == -1:
# No need to do loop just parse and leave
- if "matches" in resp.keys():
- hostnames, emails, ips, asns, iurls = await self.parse_matches(
- resp["matches"]
- )
+ if 'matches' in resp.keys():
+ hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
self.totalhosts.extend(hostnames)
self.totalemails.extend(emails)
self.totalips.extend(ips)
self.totalasns.extend(asns)
self.interestingurls.extend(iurls)
else:
- if "matches" in resp.keys():
+ if 'matches' in resp.keys():
# Parse out initial results and then continue to loop
- hostnames, emails, ips, asns, iurls = await self.parse_matches(
- resp["matches"]
- )
+ hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
self.totalhosts.extend(hostnames)
self.totalemails.extend(emails)
self.totalips.extend(ips)
@@ -145,8 +137,8 @@ async def do_search(self) -> None:
for num in range(2, self.limit):
# print(f'Currently on page: {num}')
params = (
- ("query", f"site:{self.word}"),
- ("page", f"{num}"),
+ ('query', f'site:{self.word}'),
+ ('page', f'{num}'),
)
response = await AsyncFetcher.fetch_all(
[self.baseurl],
@@ -156,22 +148,14 @@ async def do_search(self) -> None:
params=params,
)
resp = response[0]
- if "matches" not in resp.keys():
- print(f"Your resp: {resp}")
- print("Match not found in keys")
+ if 'matches' not in resp.keys():
+ print(f'Your resp: {resp}')
+ print('Match not found in keys')
break
- hostnames, emails, ips, asns, iurls = await self.parse_matches(
- resp["matches"]
- )
+ hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
- if (
- len(hostnames) == 0
- and len(emails) == 0
- and len(ips) == 0
- and len(asns) == 0
- and len(iurls) == 0
- ):
+ if len(hostnames) == 0 and len(emails) == 0 and len(ips) == 0 and len(asns) == 0 and len(iurls) == 0:
nomatches_counter += 1
if nomatches_counter >= 5:
@@ -196,48 +180,42 @@ async def parse_matches(self, matches):
emails = set()
for match in matches:
try:
- ips.add(match["ip"])
+ ips.add(match['ip'])
- if "geoinfo" in match.keys():
+ if 'geoinfo' in match.keys():
asns.add(f"AS{match['geoinfo']['asn']}")
- if "rdns_new" in match.keys():
- rdns_new = match["rdns_new"]
+ if 'rdns_new' in match.keys():
+ rdns_new = match['rdns_new']
- if "," in rdns_new:
- parts = str(rdns_new).split(",")
+ if ',' in rdns_new:
+ parts = str(rdns_new).split(',')
rdns_new = parts[0]
if len(parts) == 2:
hostnames.add(parts[1])
- rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new
+ rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
hostnames.add(rdns_new)
else:
- rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new
+ rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
hostnames.add(rdns_new)
- if "rdns" in match.keys():
- rdns = match["rdns"]
- rdns = rdns[:-1] if rdns[-1] == "." else rdns
+ if 'rdns' in match.keys():
+ rdns = match['rdns']
+ rdns = rdns[:-1] if rdns[-1] == '.' else rdns
hostnames.add(rdns)
- if "portinfo" in match.keys():
+ if 'portinfo' in match.keys():
# re.
- temp_emails = set(
- await self.parse_emails(match["portinfo"]["banner"])
- )
+ temp_emails = set(await self.parse_emails(match['portinfo']['banner']))
emails.update(temp_emails)
- hostnames.update(
- set(await self.parse_hostnames(match["portinfo"]["banner"]))
- )
+ hostnames.update(set(await self.parse_hostnames(match['portinfo']['banner'])))
iurls = {
- str(iurl.group(1)).replace('"', "")
- for iurl in re.finditer(
- self.iurl_regex, match["portinfo"]["banner"]
- )
+ str(iurl.group(1)).replace('"', '')
+ for iurl in re.finditer(self.iurl_regex, match['portinfo']['banner'])
if self.word in str(iurl.group(1))
}
except Exception as e:
- print(f"An exception has occurred: {e}")
+ print(f'An exception has occurred: {e}')
return hostnames, emails, ips, asns, iurls
async def process(self, proxy: bool = False) -> None:
diff --git a/theHarvester/lib/__init__.py b/theHarvester/lib/__init__.py
index 7145285d..8fc0aea3 100644
--- a/theHarvester/lib/__init__.py
+++ b/theHarvester/lib/__init__.py
@@ -1 +1 @@
-__all__ = ["hostchecker"]
+__all__ = ['hostchecker']
diff --git a/theHarvester/lib/api/api.py b/theHarvester/lib/api/api.py
index e29983f6..53d903a7 100644
--- a/theHarvester/lib/api/api.py
+++ b/theHarvester/lib/api/api.py
@@ -12,36 +12,32 @@
limiter = Limiter(key_func=get_remote_address)
app = FastAPI(
- title="Restful Harvest",
- description="Rest API for theHarvester powered by FastAPI",
- version="0.0.2",
+ title='Restful Harvest',
+ description='Rest API for theHarvester powered by FastAPI',
+ version='0.0.2',
)
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore
# This is where we will host files that arise if the user specifies a filename
try:
- app.mount(
- "/static", StaticFiles(directory="theHarvester/lib/api/static/"), name="static"
- )
+ app.mount('/static', StaticFiles(directory='theHarvester/lib/api/static/'), name='static')
except RuntimeError:
- static_path = os.path.expanduser("~/.local/share/theHarvester/static/")
+ static_path = os.path.expanduser('~/.local/share/theHarvester/static/')
if not os.path.isdir(static_path):
os.makedirs(static_path)
app.mount(
- "/static",
+ '/static',
StaticFiles(directory=static_path),
- name="static",
+ name='static',
)
-@app.get("/")
+@app.get('/')
async def root(*, user_agent: str = Header(None)) -> Response:
# very basic user agent filtering
- if user_agent and (
- "gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
- ):
- response = RedirectResponse(app.url_path_for("bot"))
+ if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
+ response = RedirectResponse(app.url_path_for('bot'))
return response
return HTMLResponse(
@@ -70,36 +66,34 @@ async def root(*, user_agent: str = Header(None)) -> Response:
)
-@app.get("/nicebot")
+@app.get('/nicebot')
async def bot() -> dict[str, str]:
# nice bot
- string = {"bot": "These are not the droids you are looking for"}
+ string = {'bot': 'These are not the droids you are looking for'}
return string
-@app.get("/sources", response_class=UJSONResponse)
-@limiter.limit("5/minute")
+@app.get('/sources', response_class=UJSONResponse)
+@limiter.limit('5/minute')
async def getsources(request: Request):
# Endpoint for user to query for available sources theHarvester supports
# Rate limit of 5 requests per minute
sources = __main__.Core.get_supportedengines()
- return {"sources": sources}
+ return {'sources': sources}
-@app.get("/dnsbrute")
-@limiter.limit("5/minute")
+@app.get('/dnsbrute')
+@limiter.limit('5/minute')
async def dnsbrute(
request: Request,
user_agent: str = Header(None),
- domain: str = Query(..., description="Domain to be brute forced"),
+ domain: str = Query(..., description='Domain to be brute forced'),
) -> Response:
# Endpoint for user to signal to do DNS brute forcing
# Rate limit of 5 requests per minute
# basic user agent filtering
- if user_agent and (
- "gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
- ):
- response = RedirectResponse(app.url_path_for("bot"))
+ if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
+ response = RedirectResponse(app.url_path_for('bot'))
return response
dns_bruteforce = await __main__.start(
argparse.Namespace(
@@ -108,49 +102,45 @@ async def dnsbrute(
dns_server=False,
dns_tld=False,
domain=domain,
- filename="",
+ filename='',
google_dork=False,
limit=500,
proxies=False,
shodan=False,
- source=",".join([]),
+ source=','.join([]),
start=0,
take_over=False,
virtual_host=False,
)
)
- return UJSONResponse({"dns_bruteforce": dns_bruteforce})
+ return UJSONResponse({'dns_bruteforce': dns_bruteforce})
-@app.get("/query")
-@limiter.limit("2/minute")
+@app.get('/query')
+@limiter.limit('2/minute')
async def query(
request: Request,
- dns_server: str = Query(""),
+ dns_server: str = Query(''),
user_agent: str = Header(None),
dns_brute: bool = Query(False),
dns_lookup: bool = Query(False),
dns_tld: bool = Query(False),
- filename: str = Query(""),
+ filename: str = Query(''),
google_dork: bool = Query(False),
proxies: bool = Query(False),
shodan: bool = Query(False),
take_over: bool = Query(False),
virtual_host: bool = Query(False),
- source: list[str] = Query(
- ..., description="Data sources to query comma separated with no space"
- ),
+ source: list[str] = Query(..., description='Data sources to query comma separated with no space'),
limit: int = Query(500),
start: int = Query(0),
- domain: str = Query(..., description="Domain to be harvested"),
+ domain: str = Query(..., description='Domain to be harvested'),
) -> Response:
# Query function that allows user to query theHarvester rest API
# Rate limit of 2 requests per minute
# basic user agent filtering
- if user_agent and (
- "gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
- ):
- response = RedirectResponse(app.url_path_for("bot"))
+ if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
+ response = RedirectResponse(app.url_path_for('bot'))
return response
try:
(
@@ -175,7 +165,7 @@ async def query(
limit=limit,
proxies=proxies,
shodan=shodan,
- source=",".join(source),
+ source=','.join(source),
start=start,
take_over=take_over,
virtual_host=virtual_host,
@@ -184,18 +174,16 @@ async def query(
return UJSONResponse(
{
- "asns": asns,
- "interesting_urls": iurls,
- "twitter_people": twitter_people_list,
- "linkedin_people": linkedin_people_list,
- "linkedin_links": linkedin_links,
- "trello_urls": aurls,
- "ips": aips,
- "emails": aemails,
- "hosts": ahosts,
+ 'asns': asns,
+ 'interesting_urls': iurls,
+ 'twitter_people': twitter_people_list,
+ 'linkedin_people': linkedin_people_list,
+ 'linkedin_links': linkedin_links,
+ 'trello_urls': aurls,
+ 'ips': aips,
+ 'emails': aemails,
+ 'hosts': ahosts,
}
)
except Exception:
- return UJSONResponse(
- {"exception": "Please contact the server administrator to check the issue"}
- )
+ return UJSONResponse({'exception': 'Please contact the server administrator to check the issue'})
diff --git a/theHarvester/lib/api/api_example.py b/theHarvester/lib/api/api_example.py
index 9174cfd3..af7b5cdf 100644
--- a/theHarvester/lib/api/api_example.py
+++ b/theHarvester/lib/api/api_example.py
@@ -23,100 +23,94 @@ async def main() -> None:
Just a simple example of how to interact with the rest api
you can easily use requests instead of aiohttp or whatever you best see fit
"""
- url = "http://127.0.0.1:5000"
- domain = "netflix.com"
- query_url = (
- f"{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}"
- )
+ url = 'http://127.0.0.1:5000'
+ domain = 'netflix.com'
+ query_url = f'{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}'
async with aiohttp.ClientSession() as session:
fetched_json = await fetch_json(session, query_url)
- total_asns = fetched_json["asns"]
- interesting_urls = fetched_json["interesting_urls"]
- twitter_people_list_tracker = fetched_json["twitter_people"]
- linkedin_people_list_tracker = fetched_json["linkedin_people"]
- linkedin_links_tracker = fetched_json["linkedin_links"]
- trello_urls = fetched_json["trello_urls"]
- ips = fetched_json["ips"]
- emails = fetched_json["emails"]
- hosts = fetched_json["hosts"]
+ total_asns = fetched_json['asns']
+ interesting_urls = fetched_json['interesting_urls']
+ twitter_people_list_tracker = fetched_json['twitter_people']
+ linkedin_people_list_tracker = fetched_json['linkedin_people']
+ linkedin_links_tracker = fetched_json['linkedin_links']
+ trello_urls = fetched_json['trello_urls']
+ ips = fetched_json['ips']
+ emails = fetched_json['emails']
+ hosts = fetched_json['hosts']
if len(total_asns) > 0:
- print(f"\n[*] ASNS found: {len(total_asns)}")
- print("--------------------")
+ print(f'\n[*] ASNS found: {len(total_asns)}')
+ print('--------------------')
total_asns = list(sorted(set(total_asns)))
for asn in total_asns:
print(asn)
if len(interesting_urls) > 0:
- print(f"\n[*] Interesting Urls found: {len(interesting_urls)}")
- print("--------------------")
+ print(f'\n[*] Interesting Urls found: {len(interesting_urls)}')
+ print('--------------------')
interesting_urls = list(sorted(set(interesting_urls)))
for iurl in interesting_urls:
print(iurl)
if len(twitter_people_list_tracker) == 0:
- print("\n[*] No Twitter users found.\n\n")
+ print('\n[*] No Twitter users found.\n\n')
else:
if len(twitter_people_list_tracker) >= 1:
- print("\n[*] Twitter Users found: " + str(len(twitter_people_list_tracker)))
- print("---------------------")
+ print('\n[*] Twitter Users found: ' + str(len(twitter_people_list_tracker)))
+ print('---------------------')
twitter_people_list_tracker = list(sorted(set(twitter_people_list_tracker)))
for usr in twitter_people_list_tracker:
print(usr)
if len(linkedin_people_list_tracker) == 0:
- print("\n[*] No LinkedIn users found.\n\n")
+ print('\n[*] No LinkedIn users found.\n\n')
else:
if len(linkedin_people_list_tracker) >= 1:
- print(
- "\n[*] LinkedIn Users found: " + str(len(linkedin_people_list_tracker))
- )
- print("---------------------")
- linkedin_people_list_tracker = list(
- sorted(set(linkedin_people_list_tracker))
- )
+ print('\n[*] LinkedIn Users found: ' + str(len(linkedin_people_list_tracker)))
+ print('---------------------')
+ linkedin_people_list_tracker = list(sorted(set(linkedin_people_list_tracker)))
for usr in linkedin_people_list_tracker:
print(usr)
if len(linkedin_links_tracker) == 0:
- print(f"\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}")
+ print(f'\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}')
linkedin_links_tracker = list(sorted(set(linkedin_links_tracker)))
- print("---------------------")
+ print('---------------------')
for link in linkedin_links_tracker:
print(link)
length_urls = len(trello_urls)
total = length_urls
- print("\n[*] Trello URLs found: " + str(total))
- print("--------------------")
+ print('\n[*] Trello URLs found: ' + str(total))
+ print('--------------------')
all_urls = list(sorted(set(trello_urls)))
for url in sorted(all_urls):
print(url)
if len(ips) == 0:
- print("\n[*] No IPs found.")
+ print('\n[*] No IPs found.')
else:
- print("\n[*] IPs found: " + str(len(ips)))
- print("-------------------")
+ print('\n[*] IPs found: ' + str(len(ips)))
+ print('-------------------')
# use netaddr as the list may contain ipv4 and ipv6 addresses
ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(ips)])
- print("\n".join(map(str, ip_list)))
+ print('\n'.join(map(str, ip_list)))
if len(emails) == 0:
- print("\n[*] No emails found.")
+ print('\n[*] No emails found.')
else:
- print("\n[*] Emails found: " + str(len(emails)))
- print("----------------------")
+ print('\n[*] Emails found: ' + str(len(emails)))
+ print('----------------------')
all_emails = sorted(list(set(emails)))
- print("\n".join(all_emails))
+ print('\n'.join(all_emails))
if len(hosts) == 0:
- print("\n[*] No hosts found.\n\n")
+ print('\n[*] No hosts found.\n\n')
else:
- print("\n[*] Hosts found: " + str(len(hosts)))
- print("---------------------")
- print("\n".join(hosts))
+ print('\n[*] Hosts found: ' + str(len(hosts)))
+ print('---------------------')
+ print('\n'.join(hosts))
-if __name__ == "__main__":
+if __name__ == '__main__':
asyncio.run(main())
diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py
index 5897b5a1..84a656d9 100644
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@@ -19,11 +19,11 @@
if TYPE_CHECKING:
from collections.abc import Sized
-DATA_DIR = Path(__file__).parents[1] / "data"
+DATA_DIR = Path(__file__).parents[1] / 'data'
CONFIG_DIRS = [
- Path("/etc/theHarvester/"),
- Path("/usr/local/etc/theHarvester/"),
- Path("~/.theHarvester"),
+ Path('/etc/theHarvester/'),
+ Path('/usr/local/etc/theHarvester/'),
+ Path('~/.theHarvester'),
]
@@ -35,7 +35,7 @@ def _read_config(filename: str) -> str:
with contextlib.suppress(FileNotFoundError):
file = path.expanduser() / filename
config = file.read_text()
- print(f"Read {filename} from {file}")
+ print(f'Read {filename} from {file}')
return config
# Fallback to creating default in user's home dir
@@ -43,168 +43,160 @@ def _read_config(filename: str) -> str:
dest = CONFIG_DIRS[-1].expanduser() / filename
dest.parent.mkdir(exist_ok=True)
dest.write_text(default)
- print(f"Created default {filename} at {dest}")
+ print(f'Created default {filename} at {dest}')
return default
@staticmethod
def api_keys() -> dict:
- keys = yaml.safe_load(Core._read_config("api-keys.yaml"))
- return keys["apikeys"]
+ keys = yaml.safe_load(Core._read_config('api-keys.yaml'))
+ return keys['apikeys']
@staticmethod
def bevigil_key() -> str:
- return Core.api_keys()["bevigil"]["key"]
+ return Core.api_keys()['bevigil']['key']
@staticmethod
def binaryedge_key() -> str:
- return Core.api_keys()["binaryedge"]["key"]
+ return Core.api_keys()['binaryedge']['key']
@staticmethod
def bing_key() -> str:
- return Core.api_keys()["bing"]["key"]
+ return Core.api_keys()['bing']['key']
@staticmethod
def bufferoverun_key() -> str:
- return Core.api_keys()["bufferoverun"]["key"]
+ return Core.api_keys()['bufferoverun']['key']
@staticmethod
def censys_key() -> tuple:
- return Core.api_keys()["censys"]["id"], Core.api_keys()["censys"]["secret"]
+ return Core.api_keys()['censys']['id'], Core.api_keys()['censys']['secret']
@staticmethod
def criminalip_key() -> str:
- return Core.api_keys()["criminalip"]["key"]
+ return Core.api_keys()['criminalip']['key']
@staticmethod
def fullhunt_key() -> str:
- return Core.api_keys()["fullhunt"]["key"]
+ return Core.api_keys()['fullhunt']['key']
@staticmethod
def github_key() -> str:
- return Core.api_keys()["github"]["key"]
+ return Core.api_keys()['github']['key']
@staticmethod
def hunter_key() -> str:
- return Core.api_keys()["hunter"]["key"]
+ return Core.api_keys()['hunter']['key']
@staticmethod
def hunterhow_key() -> str:
- return Core.api_keys()["hunterhow"]["key"]
+ return Core.api_keys()['hunterhow']['key']
@staticmethod
def intelx_key() -> str:
- return Core.api_keys()["intelx"]["key"]
+ return Core.api_keys()['intelx']['key']
@staticmethod
def netlas_key() -> str:
- return Core.api_keys()["netlas"]["key"]
+ return Core.api_keys()['netlas']['key']
@staticmethod
def pentest_tools_key() -> str:
- return Core.api_keys()["pentestTools"]["key"]
+ return Core.api_keys()['pentestTools']['key']
@staticmethod
def onyphe_key() -> str:
- return Core.api_keys()["onyphe"]["key"]
+ return Core.api_keys()['onyphe']['key']
@staticmethod
def projectdiscovery_key() -> str:
- return Core.api_keys()["projectDiscovery"]["key"]
+ return Core.api_keys()['projectDiscovery']['key']
@staticmethod
def rocketreach_key() -> str:
- return Core.api_keys()["rocketreach"]["key"]
+ return Core.api_keys()['rocketreach']['key']
@staticmethod
def security_trails_key() -> str:
- return Core.api_keys()["securityTrails"]["key"]
+ return Core.api_keys()['securityTrails']['key']
@staticmethod
def shodan_key() -> str:
- return Core.api_keys()["shodan"]["key"]
+ return Core.api_keys()['shodan']['key']
@staticmethod
def zoomeye_key() -> str:
- return Core.api_keys()["zoomeye"]["key"]
+ return Core.api_keys()['zoomeye']['key']
@staticmethod
def tomba_key() -> tuple[str, str]:
- return Core.api_keys()["tomba"]["key"], Core.api_keys()["tomba"]["secret"]
+ return Core.api_keys()['tomba']['key'], Core.api_keys()['tomba']['secret']
@staticmethod
def virustotal_key() -> str:
- return Core.api_keys()["virustotal"]["key"]
+ return Core.api_keys()['virustotal']['key']
@staticmethod
def proxy_list() -> list:
- keys = yaml.safe_load(Core._read_config("proxies.yaml"))
- http_list = (
- [f"http://{proxy}" for proxy in keys["http"]]
- if keys["http"] is not None
- else []
- )
+ keys = yaml.safe_load(Core._read_config('proxies.yaml'))
+ http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else []
return http_list
@staticmethod
def banner() -> None:
- print("*******************************************************************")
- print("* _ _ _ *")
- print(r"* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *")
+ print('*******************************************************************')
+ print('* _ _ _ *')
+ print(r'* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *')
print(r"* | __| _ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *")
- print(r"* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *")
- print(r"* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *")
- print("* *")
- print(
- "* theHarvester {version}{filler}*".format(
- version=version(), filler=" " * (51 - len(version()))
- )
- )
- print("* Coded by Christian Martorella *")
- print("* Edge-Security Research *")
- print("* cmartorella@edge-security.com *")
- print("* *")
- print("*******************************************************************")
+ print(r'* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *')
+ print(r'* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *')
+ print('* *')
+ print('* theHarvester {version}{filler}*'.format(version=version(), filler=' ' * (51 - len(version()))))
+ print('* Coded by Christian Martorella *')
+ print('* Edge-Security Research *')
+ print('* cmartorella@edge-security.com *')
+ print('* *')
+ print('*******************************************************************')
@staticmethod
def get_supportedengines() -> list[str | Any]:
supportedengines = [
- "anubis",
- "baidu",
- "bevigil",
- "binaryedge",
- "bing",
- "bingapi",
- "bufferoverun",
- "brave",
- "censys",
- "certspotter",
- "criminalip",
- "crtsh",
- "dnsdumpster",
- "duckduckgo",
- "fullhunt",
- "github-code",
- "hackertarget",
- "hunter",
- "hunterhow",
- "intelx",
- "netlas",
- "onyphe",
- "otx",
- "pentesttools",
- "projectdiscovery",
- "rapiddns",
- "rocketreach",
- "securityTrails",
- "sitedossier",
- "subdomaincenter",
- "subdomainfinderc99",
- "threatminer",
- "tomba",
- "urlscan",
- "virustotal",
- "yahoo",
- "zoomeye",
+ 'anubis',
+ 'baidu',
+ 'bevigil',
+ 'binaryedge',
+ 'bing',
+ 'bingapi',
+ 'bufferoverun',
+ 'brave',
+ 'censys',
+ 'certspotter',
+ 'criminalip',
+ 'crtsh',
+ 'dnsdumpster',
+ 'duckduckgo',
+ 'fullhunt',
+ 'github-code',
+ 'hackertarget',
+ 'hunter',
+ 'hunterhow',
+ 'intelx',
+ 'netlas',
+ 'onyphe',
+ 'otx',
+ 'pentesttools',
+ 'projectdiscovery',
+ 'rapiddns',
+ 'rocketreach',
+ 'securityTrails',
+ 'sitedossier',
+ 'subdomaincenter',
+ 'subdomainfinderc99',
+ 'threatminer',
+ 'tomba',
+ 'urlscan',
+ 'virustotal',
+ 'yahoo',
+ 'zoomeye',
]
return supportedengines
@@ -214,58 +206,58 @@ def get_user_agent() -> str:
# Lasted updated 7/2/23
# TODO use bs4 to auto parse user agents
user_agents = [
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0",
- "Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43",
- "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0",
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37",
- "Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15",
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
- "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0",
- "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0",
- "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0',
+ 'Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43',
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0',
+ 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37',
+ 'Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15',
+ 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15',
+ 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0',
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
+ 'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
]
return random.choice(user_agents)
@@ -278,129 +270,85 @@ async def post_fetch(
cls,
url,
headers=None,
- data: str | dict[str, str] = "",
- params: str = "",
+ data: str | dict[str, str] = '',
+ params: str = '',
json: bool = False,
proxy: bool = False,
):
if headers is None:
headers = {}
if len(headers) == 0:
- headers = {"User-Agent": Core.get_user_agent()}
+ headers = {'User-Agent': Core.get_user_agent()}
timeout = aiohttp.ClientTimeout(total=720)
# By default, timeout is 5 minutes, changed to 12-minutes
# results are well worth the wait
try:
if proxy:
proxy = random.choice(cls().proxy_list)
- if params != "":
- async with aiohttp.ClientSession(
- headers=headers, timeout=timeout
- ) as session:
- async with session.get(
- url, params=params, proxy=proxy
- ) as response:
+ if params != '':
+ async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
+ async with session.get(url, params=params, proxy=proxy) as response:
await asyncio.sleep(5)
- return (
- await response.text()
- if json is False
- else await response.json()
- )
+ return await response.text() if json is False else await response.json()
else:
- async with aiohttp.ClientSession(
- headers=headers, timeout=timeout
- ) as session:
+ async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
async with session.get(url, proxy=proxy) as response:
await asyncio.sleep(5)
- return (
- await response.text()
- if json is False
- else await response.json()
- )
- elif params == "":
+ return await response.text() if json is False else await response.json()
+ elif params == '':
if isinstance(data, str):
data = json_loader.loads(data)
- async with aiohttp.ClientSession(
- headers=headers, timeout=timeout
- ) as session:
+ async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
async with session.post(url, data=data) as resp:
await asyncio.sleep(3)
return await resp.text() if json is False else await resp.json()
else:
if isinstance(data, str):
data = json_loader.loads(data)
- async with aiohttp.ClientSession(
- headers=headers, timeout=timeout
- ) as session:
+ async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
sslcontext = ssl.create_default_context(cafile=certifi.where())
- async with session.post(
- url, data=data, ssl=sslcontext, params=params
- ) as resp:
+ async with session.post(url, data=data, ssl=sslcontext, params=params) as resp:
await asyncio.sleep(3)
return await resp.text() if json is False else await resp.json()
except Exception as e:
- print(f"An exception has occurred in post_fetch: {e}")
- return ""
+ print(f'An exception has occurred in post_fetch: {e}')
+ return ''
@classmethod
- async def fetch(
- cls, session, url, params: Sized = "", json: bool = False, proxy: str = ""
- ) -> str | dict | list | bool:
+ async def fetch(cls, session, url, params: Sized = '', json: bool = False, proxy: str = '') -> str | dict | list | bool:
# This fetch method solely focuses on get requests
try:
# Wrap in try except due to 0x89 png/jpg files
# This fetch method solely focuses on get requests
- if proxy != "":
+ if proxy != '':
proxy = str(random.choice(cls().proxy_list))
if len(params) != 0:
sslcontext = ssl.create_default_context(cafile=certifi.where())
- async with session.get(
- url, ssl=sslcontext, params=params, proxy=proxy
- ) as response:
- return (
- await response.text()
- if json is False
- else await response.json()
- )
+ async with session.get(url, ssl=sslcontext, params=params, proxy=proxy) as response:
+ return await response.text() if json is False else await response.json()
else:
sslcontext = ssl.create_default_context(cafile=certifi.where())
- async with session.get(
- url, ssl=sslcontext, proxy=proxy
- ) as response:
+ async with session.get(url, ssl=sslcontext, proxy=proxy) as response:
await asyncio.sleep(5)
- return (
- await response.text()
- if json is False
- else await response.json()
- )
+ return await response.text() if json is False else await response.json()
if len(params) != 0:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext, params=params) as response:
await asyncio.sleep(5)
- return (
- await response.text()
- if json is False
- else await response.json()
- )
+ return await response.text() if json is False else await response.json()
else:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext) as response:
await asyncio.sleep(5)
- return (
- await response.text()
- if json is False
- else await response.json()
- )
+ return await response.text() if json is False else await response.json()
except Exception as e:
- print(f"An exception has occurred: {e}")
- return ""
+ print(f'An exception has occurred: {e}')
+ return ''
@staticmethod
- async def takeover_fetch(
- session, url: str, proxy: str = ""
- ) -> tuple[Any, Any] | str:
+ async def takeover_fetch(session, url: str, proxy: str = '') -> tuple[Any, Any] | str:
# This fetch method solely focuses on get requests
try:
# Wrap in try except due to 0x89 png/jpg files
@@ -408,12 +356,10 @@ async def takeover_fetch(
# TODO determine if method for post requests is necessary
# url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url
# Clean up urls with proper schemas
- if proxy != "":
- if "https://" in url:
+ if proxy != '':
+ if 'https://' in url:
sslcontext = ssl.create_default_context(cafile=certifi.where())
- async with session.get(
- url, proxy=proxy, ssl=sslcontext
- ) as response:
+ async with session.get(url, proxy=proxy, ssl=sslcontext) as response:
await asyncio.sleep(5)
return url, await response.text()
else:
@@ -421,7 +367,7 @@ async def takeover_fetch(
await asyncio.sleep(5)
return url, await response.text()
else:
- if "https://" in url:
+ if 'https://' in url:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext) as response:
await asyncio.sleep(5)
@@ -431,15 +377,15 @@ async def takeover_fetch(
await asyncio.sleep(5)
return url, await response.text()
except Exception as e:
- print(f"Takeover check error: {e}")
- return url, ""
+ print(f'Takeover check error: {e}')
+ return url, ''
@classmethod
async def fetch_all(
cls,
urls,
headers=None,
- params: Sized = "",
+ params: Sized = '',
json: bool = False,
takeover: bool = False,
proxy: bool = False,
@@ -449,29 +395,18 @@ async def fetch_all(
headers = {}
timeout = aiohttp.ClientTimeout(total=60)
if len(headers) == 0:
- headers = {"User-Agent": Core.get_user_agent()}
+ headers = {'User-Agent': Core.get_user_agent()}
if takeover:
- async with aiohttp.ClientSession(
- headers=headers, timeout=aiohttp.ClientTimeout(total=15)
- ) as session:
+ async with aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as session:
if proxy:
return await asyncio.gather(
- *[
- AsyncFetcher.takeover_fetch(
- session, url, proxy=random.choice(cls().proxy_list)
- )
- for url in urls
- ]
+ *[AsyncFetcher.takeover_fetch(session, url, proxy=random.choice(cls().proxy_list)) for url in urls]
)
else:
- return await asyncio.gather(
- *[AsyncFetcher.takeover_fetch(session, url) for url in urls]
- )
+ return await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls])
if len(params) == 0:
- async with aiohttp.ClientSession(
- headers=headers, timeout=timeout, max_field_size=13000
- ) as session:
+ async with aiohttp.ClientSession(headers=headers, timeout=timeout, max_field_size=13000) as session:
if proxy:
return await asyncio.gather(
*[
@@ -485,14 +420,10 @@ async def fetch_all(
]
)
else:
- return await asyncio.gather(
- *[AsyncFetcher.fetch(session, url, json=json) for url in urls]
- )
+ return await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
else:
# Indicates the request has certain params
- async with aiohttp.ClientSession(
- headers=headers, timeout=timeout
- ) as session:
+ async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
if proxy:
return await asyncio.gather(
*[
@@ -507,9 +438,4 @@ async def fetch_all(
]
)
else:
- return await asyncio.gather(
- *[
- AsyncFetcher.fetch(session, url, params, json)
- for url in urls
- ]
- )
+ return await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
diff --git a/theHarvester/lib/hostchecker.py b/theHarvester/lib/hostchecker.py
index bb5f806e..17a6796d 100644
--- a/theHarvester/lib/hostchecker.py
+++ b/theHarvester/lib/hostchecker.py
@@ -40,13 +40,13 @@ async def resolve_host(host, resolver) -> str:
result = await resolver.gethostbyname(host, socket.AF_INET)
addresses = result.addresses
if addresses == [] or addresses is None or result is None:
- return f"{host}:"
+ return f'{host}:'
else:
- addresses = ",".join(map(str, list(sorted(set(addresses)))))
+ addresses = ','.join(map(str, list(sorted(set(addresses)))))
# addresses = list(sorted(addresses))
- return f"{host}:{addresses}"
+ return f'{host}:{addresses}'
except Exception:
- return f"{host}:"
+ return f'{host}:'
# https://stackoverflow.com/questions/312443/how-do-i-split-a-list-into-equally-sized-chunks
@staticmethod
@@ -57,9 +57,7 @@ def chunks(lst, n):
async def query_all(self, resolver, hosts) -> list[Any]:
# TODO chunk list into 50 pieces regardless of IPs and subnets
- results = await asyncio.gather(
- *[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts]
- )
+ results = await asyncio.gather(*[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts])
return results
async def check(self):
@@ -75,9 +73,9 @@ async def check(self):
results = await self.query_all(resolver, chunk)
all_results.update(results)
for pair in results:
- host, addresses = pair.split(":")
+ host, addresses = pair.split(':')
self.realhosts.append(host)
- self.addresses.update({addr for addr in addresses.split(",")})
+ self.addresses.update({addr for addr in addresses.split(',')})
# address may be a list of ips
# and do a set comprehension to remove duplicates
self.realhosts.sort()
diff --git a/theHarvester/lib/stash.py b/theHarvester/lib/stash.py
index f2ee9eba..1e12f881 100644
--- a/theHarvester/lib/stash.py
+++ b/theHarvester/lib/stash.py
@@ -5,7 +5,7 @@
import aiosqlite
-db_path = os.path.expanduser("~/.local/share/theHarvester")
+db_path = os.path.expanduser('~/.local/share/theHarvester')
if not os.path.isdir(db_path):
os.makedirs(db_path)
@@ -13,9 +13,9 @@
class StashManager:
def __init__(self) -> None:
- self.db = os.path.join(db_path, "stash.sqlite")
- self.results = ""
- self.totalresults = ""
+ self.db = os.path.join(db_path, 'stash.sqlite')
+ self.results = ''
+ self.totalresults = ''
self.latestscandomain: dict = {}
self.domainscanhistory: list = []
self.scanboarddata: dict = {}
@@ -26,7 +26,7 @@ def __init__(self) -> None:
async def do_init(self) -> None:
async with aiosqlite.connect(self.db) as db:
await db.execute(
- "CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)"
+ 'CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)'
)
await db.commit()
@@ -39,7 +39,7 @@ async def store(self, domain, resource, res_type, source) -> None:
try:
async with aiosqlite.connect(self.db, timeout=30) as db:
await db.execute(
- "INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)",
+ 'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
(self.domain, self.resource, self.type, self.date, self.source),
)
await db.commit()
@@ -52,13 +52,11 @@ async def store_all(self, domain, all, res_type, source) -> None:
self.type = res_type
self.source = source
self.date = datetime.date.today()
- master_list = [
- (self.domain, x, self.type, self.date, self.source) for x in self.all
- ]
+ master_list = [(self.domain, x, self.type, self.date, self.source) for x in self.all]
async with aiosqlite.connect(self.db, timeout=30) as db:
try:
await db.executemany(
- "INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)",
+ 'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
master_list,
)
await db.commit()
@@ -68,43 +66,41 @@ async def store_all(self, domain, all, res_type, source) -> None:
async def generatedashboardcode(self, domain):
try:
# TODO refactor into generic method
- self.latestscandomain["domain"] = domain
+ self.latestscandomain['domain'] = domain
async with aiosqlite.connect(self.db, timeout=30) as conn:
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="host"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["host"] = data[0]
+ self.latestscandomain['host'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="email"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["email"] = data[0]
+ self.latestscandomain['email'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["ip"] = data[0]
+ self.latestscandomain['ip'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["vhost"] = data[0]
+ self.latestscandomain['vhost'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["shodan"] = data[0]
- cursor = await conn.execute(
- """SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)
- )
+ self.latestscandomain['shodan'] = data[0]
+ cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,))
data = await cursor.fetchone()
- self.latestscandomain["latestdate"] = data[0]
+ self.latestscandomain['latestdate'] = data[0]
latestdate = data[0]
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''',
@@ -114,7 +110,7 @@ async def generatedashboardcode(self, domain):
),
)
scandetailshost = await cursor.fetchall()
- self.latestscandomain["scandetailshost"] = scandetailshost
+ self.latestscandomain['scandetailshost'] = scandetailshost
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
(
@@ -123,7 +119,7 @@ async def generatedashboardcode(self, domain):
),
)
scandetailsemail = await cursor.fetchall()
- self.latestscandomain["scandetailsemail"] = scandetailsemail
+ self.latestscandomain['scandetailsemail'] = scandetailsemail
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''',
(
@@ -132,7 +128,7 @@ async def generatedashboardcode(self, domain):
),
)
scandetailsip = await cursor.fetchall()
- self.latestscandomain["scandetailsip"] = scandetailsip
+ self.latestscandomain['scandetailsip'] = scandetailsip
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
(
@@ -141,7 +137,7 @@ async def generatedashboardcode(self, domain):
),
)
scandetailsvhost = await cursor.fetchall()
- self.latestscandomain["scandetailsvhost"] = scandetailsvhost
+ self.latestscandomain['scandetailsvhost'] = scandetailsvhost
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
(
@@ -150,14 +146,12 @@ async def generatedashboardcode(self, domain):
),
)
scandetailsshodan = await cursor.fetchall()
- self.latestscandomain["scandetailsshodan"] = scandetailsshodan
+ self.latestscandomain['scandetailsshodan'] = scandetailsshodan
return self.latestscandomain
except Exception as e:
print(e)
- async def getlatestscanresults(
- self, domain, previousday: bool = False
- ) -> Iterable[Row | str] | None:
+ async def getlatestscanresults(self, domain, previousday: bool = False) -> Iterable[Row | str] | None:
try:
async with aiosqlite.connect(self.db, timeout=30) as conn:
if previousday:
@@ -170,15 +164,13 @@ async def getlatestscanresults(
(domain,),
)
previousscandate = await cursor.fetchone()
- if (
- not previousscandate
- ): # When theHarvester runs first time/day, this query will return.
+ if not previousscandate: # When theHarvester runs first time/day, this query will return.
self.previousscanresults = [
- "No results",
- "No results",
- "No results",
- "No results",
- "No results",
+ 'No results',
+ 'No results',
+ 'No results',
+ 'No results',
+ 'No results',
]
else:
cursor = await conn.execute(
@@ -197,9 +189,7 @@ async def getlatestscanresults(
self.previousscanresults = list(results)
return self.previousscanresults
except Exception as e:
- print(
- f"Error in getting the previous scan results from the database: {e}"
- )
+ print(f'Error in getting the previous scan results from the database: {e}')
else:
try:
cursor = await conn.execute(
@@ -223,46 +213,32 @@ async def getlatestscanresults(
self.latestscanresults = list(results)
return self.latestscanresults
except Exception as e:
- print(
- f"Error in getting the latest scan results from the database: {e}"
- )
+ print(f'Error in getting the latest scan results from the database: {e}')
except Exception as e:
- print(f"Error connecting to theHarvester database: {e}")
+ print(f'Error connecting to theHarvester database: {e}')
return self.latestscanresults
async def getscanboarddata(self):
try:
async with aiosqlite.connect(self.db, timeout=30) as conn:
- cursor = await conn.execute(
- '''SELECT COUNT(*) from results WHERE type="host"'''
- )
+ cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''')
data = await cursor.fetchone()
- self.scanboarddata["host"] = data[0]
- cursor = await conn.execute(
- '''SELECT COUNT(*) from results WHERE type="email"'''
- )
+ self.scanboarddata['host'] = data[0]
+ cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="email"''')
data = await cursor.fetchone()
- self.scanboarddata["email"] = data[0]
- cursor = await conn.execute(
- '''SELECT COUNT(*) from results WHERE type="ip"'''
- )
+ self.scanboarddata['email'] = data[0]
+ cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="ip"''')
data = await cursor.fetchone()
- self.scanboarddata["ip"] = data[0]
- cursor = await conn.execute(
- '''SELECT COUNT(*) from results WHERE type="vhost"'''
- )
+ self.scanboarddata['ip'] = data[0]
+ cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="vhost"''')
data = await cursor.fetchone()
- self.scanboarddata["vhost"] = data[0]
- cursor = await conn.execute(
- '''SELECT COUNT(*) from results WHERE type="shodan"'''
- )
+ self.scanboarddata['vhost'] = data[0]
+ cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="shodan"''')
data = await cursor.fetchone()
- self.scanboarddata["shodan"] = data[0]
- cursor = await conn.execute(
- """SELECT COUNT(DISTINCT(domain)) FROM results """
- )
+ self.scanboarddata['shodan'] = data[0]
+ cursor = await conn.execute("""SELECT COUNT(DISTINCT(domain)) FROM results """)
data = await cursor.fetchone()
- self.scanboarddata["domains"] = data[0]
+ self.scanboarddata['domains'] = data[0]
return self.scanboarddata
except Exception as e:
print(e)
@@ -302,12 +278,12 @@ async def getscanhistorydomain(self, domain):
)
countshodan = await cursor.fetchone()
results = {
- "date": str(date[0]),
- "hosts": str(counthost[0]),
- "email": str(countemail[0]),
- "ip": str(countip[0]),
- "vhost": str(countvhost[0]),
- "shodan": str(countshodan[0]),
+ 'date': str(date[0]),
+ 'hosts': str(counthost[0]),
+ 'email': str(countemail[0]),
+ 'ip': str(countip[0]),
+ 'vhost': str(countvhost[0]),
+ 'shodan': str(countshodan[0]),
}
self.domainscanhistory.append(results)
return self.domainscanhistory
@@ -333,42 +309,40 @@ async def getpluginscanstatistics(self) -> Iterable[Row] | None:
async def latestscanchartdata(self, domain):
try:
async with aiosqlite.connect(self.db, timeout=30) as conn:
- self.latestscandomain["domain"] = domain
+ self.latestscandomain['domain'] = domain
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="host"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["host"] = data[0]
+ self.latestscandomain['host'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="email"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["email"] = data[0]
+ self.latestscandomain['email'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["ip"] = data[0]
+ self.latestscandomain['ip'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["vhost"] = data[0]
+ self.latestscandomain['vhost'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''',
(domain,),
)
data = await cursor.fetchone()
- self.latestscandomain["shodan"] = data[0]
- cursor = await conn.execute(
- """SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)
- )
+ self.latestscandomain['shodan'] = data[0]
+ cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,))
data = await cursor.fetchone()
- self.latestscandomain["latestdate"] = data[0]
+ self.latestscandomain['latestdate'] = data[0]
latestdate = data[0]
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''',
@@ -378,7 +352,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailshost = await cursor.fetchall()
- self.latestscandomain["scandetailshost"] = scandetailshost
+ self.latestscandomain['scandetailshost'] = scandetailshost
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
(
@@ -387,7 +361,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailsemail = await cursor.fetchall()
- self.latestscandomain["scandetailsemail"] = scandetailsemail
+ self.latestscandomain['scandetailsemail'] = scandetailsemail
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''',
(
@@ -396,7 +370,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailsip = await cursor.fetchall()
- self.latestscandomain["scandetailsip"] = scandetailsip
+ self.latestscandomain['scandetailsip'] = scandetailsip
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
(
@@ -405,7 +379,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailsvhost = await cursor.fetchall()
- self.latestscandomain["scandetailsvhost"] = scandetailsvhost
+ self.latestscandomain['scandetailsvhost'] = scandetailsvhost
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
(
@@ -414,7 +388,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailsshodan = await cursor.fetchall()
- self.latestscandomain["scandetailsshodan"] = scandetailsshodan
+ self.latestscandomain['scandetailsshodan'] = scandetailsshodan
return self.latestscandomain
except Exception as e:
print(e)
diff --git a/theHarvester/lib/version.py b/theHarvester/lib/version.py
index 9dd7212a..7fcba431 100644
--- a/theHarvester/lib/version.py
+++ b/theHarvester/lib/version.py
@@ -1,4 +1,4 @@
-VERSION = "4.6.0"
+VERSION = '4.6.0'
def version() -> str:
diff --git a/theHarvester/parsers/intelxparser.py b/theHarvester/parsers/intelxparser.py
index 8dc428be..e47a6166 100644
--- a/theHarvester/parsers/intelxparser.py
+++ b/theHarvester/parsers/intelxparser.py
@@ -10,17 +10,17 @@ async def parse_dictionaries(self, results: dict) -> tuple:
:return: tuple of emails and hosts
"""
if results is not None:
- for dictionary in results["selectors"]:
- field = dictionary["selectorvalue"]
- if "@" in field:
+ for dictionary in results['selectors']:
+ field = dictionary['selectorvalue']
+ if '@' in field:
self.emails.add(field)
else:
field = str(field)
- if "http" in field or "https" in field:
- if field[:5] == "https":
+ if 'http' in field or 'https' in field:
+ if field[:5] == 'https':
field = field[8:]
else:
field = field[7:]
- self.hosts.add(field.replace(")", "").replace(",", ""))
+ self.hosts.add(field.replace(')', '').replace(',', ''))
return self.emails, self.hosts
return None, None
diff --git a/theHarvester/parsers/myparser.py b/theHarvester/parsers/myparser.py
index f1dd2da1..ae22c987 100644
--- a/theHarvester/parsers/myparser.py
+++ b/theHarvester/parsers/myparser.py
@@ -10,59 +10,49 @@ def __init__(self, results, word) -> None:
async def genericClean(self) -> None:
self.results = (
- self.results.replace("", "")
- .replace("", "")
- .replace("", "")
- .replace("", "")
- .replace("%3a", "")
- .replace("", "")
- .replace("", "")
- .replace("", "")
- .replace("", "")
+ self.results.replace('', '')
+ .replace('', '')
+ .replace('', '')
+ .replace('', '')
+ .replace('%3a', '')
+ .replace('', '')
+ .replace('', '')
+ .replace('', '')
+ .replace('', '')
)
for search in (
- "<",
- ">",
- ":",
- "=",
- ";",
- "&",
- "%3A",
- "%3D",
- "%3C",
- "%2f",
- "/",
- "\\",
+ '<',
+ '>',
+ ':',
+ '=',
+ ';',
+ '&',
+ '%3A',
+ '%3D',
+ '%3C',
+ '%2f',
+ '/',
+ '\\',
):
- self.results = self.results.replace(search, " ")
+ self.results = self.results.replace(search, ' ')
async def urlClean(self) -> None:
- self.results = (
- self.results.replace("", "")
- .replace("", "")
- .replace("%2f", "")
- .replace("%3a", "")
- )
- for search in ("<", ">", ":", "=", ";", "&", "%3A", "%3D", "%3C"):
- self.results = self.results.replace(search, " ")
+ self.results = self.results.replace('', '').replace('', '').replace('%2f', '').replace('%3a', '')
+ for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
+ self.results = self.results.replace(search, ' ')
async def emails(self):
await self.genericClean()
# Local part is required, charset is flexible.
# https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly)
- reg_emails = re.compile(
- r"[a-zA-Z0-9.\-_+#~!$&\',;=:]+"
- + "@"
- + "[a-zA-Z0-9.-]*"
- + self.word.replace("www.", "")
- )
+ reg_emails = re.compile(r'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' + '@' + '[a-zA-Z0-9.-]*' + self.word.replace('www.', ''))
self.temp = reg_emails.findall(self.results)
emails = await self.unique()
true_emails = {
(
str(email)[1:].lower().strip()
- if len(str(email)) > 1 and str(email)[0] == "."
+ if len(str(email)) > 1 and str(email)[0] == '.'
else len(str(email)) > 1 and str(email).lower().strip()
)
for email in emails
@@ -76,11 +66,7 @@ async def fileurls(self, file) -> list:
self.temp = reg_urls.findall(self.results)
allurls = await self.unique()
for iteration in allurls:
- if (
- iteration.count("webcache")
- or iteration.count("google.com")
- or iteration.count("search?hl")
- ):
+ if iteration.count('webcache') or iteration.count('google.com') or iteration.count('search?hl'):
pass
else:
urls.append(iteration)
@@ -90,11 +76,11 @@ async def hostnames(self):
# should check both www. and not www.
hostnames = []
await self.genericClean()
- reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word)
+ reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word)
first_hostnames = reg_hosts.findall(self.results)
hostnames.extend(first_hostnames)
# TODO determine if necessary below or if only pass through is fine
- reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word.replace("www.", ""))
+ reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word.replace('www.', ''))
# reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.' + 'www.' + self.word)
# reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.(?:' + 'www.' + self.word + ')?')
second_hostnames = reg_hosts.findall(self.results)
@@ -102,31 +88,29 @@ async def hostnames(self):
return list(set(hostnames))
async def hostnames_all(self):
- reg_hosts = re.compile("(.*?)")
+ reg_hosts = re.compile('(.*?)')
temp = reg_hosts.findall(self.results)
for iteration in temp:
- if iteration.count(":"):
- res = iteration.split(":")[1].split("/")[2]
+ if iteration.count(':'):
+ res = iteration.split(':')[1].split('/')[2]
else:
- res = iteration.split("/")[0]
+ res = iteration.split('/')[0]
self.temp.append(res)
hostnames = await self.unique()
return hostnames
async def set(self):
- reg_sets = re.compile(r">[a-zA-Z\d]*")
+ reg_sets = re.compile(r'>[a-zA-Z\d]*')
self.temp = reg_sets.findall(self.results)
sets = []
for iteration in self.temp:
- delete = iteration.replace(">", "")
- delete = delete.replace("', '')
+ delete = delete.replace(' Set[str]:
- found = re.finditer(
- r"(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*", self.results
- )
+ found = re.finditer(r'(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*', self.results)
urls = {match.group().strip() for match in found}
return urls
diff --git a/theHarvester/parsers/securitytrailsparser.py b/theHarvester/parsers/securitytrailsparser.py
index 3edc976c..76194c21 100644
--- a/theHarvester/parsers/securitytrailsparser.py
+++ b/theHarvester/parsers/securitytrailsparser.py
@@ -13,7 +13,7 @@ async def parse_text(self) -> tuple[set, set]:
line = self.text[index].strip()
if '"ip":' in line:
# Extract IP.
- ip = ""
+ ip = ''
for ch in line[7:]:
if ch == '"':
break
@@ -25,17 +25,13 @@ async def parse_text(self) -> tuple[set, set]:
sub_domain_flag = 1
continue
elif sub_domain_flag > 0:
- if "]" in line:
+ if ']' in line:
sub_domain_flag = 0
else:
- if "www" in self.word:
- self.word = (
- str(self.word).replace("www.", "").replace("www", "")
- )
+ if 'www' in self.word:
+ self.word = str(self.word).replace('www.', '').replace('www', '')
# Remove www from word if entered
- self.hostnames.add(
- str(line).replace('"', "").replace(",", "") + "." + self.word
- )
+ self.hostnames.add(str(line).replace('"', '').replace(',', '') + '.' + self.word)
else:
continue
return self.ips, self.hostnames
diff --git a/theHarvester/restfulHarvest.py b/theHarvester/restfulHarvest.py
index 13d8d3c7..90cb3140 100644
--- a/theHarvester/restfulHarvest.py
+++ b/theHarvester/restfulHarvest.py
@@ -6,35 +6,35 @@
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
- "-H",
- "--host",
- default="127.0.0.1",
- help="IP address to listen on default is 127.0.0.1",
+ '-H',
+ '--host',
+ default='127.0.0.1',
+ help='IP address to listen on default is 127.0.0.1',
)
parser.add_argument(
- "-p",
- "--port",
+ '-p',
+ '--port',
default=5000,
- help="Port to bind the web server to, default is 5000",
+ help='Port to bind the web server to, default is 5000',
type=int,
)
parser.add_argument(
- "-l",
- "--log-level",
- default="info",
- help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set",
+ '-l',
+ '--log-level',
+ default='info',
+ help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set',
)
parser.add_argument(
- "-r",
- "--reload",
+ '-r',
+ '--reload',
default=False,
- help="Enable automatic reload used during development of the api",
- action="store_true",
+ help='Enable automatic reload used during development of the api',
+ action='store_true',
)
args: argparse.Namespace = parser.parse_args()
uvicorn.run(
- "theHarvester.lib.api.api:app",
+ 'theHarvester.lib.api.api:app',
host=args.host,
port=args.port,
log_level=args.log_level,
@@ -42,5 +42,5 @@ def main():
)
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
diff --git a/theHarvester/screenshot/screenshot.py b/theHarvester/screenshot/screenshot.py
index 8db0809f..c48761bc 100644
--- a/theHarvester/screenshot/screenshot.py
+++ b/theHarvester/screenshot/screenshot.py
@@ -17,27 +17,21 @@
class ScreenShotter:
def __init__(self, output) -> None:
self.output = output
- self.slash = "\\" if "win" in sys.platform else "/"
- self.slash = (
- "" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash
- )
+ self.slash = '\\' if 'win' in sys.platform else '/'
+ self.slash = '' if (self.output[-1] == '\\' or self.output[-1] == '/') else self.slash
def verify_path(self) -> bool:
try:
if not os.path.isdir(self.output):
- answer = input(
- "[+] The output path you have entered does not exist would you like to create it (y/n): "
- )
- if answer.lower() == "yes" or answer.lower() == "y":
+ answer = input('[+] The output path you have entered does not exist would you like to create it (y/n): ')
+ if answer.lower() == 'yes' or answer.lower() == 'y':
os.makedirs(self.output)
return True
else:
return False
return True
except Exception as e:
- print(
- f"An exception has occurred while attempting to verify output path's existence: {e}"
- )
+ print(f"An exception has occurred while attempting to verify output path's existence: {e}")
return False
@staticmethod
@@ -47,29 +41,25 @@ async def verify_installation() -> None:
async with async_playwright() as p:
browser = await p.chromium.launch()
await browser.close()
- print("Playwright and Chromium are successfully installed.")
+ print('Playwright and Chromium are successfully installed.')
except Exception as e:
- print(
- f"An exception has occurred while attempting to verify installation: {e}"
- )
+ print(f'An exception has occurred while attempting to verify installation: {e}')
@staticmethod
def chunk_list(items: Collection, chunk_size: int) -> list:
# Based off of: https://github.com/apache/incubator-sdap-ingester
- return [
- list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)
- ]
+ return [list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)]
@staticmethod
async def visit(url: str) -> tuple[str, str]:
try:
timeout = aiohttp.ClientTimeout(total=35)
headers = {
- "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) "
- "Chrome/122.0.0.0 Safari/537.36"
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/122.0.0.0 Safari/537.36'
}
- url = f"http://{url}" if not url.startswith("http") else url
- url = url.replace("www.", "")
+ url = f'http://{url}' if not url.startswith('http') else url
+ url = url.replace('www.', '')
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with aiohttp.ClientSession(
timeout=timeout,
@@ -77,16 +67,16 @@ async def visit(url: str) -> tuple[str, str]:
connector=aiohttp.TCPConnector(ssl=sslcontext),
) as session:
async with session.get(url, verify_ssl=False) as resp:
- text = await resp.text("UTF-8")
- return f"http://{url}" if not url.startswith("http") else url, text
+ text = await resp.text('UTF-8')
+ return f'http://{url}' if not url.startswith('http') else url, text
except Exception as e:
- print(f"An exception has occurred while attempting to visit {url} : {e}")
- return "", ""
+ print(f'An exception has occurred while attempting to visit {url} : {e}')
+ return '', ''
async def take_screenshot(self, url: str) -> tuple[str, ...]:
- url = f"http://{url}" if not url.startswith("http") else url
- url = url.replace("www.", "")
- print(f"Attempting to take a screenshot of: {url}")
+ url = f'http://{url}' if not url.startswith('http') else url
+ url = url.replace('www.', '')
+ print(f'Attempting to take a screenshot of: {url}')
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
# New browser context
@@ -100,10 +90,8 @@ async def take_screenshot(self, url: str) -> tuple[str, ...]:
await page.goto(url, timeout=35000)
await page.screenshot(path=path)
except Exception as e:
- print(
- f"An exception has occurred attempting to screenshot: {url} : {e}"
- )
- path = ""
+ print(f'An exception has occurred attempting to screenshot: {url} : {e}')
+ path = ''
finally:
await page.close()
await context.close()
diff --git a/theHarvester/theHarvester.py b/theHarvester/theHarvester.py
index c07c2830..80692f0f 100644
--- a/theHarvester/theHarvester.py
+++ b/theHarvester/theHarvester.py
@@ -6,7 +6,7 @@
def main():
platform = sys.platform
- if platform == "win32":
+ if platform == 'win32':
# Required or things will break if trying to take screenshots
import multiprocessing
@@ -23,9 +23,9 @@ def main():
uvloop.install()
- if "linux" in platform:
+ if 'linux' in platform:
import aiomultiprocess
# As we are not using Windows, we can change the spawn method to fork for greater performance
- aiomultiprocess.set_context("fork")
+ aiomultiprocess.set_context('fork')
asyncio.run(__main__.entry_point())