mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-20 07:16:31 +08:00
Add new modules and tweaks everywhere
This commit is contained in:
parent
bfc0897cfa
commit
79d5eaef75
|
@ -1,4 +1,7 @@
|
|||
apikeys:
|
||||
binaryedge:
|
||||
key:
|
||||
|
||||
bing:
|
||||
key:
|
||||
|
||||
|
@ -7,7 +10,7 @@ apikeys:
|
|||
secret:
|
||||
|
||||
github:
|
||||
key:
|
||||
key:
|
||||
|
||||
hunter:
|
||||
key:
|
||||
|
@ -32,3 +35,6 @@ apikeys:
|
|||
|
||||
spyse:
|
||||
key:
|
||||
|
||||
zoomeye:
|
||||
key:
|
||||
|
|
|
@ -1,23 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from typing import Dict, List
|
||||
from theHarvester.discovery import *
|
||||
from theHarvester.discovery import dnssearch, takeover, shodansearch
|
||||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib import hostchecker
|
||||
from theHarvester.lib import reportgraph
|
||||
from theHarvester.lib import stash
|
||||
from theHarvester.lib import statichtmlgenerator
|
||||
from theHarvester.lib.core import *
|
||||
import argparse
|
||||
import asyncio
|
||||
import datetime
|
||||
import aiofiles
|
||||
import json
|
||||
import netaddr
|
||||
import re
|
||||
import sys
|
||||
import string
|
||||
import secrets
|
||||
|
||||
|
||||
async def start():
|
||||
async def start(rest_args=None):
|
||||
"""Main program function"""
|
||||
parser = argparse.ArgumentParser(description='theHarvester is used to gather open source intelligence (OSINT) on a company or domain.')
|
||||
parser.add_argument('-d', '--domain', help='Company name or domain to search.', required=True)
|
||||
|
@ -33,36 +34,54 @@ async def start():
|
|||
parser.add_argument('-r', '--take-over', help='Check for takeovers.', default=False, action='store_true')
|
||||
parser.add_argument('-n', '--dns-lookup', help='Enable DNS server lookup, default False.', default=False, action='store_true')
|
||||
parser.add_argument('-c', '--dns-brute', help='Perform a DNS brute force on the domain.', default=False, action='store_true')
|
||||
parser.add_argument('-f', '--filename', help='Save the results to an HTML,XML and JSON file.', default='', type=str)
|
||||
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, bufferoverun, censys, certspotter, crtsh,
|
||||
parser.add_argument('-f', '--filename', help='Save the results to an XML and JSON file.', default='', type=str)
|
||||
parser.add_argument('-b', '--source', help='''baidu, bing, binaryedge, bingapi, bufferoverun, censys, certspotter, crtsh,
|
||||
dnsdumpster, duckduckgo, exalead, github-code, google,
|
||||
hackertarget, hunter, intelx, linkedin, linkedin_links,
|
||||
netcraft, omnisint, otx, pentesttools, projectdiscovery,
|
||||
qwant, rapiddns, rocketreach, securityTrails, spyse, sublist3r, threatcrowd, threatminer,
|
||||
trello, twitter, urlscan, virustotal, yahoo''')
|
||||
trello, twitter, urlscan, virustotal, yahoo, zoomeye''')
|
||||
|
||||
args = parser.parse_args()
|
||||
filename: str = args.filename
|
||||
dnsbrute = (args.dns_brute, False)
|
||||
# determines if filename is coming from rest api or user
|
||||
rest_filename = ''
|
||||
# indicates this from the rest API
|
||||
if rest_args:
|
||||
if rest_args.source and rest_args.source == "getsources":
|
||||
return list(sorted(Core.get_supportedengines()))
|
||||
elif rest_args.dns_brute:
|
||||
args = rest_args
|
||||
dnsbrute = (rest_args.dns_brute, True)
|
||||
else:
|
||||
args = rest_args
|
||||
# We need to make sure the filename is random as to not overwrite other files
|
||||
filename: str = args.filename
|
||||
alphabet = string.ascii_letters + string.digits
|
||||
rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" \
|
||||
if len(filename) != 0 else ""
|
||||
|
||||
else:
|
||||
args = parser.parse_args()
|
||||
filename: str = args.filename
|
||||
dnsbrute = (args.dns_brute, False)
|
||||
try:
|
||||
db = stash.StashManager()
|
||||
await db.do_init()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
all_emails: list = []
|
||||
all_hosts: list = []
|
||||
all_ip: list = []
|
||||
all_emails: List = []
|
||||
all_hosts: List = []
|
||||
all_ip: List = []
|
||||
dnslookup = args.dns_lookup
|
||||
dnsserver = args.dns_server
|
||||
dnstld = args.dns_tld
|
||||
engines = []
|
||||
engines: List = []
|
||||
# If the user specifies
|
||||
|
||||
full: list = []
|
||||
ips: list = []
|
||||
full: List = []
|
||||
ips: List = []
|
||||
google_dorking = args.google_dork
|
||||
host_ip: list = []
|
||||
host_ip: List = []
|
||||
limit: int = args.limit
|
||||
shodan = args.shodan
|
||||
start: int = args.start
|
||||
|
@ -72,13 +91,16 @@ async def start():
|
|||
word: str = args.domain
|
||||
takeover_status = args.take_over
|
||||
use_proxy = args.proxies
|
||||
linkedin_people_list_tracker: list = []
|
||||
linkedin_links_tracker: list = []
|
||||
twitter_people_list_tracker: list = []
|
||||
linkedin_people_list_tracker: List = []
|
||||
linkedin_links_tracker: List = []
|
||||
twitter_people_list_tracker: List = []
|
||||
interesting_urls: list = []
|
||||
total_asns: list = []
|
||||
|
||||
async def store(search_engine: Any, source: str, process_param: Any = None, store_host: bool = False,
|
||||
store_emails: bool = False, store_ip: bool = False, store_people: bool = False,
|
||||
store_links: bool = False, store_results: bool = False) -> None:
|
||||
store_links: bool = False, store_results: bool = False,
|
||||
store_interestingurls: bool = False, store_asns: bool = False) -> None:
|
||||
"""
|
||||
Persist details into the database.
|
||||
The details to be stored is controlled by the parameters passed to the method.
|
||||
|
@ -92,6 +114,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
:param store_people: whether to store user details
|
||||
:param store_links: whether to store links
|
||||
:param store_results: whether to fetch details from get_results() and persist
|
||||
:param store_interestingurls: whether to store interesting urls
|
||||
:param store_asns: whether to store asns
|
||||
"""
|
||||
await search_engine.process(use_proxy) if process_param is None else await \
|
||||
search_engine.process(process_param, use_proxy)
|
||||
|
@ -128,24 +152,28 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
await db.store_all(word, all_emails, 'email', source)
|
||||
if store_people:
|
||||
people_list = await search_engine.get_people()
|
||||
if source == 'twitter':
|
||||
twitter_people_list_tracker.extend(people_list)
|
||||
if source == 'linkedin':
|
||||
linkedin_people_list_tracker.extend(people_list)
|
||||
await db_stash.store_all(word, people_list, 'people', source)
|
||||
if len(people_list) == 0:
|
||||
print('\n[*] No users found.\n\n')
|
||||
else:
|
||||
print('\n[*] Users found: ' + str(len(people_list)))
|
||||
print('---------------------')
|
||||
for usr in sorted(list(set(people_list))):
|
||||
print(usr)
|
||||
|
||||
if store_links:
|
||||
links = await search_engine.get_links()
|
||||
await db.store_all(word, links, 'name', engineitem)
|
||||
if len(links) == 0:
|
||||
print('\n[*] No links found.\n\n')
|
||||
else:
|
||||
print(f'\n[*] Links found: {len(links)}')
|
||||
print('---------------------')
|
||||
for link in sorted(list(set(links))):
|
||||
print(link)
|
||||
linkedin_links_tracker.extend(links)
|
||||
if len(links) > 0:
|
||||
await db.store_all(word, links, 'linkedinlinks', engineitem)
|
||||
|
||||
if store_interestingurls:
|
||||
iurls = await search_engine.get_interestingurls()
|
||||
interesting_urls.extend(iurls)
|
||||
if len(iurls) > 0:
|
||||
await db.store_all(word, iurls, 'interestingurl', engineitem)
|
||||
if store_asns:
|
||||
fasns = await search_engine.get_asns()
|
||||
total_asns.extend(fasns)
|
||||
if len(fasns) > 0:
|
||||
await db.store_all(word, fasns, 'asns', engineitem)
|
||||
|
||||
stor_lst = []
|
||||
if args.source is not None:
|
||||
|
@ -163,8 +191,16 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
try:
|
||||
baidu_search = baidusearch.SearchBaidu(word, limit)
|
||||
stor_lst.append(store(baidu_search, engineitem, store_host=True, store_emails=True))
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
elif engineitem == 'binaryedge':
|
||||
from theHarvester.discovery import binaryedgesearch
|
||||
try:
|
||||
binaryedge_search = binaryedgesearch.SearchBinaryEdge(word, limit)
|
||||
stor_lst.append(store(binaryedge_search, engineitem, store_host=True))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
elif engineitem == 'bing' or engineitem == 'bingapi':
|
||||
from theHarvester.discovery import bingsearch
|
||||
|
@ -220,7 +256,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
try:
|
||||
from theHarvester.discovery import dnsdumpster
|
||||
dns_dumpster_search = dnsdumpster.SearchDnsDumpster(word)
|
||||
stor_lst.append(store(dns_dumpster_search, engineitem, store_host=True))
|
||||
stor_lst.append(store(dns_dumpster_search, engineitem, store_host=True, store_ip=True))
|
||||
except Exception as e:
|
||||
print(f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m')
|
||||
|
||||
|
@ -272,7 +308,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
# Import locally or won't work.
|
||||
try:
|
||||
intelx_search = intelxsearch.SearchIntelx(word)
|
||||
stor_lst.append(store(intelx_search, engineitem, store_host=True, store_emails=True))
|
||||
stor_lst.append(store(intelx_search, engineitem, store_interestingurls=True, store_emails=True))
|
||||
except Exception as e:
|
||||
if isinstance(e, MissingKey):
|
||||
print(e)
|
||||
|
@ -387,7 +423,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
from theHarvester.discovery import threatcrowd
|
||||
try:
|
||||
threatcrowd_search = threatcrowd.SearchThreatcrowd(word)
|
||||
stor_lst.append(store(threatcrowd_search, engineitem, store_host=True))
|
||||
stor_lst.append(store(threatcrowd_search, engineitem, store_host=True, store_ip=True))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
@ -395,7 +431,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
from theHarvester.discovery import threatminer
|
||||
try:
|
||||
threatminer_search = threatminer.SearchThreatminer(word)
|
||||
stor_lst.append(store(threatminer_search, engineitem, store_host=True))
|
||||
stor_lst.append(store(threatminer_search, engineitem, store_host=True, store_ip=True))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
@ -414,7 +450,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
from theHarvester.discovery import urlscan
|
||||
try:
|
||||
urlscan_search = urlscan.SearchUrlscan(word)
|
||||
stor_lst.append(store(urlscan_search, engineitem, store_host=True, store_ip=True))
|
||||
stor_lst.append(store(urlscan_search, engineitem, store_host=True, store_ip=True,
|
||||
store_interestingurls=True, store_asns=True))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
@ -424,13 +461,22 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
stor_lst.append(store(virustotal_search, engineitem, store_host=True))
|
||||
|
||||
elif engineitem == 'yahoo':
|
||||
|
||||
from theHarvester.discovery import yahoosearch
|
||||
yahoo_search = yahoosearch.SearchYahoo(word, limit)
|
||||
stor_lst.append(store(yahoo_search, engineitem, store_host=True, store_emails=True))
|
||||
|
||||
elif engineitem == 'zoomeye':
|
||||
from theHarvester.discovery import zoomeyesearch
|
||||
zoomeye_search = zoomeyesearch.SearchZoomEye(word, limit)
|
||||
stor_lst.append(store(zoomeye_search, engineitem, store_host=True, store_emails=True,
|
||||
store_ip=True, store_interestingurls=True, store_asns=True))
|
||||
else:
|
||||
print('\033[93m[!] Invalid source.\n\n \033[0m')
|
||||
sys.exit(1)
|
||||
try:
|
||||
# Check if dns_brute is defined
|
||||
rest_args.dns_brute
|
||||
except Exception:
|
||||
print('\033[93m[!] Invalid source.\n\n \033[0m')
|
||||
sys.exit(1)
|
||||
|
||||
async def worker(queue):
|
||||
while True:
|
||||
|
@ -465,6 +511,15 @@ async def handler(lst):
|
|||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
await handler(lst=stor_lst)
|
||||
return_ips: List = []
|
||||
if rest_args is not None and len(rest_filename) == 0 and rest_args.dns_brute is False:
|
||||
# Indicates user is using rest api but not wanting output to be saved to a file
|
||||
full = [host if ':' in host and word in host else word in host.split(':')[0] and host for host in full]
|
||||
full = list({host for host in full if host})
|
||||
full.sort()
|
||||
# cast to string so Rest API can understand type
|
||||
return_ips.extend([str(ip) for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])])
|
||||
return list(set(all_emails)), return_ips, full, '', ''
|
||||
# Sanity check to see if all_emails and all_hosts are defined.
|
||||
try:
|
||||
all_emails
|
||||
|
@ -526,6 +581,9 @@ async def handler(lst):
|
|||
hosts, ips = await dns_force.run()
|
||||
hosts = list({host for host in hosts if ':' in host})
|
||||
hosts.sort(key=lambda el: el.split(':')[0])
|
||||
# Check if Rest API is being used if so return found hosts
|
||||
if dnsbrute[1]:
|
||||
return hosts
|
||||
print('\n[*] Hosts found after DNS brute force:')
|
||||
db = stash.StashManager()
|
||||
for host in hosts:
|
||||
|
@ -677,59 +735,24 @@ async def handler(lst):
|
|||
if args.dns_tld is not False:
|
||||
counter = 0
|
||||
for word in vhost:
|
||||
search = googlesearch.SearchGoogle(word, limit, counter)
|
||||
await search.process(google_dorking)
|
||||
emails = await search.get_emails()
|
||||
hosts = await search.get_hostnames()
|
||||
search_google = googlesearch.SearchGoogle(word, limit, counter)
|
||||
await search_google.process(google_dorking)
|
||||
emails = await search_google.get_emails()
|
||||
hosts = await search_google.get_hostnames()
|
||||
print(emails)
|
||||
print(hosts)
|
||||
else:
|
||||
pass
|
||||
|
||||
# Reporting
|
||||
if filename != "":
|
||||
if filename != '':
|
||||
print('\n[*] Reporting started.')
|
||||
try:
|
||||
print('\n[*] Reporting started.')
|
||||
db = stash.StashManager()
|
||||
scanboarddata = await db.getscanboarddata()
|
||||
latestscanresults = await db.getlatestscanresults(word)
|
||||
previousscanresults = await db.getlatestscanresults(word, previousday=True)
|
||||
latestscanchartdata = await db.latestscanchartdata(word)
|
||||
scanhistorydomain = await db.getscanhistorydomain(word)
|
||||
pluginscanstatistics = await db.getpluginscanstatistics()
|
||||
generator = statichtmlgenerator.HtmlGenerator(word)
|
||||
html_code = await generator.beginhtml()
|
||||
html_code += await generator.generatedashboardcode(scanboarddata)
|
||||
html_code += await generator.generatelatestscanresults(latestscanresults)
|
||||
if len(screenshot_tups) > 0:
|
||||
html_code += await generator.generatescreenshots(screenshot_tups)
|
||||
html_code += await generator.generatepreviousscanresults(previousscanresults)
|
||||
graph = reportgraph.GraphGenerator(word)
|
||||
await graph.init_db()
|
||||
html_code += await graph.drawlatestscangraph(word, latestscanchartdata)
|
||||
html_code += await graph.drawscattergraphscanhistory(word, scanhistorydomain)
|
||||
html_code += await generator.generatepluginscanstatistics(pluginscanstatistics)
|
||||
html_code += '<p><span style="color: #000000;">Report generated on ' + str(
|
||||
datetime.datetime.now()) + '</span></p>'
|
||||
html_code += '''
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m')
|
||||
sys.exit(1)
|
||||
|
||||
html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
|
||||
html_file.write(html_code)
|
||||
html_file.close()
|
||||
print('[*] Reporting finished.')
|
||||
print('[*] Saving files.')
|
||||
|
||||
try:
|
||||
# XML REPORT SECTION
|
||||
filename = filename.rsplit('.', 1)[0] + '.xml'
|
||||
|
||||
if len(rest_filename) == 0:
|
||||
filename = filename.rsplit('.', 1)[0] + '.xml'
|
||||
else:
|
||||
filename = 'theHarvester/app/static/' + rest_filename.rsplit('.', 1)[0] + '.xml'
|
||||
# TODO use aiofiles if user is using rest api
|
||||
with open(filename, 'w+') as file:
|
||||
file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
|
||||
for x in all_emails:
|
||||
|
@ -767,16 +790,16 @@ async def handler(lst):
|
|||
file.write('</servers>')
|
||||
|
||||
file.write('</theHarvester>')
|
||||
print('[*] XML File saved.')
|
||||
except Exception as er:
|
||||
print(f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m')
|
||||
print('[*] XML File saved.')
|
||||
except Exception as error:
|
||||
print(f'\033[93m[!] An error occurred while saving the XML file: {error} \033[0m')
|
||||
|
||||
try:
|
||||
# JSON REPORT SECTION
|
||||
filename = filename.rsplit('.', 1)[0] + '.json'
|
||||
|
||||
# create dict with values for json output
|
||||
json_dict = dict()
|
||||
json_dict: Dict = dict()
|
||||
|
||||
json_dict["emails"] = [email for email in all_emails]
|
||||
json_dict["hosts"] = [host for host in full]
|
||||
|
@ -791,9 +814,9 @@ async def handler(lst):
|
|||
if len(linkedin_links_tracker) > 0:
|
||||
json_dict["linkedin_links"] = [link for link in list(sorted(set(linkedin_links_tracker)))]
|
||||
|
||||
shodan_dict = dict()
|
||||
shodan_dict: Dict = dict()
|
||||
if shodanres != []:
|
||||
shodanalysis = []
|
||||
shodanalysis: List = []
|
||||
for x in shodanres:
|
||||
res = x.split('SAPO')
|
||||
shodan_dict[res[0]] = [res[2], [res[1]]]
|
||||
|
|
40
theHarvester/discovery/binaryedgesearch.py
Normal file
40
theHarvester/discovery/binaryedgesearch.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
import asyncio
|
||||
|
||||
|
||||
class SearchBinaryEdge:
|
||||
|
||||
def __init__(self, word, limit):
|
||||
self.word = word
|
||||
self.totalhosts = set()
|
||||
self.proxy = False
|
||||
self.key = Core.binaryedge_key()
|
||||
self.limit = 501 if limit >= 501 else limit
|
||||
self.limit = 2 if self.limit == 1 else self.limit
|
||||
if self.key is None:
|
||||
raise MissingKey('binaryedge')
|
||||
|
||||
async def do_search(self):
|
||||
base_url = f'https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}'
|
||||
headers = {'X-KEY': self.key, 'User-Agent': Core.get_user_agent()}
|
||||
for page in range(1, self.limit):
|
||||
params = {'page': page}
|
||||
response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, params=params, headers=headers)
|
||||
responses = response[0]
|
||||
dct = responses
|
||||
if ('status' in dct.keys() and 'message' in dct.keys()) and \
|
||||
(dct['status'] == 400 or 'Bad Parameter' in dct['message'] or 'Error' in dct['message']):
|
||||
# 400 status code means no more results
|
||||
break
|
||||
if 'events' in dct.keys():
|
||||
if len(dct['events']) == 0:
|
||||
break
|
||||
self.totalhosts.update({host for host in dct['events']})
|
||||
await asyncio.sleep(get_delay())
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
return self.totalhosts
|
||||
|
||||
async def process(self, proxy=False):
|
||||
self.proxy = proxy
|
||||
await self.do_search()
|
|
@ -25,8 +25,8 @@ async def do_search(self):
|
|||
# Based on: https://github.com/IntelligenceX/SDK/blob/master/Python/intelxapi.py
|
||||
# API requests self identification
|
||||
# https://intelx.io/integrations
|
||||
headers: dict = {'x-key': self.key, 'User-Agent': f'{Core.get_user_agent()}-theHarvester'}
|
||||
data: dict = {
|
||||
headers = {'x-key': self.key, 'User-Agent': f'{Core.get_user_agent()}-theHarvester'}
|
||||
data = {
|
||||
"term": self.word,
|
||||
"buckets": [],
|
||||
"lookuplevel": 0,
|
||||
|
@ -59,8 +59,8 @@ async def process(self, proxy=False):
|
|||
intelx_parser = intelxparser.Parser()
|
||||
self.info = await intelx_parser.parse_dictionaries(self.results)
|
||||
|
||||
async def get_emails(self) -> Set:
|
||||
async def get_emails(self):
|
||||
return self.info[0]
|
||||
|
||||
async def get_hostnames(self) -> Set:
|
||||
async def get_interestingurls(self):
|
||||
return self.info[1]
|
||||
|
|
|
@ -1,31 +1,61 @@
|
|||
from theHarvester.discovery.constants import MissingKey
|
||||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
import rocketreach
|
||||
import asyncio
|
||||
|
||||
|
||||
class SearchRocketreach:
|
||||
class SearchRocketReach:
|
||||
|
||||
def __init__(self, word):
|
||||
def __init__(self, word, limit):
|
||||
self.ips = set()
|
||||
self.word = word
|
||||
self.key = Core.rocketreach_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('Rocketreach')
|
||||
self.total_results = ""
|
||||
raise MissingKey('RocketReach')
|
||||
self.hosts = set()
|
||||
self.proxy = False
|
||||
self.baseurl = 'https://api.rocketreach.co/v2/api/search'
|
||||
self.links = set()
|
||||
self.limit = limit
|
||||
|
||||
async def do_search(self):
|
||||
rr = rocketreach.Gateway(rocketreach.GatewayConfig(self.key))
|
||||
s = rr.person.search().filter(current_employer=self.word)
|
||||
result = s.execute()
|
||||
if result.is_success:
|
||||
lookup = rr.person.lookup(result.people[0].id)
|
||||
if lookup.is_success:
|
||||
print(repr(lookup.person))
|
||||
try:
|
||||
headers = {
|
||||
'Api-Key': self.key,
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': Core.get_user_agent()
|
||||
}
|
||||
|
||||
import pprint as pp
|
||||
|
||||
# linkedin_urls = set()
|
||||
for page in range(1, self.limit):
|
||||
data = f'{{"query":{{"company_website_url": ["{self.word}"]}}, "start": {page}}}'
|
||||
result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True)
|
||||
|
||||
if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result[
|
||||
'detail']:
|
||||
# No more results can be fetched
|
||||
break
|
||||
if 'detail' in result.keys() and 'Request was throttled.' in result['detail']:
|
||||
# Rate limit has been triggered need to sleep extra
|
||||
print(f'RocketReach requests have been throttled; '
|
||||
f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}')
|
||||
break
|
||||
if 'profiles' in dict(result).keys():
|
||||
if len(result['profiles']) == 0:
|
||||
break
|
||||
for profile in result['profiles']:
|
||||
if 'linkedin_url' in dict(profile).keys():
|
||||
self.links.add(profile['linkedin_url'])
|
||||
|
||||
await asyncio.sleep(get_delay() + 2)
|
||||
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {e}')
|
||||
|
||||
async def get_links(self):
|
||||
return self.links
|
||||
|
||||
async def process(self, proxy=False):
|
||||
self.proxy = proxy
|
||||
await self.do_search() # Only need to do it once.
|
||||
|
||||
# async def get_emails(self):
|
||||
# rawres = myparser.Parser(self.total_results, self.word)
|
||||
# return await rawres.emails()
|
||||
await self.do_search()
|
||||
|
|
|
@ -1,28 +1,31 @@
|
|||
from typing import Coroutine
|
||||
from typing import List
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
|
||||
|
||||
class SearchThreatcrowd:
|
||||
|
||||
def __init__(self, word):
|
||||
self.word = word.replace(' ', '%20')
|
||||
self.results: str = ""
|
||||
self.totalresults: str = ""
|
||||
self.hostnames = list()
|
||||
self.ips = list()
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self):
|
||||
base_url = f'https://www.threatcrowd.org/searchApi/v2/domain/report/?domain={self.word}'
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
try:
|
||||
responses = await AsyncFetcher.fetch_all([base_url], headers=headers, proxy=self.proxy)
|
||||
self.results = responses[0]
|
||||
responses = await AsyncFetcher.fetch_all([base_url], headers=headers, proxy=self.proxy, json=True)
|
||||
resp = responses[0]
|
||||
self.ips = {ip['ip_address'] for ip in resp['resolutions'] if len(ip['ip_address']) > 4}
|
||||
self.hostnames = set(list(resp['subdomains']))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.totalresults += self.results
|
||||
|
||||
async def get_hostnames(self) -> Coroutine:
|
||||
return await myparser.Parser(self.results, self.word).hostnames()
|
||||
async def get_ips(self) -> List:
|
||||
return self.ips
|
||||
|
||||
async def get_hostnames(self) -> List:
|
||||
return self.hostnames
|
||||
|
||||
async def process(self, proxy=False):
|
||||
self.proxy = proxy
|
||||
|
|
|
@ -7,16 +7,23 @@ class SearchThreatminer:
|
|||
def __init__(self, word):
|
||||
self.word = word
|
||||
self.totalhosts = list
|
||||
self.totalips = list
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self):
|
||||
url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5'
|
||||
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
|
||||
self.totalhosts: set = {host for host in response[0]['results']}
|
||||
second_url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2'
|
||||
secondresp = await AsyncFetcher.fetch_all([second_url], json=True, proxy=self.proxy)
|
||||
self.totalips: set = {resp['ip'] for resp in secondresp[0]['results']}
|
||||
|
||||
async def get_hostnames(self) -> Type[list]:
|
||||
return self.totalhosts
|
||||
|
||||
async def get_ips(self) -> Type[list]:
|
||||
return self.totalips
|
||||
|
||||
async def process(self, proxy=False):
|
||||
self.proxy = proxy
|
||||
await self.do_search()
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
from typing import Type
|
||||
from typing import List
|
||||
from theHarvester.lib.core import *
|
||||
|
||||
|
||||
class SearchUrlscan:
|
||||
def __init__(self, word):
|
||||
self.word = word
|
||||
self.totalhosts = list
|
||||
self.totalips = list
|
||||
self.totalhosts = list()
|
||||
self.totalips = list()
|
||||
self.interestingurls = list()
|
||||
self.totalasns = list()
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self):
|
||||
|
@ -15,13 +17,22 @@ async def do_search(self):
|
|||
resp = response[0]
|
||||
self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']}
|
||||
self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()}
|
||||
self.interestingurls = {f"{page['page']['url']}" for page in resp['results'] if self.word in page['page']['url']
|
||||
and 'url' in page['page'].keys()}
|
||||
self.totalasns = {f"{page['page']['asn']}" for page in resp['results'] if 'asn' in page['page'].keys()}
|
||||
|
||||
async def get_hostnames(self) -> Type[list]:
|
||||
async def get_hostnames(self) -> List:
|
||||
return self.totalhosts
|
||||
|
||||
async def get_ips(self) -> Type[list]:
|
||||
async def get_ips(self) -> List:
|
||||
return self.totalips
|
||||
|
||||
async def get_interestingurls(self) -> List:
|
||||
return self.interestingurls
|
||||
|
||||
async def get_asns(self) -> List:
|
||||
return self.totalasns
|
||||
|
||||
async def process(self, proxy=False):
|
||||
self.proxy = proxy
|
||||
await self.do_search()
|
||||
|
|
199
theHarvester/discovery/zoomeyesearch.py
Normal file
199
theHarvester/discovery/zoomeyesearch.py
Normal file
|
@ -0,0 +1,199 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
|
||||
class SearchZoomEye:
|
||||
|
||||
def __init__(self, word, limit):
|
||||
self.word = word
|
||||
self.limit = limit
|
||||
self.key = Core.zoomeye_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('zoomeye')
|
||||
self.baseurl = 'https://api.zoomeye.org/host/search'
|
||||
self.proxy = False
|
||||
self.totalasns = list()
|
||||
self.totalhosts = list()
|
||||
self.interestingurls = list()
|
||||
self.totalips = list()
|
||||
self.totalemails = list()
|
||||
# Regex used is directly from: https://github.com/GerbenJavado/LinkFinder/blob/master/linkfinder.py#L29
|
||||
# Maybe one day it will be a pip package
|
||||
# Regardless LinkFinder is an amazing tool!
|
||||
self.iurl_regex = r"""
|
||||
(?:"|') # Start newline delimiter
|
||||
(
|
||||
((?:[a-zA-Z]{1,10}://|//) # Match a scheme [a-Z]*1-10 or //
|
||||
[^"'/]{1,}\. # Match a domainname (any character + dot)
|
||||
[a-zA-Z]{2,}[^"']{0,}) # The domainextension and/or path
|
||||
|
|
||||
((?:/|\.\./|\./) # Start with /,../,./
|
||||
[^"'><,;| *()(%%$^/\\\[\]] # Next character can't be...
|
||||
[^"'><,;|()]{1,}) # Rest of the characters can't be
|
||||
|
|
||||
([a-zA-Z0-9_\-/]{1,}/ # Relative endpoint with /
|
||||
[a-zA-Z0-9_\-/]{1,} # Resource name
|
||||
\.(?:[a-zA-Z]{1,4}|action) # Rest + extension (length 1-4 or action)
|
||||
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
|
||||
|
|
||||
([a-zA-Z0-9_\-/]{1,}/ # REST API (no extension) with /
|
||||
[a-zA-Z0-9_\-/]{3,} # Proper REST endpoints usually have 3+ chars
|
||||
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
|
||||
|
|
||||
([a-zA-Z0-9_\-]{1,} # filename
|
||||
\.(?:php|asp|aspx|jsp|json|
|
||||
action|html|js|txt|xml) # . + extension
|
||||
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
|
||||
)
|
||||
(?:"|') # End newline delimiter
|
||||
"""
|
||||
self.iurl_regex = re.compile(self.iurl_regex, re.VERBOSE)
|
||||
|
||||
async def do_search(self):
|
||||
headers = {
|
||||
'API-KEY': self.key,
|
||||
'User-Agent': Core.get_user_agent()
|
||||
}
|
||||
params = (
|
||||
('query', f'site:{self.word}'),
|
||||
('page', '1'),
|
||||
)
|
||||
# TODO add: https://www.zoomeye.org/profile/domain to fetch subdomains more easily once
|
||||
# once api endpoint is created
|
||||
response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers,
|
||||
params=params)
|
||||
# First request determines how many pages there in total
|
||||
resp = response[0]
|
||||
total_pages = int(resp['available'])
|
||||
self.limit = self.limit if total_pages > self.limit else total_pages
|
||||
self.limit = 3 if self.limit == 2 else self.limit
|
||||
cur_page = 2 if self.limit >= 2 else -1
|
||||
# Means there is only one page
|
||||
# hostnames, emails, ips, asns, iurls
|
||||
nomatches_counter = 0
|
||||
# cur_page = -1
|
||||
if cur_page == -1:
|
||||
# No need to do loop just parse and leave
|
||||
if 'matches' in resp.keys():
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
|
||||
self.totalhosts.extend(hostnames)
|
||||
self.totalemails.extend(emails)
|
||||
self.totalips.extend(ips)
|
||||
self.totalasns.extend(asns)
|
||||
self.interestingurls.extend(iurls)
|
||||
else:
|
||||
if 'matches' in resp.keys():
|
||||
# Parse out initial results and then continue to loop
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
|
||||
self.totalhosts.extend(hostnames)
|
||||
self.totalemails.extend(emails)
|
||||
self.totalips.extend(ips)
|
||||
self.totalasns.extend(asns)
|
||||
self.interestingurls.extend(iurls)
|
||||
|
||||
for num in range(2, self.limit):
|
||||
print(f'Currently on page: {num}')
|
||||
params = (
|
||||
('query', f'site:{self.word}'),
|
||||
('page', f'{num}'),
|
||||
)
|
||||
response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers,
|
||||
params=params)
|
||||
resp = response[0]
|
||||
if 'matches' not in resp.keys():
|
||||
print(f'Your resp: {resp}')
|
||||
print('Match not found in keys')
|
||||
break
|
||||
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
|
||||
|
||||
if len(hostnames) == 0 and len(emails) == 0 and len(ips) == 0 \
|
||||
and len(asns) == 0 and len(iurls) == 0:
|
||||
nomatches_counter += 1
|
||||
|
||||
if nomatches_counter >= 5:
|
||||
break
|
||||
|
||||
self.totalhosts.extend(hostnames)
|
||||
self.totalemails.extend(emails)
|
||||
self.totalips.extend(ips)
|
||||
self.totalasns.extend(asns)
|
||||
self.interestingurls.extend(iurls)
|
||||
|
||||
await asyncio.sleep(get_delay() + 2)
|
||||
|
||||
async def parse_matchs(self, matches):
|
||||
# Helper function to parse items from match json
|
||||
# ips = {match["ip"] for match in matches}
|
||||
ips = set()
|
||||
iurls = set()
|
||||
hostnames = set()
|
||||
asns = set()
|
||||
emails = set()
|
||||
for match in matches:
|
||||
try:
|
||||
ips.add(match['ip'])
|
||||
|
||||
if 'geoinfo' in match.keys():
|
||||
asns.add(int(match['geoinfo']['asn']))
|
||||
|
||||
if 'rdns_new' in match.keys():
|
||||
rdns_new = match['rdns_new']
|
||||
|
||||
if ',' in rdns_new:
|
||||
parts = str(rdns_new).split(',')
|
||||
rdns_new = parts[0]
|
||||
if len(parts) == 2:
|
||||
hostnames.add(parts[1])
|
||||
rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
|
||||
hostnames.add(rdns_new)
|
||||
else:
|
||||
rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
|
||||
hostnames.add(rdns_new)
|
||||
|
||||
if 'rdns' in match.keys():
|
||||
rdns = match['rdns']
|
||||
rdns = rdns[:-1] if rdns[-1] == '.' else rdns
|
||||
hostnames.add(rdns)
|
||||
|
||||
if 'portinfo' in match.keys():
|
||||
# re.
|
||||
temp_emails = set(await self.parse_emails(match['portinfo']['banner']))
|
||||
emails.update(temp_emails)
|
||||
hostnames.update(set(await self.parse_hostnames(match['portinfo']['banner'])))
|
||||
iurls = {str(iurl.group(1)).replace('"', '') for iurl
|
||||
in re.finditer(self.iurl_regex, match['portinfo']['banner'])
|
||||
if self.word in str(iurl.group(1))}
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {e}')
|
||||
return hostnames, emails, ips, asns, iurls
|
||||
|
||||
async def process(self, proxy=False):
|
||||
self.proxy = proxy
|
||||
await self.do_search() # Only need to do it once.
|
||||
|
||||
async def parse_emails(self, content):
|
||||
rawres = myparser.Parser(content, self.word)
|
||||
return await rawres.emails()
|
||||
|
||||
async def parse_hostnames(self, content):
|
||||
rawres = myparser.Parser(content, self.word)
|
||||
return await rawres.hostnames()
|
||||
|
||||
async def get_hostnames(self):
|
||||
return set(self.totalhosts)
|
||||
|
||||
async def get_emails(self):
|
||||
return set(self.totalemails)
|
||||
|
||||
async def get_ips(self):
|
||||
return set(self.totalips)
|
||||
|
||||
async def get_asns(self):
|
||||
return set(self.totalasns)
|
||||
|
||||
async def get_interestingurls(self):
|
||||
return set(self.interestingurls)
|
|
@ -78,20 +78,20 @@ async def dnsbrute(request: Request, user_agent: str = Header(None),
|
|||
if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
|
||||
response = RedirectResponse(app.url_path_for('bot'))
|
||||
return response
|
||||
dns_bruteforce = await __main__.entry_point(argparse.Namespace(dns_brute=True,
|
||||
dns_lookup=False,
|
||||
dns_server=False,
|
||||
dns_tld=False,
|
||||
domain=domain,
|
||||
filename='',
|
||||
google_dork=False,
|
||||
limit=500,
|
||||
proxies=False,
|
||||
shodan=False,
|
||||
source=','.join([]),
|
||||
start=0,
|
||||
take_over=False,
|
||||
virtual_host=False))
|
||||
dns_bruteforce = await __main__.start(argparse.Namespace(dns_brute=True,
|
||||
dns_lookup=False,
|
||||
dns_server=False,
|
||||
dns_tld=False,
|
||||
domain=domain,
|
||||
filename='',
|
||||
google_dork=False,
|
||||
limit=500,
|
||||
proxies=False,
|
||||
shodan=False,
|
||||
source=','.join([]),
|
||||
start=0,
|
||||
take_over=False,
|
||||
virtual_host=False))
|
||||
return {'dns_bruteforce': dns_bruteforce}
|
||||
|
||||
|
||||
|
@ -115,7 +115,7 @@ async def query(request: Request, dns_server: str = Query(""), user_agent: str =
|
|||
response = RedirectResponse(app.url_path_for('bot'))
|
||||
return response
|
||||
try:
|
||||
emails, ips, urls, html_filename, xml_filename = await __main__.start(argparse.Namespace(dns_brute=dns_brute,
|
||||
emails, ips, urls, xml_filename = await __main__.start(argparse.Namespace(dns_brute=dns_brute,
|
||||
dns_lookup=dns_lookup,
|
||||
dns_server=dns_server,
|
||||
dns_tld=dns_tld,
|
||||
|
@ -130,7 +130,6 @@ async def query(request: Request, dns_server: str = Query(""), user_agent: str =
|
|||
take_over=take_over,
|
||||
virtual_host=virtual_host))
|
||||
|
||||
return {'domain': f'{domain}', 'emails': emails, 'ips': ips, 'urls': urls, 'html_file': f'{html_filename}',
|
||||
'xml_file': f'{xml_filename}'}
|
||||
return {'domain': f'{domain}', 'emails': emails, 'ips': ips, 'urls': urls, 'xml_file': f'{xml_filename}'}
|
||||
except Exception as e:
|
||||
return {'exception': f'{e}'}
|
||||
|
|
|
@ -28,6 +28,10 @@ def api_keys() -> dict:
|
|||
keys = yaml.safe_load(api_keys)
|
||||
return keys['apikeys']
|
||||
|
||||
@staticmethod
|
||||
def binaryedge_key() -> str:
|
||||
return Core.api_keys()['binaryedge']['key']
|
||||
|
||||
@staticmethod
|
||||
def bing_key() -> str:
|
||||
return Core.api_keys()['bing']['key']
|
||||
|
@ -72,6 +76,10 @@ def shodan_key() -> str:
|
|||
def spyse_key() -> str:
|
||||
return Core.api_keys()['spyse']['key']
|
||||
|
||||
@staticmethod
|
||||
def zoomeye_key() -> str:
|
||||
return Core.api_keys()['zoomEye']['key']
|
||||
|
||||
@staticmethod
|
||||
def proxy_list() -> List:
|
||||
try:
|
||||
|
@ -106,6 +114,7 @@ def banner() -> None:
|
|||
@staticmethod
|
||||
def get_supportedengines() -> Set[Union[str, Any]]:
|
||||
supportedengines = {'baidu',
|
||||
'binaryedge',
|
||||
'bing',
|
||||
'bingapi',
|
||||
'bufferoverun',
|
||||
|
@ -140,6 +149,7 @@ def get_supportedengines() -> Set[Union[str, Any]]:
|
|||
'urlscan',
|
||||
'virustotal',
|
||||
'yahoo',
|
||||
'zoomeye'
|
||||
}
|
||||
return supportedengines
|
||||
|
||||
|
|
Loading…
Reference in a new issue