mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 16:26:34 +08:00
Removed remnants of API, added function to detect if chromium is installed, and cleaned up verbose output.
This commit is contained in:
parent
5138a82cd9
commit
c1b8985276
|
@ -15,7 +15,7 @@
|
|||
import sys
|
||||
|
||||
|
||||
async def start(rest_args=None):
|
||||
async def start():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='theHarvester is used to gather open source intelligence (OSINT) on a\n'
|
||||
'company or domain.')
|
||||
|
@ -47,29 +47,10 @@ async def start(rest_args=None):
|
|||
rapiddns, securityTrails, spyse, sublist3r, suip, threatcrowd, threatminer,
|
||||
trello, twitter, urlscan, virustotal, yahoo, all''')
|
||||
|
||||
# determines if filename is coming from rest api or user
|
||||
rest_filename = ""
|
||||
# indicates this from the rest API
|
||||
if rest_args:
|
||||
if rest_args.source and rest_args.source == "getsources":
|
||||
return list(sorted(Core.get_supportedengines()))
|
||||
elif rest_args.dns_brute:
|
||||
args = rest_args
|
||||
dnsbrute = (rest_args.dns_brute, True)
|
||||
else:
|
||||
args = rest_args
|
||||
# We need to make sure the filename is random as to not overwrite other files
|
||||
filename: str = args.filename
|
||||
import string
|
||||
import secrets
|
||||
alphabet = string.ascii_letters + string.digits
|
||||
rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" \
|
||||
if len(filename) != 0 else ""
|
||||
|
||||
else:
|
||||
args = parser.parse_args()
|
||||
filename: str = args.filename
|
||||
dnsbrute = (args.dns_brute, False)
|
||||
args = parser.parse_args()
|
||||
filename: str = args.filename
|
||||
dnsbrute = (args.dns_brute, False)
|
||||
try:
|
||||
db = stash.StashManager()
|
||||
await db.do_init()
|
||||
|
@ -126,7 +107,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
else:
|
||||
print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m')
|
||||
if store_host:
|
||||
#host_names = filter(await search_engine.get_hostnames())
|
||||
host_names = [host for host in filter(await search_engine.get_hostnames()) if f'.{word}' in host]
|
||||
if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
|
||||
# If source is inside this conditional it means the hosts returned must be resolved to obtain ip
|
||||
|
@ -429,12 +409,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
yahoo_search = yahoosearch.SearchYahoo(word, limit)
|
||||
stor_lst.append(store(yahoo_search, engineitem, store_host=True, store_emails=True))
|
||||
else:
|
||||
try:
|
||||
# Check if dns_brute is defined
|
||||
rest_args.dns_brute
|
||||
except:
|
||||
print('\033[93m[!] Invalid source.\n\n \033[0m')
|
||||
sys.exit(1)
|
||||
print('\033[93m[!] Invalid source.\n\n \033[0m')
|
||||
sys.exit(1)
|
||||
|
||||
async def worker(queue):
|
||||
while True:
|
||||
|
@ -470,14 +446,7 @@ async def handler(lst):
|
|||
|
||||
await handler(lst=stor_lst)
|
||||
return_ips = []
|
||||
if rest_args is not None and len(rest_filename) == 0 and rest_args.dns_brute is False:
|
||||
# Indicates user is using rest api but not wanting output to be saved to a file
|
||||
full = [host if ':' in host and word in host else word in host.split(':')[0] and host for host in full]
|
||||
full = list({host for host in full if host})
|
||||
full.sort()
|
||||
# cast to string so Rest API can understand type
|
||||
return_ips.extend([str(ip) for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])])
|
||||
return list(set(all_emails)), return_ips, full, "", ""
|
||||
|
||||
# Sanity check to see if all_emails and all_hosts are defined.
|
||||
try:
|
||||
all_emails
|
||||
|
@ -533,20 +502,17 @@ async def handler(lst):
|
|||
print(url)
|
||||
|
||||
# DNS brute force
|
||||
|
||||
if dnsbrute and dnsbrute[0] is True:
|
||||
print('\n[*] Starting DNS brute force.')
|
||||
dns_force = dnssearch.DnsForce(word, dnsserver, verbose=True)
|
||||
hosts, ips = await dns_force.run()
|
||||
hosts = list({host for host in hosts if ':' in host})
|
||||
hosts.sort(key=lambda el: el.split(':')[0])
|
||||
# Check if Rest API is being used if so return found hosts
|
||||
if dnsbrute[1]:
|
||||
return hosts
|
||||
print('\n[*] Hosts found after DNS brute force:')
|
||||
db = stash.StashManager()
|
||||
for host in hosts:
|
||||
print(host)
|
||||
full.append(host)
|
||||
await db.store_all(word, hosts, 'host', 'dns_bruteforce')
|
||||
|
||||
# TakeOver Checking
|
||||
|
@ -623,52 +589,36 @@ async def handler(lst):
|
|||
else:
|
||||
pass
|
||||
|
||||
MAX_QUEUE_SIZE = 2 ** 15 - 1
|
||||
print(f'max queue size: {MAX_QUEUE_SIZE}')
|
||||
import time
|
||||
from aiomultiprocess import Pool
|
||||
|
||||
# Screenshots
|
||||
if len(args.screenshot) > 0:
|
||||
# screenshot_handler
|
||||
#from theHarvester.screenshot.screenshot import take_screenshot, screenshot_handler, _chunk_list, receive, visit
|
||||
import time
|
||||
from aiomultiprocess import Pool
|
||||
from theHarvester.screenshot.screenshot import ScreenShotter
|
||||
# AsyncFetcher.fetch_all([])
|
||||
screen_shotter = ScreenShotter(args.screenshot)
|
||||
await screen_shotter.verify_installation()
|
||||
print(f'Screenshots can be found: {screen_shotter.output}{screen_shotter.slash}')
|
||||
start = time.perf_counter()
|
||||
print('Filtering domains for ones we can reach')
|
||||
#from theHarvester.screenshot import take_screenshot
|
||||
unique_resolved_domains = {url.split(':')[0]for url in full if ':' in url and 'www.' not in url}
|
||||
# First filter out ones that didn't resolve
|
||||
#unique_resolved_domains = list(sorted([x for x in unique_resolved_domains
|
||||
# if len(await screen_shotter.visit(x)) > 0]))
|
||||
# Second filter out ones where we can't reach them with an http request
|
||||
# Grab resolved subdomains
|
||||
# coroutines = [take_screenshot(url) for url in unique_resolved_domains]
|
||||
#await screenshot_handler(coroutines)
|
||||
async with Pool(15) as pool:
|
||||
print('Created pool')
|
||||
print('mapping for unique resolved domains')
|
||||
y = await pool.map(screen_shotter.visit, list(unique_resolved_domains))
|
||||
unique_resolved_domains = list(sorted({x[0] for x in y if len(x[1]) > 0}))
|
||||
print(unique_resolved_domains)
|
||||
async with Pool(3) as pool:
|
||||
#serialized_tiles = [take_screenshot(url) for url in unique_resolved_domains]
|
||||
#print(f'Length of serialized_tiles: {len(serialized_tiles)} ')
|
||||
print(f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!')
|
||||
for chunk in screen_shotter._chunk_list(unique_resolved_domains, 25):
|
||||
print(f'Chunk: {chunk} and length: {len(chunk)}')
|
||||
try:
|
||||
#resultsss = await pool.map(visit, unique_resolved_domains)
|
||||
temp = await pool.map(screen_shotter.take_screenshot, chunk)
|
||||
#resultsss = await pool.map(take_screenshot, unique_resolved_domains)
|
||||
#await pool.map(screenshot_handler, chunk)
|
||||
except Exception as ee:
|
||||
print(f'An excpeption has occurred while mapping: {ee}')
|
||||
#continue
|
||||
end = time.perf_counter()
|
||||
print("Pipeline finished in {} seconds".format(end - start))
|
||||
|
||||
if len(unique_resolved_domains) > 0:
|
||||
# First filter out ones that didn't resolve
|
||||
print('Attempting to visit unique resolved domains, this is ACTIVE RECON')
|
||||
async with Pool(15) as pool:
|
||||
results = await pool.map(screen_shotter.visit, list(unique_resolved_domains))
|
||||
# Filter out domains that we couldn't connect to
|
||||
unique_resolved_domains = list(sorted({tup[0] for tup in results if len(tup[1]) > 0}))
|
||||
async with Pool(3) as pool:
|
||||
print(f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!')
|
||||
# If you have the resources you could make the function faster by increasing the chunk number
|
||||
chunk_number = 25
|
||||
for chunk in screen_shotter.chunk_list(unique_resolved_domains, chunk_number):
|
||||
try:
|
||||
await pool.map(screen_shotter.take_screenshot, chunk)
|
||||
except Exception as ee:
|
||||
print(f'An exception has occurred while mapping: {ee}')
|
||||
end = time.perf_counter()
|
||||
print(f"Finished taking screenshots in {end - start} seconds")
|
||||
|
||||
# Shodan
|
||||
shodanres = []
|
||||
|
@ -715,20 +665,12 @@ async def handler(lst):
|
|||
try:
|
||||
print('\n[*] Reporting started.')
|
||||
db = stash.StashManager()
|
||||
if rest_args and rest_args.domain is not None and len(rest_args.domain) > 1:
|
||||
# If using rest API filter by domain
|
||||
scanboarddata = await db.getscanboarddata(domain=rest_args.domain)
|
||||
else:
|
||||
scanboarddata = await db.getscanboarddata()
|
||||
scanboarddata = await db.getscanboarddata()
|
||||
latestscanresults = await db.getlatestscanresults(word)
|
||||
previousscanresults = await db.getlatestscanresults(word, previousday=True)
|
||||
latestscanchartdata = await db.latestscanchartdata(word)
|
||||
scanhistorydomain = await db.getscanhistorydomain(word)
|
||||
if rest_args and rest_args.domain is not None and len(rest_args.domain) > 1:
|
||||
# If using rest API filter by domain
|
||||
pluginscanstatistics = await db.getpluginscanstatistics(domain=rest_args.domain)
|
||||
else:
|
||||
pluginscanstatistics = await db.getpluginscanstatistics()
|
||||
pluginscanstatistics = await db.getpluginscanstatistics()
|
||||
generator = statichtmlgenerator.HtmlGenerator(word)
|
||||
HTMLcode = await generator.beginhtml()
|
||||
HTMLcode += await generator.generatedashboardcode(scanboarddata)
|
||||
|
@ -745,40 +687,23 @@ async def handler(lst):
|
|||
</body>
|
||||
</html>
|
||||
'''
|
||||
if len(rest_filename) == 0:
|
||||
Html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
|
||||
Html_file.write(HTMLcode)
|
||||
Html_file.close()
|
||||
print('[*] Reporting finished.')
|
||||
print('[*] Saving files.')
|
||||
else:
|
||||
# indicates the rest api is being used in that case we asynchronously write the file to our static directory
|
||||
try:
|
||||
import aiofiles
|
||||
async with aiofiles.open(
|
||||
f'theHarvester/app/static/{rest_filename}.html' if '.html' not in rest_filename
|
||||
else f'theHarvester/app/static/{rest_filename}', 'w+') as Html_file:
|
||||
await Html_file.write(HTMLcode)
|
||||
except Exception as ex:
|
||||
print(f"An excpetion has occurred: {ex}")
|
||||
return list(set(all_emails)), return_ips, full, f'{ex}', ""
|
||||
# Html_file = async with aiofiles.open(f'{filename}.html' if '.html' not in filename else filename, 'w')
|
||||
# Html_file.write(HTMLcode)
|
||||
# Html_file.close()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m')
|
||||
sys.exit(1)
|
||||
|
||||
Html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
|
||||
Html_file.write(HTMLcode)
|
||||
Html_file.close()
|
||||
print('[*] Reporting finished.')
|
||||
print('[*] Saving files.')
|
||||
|
||||
|
||||
try:
|
||||
# filename = filename.rsplit('.', 1)[0] + '.xml'
|
||||
# file = open(filename, 'w')
|
||||
if len(rest_filename) == 0:
|
||||
filename = filename.rsplit('.', 1)[0] + '.xml'
|
||||
else:
|
||||
filename = 'theHarvester/app/static/' \
|
||||
+ rest_filename.rsplit('.', 1)[0] + '.xml'
|
||||
# TODO use aiofiles if user is using rest api
|
||||
filename = filename.rsplit('.', 1)[0] + '.xml'
|
||||
|
||||
with open(filename, 'w+') as file:
|
||||
file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
|
||||
for x in all_emails:
|
||||
|
@ -816,14 +741,10 @@ async def handler(lst):
|
|||
file.write('</servers>')
|
||||
|
||||
file.write('</theHarvester>')
|
||||
if len(rest_filename) > 0:
|
||||
return list(set(all_emails)), return_ips, full, f'/static/{rest_filename}.html', \
|
||||
f'/static/{filename[filename.find("/static/") + 8:]}' if '/static/' in filename \
|
||||
else f'/static/{filename} '
|
||||
|
||||
print('[*] Files saved.')
|
||||
except Exception as er:
|
||||
print(f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m')
|
||||
return list(set(all_emails)), return_ips, full, f'/static/{rest_filename}.html', f'{er}'
|
||||
print('\n\n')
|
||||
sys.exit(0)
|
||||
|
||||
|
@ -837,11 +758,3 @@ async def entry_point():
|
|||
except Exception as error_entry_point:
|
||||
print(error_entry_point)
|
||||
sys.exit(1)
|
||||
|
||||
"""
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(wow())
|
||||
#import multiprocessing
|
||||
#multiprocessing.freeze_support()
|
||||
#asyncio.run(main=entry_point())
|
||||
"""
|
|
@ -146,14 +146,11 @@ async def getlatestscanresults(self, domain, previousday=False):
|
|||
except Exception as e:
|
||||
print(f'Error connecting to theHarvester database: {e}')
|
||||
|
||||
async def getscanboarddata(self, domain=""):
|
||||
async def getscanboarddata(self):
|
||||
try:
|
||||
async with aiosqlite.connect(self.db, timeout=30) as conn:
|
||||
if len(domain) != 0:
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host" and domain=?''',
|
||||
(domain,))
|
||||
else:
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''')
|
||||
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''')
|
||||
data = await cursor.fetchone()
|
||||
self.scanboarddata["host"] = data[0]
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="email"''')
|
||||
|
@ -214,26 +211,16 @@ async def getscanhistorydomain(self, domain):
|
|||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
async def getpluginscanstatistics(self, domain=""):
|
||||
async def getpluginscanstatistics(self):
|
||||
try:
|
||||
async with aiosqlite.connect(self.db, timeout=30) as conn:
|
||||
if len(domain) == 0:
|
||||
cursor = await conn.execute('''
|
||||
SELECT domain,find_date, type, source, count(*)
|
||||
FROM results
|
||||
GROUP BY domain, find_date, type, source
|
||||
''')
|
||||
results = await cursor.fetchall()
|
||||
self.scanstats = results
|
||||
else:
|
||||
cursor = await conn.execute('''
|
||||
SELECT domain,find_date, type, source, count(*)
|
||||
FROM results WHERE domain=?
|
||||
GROUP BY domain, find_date, type, source
|
||||
''', (domain,))
|
||||
results = await cursor.fetchall()
|
||||
self.scanstats = results
|
||||
|
||||
cursor = await conn.execute('''
|
||||
SELECT domain,find_date, type, source, count(*)
|
||||
FROM results
|
||||
GROUP BY domain, find_date, type, source
|
||||
''')
|
||||
results = await cursor.fetchall()
|
||||
self.scanstats = results
|
||||
return self.scanstats
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
import aiohttp
|
||||
import sys
|
||||
|
||||
|
||||
class ScreenShotter():
|
||||
|
||||
def __init__(self, output):
|
||||
|
@ -15,14 +16,22 @@ def __init__(self, output):
|
|||
self.slash = "" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash
|
||||
|
||||
@staticmethod
|
||||
def _chunk_list(items, chunk_size):
|
||||
async def verify_installation():
|
||||
# Helper function that verifies pyppeteer & chromium are installed
|
||||
# If chromium is not installed pyppeteer will prompt user to install it
|
||||
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
|
||||
await browser.close()
|
||||
|
||||
@staticmethod
|
||||
def chunk_list(items, chunk_size):
|
||||
# Based off of: https://github.com/apache/incubator-sdap-ingester
|
||||
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
|
||||
|
||||
@staticmethod
|
||||
async def visit(url):
|
||||
try:
|
||||
print(f'attempting to visit: {url}')
|
||||
timeout = aiohttp.ClientTimeout(total=45)
|
||||
# print(f'attempting to visit: {url}')
|
||||
timeout = aiohttp.ClientTimeout(total=35)
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/83.0.4103.106 Safari/537.36'}
|
||||
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
|
@ -30,44 +39,34 @@ async def visit(url):
|
|||
async with aiohttp.ClientSession(timeout=timeout, headers=headers,
|
||||
connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
|
||||
async with session.get(url) as resp:
|
||||
# TODO fix with origin url I think it's there somewhere
|
||||
#return str(resp.url.origin()), await resp.text()
|
||||
# TODO fix with origin url, should be there somewhere
|
||||
text = await resp.text("UTF-8")
|
||||
print(text)
|
||||
print('\n\n\n\n')
|
||||
return f'http://{url}' if ('http' not in url and 'https' not in url) else url, text
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred while attempting to visit: {e}')
|
||||
print(f'An exception has occurred while attempting to screenshot {url}: {e}')
|
||||
return "", ""
|
||||
|
||||
async def take_screenshot(self, url):
|
||||
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
# url = f'https://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
url = url.replace('www.', '')
|
||||
print(f'Taking a screenshot of: {url}')
|
||||
print(f'Attempting to take a screenshot of: {url}')
|
||||
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
|
||||
context = await browser.createIncognitoBrowserContext()
|
||||
page = await browser.newPage()
|
||||
try:
|
||||
|
||||
# change default timeout from 30 to 35 seconds
|
||||
page.setDefaultNavigationTimeout(35000)
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/83.0.4103.106 Safari/537.36')
|
||||
#await page.goto(url, waitUntil='networkidle0')
|
||||
# await page.goto(url, waitUntil='networkidle0')
|
||||
await page.goto(url)
|
||||
await page.screenshot({'path': f'{self.output}{self.slash}{url.replace("http://", "").replace("https://", "")}.png'})
|
||||
#print('inside try and page has been closed')
|
||||
#await page.close()
|
||||
# await browser.close()
|
||||
# return True
|
||||
await page.screenshot(
|
||||
{'path': f'{self.output}{self.slash}{url.replace("http://", "").replace("https://", "")}.png'})
|
||||
except Exception as e:
|
||||
print(f'Exception occurred: {e} for: {url} ')
|
||||
finally:
|
||||
# Clean up everything whether screenshot is taken or not
|
||||
await page.close()
|
||||
#await page.close()
|
||||
#print('page is closed')
|
||||
await context.close()
|
||||
#print('context is closed')
|
||||
await browser.close()
|
||||
print('everything is closed!')
|
||||
|
|
Loading…
Reference in a new issue