Removed remnants of API, added function to detect if chromium is installed, and cleaned up verbose output.

This commit is contained in:
NotoriousRebel 2020-07-03 21:44:40 -04:00
parent 5138a82cd9
commit c1b8985276
3 changed files with 72 additions and 173 deletions

View file

@ -15,7 +15,7 @@
import sys
async def start(rest_args=None):
async def start():
parser = argparse.ArgumentParser(
description='theHarvester is used to gather open source intelligence (OSINT) on a\n'
'company or domain.')
@ -47,29 +47,10 @@ async def start(rest_args=None):
rapiddns, securityTrails, spyse, sublist3r, suip, threatcrowd, threatminer,
trello, twitter, urlscan, virustotal, yahoo, all''')
# determines if filename is coming from rest api or user
rest_filename = ""
# indicates this from the rest API
if rest_args:
if rest_args.source and rest_args.source == "getsources":
return list(sorted(Core.get_supportedengines()))
elif rest_args.dns_brute:
args = rest_args
dnsbrute = (rest_args.dns_brute, True)
else:
args = rest_args
# We need to make sure the filename is random as to not overwrite other files
filename: str = args.filename
import string
import secrets
alphabet = string.ascii_letters + string.digits
rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" \
if len(filename) != 0 else ""
else:
args = parser.parse_args()
filename: str = args.filename
dnsbrute = (args.dns_brute, False)
args = parser.parse_args()
filename: str = args.filename
dnsbrute = (args.dns_brute, False)
try:
db = stash.StashManager()
await db.do_init()
@ -126,7 +107,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
else:
print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m')
if store_host:
#host_names = filter(await search_engine.get_hostnames())
host_names = [host for host in filter(await search_engine.get_hostnames()) if f'.{word}' in host]
if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
# If source is inside this conditional it means the hosts returned must be resolved to obtain ip
@ -429,12 +409,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
yahoo_search = yahoosearch.SearchYahoo(word, limit)
stor_lst.append(store(yahoo_search, engineitem, store_host=True, store_emails=True))
else:
try:
# Check if dns_brute is defined
rest_args.dns_brute
except:
print('\033[93m[!] Invalid source.\n\n \033[0m')
sys.exit(1)
print('\033[93m[!] Invalid source.\n\n \033[0m')
sys.exit(1)
async def worker(queue):
while True:
@ -470,14 +446,7 @@ async def handler(lst):
await handler(lst=stor_lst)
return_ips = []
if rest_args is not None and len(rest_filename) == 0 and rest_args.dns_brute is False:
# Indicates user is using rest api but not wanting output to be saved to a file
full = [host if ':' in host and word in host else word in host.split(':')[0] and host for host in full]
full = list({host for host in full if host})
full.sort()
# cast to string so Rest API can understand type
return_ips.extend([str(ip) for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])])
return list(set(all_emails)), return_ips, full, "", ""
# Sanity check to see if all_emails and all_hosts are defined.
try:
all_emails
@ -533,20 +502,17 @@ async def handler(lst):
print(url)
# DNS brute force
if dnsbrute and dnsbrute[0] is True:
print('\n[*] Starting DNS brute force.')
dns_force = dnssearch.DnsForce(word, dnsserver, verbose=True)
hosts, ips = await dns_force.run()
hosts = list({host for host in hosts if ':' in host})
hosts.sort(key=lambda el: el.split(':')[0])
# Check if Rest API is being used if so return found hosts
if dnsbrute[1]:
return hosts
print('\n[*] Hosts found after DNS brute force:')
db = stash.StashManager()
for host in hosts:
print(host)
full.append(host)
await db.store_all(word, hosts, 'host', 'dns_bruteforce')
# TakeOver Checking
@ -623,52 +589,36 @@ async def handler(lst):
else:
pass
MAX_QUEUE_SIZE = 2 ** 15 - 1
print(f'max queue size: {MAX_QUEUE_SIZE}')
import time
from aiomultiprocess import Pool
# Screenshots
if len(args.screenshot) > 0:
# screenshot_handler
#from theHarvester.screenshot.screenshot import take_screenshot, screenshot_handler, _chunk_list, receive, visit
import time
from aiomultiprocess import Pool
from theHarvester.screenshot.screenshot import ScreenShotter
# AsyncFetcher.fetch_all([])
screen_shotter = ScreenShotter(args.screenshot)
await screen_shotter.verify_installation()
print(f'Screenshots can be found: {screen_shotter.output}{screen_shotter.slash}')
start = time.perf_counter()
print('Filtering domains for ones we can reach')
#from theHarvester.screenshot import take_screenshot
unique_resolved_domains = {url.split(':')[0]for url in full if ':' in url and 'www.' not in url}
# First filter out ones that didn't resolve
#unique_resolved_domains = list(sorted([x for x in unique_resolved_domains
# if len(await screen_shotter.visit(x)) > 0]))
# Second filter out ones where we can't reach them with an http request
# Grab resolved subdomains
# coroutines = [take_screenshot(url) for url in unique_resolved_domains]
#await screenshot_handler(coroutines)
async with Pool(15) as pool:
print('Created pool')
print('mapping for unique resolved domains')
y = await pool.map(screen_shotter.visit, list(unique_resolved_domains))
unique_resolved_domains = list(sorted({x[0] for x in y if len(x[1]) > 0}))
print(unique_resolved_domains)
async with Pool(3) as pool:
#serialized_tiles = [take_screenshot(url) for url in unique_resolved_domains]
#print(f'Length of serialized_tiles: {len(serialized_tiles)} ')
print(f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!')
for chunk in screen_shotter._chunk_list(unique_resolved_domains, 25):
print(f'Chunk: {chunk} and length: {len(chunk)}')
try:
#resultsss = await pool.map(visit, unique_resolved_domains)
temp = await pool.map(screen_shotter.take_screenshot, chunk)
#resultsss = await pool.map(take_screenshot, unique_resolved_domains)
#await pool.map(screenshot_handler, chunk)
except Exception as ee:
print(f'An excpeption has occurred while mapping: {ee}')
#continue
end = time.perf_counter()
print("Pipeline finished in {} seconds".format(end - start))
if len(unique_resolved_domains) > 0:
# First filter out ones that didn't resolve
print('Attempting to visit unique resolved domains, this is ACTIVE RECON')
async with Pool(15) as pool:
results = await pool.map(screen_shotter.visit, list(unique_resolved_domains))
# Filter out domains that we couldn't connect to
unique_resolved_domains = list(sorted({tup[0] for tup in results if len(tup[1]) > 0}))
async with Pool(3) as pool:
print(f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!')
# If you have the resources you could make the function faster by increasing the chunk number
chunk_number = 25
for chunk in screen_shotter.chunk_list(unique_resolved_domains, chunk_number):
try:
await pool.map(screen_shotter.take_screenshot, chunk)
except Exception as ee:
print(f'An exception has occurred while mapping: {ee}')
end = time.perf_counter()
print(f"Finished taking screenshots in {end - start} seconds")
# Shodan
shodanres = []
@ -715,20 +665,12 @@ async def handler(lst):
try:
print('\n[*] Reporting started.')
db = stash.StashManager()
if rest_args and rest_args.domain is not None and len(rest_args.domain) > 1:
# If using rest API filter by domain
scanboarddata = await db.getscanboarddata(domain=rest_args.domain)
else:
scanboarddata = await db.getscanboarddata()
scanboarddata = await db.getscanboarddata()
latestscanresults = await db.getlatestscanresults(word)
previousscanresults = await db.getlatestscanresults(word, previousday=True)
latestscanchartdata = await db.latestscanchartdata(word)
scanhistorydomain = await db.getscanhistorydomain(word)
if rest_args and rest_args.domain is not None and len(rest_args.domain) > 1:
# If using rest API filter by domain
pluginscanstatistics = await db.getpluginscanstatistics(domain=rest_args.domain)
else:
pluginscanstatistics = await db.getpluginscanstatistics()
pluginscanstatistics = await db.getpluginscanstatistics()
generator = statichtmlgenerator.HtmlGenerator(word)
HTMLcode = await generator.beginhtml()
HTMLcode += await generator.generatedashboardcode(scanboarddata)
@ -745,40 +687,23 @@ async def handler(lst):
</body>
</html>
'''
if len(rest_filename) == 0:
Html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
Html_file.write(HTMLcode)
Html_file.close()
print('[*] Reporting finished.')
print('[*] Saving files.')
else:
# indicates the rest api is being used in that case we asynchronously write the file to our static directory
try:
import aiofiles
async with aiofiles.open(
f'theHarvester/app/static/{rest_filename}.html' if '.html' not in rest_filename
else f'theHarvester/app/static/{rest_filename}', 'w+') as Html_file:
await Html_file.write(HTMLcode)
except Exception as ex:
print(f"An excpetion has occurred: {ex}")
return list(set(all_emails)), return_ips, full, f'{ex}', ""
# Html_file = async with aiofiles.open(f'{filename}.html' if '.html' not in filename else filename, 'w')
# Html_file.write(HTMLcode)
# Html_file.close()
except Exception as e:
print(e)
print('\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m')
sys.exit(1)
Html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
Html_file.write(HTMLcode)
Html_file.close()
print('[*] Reporting finished.')
print('[*] Saving files.')
try:
# filename = filename.rsplit('.', 1)[0] + '.xml'
# file = open(filename, 'w')
if len(rest_filename) == 0:
filename = filename.rsplit('.', 1)[0] + '.xml'
else:
filename = 'theHarvester/app/static/' \
+ rest_filename.rsplit('.', 1)[0] + '.xml'
# TODO use aiofiles if user is using rest api
filename = filename.rsplit('.', 1)[0] + '.xml'
with open(filename, 'w+') as file:
file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
for x in all_emails:
@ -816,14 +741,10 @@ async def handler(lst):
file.write('</servers>')
file.write('</theHarvester>')
if len(rest_filename) > 0:
return list(set(all_emails)), return_ips, full, f'/static/{rest_filename}.html', \
f'/static/{filename[filename.find("/static/") + 8:]}' if '/static/' in filename \
else f'/static/{filename} '
print('[*] Files saved.')
except Exception as er:
print(f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m')
return list(set(all_emails)), return_ips, full, f'/static/{rest_filename}.html', f'{er}'
print('\n\n')
sys.exit(0)
@ -837,11 +758,3 @@ async def entry_point():
except Exception as error_entry_point:
print(error_entry_point)
sys.exit(1)
"""
if __name__ == '__main__':
asyncio.run(wow())
#import multiprocessing
#multiprocessing.freeze_support()
#asyncio.run(main=entry_point())
"""

View file

@ -146,14 +146,11 @@ async def getlatestscanresults(self, domain, previousday=False):
except Exception as e:
print(f'Error connecting to theHarvester database: {e}')
async def getscanboarddata(self, domain=""):
async def getscanboarddata(self):
try:
async with aiosqlite.connect(self.db, timeout=30) as conn:
if len(domain) != 0:
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host" and domain=?''',
(domain,))
else:
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''')
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''')
data = await cursor.fetchone()
self.scanboarddata["host"] = data[0]
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="email"''')
@ -214,26 +211,16 @@ async def getscanhistorydomain(self, domain):
except Exception as e:
print(e)
async def getpluginscanstatistics(self, domain=""):
async def getpluginscanstatistics(self):
try:
async with aiosqlite.connect(self.db, timeout=30) as conn:
if len(domain) == 0:
cursor = await conn.execute('''
SELECT domain,find_date, type, source, count(*)
FROM results
GROUP BY domain, find_date, type, source
''')
results = await cursor.fetchall()
self.scanstats = results
else:
cursor = await conn.execute('''
SELECT domain,find_date, type, source, count(*)
FROM results WHERE domain=?
GROUP BY domain, find_date, type, source
''', (domain,))
results = await cursor.fetchall()
self.scanstats = results
cursor = await conn.execute('''
SELECT domain,find_date, type, source, count(*)
FROM results
GROUP BY domain, find_date, type, source
''')
results = await cursor.fetchall()
self.scanstats = results
return self.scanstats
except Exception as e:
print(e)

View file

@ -7,6 +7,7 @@
import aiohttp
import sys
class ScreenShotter():
def __init__(self, output):
@ -15,14 +16,22 @@ def __init__(self, output):
self.slash = "" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash
@staticmethod
def _chunk_list(items, chunk_size):
async def verify_installation():
# Helper function that verifies pyppeteer & chromium are installed
# If chromium is not installed pyppeteer will prompt user to install it
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
await browser.close()
@staticmethod
def chunk_list(items, chunk_size):
# Based off of: https://github.com/apache/incubator-sdap-ingester
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
@staticmethod
async def visit(url):
try:
print(f'attempting to visit: {url}')
timeout = aiohttp.ClientTimeout(total=45)
# print(f'attempting to visit: {url}')
timeout = aiohttp.ClientTimeout(total=35)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/83.0.4103.106 Safari/537.36'}
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
@ -30,44 +39,34 @@ async def visit(url):
async with aiohttp.ClientSession(timeout=timeout, headers=headers,
connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
async with session.get(url) as resp:
# TODO fix with origin url I think it's there somewhere
#return str(resp.url.origin()), await resp.text()
# TODO fix with origin url, should be there somewhere
text = await resp.text("UTF-8")
print(text)
print('\n\n\n\n')
return f'http://{url}' if ('http' not in url and 'https' not in url) else url, text
except Exception as e:
print(f'An exception has occurred while attempting to visit: {e}')
print(f'An exception has occurred while attempting to screenshot {url}: {e}')
return "", ""
async def take_screenshot(self, url):
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
# url = f'https://{url}' if ('http' not in url and 'https' not in url) else url
url = url.replace('www.', '')
print(f'Taking a screenshot of: {url}')
print(f'Attempting to take a screenshot of: {url}')
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
context = await browser.createIncognitoBrowserContext()
page = await browser.newPage()
try:
# change default timeout from 30 to 35 seconds
page.setDefaultNavigationTimeout(35000)
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/83.0.4103.106 Safari/537.36')
#await page.goto(url, waitUntil='networkidle0')
# await page.goto(url, waitUntil='networkidle0')
await page.goto(url)
await page.screenshot({'path': f'{self.output}{self.slash}{url.replace("http://", "").replace("https://", "")}.png'})
#print('inside try and page has been closed')
#await page.close()
# await browser.close()
# return True
await page.screenshot(
{'path': f'{self.output}{self.slash}{url.replace("http://", "").replace("https://", "")}.png'})
except Exception as e:
print(f'Exception occurred: {e} for: {url} ')
finally:
# Clean up everything whether screenshot is taken or not
await page.close()
#await page.close()
#print('page is closed')
await context.close()
#print('context is closed')
await browser.close()
print('everything is closed!')