Screnshot functionality is ready for takeoff 🚀

This commit is contained in:
NotoriousRebel 2020-06-30 18:47:06 -04:00
parent ab39c3402f
commit d3def10167
3 changed files with 90 additions and 84 deletions

View file

@ -21,9 +21,10 @@
else:
import uvloop
uvloop.install()
import aiomultiprocess
# As we are not using Windows we can change the spawn method to fork for greater performance
aiomultiprocess.set_context("fork")
if platform == "linux":
import aiomultiprocess
# As we are not using Windows we can change the spawn method to fork for greater performance
aiomultiprocess.set_context("fork")
asyncio.run(__main__.entry_point())
# __main__

View file

@ -14,8 +14,6 @@
import re
import sys
Core.banner()
async def start(rest_args=None):
parser = argparse.ArgumentParser(
@ -128,7 +126,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
else:
print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m')
if store_host:
host_names = filter(await search_engine.get_hostnames())
#host_names = filter(await search_engine.get_hostnames())
host_names = [host for host in filter(await search_engine.get_hostnames()) if f'.{word}' in host]
if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
# If source is inside this conditional it means the hosts returned must be resolved to obtain ip
full_hosts_checker = hostchecker.Checker(host_names)
@ -150,7 +149,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
if store_results:
email_list, host_names, urls = await search_engine.get_results()
all_emails.extend(email_list)
host_names = filter(host_names)
host_names = [host for host in filter(host_names) if f'.{word}' in host]
all_urls.extend(filter(urls))
all_hosts.extend(host_names)
await db.store_all(word, all_hosts, 'host', source)
@ -631,31 +630,45 @@ async def handler(lst):
# Screenshots
if len(args.screenshot) > 0:
print(f'Screenshots can be found: {args.screenshot}')
# screenshot_handler,
from theHarvester.screenshot.screenshot import take_screenshot, screenshot_handler, _chunk_list, receive, visit
# screenshot_handler
#from theHarvester.screenshot.screenshot import take_screenshot, screenshot_handler, _chunk_list, receive, visit
from theHarvester.screenshot.screenshot import ScreenShotter
# AsyncFetcher.fetch_all([])
screen_shotter = ScreenShotter(args.screenshot)
print(f'Screenshots can be found: {screen_shotter.output}{screen_shotter.slash}')
start = time.perf_counter()
print('Filtering domains for ones we can reach')
#from theHarvester.screenshot import take_screenshot
unique_resolved_domains = list(sorted({url.split(':')[0]for url in full if ':' in url and 'wwws' not in url}))
unique_resolved_domains = {url.split(':')[0]for url in full if ':' in url and 'www.' not in url}
# First filter out ones that didn't resolve
#unique_resolved_domains = list(sorted([x for x in unique_resolved_domains
# if len(await screen_shotter.visit(x)) > 0]))
# Second filter out ones where we can't reach them with an http request
# Grab resolved subdomains
# coroutines = [take_screenshot(url) for url in unique_resolved_domains]
#await screenshot_handler(coroutines)
async with Pool() as pool:
async with Pool(15) as pool:
print('Created pool')
print('mapping for unique resolved domains')
y = await pool.map(screen_shotter.visit, list(unique_resolved_domains))
unique_resolved_domains = list(sorted({x[0] for x in y if len(x[1]) > 0}))
print(unique_resolved_domains)
async with Pool(3) as pool:
#serialized_tiles = [take_screenshot(url) for url in unique_resolved_domains]
#print(f'Length of serialized_tiles: {len(serialized_tiles)} ')
for chunk in _chunk_list(unique_resolved_domains, 20):
print(f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!')
for chunk in screen_shotter._chunk_list(unique_resolved_domains, 25):
print(f'Chunk: {chunk} and length: {len(chunk)}')
try:
#resultsss = await pool.map(visit, unique_resolved_domains)
temp = await pool.map(take_screenshot, chunk)
temp = await pool.map(screen_shotter.take_screenshot, chunk)
#resultsss = await pool.map(take_screenshot, unique_resolved_domains)
#await pool.map(screenshot_handler, chunk)
except Exception as ee:
print(f'An excpeption has occurred while mapping: {ee}')
#continue
end = time.perf_counter()
print("Pipeline finished in {} seconds".format(end - start))
end = time.perf_counter()
print("Pipeline finished in {} seconds".format(end - start))
# Shodan
shodanres = []
@ -817,6 +830,7 @@ async def handler(lst):
async def entry_point():
try:
Core.banner()
await start()
except KeyboardInterrupt:
print('\n\n\033[93m[!] ctrl+c detected from user, quitting.\n\n \033[0m')

View file

@ -2,81 +2,72 @@
Screenshot module that utilizes pyppeteer in async fashion
to break urls into list and assign them to workers in a queue
"""
import asyncio
from pyppeteer import launch
import aiohttp
import sys
class ScreenShotter():
def _chunk_list(items, chunk_size):
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
def __init__(self, output):
self.output = output
self.slash = "\\" if 'win' in sys.platform else '/'
self.slash = "" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash
@staticmethod
def _chunk_list(items, chunk_size):
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
async def worker(queue):
while True:
# Get a "work item" out of the queue.
stor = await queue.get()
@staticmethod
async def visit(url):
try:
await stor
queue.task_done()
# Notify the queue that the "work item" has been processed.
except Exception:
queue.task_done()
print(f'attempting to visit: {url}')
timeout = aiohttp.ClientTimeout(total=45)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/83.0.4103.106 Safari/537.36'}
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
url = url.replace('www.', '')
async with aiohttp.ClientSession(timeout=timeout, headers=headers,
connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
async with session.get(url) as resp:
# TODO fix with origin url I think it's there somewhere
#return str(resp.url.origin()), await resp.text()
text = await resp.text("UTF-8")
print(text)
print('\n\n\n\n')
return f'http://{url}' if ('http' not in url and 'https' not in url) else url, text
except Exception as e:
print(f'An exception has occurred while attempting to visit: {e}')
return "", ""
async def screenshot_handler(lst):
print('Created screenshot handler')
queue = asyncio.Queue()
for stor_method in lst:
# enqueue the coroutines
queue.put_nowait(stor_method)
# Create ten worker tasks to process the queue concurrently.
tasks = []
for i in range(10):
task = asyncio.create_task(worker(queue))
tasks.append(task)
# Wait until the queue is fully processed.
await queue.join()
# Cancel our worker tasks.
for task in tasks:
task.cancel()
# Wait until all worker tasks are cancelled.
await asyncio.gather(*tasks, return_exceptions=True)
async def receive(lst):
for url in lst:
await take_screenshot(url)
async def visit(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.text()
async def take_screenshot(url):
#url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
url = f'https://{url}' if ('http' not in url and 'https' not in url) else url
url.replace('www.', '')
print(f'Taking a screenshot of: {url}')
try:
async def take_screenshot(self, url):
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
# url = f'https://{url}' if ('http' not in url and 'https' not in url) else url
url = url.replace('www.', '')
print(f'Taking a screenshot of: {url}')
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
context = await browser.createIncognitoBrowserContext()
page = await browser.newPage()
# 50 second timeout
page.setDefaultNavigationTimeout(50000)
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')
# default timeout of 30 seconds
#await page.setDefaultNavigationTimeout(20000)
await page.goto(url)
await page.screenshot({'path': f'D:\\repos\\theHarvester\\theHarvester\\screenshot\\{url.replace("https://", "").replace("http://", "")}.png'})
await page.close()
await browser.close()
# return True
except Exception as e:
print(f'Exception occurred: {e} for: {url} ')
# No matter what happens make sure browser and page are closed
return False
try:
# change default timeout from 30 to 35 seconds
page.setDefaultNavigationTimeout(35000)
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/83.0.4103.106 Safari/537.36')
#await page.goto(url, waitUntil='networkidle0')
await page.goto(url)
await page.screenshot({'path': f'{self.output}{self.slash}{url.replace("http://", "").replace("https://", "")}.png'})
#print('inside try and page has been closed')
#await page.close()
# await browser.close()
# return True
except Exception as e:
print(f'Exception occurred: {e} for: {url} ')
finally:
await page.close()
#await page.close()
#print('page is closed')
await context.close()
#print('context is closed')
await browser.close()
print('everything is closed!')