mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-24 14:32:57 +08:00
Screnshot functionality is ready for takeoff 🚀
This commit is contained in:
parent
ab39c3402f
commit
d3def10167
3 changed files with 90 additions and 84 deletions
|
@ -21,9 +21,10 @@
|
|||
else:
|
||||
import uvloop
|
||||
uvloop.install()
|
||||
import aiomultiprocess
|
||||
# As we are not using Windows we can change the spawn method to fork for greater performance
|
||||
aiomultiprocess.set_context("fork")
|
||||
if platform == "linux":
|
||||
import aiomultiprocess
|
||||
# As we are not using Windows we can change the spawn method to fork for greater performance
|
||||
aiomultiprocess.set_context("fork")
|
||||
asyncio.run(__main__.entry_point())
|
||||
|
||||
# __main__
|
||||
|
|
|
@ -14,8 +14,6 @@
|
|||
import re
|
||||
import sys
|
||||
|
||||
Core.banner()
|
||||
|
||||
|
||||
async def start(rest_args=None):
|
||||
parser = argparse.ArgumentParser(
|
||||
|
@ -128,7 +126,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
else:
|
||||
print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m')
|
||||
if store_host:
|
||||
host_names = filter(await search_engine.get_hostnames())
|
||||
#host_names = filter(await search_engine.get_hostnames())
|
||||
host_names = [host for host in filter(await search_engine.get_hostnames()) if f'.{word}' in host]
|
||||
if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
|
||||
# If source is inside this conditional it means the hosts returned must be resolved to obtain ip
|
||||
full_hosts_checker = hostchecker.Checker(host_names)
|
||||
|
@ -150,7 +149,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
if store_results:
|
||||
email_list, host_names, urls = await search_engine.get_results()
|
||||
all_emails.extend(email_list)
|
||||
host_names = filter(host_names)
|
||||
host_names = [host for host in filter(host_names) if f'.{word}' in host]
|
||||
all_urls.extend(filter(urls))
|
||||
all_hosts.extend(host_names)
|
||||
await db.store_all(word, all_hosts, 'host', source)
|
||||
|
@ -631,31 +630,45 @@ async def handler(lst):
|
|||
|
||||
# Screenshots
|
||||
if len(args.screenshot) > 0:
|
||||
print(f'Screenshots can be found: {args.screenshot}')
|
||||
# screenshot_handler,
|
||||
from theHarvester.screenshot.screenshot import take_screenshot, screenshot_handler, _chunk_list, receive, visit
|
||||
# screenshot_handler
|
||||
#from theHarvester.screenshot.screenshot import take_screenshot, screenshot_handler, _chunk_list, receive, visit
|
||||
from theHarvester.screenshot.screenshot import ScreenShotter
|
||||
# AsyncFetcher.fetch_all([])
|
||||
screen_shotter = ScreenShotter(args.screenshot)
|
||||
print(f'Screenshots can be found: {screen_shotter.output}{screen_shotter.slash}')
|
||||
start = time.perf_counter()
|
||||
print('Filtering domains for ones we can reach')
|
||||
#from theHarvester.screenshot import take_screenshot
|
||||
unique_resolved_domains = list(sorted({url.split(':')[0]for url in full if ':' in url and 'wwws' not in url}))
|
||||
unique_resolved_domains = {url.split(':')[0]for url in full if ':' in url and 'www.' not in url}
|
||||
# First filter out ones that didn't resolve
|
||||
#unique_resolved_domains = list(sorted([x for x in unique_resolved_domains
|
||||
# if len(await screen_shotter.visit(x)) > 0]))
|
||||
# Second filter out ones where we can't reach them with an http request
|
||||
# Grab resolved subdomains
|
||||
# coroutines = [take_screenshot(url) for url in unique_resolved_domains]
|
||||
#await screenshot_handler(coroutines)
|
||||
async with Pool() as pool:
|
||||
async with Pool(15) as pool:
|
||||
print('Created pool')
|
||||
print('mapping for unique resolved domains')
|
||||
y = await pool.map(screen_shotter.visit, list(unique_resolved_domains))
|
||||
unique_resolved_domains = list(sorted({x[0] for x in y if len(x[1]) > 0}))
|
||||
print(unique_resolved_domains)
|
||||
async with Pool(3) as pool:
|
||||
#serialized_tiles = [take_screenshot(url) for url in unique_resolved_domains]
|
||||
#print(f'Length of serialized_tiles: {len(serialized_tiles)} ')
|
||||
for chunk in _chunk_list(unique_resolved_domains, 20):
|
||||
print(f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!')
|
||||
for chunk in screen_shotter._chunk_list(unique_resolved_domains, 25):
|
||||
print(f'Chunk: {chunk} and length: {len(chunk)}')
|
||||
try:
|
||||
#resultsss = await pool.map(visit, unique_resolved_domains)
|
||||
temp = await pool.map(take_screenshot, chunk)
|
||||
temp = await pool.map(screen_shotter.take_screenshot, chunk)
|
||||
#resultsss = await pool.map(take_screenshot, unique_resolved_domains)
|
||||
#await pool.map(screenshot_handler, chunk)
|
||||
except Exception as ee:
|
||||
print(f'An excpeption has occurred while mapping: {ee}')
|
||||
#continue
|
||||
end = time.perf_counter()
|
||||
print("Pipeline finished in {} seconds".format(end - start))
|
||||
end = time.perf_counter()
|
||||
print("Pipeline finished in {} seconds".format(end - start))
|
||||
|
||||
# Shodan
|
||||
shodanres = []
|
||||
|
@ -817,6 +830,7 @@ async def handler(lst):
|
|||
|
||||
async def entry_point():
|
||||
try:
|
||||
Core.banner()
|
||||
await start()
|
||||
except KeyboardInterrupt:
|
||||
print('\n\n\033[93m[!] ctrl+c detected from user, quitting.\n\n \033[0m')
|
||||
|
|
|
@ -2,81 +2,72 @@
|
|||
Screenshot module that utilizes pyppeteer in async fashion
|
||||
to break urls into list and assign them to workers in a queue
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
from pyppeteer import launch
|
||||
import aiohttp
|
||||
import sys
|
||||
|
||||
class ScreenShotter():
|
||||
|
||||
def _chunk_list(items, chunk_size):
|
||||
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
|
||||
def __init__(self, output):
|
||||
self.output = output
|
||||
self.slash = "\\" if 'win' in sys.platform else '/'
|
||||
self.slash = "" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash
|
||||
|
||||
@staticmethod
|
||||
def _chunk_list(items, chunk_size):
|
||||
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
|
||||
|
||||
async def worker(queue):
|
||||
while True:
|
||||
# Get a "work item" out of the queue.
|
||||
stor = await queue.get()
|
||||
@staticmethod
|
||||
async def visit(url):
|
||||
try:
|
||||
await stor
|
||||
queue.task_done()
|
||||
# Notify the queue that the "work item" has been processed.
|
||||
except Exception:
|
||||
queue.task_done()
|
||||
print(f'attempting to visit: {url}')
|
||||
timeout = aiohttp.ClientTimeout(total=45)
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/83.0.4103.106 Safari/537.36'}
|
||||
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
url = url.replace('www.', '')
|
||||
async with aiohttp.ClientSession(timeout=timeout, headers=headers,
|
||||
connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
|
||||
async with session.get(url) as resp:
|
||||
# TODO fix with origin url I think it's there somewhere
|
||||
#return str(resp.url.origin()), await resp.text()
|
||||
text = await resp.text("UTF-8")
|
||||
print(text)
|
||||
print('\n\n\n\n')
|
||||
return f'http://{url}' if ('http' not in url and 'https' not in url) else url, text
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred while attempting to visit: {e}')
|
||||
return "", ""
|
||||
|
||||
|
||||
async def screenshot_handler(lst):
|
||||
print('Created screenshot handler')
|
||||
queue = asyncio.Queue()
|
||||
|
||||
for stor_method in lst:
|
||||
# enqueue the coroutines
|
||||
queue.put_nowait(stor_method)
|
||||
# Create ten worker tasks to process the queue concurrently.
|
||||
tasks = []
|
||||
for i in range(10):
|
||||
task = asyncio.create_task(worker(queue))
|
||||
tasks.append(task)
|
||||
|
||||
# Wait until the queue is fully processed.
|
||||
await queue.join()
|
||||
|
||||
# Cancel our worker tasks.
|
||||
for task in tasks:
|
||||
task.cancel()
|
||||
# Wait until all worker tasks are cancelled.
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
|
||||
async def receive(lst):
|
||||
for url in lst:
|
||||
await take_screenshot(url)
|
||||
|
||||
|
||||
async def visit(url):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as resp:
|
||||
return await resp.text()
|
||||
|
||||
|
||||
async def take_screenshot(url):
|
||||
#url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
url = f'https://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
url.replace('www.', '')
|
||||
print(f'Taking a screenshot of: {url}')
|
||||
try:
|
||||
async def take_screenshot(self, url):
|
||||
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
# url = f'https://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
url = url.replace('www.', '')
|
||||
print(f'Taking a screenshot of: {url}')
|
||||
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
|
||||
context = await browser.createIncognitoBrowserContext()
|
||||
page = await browser.newPage()
|
||||
# 50 second timeout
|
||||
page.setDefaultNavigationTimeout(50000)
|
||||
await page.setUserAgent(
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')
|
||||
# default timeout of 30 seconds
|
||||
#await page.setDefaultNavigationTimeout(20000)
|
||||
await page.goto(url)
|
||||
await page.screenshot({'path': f'D:\\repos\\theHarvester\\theHarvester\\screenshot\\{url.replace("https://", "").replace("http://", "")}.png'})
|
||||
await page.close()
|
||||
await browser.close()
|
||||
# return True
|
||||
except Exception as e:
|
||||
print(f'Exception occurred: {e} for: {url} ')
|
||||
# No matter what happens make sure browser and page are closed
|
||||
return False
|
||||
try:
|
||||
|
||||
# change default timeout from 30 to 35 seconds
|
||||
page.setDefaultNavigationTimeout(35000)
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/83.0.4103.106 Safari/537.36')
|
||||
#await page.goto(url, waitUntil='networkidle0')
|
||||
await page.goto(url)
|
||||
await page.screenshot({'path': f'{self.output}{self.slash}{url.replace("http://", "").replace("https://", "")}.png'})
|
||||
#print('inside try and page has been closed')
|
||||
#await page.close()
|
||||
# await browser.close()
|
||||
# return True
|
||||
except Exception as e:
|
||||
print(f'Exception occurred: {e} for: {url} ')
|
||||
finally:
|
||||
await page.close()
|
||||
#await page.close()
|
||||
#print('page is closed')
|
||||
await context.close()
|
||||
#print('context is closed')
|
||||
await browser.close()
|
||||
print('everything is closed!')
|
||||
|
|
Loading…
Reference in a new issue