diff --git a/theHarvester.py b/theHarvester.py index cb30ceb8..608d17dd 100755 --- a/theHarvester.py +++ b/theHarvester.py @@ -11,10 +11,19 @@ from theHarvester import __main__ -if sys.platform == 'win32': - asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy -else: - import uvloop - uvloop.install() +if __name__ == '__main__': + platform = sys.platform + if platform == 'win32': + # Required or things will break if trying to take screenshots + import multiprocessing + multiprocessing.freeze_support() + asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy + else: + import uvloop + uvloop.install() + import aiomultiprocess + # As we are not using Windows we can change the spawn method to fork for greater performance + aiomultiprocess.set_context("fork") + asyncio.run(__main__.entry_point()) -asyncio.run(__main__.entry_point()) +# __main__ diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py index a2573840..4bcdbe7c 100644 --- a/theHarvester/__main__.py +++ b/theHarvester/__main__.py @@ -30,8 +30,8 @@ async def start(rest_args=None): default=False, action='store_true') parser.add_argument('-s', '--shodan', help='Use Shodan to query discovered hosts.', default=False, action='store_true') - parser.add_argument('--screenshot', help='Take screenshots of resolved domains', default=False, - action='store_true') + parser.add_argument('--screenshot', help='Take screenshots of resolved domains specify output' + ' directory: --screenshot output_directory', default="", type=str) parser.add_argument('-v', '--virtual-host', help='Verify host name via DNS resolution and search for virtual hosts.', action='store_const', const='basic', default=False) @@ -431,7 +431,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor stor_lst.append(store(yahoo_search, engineitem, store_host=True, store_emails=True)) else: try: - print('Checking if dns brute is defined') # Check if dns_brute is defined rest_args.dns_brute except: @@ -625,23 +624,38 @@ async def handler(lst): else: pass + MAX_QUEUE_SIZE = 2 ** 15 - 1 + print(f'max queue size: {MAX_QUEUE_SIZE}') + import time + from aiomultiprocess import Pool # Screenshots - if args.screenshot is True: - from theHarvester.screenshot.screenshot import screenshot_handler, take_screenshot + if len(args.screenshot) > 0: + print(f'Screenshots can be found: {args.screenshot}') + # screenshot_handler, + from theHarvester.screenshot.screenshot import take_screenshot, screenshot_handler, _chunk_list, receive, visit + start = time.perf_counter() #from theHarvester.screenshot import take_screenshot - """for host in full: - if ':' in host: - try: - # Did host resolve? - domain = host.split(':')[0] - await take_screenshot(domain) - # break - except Exception as e: - print(f'Was unable to take a screenshot for: {host}, exception: {e}')""" + unique_resolved_domains = list(sorted({url.split(':')[0]for url in full if ':' in url and 'wwws' not in url})) # Grab resolved subdomains - coroutines = [take_screenshot(url.split(':')[0]) for url in full if ':' in url] - await screenshot_handler(coroutines) + # coroutines = [take_screenshot(url) for url in unique_resolved_domains] + #await screenshot_handler(coroutines) + async with Pool() as pool: + print('Created pool') + #serialized_tiles = [take_screenshot(url) for url in unique_resolved_domains] + #print(f'Length of serialized_tiles: {len(serialized_tiles)} ') + for chunk in _chunk_list(unique_resolved_domains, 20): + print(f'Chunk: {chunk} and length: {len(chunk)}') + try: + #resultsss = await pool.map(visit, unique_resolved_domains) + temp = await pool.map(take_screenshot, chunk) + #resultsss = await pool.map(take_screenshot, unique_resolved_domains) + #await pool.map(screenshot_handler, chunk) + except Exception as ee: + print(f'An excpeption has occurred while mapping: {ee}') + #continue + end = time.perf_counter() + print("Pipeline finished in {} seconds".format(end - start)) # Shodan shodanres = [] @@ -810,6 +824,10 @@ async def entry_point(): print(error_entry_point) sys.exit(1) - +""" if __name__ == '__main__': - asyncio.run(main=entry_point()) + asyncio.run(wow()) + #import multiprocessing + #multiprocessing.freeze_support() + #asyncio.run(main=entry_point()) +""" \ No newline at end of file diff --git a/theHarvester/screenshot/screenshot.py b/theHarvester/screenshot/screenshot.py index cac5d287..d2373b46 100644 --- a/theHarvester/screenshot/screenshot.py +++ b/theHarvester/screenshot/screenshot.py @@ -4,6 +4,11 @@ """ import asyncio from pyppeteer import launch +import aiohttp + + +def _chunk_list(items, chunk_size): + return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)] async def worker(queue): @@ -19,14 +24,15 @@ async def worker(queue): async def screenshot_handler(lst): + print('Created screenshot handler') queue = asyncio.Queue() for stor_method in lst: # enqueue the coroutines queue.put_nowait(stor_method) - # Create five worker tasks to process the queue concurrently. + # Create ten worker tasks to process the queue concurrently. tasks = [] - for i in range(5): + for i in range(10): task = asyncio.create_task(worker(queue)) tasks.append(task) @@ -40,19 +46,37 @@ async def screenshot_handler(lst): await asyncio.gather(*tasks, return_exceptions=True) +async def receive(lst): + for url in lst: + await take_screenshot(url) + + +async def visit(url): + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + return await resp.text() + + async def take_screenshot(url): - url = f'http://{url}' if ('http' not in url and 'https' not in url) else url + #url = f'http://{url}' if ('http' not in url and 'https' not in url) else url + url = f'https://{url}' if ('http' not in url and 'https' not in url) else url url.replace('www.', '') print(f'Taking a screenshot of: {url}') - browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"]) - page = await browser.newPage() try: + browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"]) + page = await browser.newPage() + # 45 second timeout + page.setDefaultNavigationTimeout(40000) await page.setUserAgent( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36') - # default wait time of 30 seconds + # default timeout of 30 seconds + #await page.setDefaultNavigationTimeout(20000) await page.goto(url) await page.screenshot({'path': f'D:\\repos\\theHarvester\\theHarvester\\screenshot\\{url.replace("https://", "").replace("http://", "")}.png'}) + await page.close() + await browser.close() + # return True except Exception as e: - print(f'Exception occurred: {e} ') - # No matter what happens make sure browser is closed - await browser.close() + print(f'Exception occurred: {e} for: {url} ') + # No matter what happens make sure browser and page are closed + return False