mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-24 14:32:57 +08:00
Aiomultiprocess is working with proper chunking of list need to do a few tweakings before ready.
This commit is contained in:
parent
03adc7e79e
commit
75271e3ded
3 changed files with 84 additions and 33 deletions
|
@ -11,10 +11,19 @@
|
|||
|
||||
from theHarvester import __main__
|
||||
|
||||
if sys.platform == 'win32':
|
||||
asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy
|
||||
else:
|
||||
import uvloop
|
||||
uvloop.install()
|
||||
if __name__ == '__main__':
|
||||
platform = sys.platform
|
||||
if platform == 'win32':
|
||||
# Required or things will break if trying to take screenshots
|
||||
import multiprocessing
|
||||
multiprocessing.freeze_support()
|
||||
asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy
|
||||
else:
|
||||
import uvloop
|
||||
uvloop.install()
|
||||
import aiomultiprocess
|
||||
# As we are not using Windows we can change the spawn method to fork for greater performance
|
||||
aiomultiprocess.set_context("fork")
|
||||
asyncio.run(__main__.entry_point())
|
||||
|
||||
asyncio.run(__main__.entry_point())
|
||||
# __main__
|
||||
|
|
|
@ -30,8 +30,8 @@ async def start(rest_args=None):
|
|||
default=False, action='store_true')
|
||||
parser.add_argument('-s', '--shodan', help='Use Shodan to query discovered hosts.', default=False,
|
||||
action='store_true')
|
||||
parser.add_argument('--screenshot', help='Take screenshots of resolved domains', default=False,
|
||||
action='store_true')
|
||||
parser.add_argument('--screenshot', help='Take screenshots of resolved domains specify output'
|
||||
' directory: --screenshot output_directory', default="", type=str)
|
||||
parser.add_argument('-v', '--virtual-host',
|
||||
help='Verify host name via DNS resolution and search for virtual hosts.', action='store_const',
|
||||
const='basic', default=False)
|
||||
|
@ -431,7 +431,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
stor_lst.append(store(yahoo_search, engineitem, store_host=True, store_emails=True))
|
||||
else:
|
||||
try:
|
||||
print('Checking if dns brute is defined')
|
||||
# Check if dns_brute is defined
|
||||
rest_args.dns_brute
|
||||
except:
|
||||
|
@ -625,23 +624,38 @@ async def handler(lst):
|
|||
else:
|
||||
pass
|
||||
|
||||
MAX_QUEUE_SIZE = 2 ** 15 - 1
|
||||
print(f'max queue size: {MAX_QUEUE_SIZE}')
|
||||
import time
|
||||
from aiomultiprocess import Pool
|
||||
|
||||
# Screenshots
|
||||
if args.screenshot is True:
|
||||
from theHarvester.screenshot.screenshot import screenshot_handler, take_screenshot
|
||||
if len(args.screenshot) > 0:
|
||||
print(f'Screenshots can be found: {args.screenshot}')
|
||||
# screenshot_handler,
|
||||
from theHarvester.screenshot.screenshot import take_screenshot, screenshot_handler, _chunk_list, receive, visit
|
||||
start = time.perf_counter()
|
||||
#from theHarvester.screenshot import take_screenshot
|
||||
"""for host in full:
|
||||
if ':' in host:
|
||||
try:
|
||||
# Did host resolve?
|
||||
domain = host.split(':')[0]
|
||||
await take_screenshot(domain)
|
||||
# break
|
||||
except Exception as e:
|
||||
print(f'Was unable to take a screenshot for: {host}, exception: {e}')"""
|
||||
unique_resolved_domains = list(sorted({url.split(':')[0]for url in full if ':' in url and 'wwws' not in url}))
|
||||
# Grab resolved subdomains
|
||||
coroutines = [take_screenshot(url.split(':')[0]) for url in full if ':' in url]
|
||||
await screenshot_handler(coroutines)
|
||||
# coroutines = [take_screenshot(url) for url in unique_resolved_domains]
|
||||
#await screenshot_handler(coroutines)
|
||||
async with Pool() as pool:
|
||||
print('Created pool')
|
||||
#serialized_tiles = [take_screenshot(url) for url in unique_resolved_domains]
|
||||
#print(f'Length of serialized_tiles: {len(serialized_tiles)} ')
|
||||
for chunk in _chunk_list(unique_resolved_domains, 20):
|
||||
print(f'Chunk: {chunk} and length: {len(chunk)}')
|
||||
try:
|
||||
#resultsss = await pool.map(visit, unique_resolved_domains)
|
||||
temp = await pool.map(take_screenshot, chunk)
|
||||
#resultsss = await pool.map(take_screenshot, unique_resolved_domains)
|
||||
#await pool.map(screenshot_handler, chunk)
|
||||
except Exception as ee:
|
||||
print(f'An excpeption has occurred while mapping: {ee}')
|
||||
#continue
|
||||
end = time.perf_counter()
|
||||
print("Pipeline finished in {} seconds".format(end - start))
|
||||
|
||||
# Shodan
|
||||
shodanres = []
|
||||
|
@ -810,6 +824,10 @@ async def entry_point():
|
|||
print(error_entry_point)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
"""
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main=entry_point())
|
||||
asyncio.run(wow())
|
||||
#import multiprocessing
|
||||
#multiprocessing.freeze_support()
|
||||
#asyncio.run(main=entry_point())
|
||||
"""
|
|
@ -4,6 +4,11 @@
|
|||
"""
|
||||
import asyncio
|
||||
from pyppeteer import launch
|
||||
import aiohttp
|
||||
|
||||
|
||||
def _chunk_list(items, chunk_size):
|
||||
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
|
||||
|
||||
|
||||
async def worker(queue):
|
||||
|
@ -19,14 +24,15 @@ async def worker(queue):
|
|||
|
||||
|
||||
async def screenshot_handler(lst):
|
||||
print('Created screenshot handler')
|
||||
queue = asyncio.Queue()
|
||||
|
||||
for stor_method in lst:
|
||||
# enqueue the coroutines
|
||||
queue.put_nowait(stor_method)
|
||||
# Create five worker tasks to process the queue concurrently.
|
||||
# Create ten worker tasks to process the queue concurrently.
|
||||
tasks = []
|
||||
for i in range(5):
|
||||
for i in range(10):
|
||||
task = asyncio.create_task(worker(queue))
|
||||
tasks.append(task)
|
||||
|
||||
|
@ -40,19 +46,37 @@ async def screenshot_handler(lst):
|
|||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
|
||||
async def receive(lst):
|
||||
for url in lst:
|
||||
await take_screenshot(url)
|
||||
|
||||
|
||||
async def visit(url):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as resp:
|
||||
return await resp.text()
|
||||
|
||||
|
||||
async def take_screenshot(url):
|
||||
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
#url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
url = f'https://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
url.replace('www.', '')
|
||||
print(f'Taking a screenshot of: {url}')
|
||||
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
|
||||
page = await browser.newPage()
|
||||
try:
|
||||
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
|
||||
page = await browser.newPage()
|
||||
# 45 second timeout
|
||||
page.setDefaultNavigationTimeout(40000)
|
||||
await page.setUserAgent(
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')
|
||||
# default wait time of 30 seconds
|
||||
# default timeout of 30 seconds
|
||||
#await page.setDefaultNavigationTimeout(20000)
|
||||
await page.goto(url)
|
||||
await page.screenshot({'path': f'D:\\repos\\theHarvester\\theHarvester\\screenshot\\{url.replace("https://", "").replace("http://", "")}.png'})
|
||||
await page.close()
|
||||
await browser.close()
|
||||
# return True
|
||||
except Exception as e:
|
||||
print(f'Exception occurred: {e} ')
|
||||
# No matter what happens make sure browser is closed
|
||||
await browser.close()
|
||||
print(f'Exception occurred: {e} for: {url} ')
|
||||
# No matter what happens make sure browser and page are closed
|
||||
return False
|
||||
|
|
Loading…
Reference in a new issue