mirror of
https://github.com/laramies/theHarvester.git
synced 2024-11-11 18:03:10 +08:00
Real commit for screenshot functionality.
This commit is contained in:
parent
685c3c6823
commit
50a6c76716
2 changed files with 58 additions and 12 deletions
|
@ -1,12 +0,0 @@
|
|||
from selenium import webdriver
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
import time
|
||||
|
||||
options = Options()
|
||||
options.headless = True
|
||||
browser = webdriver.Firefox(options=options)
|
||||
browser.minimize_window()
|
||||
#time.sleep(3)
|
||||
browser.get('https://leidos.com')
|
||||
browser.save_screenshot('test-screenshot.png')
|
||||
browser.close()
|
58
theHarvester/screenshot/screenshot.py
Normal file
58
theHarvester/screenshot/screenshot.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
"""
|
||||
Screenshot module that utilizes pyppeteer in async fashion
|
||||
to break urls into list and assign them to workers in a queue
|
||||
"""
|
||||
import asyncio
|
||||
from pyppeteer import launch
|
||||
|
||||
|
||||
async def worker(queue):
|
||||
while True:
|
||||
# Get a "work item" out of the queue.
|
||||
stor = await queue.get()
|
||||
try:
|
||||
await stor
|
||||
queue.task_done()
|
||||
# Notify the queue that the "work item" has been processed.
|
||||
except Exception:
|
||||
queue.task_done()
|
||||
|
||||
|
||||
async def screenshot_handler(lst):
|
||||
queue = asyncio.Queue()
|
||||
|
||||
for stor_method in lst:
|
||||
# enqueue the coroutines
|
||||
queue.put_nowait(stor_method)
|
||||
# Create five worker tasks to process the queue concurrently.
|
||||
tasks = []
|
||||
for i in range(5):
|
||||
task = asyncio.create_task(worker(queue))
|
||||
tasks.append(task)
|
||||
|
||||
# Wait until the queue is fully processed.
|
||||
await queue.join()
|
||||
|
||||
# Cancel our worker tasks.
|
||||
for task in tasks:
|
||||
task.cancel()
|
||||
# Wait until all worker tasks are cancelled.
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
|
||||
async def take_screenshot(url):
|
||||
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||
url.replace('www.', '')
|
||||
print(f'Taking a screenshot of: {url}')
|
||||
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
|
||||
page = await browser.newPage()
|
||||
try:
|
||||
await page.setUserAgent(
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')
|
||||
# default wait time of 30 seconds
|
||||
await page.goto(url)
|
||||
await page.screenshot({'path': f'D:\\repos\\theHarvester\\theHarvester\\screenshot\\{url.replace("https://", "").replace("http://", "")}.png'})
|
||||
except Exception as e:
|
||||
print(f'Exception occurred: {e} ')
|
||||
# No matter what happens make sure browser is closed
|
||||
await browser.close()
|
Loading…
Reference in a new issue