mirror of
https://github.com/laramies/theHarvester.git
synced 2024-11-13 19:16:18 +08:00
Real commit for screenshot functionality.
This commit is contained in:
parent
685c3c6823
commit
50a6c76716
2 changed files with 58 additions and 12 deletions
|
@ -1,12 +0,0 @@
|
||||||
from selenium import webdriver
|
|
||||||
from selenium.webdriver.firefox.options import Options
|
|
||||||
import time
|
|
||||||
|
|
||||||
options = Options()
|
|
||||||
options.headless = True
|
|
||||||
browser = webdriver.Firefox(options=options)
|
|
||||||
browser.minimize_window()
|
|
||||||
#time.sleep(3)
|
|
||||||
browser.get('https://leidos.com')
|
|
||||||
browser.save_screenshot('test-screenshot.png')
|
|
||||||
browser.close()
|
|
58
theHarvester/screenshot/screenshot.py
Normal file
58
theHarvester/screenshot/screenshot.py
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
"""
|
||||||
|
Screenshot module that utilizes pyppeteer in async fashion
|
||||||
|
to break urls into list and assign them to workers in a queue
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
from pyppeteer import launch
|
||||||
|
|
||||||
|
|
||||||
|
async def worker(queue):
|
||||||
|
while True:
|
||||||
|
# Get a "work item" out of the queue.
|
||||||
|
stor = await queue.get()
|
||||||
|
try:
|
||||||
|
await stor
|
||||||
|
queue.task_done()
|
||||||
|
# Notify the queue that the "work item" has been processed.
|
||||||
|
except Exception:
|
||||||
|
queue.task_done()
|
||||||
|
|
||||||
|
|
||||||
|
async def screenshot_handler(lst):
|
||||||
|
queue = asyncio.Queue()
|
||||||
|
|
||||||
|
for stor_method in lst:
|
||||||
|
# enqueue the coroutines
|
||||||
|
queue.put_nowait(stor_method)
|
||||||
|
# Create five worker tasks to process the queue concurrently.
|
||||||
|
tasks = []
|
||||||
|
for i in range(5):
|
||||||
|
task = asyncio.create_task(worker(queue))
|
||||||
|
tasks.append(task)
|
||||||
|
|
||||||
|
# Wait until the queue is fully processed.
|
||||||
|
await queue.join()
|
||||||
|
|
||||||
|
# Cancel our worker tasks.
|
||||||
|
for task in tasks:
|
||||||
|
task.cancel()
|
||||||
|
# Wait until all worker tasks are cancelled.
|
||||||
|
await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
|
||||||
|
async def take_screenshot(url):
|
||||||
|
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
|
||||||
|
url.replace('www.', '')
|
||||||
|
print(f'Taking a screenshot of: {url}')
|
||||||
|
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
|
||||||
|
page = await browser.newPage()
|
||||||
|
try:
|
||||||
|
await page.setUserAgent(
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')
|
||||||
|
# default wait time of 30 seconds
|
||||||
|
await page.goto(url)
|
||||||
|
await page.screenshot({'path': f'D:\\repos\\theHarvester\\theHarvester\\screenshot\\{url.replace("https://", "").replace("http://", "")}.png'})
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Exception occurred: {e} ')
|
||||||
|
# No matter what happens make sure browser is closed
|
||||||
|
await browser.close()
|
Loading…
Reference in a new issue