Real commit for screenshot functionality.

This commit is contained in:
NotoriousRebel 2020-06-25 09:04:00 -04:00
parent 685c3c6823
commit 50a6c76716
2 changed files with 58 additions and 12 deletions

View file

@ -1,12 +0,0 @@
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import time
options = Options()
options.headless = True
browser = webdriver.Firefox(options=options)
browser.minimize_window()
#time.sleep(3)
browser.get('https://leidos.com')
browser.save_screenshot('test-screenshot.png')
browser.close()

View file

@ -0,0 +1,58 @@
"""
Screenshot module that utilizes pyppeteer in async fashion
to break urls into list and assign them to workers in a queue
"""
import asyncio
from pyppeteer import launch
async def worker(queue):
while True:
# Get a "work item" out of the queue.
stor = await queue.get()
try:
await stor
queue.task_done()
# Notify the queue that the "work item" has been processed.
except Exception:
queue.task_done()
async def screenshot_handler(lst):
queue = asyncio.Queue()
for stor_method in lst:
# enqueue the coroutines
queue.put_nowait(stor_method)
# Create five worker tasks to process the queue concurrently.
tasks = []
for i in range(5):
task = asyncio.create_task(worker(queue))
tasks.append(task)
# Wait until the queue is fully processed.
await queue.join()
# Cancel our worker tasks.
for task in tasks:
task.cancel()
# Wait until all worker tasks are cancelled.
await asyncio.gather(*tasks, return_exceptions=True)
async def take_screenshot(url):
url = f'http://{url}' if ('http' not in url and 'https' not in url) else url
url.replace('www.', '')
print(f'Taking a screenshot of: {url}')
browser = await launch(headless=True, ignoreHTTPSErrors=True, args=["--no-sandbox"])
page = await browser.newPage()
try:
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')
# default wait time of 30 seconds
await page.goto(url)
await page.screenshot({'path': f'D:\\repos\\theHarvester\\theHarvester\\screenshot\\{url.replace("https://", "").replace("http://", "")}.png'})
except Exception as e:
print(f'Exception occurred: {e} ')
# No matter what happens make sure browser is closed
await browser.close()