From 3bd1b8e12e8145157a9db4c683061d9c339311fc Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Tue, 24 Dec 2019 00:06:04 -0500 Subject: [PATCH] Minor fixes, and converted certspotter from requests to aiohttp. --- theHarvester/discovery/baidusearch.py | 5 ---- theHarvester/discovery/certspottersearch.py | 28 +++++++++++++-------- theHarvester/discovery/suip.py | 3 ++- theHarvester/lib/core.py | 17 +++++++------ 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/theHarvester/discovery/baidusearch.py b/theHarvester/discovery/baidusearch.py index c6fe283a..d2b9ac47 100644 --- a/theHarvester/discovery/baidusearch.py +++ b/theHarvester/discovery/baidusearch.py @@ -1,6 +1,5 @@ from theHarvester.lib.core import * from theHarvester.parsers import myparser -import grequests class SearchBaidu: @@ -19,10 +18,6 @@ async def do_search(self): } base_url = f'https://{self.server}/s?wd=%40{self.word}&pnxx&oq={self.word}' urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit] - """req = (grequests.get(url, headers=headers, timeout=5) for url in urls) - responses = grequests.imap(req, size=5) - for response in responses: - self.total_results += response.content.decode('UTF-8')""" responses = await async_fetcher.fetch_all(urls, headers=headers) for response in responses: self.total_results += response diff --git a/theHarvester/discovery/certspottersearch.py b/theHarvester/discovery/certspottersearch.py index 0eb9cd78..8f46d1c9 100644 --- a/theHarvester/discovery/certspottersearch.py +++ b/theHarvester/discovery/certspottersearch.py @@ -1,5 +1,5 @@ from theHarvester.lib.core import * -import requests +import aiohttp class SearchCertspoter: @@ -8,22 +8,28 @@ def __init__(self, word): self.word = word self.totalhosts = set() - def do_search(self) -> None: + async def do_search(self) -> None: base_url = f'https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names' headers = {'User-Agent': Core.get_user_agent()} try: - request = requests.get(base_url, headers=headers) - response = request.json() - for dct in response: - for key, value in dct.items(): - if key == 'dns_names': - self.totalhosts.update({name for name in value if name}) + client = aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=30)) + response = await async_fetcher.fetch(client, base_url, json=True) + await client.close() + if isinstance(response, list): + for dct in response: + for key, value in dct.items(): + if key == 'dns_names': + self.totalhosts.update({name for name in value if name}) + elif isinstance(response, dict): + self.totalhosts.update({response['dns_names'] if 'dns_names' in response.keys() else ''}) + else: + self.totalhosts.union('') except Exception as e: print(e) - def get_hostnames(self) -> set: + async def get_hostnames(self) -> set: return self.totalhosts - def process(self): - self.do_search() + async def process(self): + await self.do_search() print('\tSearching results.') diff --git a/theHarvester/discovery/suip.py b/theHarvester/discovery/suip.py index fe93b048..a8c8e14e 100644 --- a/theHarvester/discovery/suip.py +++ b/theHarvester/discovery/suip.py @@ -17,7 +17,7 @@ def __init__(self, word: str): async def request(self, url, params): headers = {'User-Agent': Core.get_user_agent()} data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'} - timeout = aiohttp.ClientTimeout(total=300) + timeout = aiohttp.ClientTimeout(total=360) # by default timeout is 5 minutes we will change that to 6 minutes # Depending on the domain and if it has a lot of subdomains you may want to tweak it # The results are well worth the wait :) @@ -33,6 +33,7 @@ async def request(self, url, params): async def handler(self, url): first_data = [url, (('act', 'subfinder'),), ] second_data = [url, (('act', 'amass'),), ] + # TODO RESEARCH https://suip.biz/?act=findomain async_requests = [ self.request(url=url, params=params) for url, params in [first_data, second_data] diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py index 941f80b0..07d4c169 100644 --- a/theHarvester/lib/core.py +++ b/theHarvester/lib/core.py @@ -1,7 +1,7 @@ # coding=utf-8 import random -from typing import Set, Union, Any +from typing import Set, Union, Any, Tuple import yaml import asyncio import aiohttp @@ -375,21 +375,22 @@ def get_user_agent() -> str: class async_fetcher: @staticmethod - async def fetch(session, url, params='') -> str: + async def fetch(session, url, params='', json=False) -> Union[str, dict, list]: # This fetch method solely focuses on get requests # TODO determine if method for post requests is necessary - if len(params) == '': + if len(params) == 0: async with session.get(url, params=params) as response: - await asyncio.sleep(3) - return await response.text() + await asyncio.sleep(2) + return await response.text() if json is False else await response.json() else: async with session.get(url) as response: - await asyncio.sleep(3) - return await response.text() + await asyncio.sleep(2) + return await response.text() if json is False else await response.json() @staticmethod async def fetch_all(urls, headers='', params='') -> list: - timeout = aiohttp.ClientTimeout(total=10) + timeout = aiohttp.ClientTimeout(total=30) + # By default timeout is 5 minutes, 30 seconds should suffice if len(headers) == 0: headers = {'User-Agent': Core.get_user_agent()} if len(params) == 0: