diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py index 39bb6aa1..decd886c 100644 --- a/theHarvester/__main__.py +++ b/theHarvester/__main__.py @@ -508,8 +508,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor print('------------------') for l in host_ip: basic_search = bingsearch.SearchBing(l, limit, start) - basic_search.process_vhost() - results = basic_search.get_allhostnames() + await basic_search.process_vhost() + results = await basic_search.get_allhostnames() for result in results: result = re.sub(r'[[\<\/?]*[\w]*>]*', '', result) result = re.sub('<', '', result) diff --git a/theHarvester/discovery/dnsdumpster.py b/theHarvester/discovery/dnsdumpster.py index c3c5c6ec..c4b12764 100644 --- a/theHarvester/discovery/dnsdumpster.py +++ b/theHarvester/discovery/dnsdumpster.py @@ -1,6 +1,7 @@ from theHarvester.lib.core import * from theHarvester.parsers import myparser -import requests +import aiohttp +import asyncio class SearchDnsDumpster: @@ -11,33 +12,33 @@ def __init__(self, word): self.totalresults = "" self.server = 'dnsdumpster.com' - def do_search(self): + async def do_search(self): try: agent = Core.get_user_agent() headers = {'User-Agent': agent} - session = requests.session() + session = aiohttp.ClientSession(headers=headers) # create a session to properly verify url = f'https://{self.server}' - request = session.get(url, headers=headers) - cookies = str(request.cookies) - # extract csrftoken from cookies csrftoken = '' - for ch in cookies.split("=")[1]: - if ch == ' ': - break - csrftoken += ch + async with session.get(url, headers=headers) as resp: + cookies = str(resp.cookies) + cookies = cookies.split('csrftoken=') + csrftoken += cookies[1][:cookies[1].find(';')] + await asyncio.sleep(2) + # extract csrftoken from cookies data = { 'Cookie': f'csfrtoken={csrftoken}', 'csrfmiddlewaretoken': csrftoken, 'targetip': self.word} headers['Referer'] = url - post_req = session.post(url, headers=headers, data=data) - self.results = post_req.text + async with session.post(url, headers=headers, data=data) as resp: + self.results = await resp.text() + await session.close() except Exception as e: print(f'An exception occured: {e}') self.totalresults += self.results - def get_hostnames(self): + async def get_hostnames(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.hostnames() - def process(self): - self.do_search() # Only need to do it once. + async def process(self): + await self.do_search() # Only need to do it once. diff --git a/theHarvester/discovery/suip.py b/theHarvester/discovery/suip.py index 0e735921..f19d9539 100644 --- a/theHarvester/discovery/suip.py +++ b/theHarvester/discovery/suip.py @@ -1,6 +1,5 @@ from theHarvester.lib.core import * from bs4 import BeautifulSoup -import aiohttp import asyncio @@ -16,26 +15,15 @@ def __init__(self, word: str): async def request(self, url, params): headers = {'User-Agent': Core.get_user_agent()} data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'} - timeout = aiohttp.ClientTimeout(total=720) - # by default timeout is 5 minutes we will change that to 6 minutes - # Depending on the domain and if it has a lot of subdomains you may want to tweak it - # The results are well worth the wait :) - try: - async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: - async with session.post(url, params=params, data=data) as resp: - await asyncio.sleep(3) - return await resp.text() - except Exception as e: - print(f'An exception has occurred: {e}') - return '' + return await AsyncFetcher.post_fetch(url, headers=headers, params=params, data=data) async def handler(self, url): - first_data = [url, (('act', 'subfinder'),), ] - second_data = [url, (('act', 'amass'),), ] + first_param = [url, (('act', 'subfinder'),), ] + second_param = [url, (('act', 'amass'),), ] # TODO RESEARCH https://suip.biz/?act=findomain async_requests = [ self.request(url=url, params=params) - for url, params in [first_data, second_data] + for url, params in [first_param, second_param] ] results = await asyncio.gather(*async_requests) return results @@ -66,4 +54,4 @@ async def clean_hosts(self, soup_hosts): if host[0] == '.': self.totalhosts.add(host[1:]) else: - self.totalhosts.add(host) \ No newline at end of file + self.totalhosts.add(host) diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py index 1cd76a5f..97afc6cb 100644 --- a/theHarvester/lib/core.py +++ b/theHarvester/lib/core.py @@ -374,6 +374,27 @@ def get_user_agent() -> str: class AsyncFetcher: + @staticmethod + async def post_fetch(url, headers='', data='', params='', json=False): + if len(headers) == 0: + headers = {'User-Agent': Core.get_user_agent()} + timeout = aiohttp.ClientTimeout(total=720) + # by default timeout is 5 minutes, changed to 12 minutes for suip module + # results are well worth the wait + try: + if params == '': + async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: + async with session.post(url, data=data) as resp: + await asyncio.sleep(3) + return await resp.text() if json is False else await resp.json() + else: + async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: + async with session.post(url, data=data, params=params) as resp: + await asyncio.sleep(3) + return await resp.text() if json is False else await resp.json() + except Exception: + return '' + @staticmethod async def fetch(session, url, params='', json=False) -> Union[str, dict, list]: # This fetch method solely focuses on get requests