diff --git a/theHarvester/discovery/dnsdumpster.py b/theHarvester/discovery/dnsdumpster.py index f9bb0f44..ff39ccb8 100644 --- a/theHarvester/discovery/dnsdumpster.py +++ b/theHarvester/discovery/dnsdumpster.py @@ -33,9 +33,12 @@ async def do_search(self): csrftoken += cookies[1][:cookies[1].find(';')] await asyncio.sleep(2) + + # extract csrftoken from cookies data = { - 'Cookie': f'csfrtoken={csrftoken}', 'csrfmiddlewaretoken': csrftoken, 'targetip': self.word} + 'Cookie': f'csfrtoken={csrftoken}', 'csrfmiddlewaretoken': csrftoken, + 'targetip': self.word, 'user': 'free'} headers['Referer'] = url if self.proxy is False: async with session.post(url, headers=headers, data=data) as resp: @@ -55,3 +58,4 @@ async def get_hostnames(self): async def process(self, proxy=False): self.proxy = proxy await self.do_search() # Only need to do it once. + diff --git a/theHarvester/discovery/huntersearch.py b/theHarvester/discovery/huntersearch.py index 82ba969a..254571cb 100644 --- a/theHarvester/discovery/huntersearch.py +++ b/theHarvester/discovery/huntersearch.py @@ -1,6 +1,5 @@ from theHarvester.discovery.constants import * from theHarvester.lib.core import * -from theHarvester.parsers import myparser class SearchHunter: @@ -8,32 +7,72 @@ class SearchHunter: def __init__(self, word, limit, start): self.word = word self.limit = limit + self.limit = 10 if limit > 10 else limit self.start = start self.key = Core.hunter_key() if self.key is None: raise MissingKey('Hunter') self.total_results = "" self.counter = start - self.database = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit={self.limit}' + self.database = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10' self.proxy = False + self.hostnames = [] + self.emails = [] async def do_search(self): - responses = await AsyncFetcher.fetch_all([self.database], headers={'User-Agent': Core.get_user_agent()}, - proxy=self.proxy) - self.total_results += responses[0] + # First determine if user account is not a free account, this call is free + is_free = True + headers = {'User-Agent': Core.get_user_agent()} + acc_info_url = f'https://api.hunter.io/v2/account?api_key={self.key}' + response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True) + is_free = is_free if 'plan_name' in response[0]['data'].keys() and response[0]['data']['plan_name'].lower() \ + == 'free' else False + # Extract total number of requests that are available for account + + total_requests_avail = response[0]['data']['requests']['searches']['available'] - response[0]['data']['requests']['searches']['used'] + if is_free: + response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True) + self.emails, self.hostnames = await self.parse_resp(json_resp=response[0]) + else: + # Determine total number of emails that are available + # As the most emails you can get within one query is 100 + # This is only done where paid accounts are in play + hunter_dinfo_url = f'https://api.hunter.io/v2/email-count?domain={self.word}' + response = await AsyncFetcher.fetch_all([hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True) + total_number_reqs = response[0]['data']['total'] // 100 + # Parse out meta field within initial JSON response to determine total number of results + if total_requests_avail < total_number_reqs: + print('WARNING: account does not have enough requests to gather all emails') + print(f'Total requests available: {total_requests_avail}, total requests ' + f'needed to be made: {total_number_reqs}') + print('RETURNING current results, if you would still like to ' + 'run this module comment out the if request') + return + self.limit = 100 + # max number of emails you can get per request is 100 + # increments of 100 with offset determining where to start + # See docs for more details: https://hunter.io/api-documentation/v2#domain-search + for offset in range(0, 100 * total_number_reqs, 100): + req_url = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}' + response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, + json=True) + temp_emails, temp_hostnames = await self.parse_resp(response[0]) + self.emails.extend(temp_emails) + self.hostnames.extend(temp_hostnames) + await asyncio.sleep(1) + + async def parse_resp(self, json_resp): + emails = list(sorted({email['value'] for email in json_resp['data']['emails']})) + domains = list(sorted({source['domain'] for email in json_resp['data']['emails'] for source in email['sources'] + if self.word in source['domain']})) + return emails, domains async def process(self, proxy=False): self.proxy = proxy await self.do_search() # Only need to do it once. async def get_emails(self): - rawres = myparser.Parser(self.total_results, self.word) - return await rawres.emails() + return self.emails async def get_hostnames(self): - rawres = myparser.Parser(self.total_results, self.word) - return await rawres.hostnames() - - async def get_profiles(self): - rawres = myparser.Parser(self.total_results, self.word) - return await rawres.profiles() + return self.hostnames diff --git a/theHarvester/discovery/omnisint.py b/theHarvester/discovery/omnisint.py index a492886a..b2891ba1 100644 --- a/theHarvester/discovery/omnisint.py +++ b/theHarvester/discovery/omnisint.py @@ -1,23 +1,25 @@ -import requests -import json from theHarvester.lib.core import * class SearchOmnisint: def __init__(self, word): self.word = word - self.totalhosts = list() + self.totalhosts = set() + self.totalips = set() self.proxy = False async def do_search(self): base_url = f'https://sonar.omnisint.io/all/{self.word}?page=1' - data = requests.get(base_url, headers={'User-Agent': Core.get_user_agent()}).text - entries = json.loads(data) - self.totalhosts = entries + responses = await AsyncFetcher.fetch_all([base_url], json=True, headers={'User-Agent': Core.get_user_agent()}, + proxy=self.proxy) + self.totalhosts = list({host for host in responses[0]}) - async def get_hostnames(self) -> list: + async def get_hostnames(self) -> set: return self.totalhosts + async def get_ips(self) -> set: + return self.totalips + async def process(self, proxy=False): self.proxy = proxy await self.do_search()