mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-24 14:32:57 +08:00
Fixed VirusTotal module (#1172)
* Added call to api endpoint to explicitly gather subdomains from zoomeye, updated user agents, replaced orjson with ujson, and fixed substring not found error. * Updated orjson to ujson. * Fixed semantic error in html check in google workaround. * Fixed flake8 errors. * Fixed VT to use API. * Fixed virustotal module. * Fixed possible edge case that could possibly cause an infinite loop.
This commit is contained in:
parent
121e23b3f9
commit
c801db6725
5 changed files with 75 additions and 27 deletions
|
@ -19,4 +19,4 @@ setuptools==63.4.1
|
|||
shodan==1.28.0
|
||||
slowapi==0.1.5
|
||||
uvicorn==0.18.2
|
||||
uvloop==0.16.0; platform_system != "Windows"
|
||||
uvloop==0.16.0; platform_system != "Windows"
|
|
@ -25,4 +25,4 @@
|
|||
|
||||
# As we are not using Windows we can change the spawn method to fork for greater performance
|
||||
aiomultiprocess.set_context("fork")
|
||||
asyncio.run(__main__.entry_point())
|
||||
asyncio.run(__main__.entry_point())
|
|
@ -154,13 +154,11 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
if store_people:
|
||||
people_list = await search_engine.get_people()
|
||||
await db_stash.store_all(word, people_list, 'people', source)
|
||||
|
||||
if store_links:
|
||||
links = await search_engine.get_links()
|
||||
linkedin_links_tracker.extend(links)
|
||||
if len(links) > 0:
|
||||
await db.store_all(word, links, 'linkedinlinks', engineitem)
|
||||
|
||||
if store_interestingurls:
|
||||
iurls = await search_engine.get_interestingurls()
|
||||
interesting_urls.extend(iurls)
|
||||
|
@ -286,8 +284,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
stor_lst.append(store(github_search, engineitem, store_host=True, store_emails=True))
|
||||
except MissingKey as ex:
|
||||
print(ex)
|
||||
else:
|
||||
pass
|
||||
|
||||
elif engineitem == 'hackertarget':
|
||||
from theHarvester.discovery import hackertarget
|
||||
|
@ -303,8 +299,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
except Exception as e:
|
||||
if isinstance(e, MissingKey):
|
||||
print(e)
|
||||
else:
|
||||
pass
|
||||
|
||||
elif engineitem == 'intelx':
|
||||
from theHarvester.discovery import intelxsearch
|
||||
|
@ -388,8 +382,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
except Exception as e:
|
||||
if isinstance(e, MissingKey):
|
||||
print(e)
|
||||
else:
|
||||
pass
|
||||
|
||||
elif engineitem == 'sublist3r':
|
||||
from theHarvester.discovery import sublist3r
|
||||
|
@ -432,8 +424,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
except Exception as e:
|
||||
if isinstance(e, MissingKey):
|
||||
print(e)
|
||||
else:
|
||||
pass
|
||||
|
||||
elif engineitem == 'yahoo':
|
||||
from theHarvester.discovery import yahoosearch
|
||||
|
@ -449,8 +439,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
except Exception as e:
|
||||
if isinstance(e, MissingKey):
|
||||
print(e)
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
# Check if dns_brute is defined
|
||||
|
@ -836,4 +824,4 @@ async def entry_point():
|
|||
print('\n\n[!] ctrl+c detected from user, quitting.\n\n ')
|
||||
except Exception as error_entry_point:
|
||||
print(error_entry_point)
|
||||
sys.exit(1)
|
||||
sys.exit(1)
|
|
@ -1,28 +1,88 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from pprint import pprint
|
||||
|
||||
|
||||
class SearchVirustotal:
|
||||
|
||||
def __init__(self, word):
|
||||
self.word = word
|
||||
self.key = Core.virustotal_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('virustotal')
|
||||
self.totalhosts = set
|
||||
self.word = word
|
||||
self.proxy = False
|
||||
self.hostnames = []
|
||||
|
||||
async def do_search(self):
|
||||
url = f'https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40'
|
||||
response = await AsyncFetcher.fetch_all([url], json=True, headers={'User-Agent': Core.get_user_agent(),
|
||||
'X-APIKEY': self.key},
|
||||
proxy=self.proxy)
|
||||
entry = [host for host in response]
|
||||
pprint(entry.items())
|
||||
# TODO determine if more endpoints can yield useful info given a domain
|
||||
# based on: https://developers.virustotal.com/reference/domains-relationships
|
||||
# base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40"
|
||||
headers = {
|
||||
'User-Agent': Core.get_user_agent(),
|
||||
"Accept": "application/json",
|
||||
"x-apikey": self.key
|
||||
}
|
||||
base_url = f"https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40"
|
||||
cursor = ''
|
||||
count = 0
|
||||
fail_counter = 0
|
||||
counter = 0
|
||||
breakcon = False
|
||||
while True:
|
||||
if breakcon:
|
||||
break
|
||||
# rate limit is 4 per minute
|
||||
# TODO add timer logic if proven to be needed
|
||||
# in the meantime sleeping 16 seconds should eliminate hitting the rate limit
|
||||
# in case rate limit is hit, fail counter exists and sleep for 65 seconds
|
||||
send_url = base_url + "&cursor=" + cursor if cursor != '' and len(cursor) > 2 else base_url
|
||||
responses = await AsyncFetcher.fetch_all([send_url], headers=headers, proxy=self.proxy, json=True)
|
||||
jdata = responses[0]
|
||||
if 'data' not in jdata.keys():
|
||||
await asyncio.sleep(60 + 5)
|
||||
fail_counter += 1
|
||||
if 'meta' in jdata.keys():
|
||||
cursor = jdata['meta']['cursor'] if 'cursor' in jdata['meta'].keys() else ''
|
||||
if len(cursor) == 0 and 'data' in jdata.keys():
|
||||
# if cursor no longer is within the meta field have hit last entry
|
||||
breakcon = True
|
||||
count += jdata['meta']['count']
|
||||
if count == 0 or fail_counter >= 2:
|
||||
break
|
||||
if 'data' in jdata.keys():
|
||||
data = jdata['data']
|
||||
self.hostnames.extend(await self.parse_hostnames(data, self.word))
|
||||
counter += 1
|
||||
await asyncio.sleep(16)
|
||||
self.hostnames = list(sorted(set(self.hostnames)))
|
||||
# verify domains such as x.x.com.multicdn.x.com are parsed properly
|
||||
self.hostnames = [host for host in self.hostnames if ((len(host.split('.')) >= 3)
|
||||
and host.split('.')[-2] == self.word.split('.')[-2])]
|
||||
|
||||
# async def get_hostnames(self) -> set:
|
||||
# return self.total_results
|
||||
async def get_hostnames(self) -> list:
|
||||
return self.hostnames
|
||||
|
||||
@staticmethod
|
||||
async def parse_hostnames(data, word):
|
||||
total_subdomains = set()
|
||||
for attribute in data:
|
||||
total_subdomains.add(attribute['id'].replace('"', '').replace('www.', ''))
|
||||
attributes = attribute['attributes']
|
||||
total_subdomains.update(
|
||||
{value['value'].replace('"', '').replace('www.', '') for value in attributes['last_dns_records'] if
|
||||
word in value['value']})
|
||||
if 'last_https_certificate' in attributes.keys():
|
||||
total_subdomains.update({value.replace('"', '').replace('www.', '') for value in
|
||||
attributes['last_https_certificate']['extensions']['subject_alternative_name']
|
||||
if word in value})
|
||||
total_subdomains = list(sorted(total_subdomains))
|
||||
# Other false positives may occur over time and yes there are other ways to parse this, feel free to implement
|
||||
# them and submit a PR or raise an issue if you run into this filtering not being enough
|
||||
# TODO determine if parsing 'v=spf1 include:_spf-x.acme.com include:_spf-x.acme.com' is worth parsing
|
||||
total_subdomains = [x for x in total_subdomains if
|
||||
not str(x).endswith('edgekey.net') and not str(x).endswith('akadns.net')
|
||||
and 'include:_spf' not in str(x)]
|
||||
total_subdomains.sort()
|
||||
return total_subdomains
|
||||
|
||||
async def process(self, proxy=False):
|
||||
self.proxy = proxy
|
||||
|
|
|
@ -357,4 +357,4 @@ async def fetch_all(cls, urls, headers='', params='', json=False, takeover=False
|
|||
return texts
|
||||
else:
|
||||
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
|
||||
return texts
|
||||
return texts
|
Loading…
Reference in a new issue