Add new modules and tweaks everywhere

This commit is contained in:
L1ghtn1ng 2021-05-31 16:55:13 +01:00
parent bfc0897cfa
commit 79d5eaef75
11 changed files with 480 additions and 152 deletions

View file

@ -1,4 +1,7 @@
apikeys:
binaryedge:
key:
bing:
key:
@ -7,7 +10,7 @@ apikeys:
secret:
github:
key:
key:
hunter:
key:
@ -32,3 +35,6 @@ apikeys:
spyse:
key:
zoomeye:
key:

View file

@ -1,23 +1,24 @@
#!/usr/bin/env python3
from typing import Dict, List
from theHarvester.discovery import *
from theHarvester.discovery import dnssearch, takeover, shodansearch
from theHarvester.discovery.constants import *
from theHarvester.lib import hostchecker
from theHarvester.lib import reportgraph
from theHarvester.lib import stash
from theHarvester.lib import statichtmlgenerator
from theHarvester.lib.core import *
import argparse
import asyncio
import datetime
import aiofiles
import json
import netaddr
import re
import sys
import string
import secrets
async def start():
async def start(rest_args=None):
"""Main program function"""
parser = argparse.ArgumentParser(description='theHarvester is used to gather open source intelligence (OSINT) on a company or domain.')
parser.add_argument('-d', '--domain', help='Company name or domain to search.', required=True)
@ -33,36 +34,54 @@ async def start():
parser.add_argument('-r', '--take-over', help='Check for takeovers.', default=False, action='store_true')
parser.add_argument('-n', '--dns-lookup', help='Enable DNS server lookup, default False.', default=False, action='store_true')
parser.add_argument('-c', '--dns-brute', help='Perform a DNS brute force on the domain.', default=False, action='store_true')
parser.add_argument('-f', '--filename', help='Save the results to an HTML,XML and JSON file.', default='', type=str)
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, bufferoverun, censys, certspotter, crtsh,
parser.add_argument('-f', '--filename', help='Save the results to an XML and JSON file.', default='', type=str)
parser.add_argument('-b', '--source', help='''baidu, bing, binaryedge, bingapi, bufferoverun, censys, certspotter, crtsh,
dnsdumpster, duckduckgo, exalead, github-code, google,
hackertarget, hunter, intelx, linkedin, linkedin_links,
netcraft, omnisint, otx, pentesttools, projectdiscovery,
qwant, rapiddns, rocketreach, securityTrails, spyse, sublist3r, threatcrowd, threatminer,
trello, twitter, urlscan, virustotal, yahoo''')
trello, twitter, urlscan, virustotal, yahoo, zoomeye''')
args = parser.parse_args()
filename: str = args.filename
dnsbrute = (args.dns_brute, False)
# determines if filename is coming from rest api or user
rest_filename = ''
# indicates this from the rest API
if rest_args:
if rest_args.source and rest_args.source == "getsources":
return list(sorted(Core.get_supportedengines()))
elif rest_args.dns_brute:
args = rest_args
dnsbrute = (rest_args.dns_brute, True)
else:
args = rest_args
# We need to make sure the filename is random as to not overwrite other files
filename: str = args.filename
alphabet = string.ascii_letters + string.digits
rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" \
if len(filename) != 0 else ""
else:
args = parser.parse_args()
filename: str = args.filename
dnsbrute = (args.dns_brute, False)
try:
db = stash.StashManager()
await db.do_init()
except Exception:
pass
all_emails: list = []
all_hosts: list = []
all_ip: list = []
all_emails: List = []
all_hosts: List = []
all_ip: List = []
dnslookup = args.dns_lookup
dnsserver = args.dns_server
dnstld = args.dns_tld
engines = []
engines: List = []
# If the user specifies
full: list = []
ips: list = []
full: List = []
ips: List = []
google_dorking = args.google_dork
host_ip: list = []
host_ip: List = []
limit: int = args.limit
shodan = args.shodan
start: int = args.start
@ -72,13 +91,16 @@ async def start():
word: str = args.domain
takeover_status = args.take_over
use_proxy = args.proxies
linkedin_people_list_tracker: list = []
linkedin_links_tracker: list = []
twitter_people_list_tracker: list = []
linkedin_people_list_tracker: List = []
linkedin_links_tracker: List = []
twitter_people_list_tracker: List = []
interesting_urls: list = []
total_asns: list = []
async def store(search_engine: Any, source: str, process_param: Any = None, store_host: bool = False,
store_emails: bool = False, store_ip: bool = False, store_people: bool = False,
store_links: bool = False, store_results: bool = False) -> None:
store_links: bool = False, store_results: bool = False,
store_interestingurls: bool = False, store_asns: bool = False) -> None:
"""
Persist details into the database.
The details to be stored is controlled by the parameters passed to the method.
@ -92,6 +114,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
:param store_people: whether to store user details
:param store_links: whether to store links
:param store_results: whether to fetch details from get_results() and persist
:param store_interestingurls: whether to store interesting urls
:param store_asns: whether to store asns
"""
await search_engine.process(use_proxy) if process_param is None else await \
search_engine.process(process_param, use_proxy)
@ -128,24 +152,28 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
await db.store_all(word, all_emails, 'email', source)
if store_people:
people_list = await search_engine.get_people()
if source == 'twitter':
twitter_people_list_tracker.extend(people_list)
if source == 'linkedin':
linkedin_people_list_tracker.extend(people_list)
await db_stash.store_all(word, people_list, 'people', source)
if len(people_list) == 0:
print('\n[*] No users found.\n\n')
else:
print('\n[*] Users found: ' + str(len(people_list)))
print('---------------------')
for usr in sorted(list(set(people_list))):
print(usr)
if store_links:
links = await search_engine.get_links()
await db.store_all(word, links, 'name', engineitem)
if len(links) == 0:
print('\n[*] No links found.\n\n')
else:
print(f'\n[*] Links found: {len(links)}')
print('---------------------')
for link in sorted(list(set(links))):
print(link)
linkedin_links_tracker.extend(links)
if len(links) > 0:
await db.store_all(word, links, 'linkedinlinks', engineitem)
if store_interestingurls:
iurls = await search_engine.get_interestingurls()
interesting_urls.extend(iurls)
if len(iurls) > 0:
await db.store_all(word, iurls, 'interestingurl', engineitem)
if store_asns:
fasns = await search_engine.get_asns()
total_asns.extend(fasns)
if len(fasns) > 0:
await db.store_all(word, fasns, 'asns', engineitem)
stor_lst = []
if args.source is not None:
@ -163,8 +191,16 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
try:
baidu_search = baidusearch.SearchBaidu(word, limit)
stor_lst.append(store(baidu_search, engineitem, store_host=True, store_emails=True))
except Exception:
pass
except Exception as e:
print(e)
elif engineitem == 'binaryedge':
from theHarvester.discovery import binaryedgesearch
try:
binaryedge_search = binaryedgesearch.SearchBinaryEdge(word, limit)
stor_lst.append(store(binaryedge_search, engineitem, store_host=True))
except Exception as e:
print(e)
elif engineitem == 'bing' or engineitem == 'bingapi':
from theHarvester.discovery import bingsearch
@ -220,7 +256,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
try:
from theHarvester.discovery import dnsdumpster
dns_dumpster_search = dnsdumpster.SearchDnsDumpster(word)
stor_lst.append(store(dns_dumpster_search, engineitem, store_host=True))
stor_lst.append(store(dns_dumpster_search, engineitem, store_host=True, store_ip=True))
except Exception as e:
print(f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m')
@ -272,7 +308,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
# Import locally or won't work.
try:
intelx_search = intelxsearch.SearchIntelx(word)
stor_lst.append(store(intelx_search, engineitem, store_host=True, store_emails=True))
stor_lst.append(store(intelx_search, engineitem, store_interestingurls=True, store_emails=True))
except Exception as e:
if isinstance(e, MissingKey):
print(e)
@ -387,7 +423,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
from theHarvester.discovery import threatcrowd
try:
threatcrowd_search = threatcrowd.SearchThreatcrowd(word)
stor_lst.append(store(threatcrowd_search, engineitem, store_host=True))
stor_lst.append(store(threatcrowd_search, engineitem, store_host=True, store_ip=True))
except Exception as e:
print(e)
@ -395,7 +431,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
from theHarvester.discovery import threatminer
try:
threatminer_search = threatminer.SearchThreatminer(word)
stor_lst.append(store(threatminer_search, engineitem, store_host=True))
stor_lst.append(store(threatminer_search, engineitem, store_host=True, store_ip=True))
except Exception as e:
print(e)
@ -414,7 +450,8 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
from theHarvester.discovery import urlscan
try:
urlscan_search = urlscan.SearchUrlscan(word)
stor_lst.append(store(urlscan_search, engineitem, store_host=True, store_ip=True))
stor_lst.append(store(urlscan_search, engineitem, store_host=True, store_ip=True,
store_interestingurls=True, store_asns=True))
except Exception as e:
print(e)
@ -424,13 +461,22 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
stor_lst.append(store(virustotal_search, engineitem, store_host=True))
elif engineitem == 'yahoo':
from theHarvester.discovery import yahoosearch
yahoo_search = yahoosearch.SearchYahoo(word, limit)
stor_lst.append(store(yahoo_search, engineitem, store_host=True, store_emails=True))
elif engineitem == 'zoomeye':
from theHarvester.discovery import zoomeyesearch
zoomeye_search = zoomeyesearch.SearchZoomEye(word, limit)
stor_lst.append(store(zoomeye_search, engineitem, store_host=True, store_emails=True,
store_ip=True, store_interestingurls=True, store_asns=True))
else:
print('\033[93m[!] Invalid source.\n\n \033[0m')
sys.exit(1)
try:
# Check if dns_brute is defined
rest_args.dns_brute
except Exception:
print('\033[93m[!] Invalid source.\n\n \033[0m')
sys.exit(1)
async def worker(queue):
while True:
@ -465,6 +511,15 @@ async def handler(lst):
await asyncio.gather(*tasks, return_exceptions=True)
await handler(lst=stor_lst)
return_ips: List = []
if rest_args is not None and len(rest_filename) == 0 and rest_args.dns_brute is False:
# Indicates user is using rest api but not wanting output to be saved to a file
full = [host if ':' in host and word in host else word in host.split(':')[0] and host for host in full]
full = list({host for host in full if host})
full.sort()
# cast to string so Rest API can understand type
return_ips.extend([str(ip) for ip in sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])])
return list(set(all_emails)), return_ips, full, '', ''
# Sanity check to see if all_emails and all_hosts are defined.
try:
all_emails
@ -526,6 +581,9 @@ async def handler(lst):
hosts, ips = await dns_force.run()
hosts = list({host for host in hosts if ':' in host})
hosts.sort(key=lambda el: el.split(':')[0])
# Check if Rest API is being used if so return found hosts
if dnsbrute[1]:
return hosts
print('\n[*] Hosts found after DNS brute force:')
db = stash.StashManager()
for host in hosts:
@ -677,59 +735,24 @@ async def handler(lst):
if args.dns_tld is not False:
counter = 0
for word in vhost:
search = googlesearch.SearchGoogle(word, limit, counter)
await search.process(google_dorking)
emails = await search.get_emails()
hosts = await search.get_hostnames()
search_google = googlesearch.SearchGoogle(word, limit, counter)
await search_google.process(google_dorking)
emails = await search_google.get_emails()
hosts = await search_google.get_hostnames()
print(emails)
print(hosts)
else:
pass
# Reporting
if filename != "":
if filename != '':
print('\n[*] Reporting started.')
try:
print('\n[*] Reporting started.')
db = stash.StashManager()
scanboarddata = await db.getscanboarddata()
latestscanresults = await db.getlatestscanresults(word)
previousscanresults = await db.getlatestscanresults(word, previousday=True)
latestscanchartdata = await db.latestscanchartdata(word)
scanhistorydomain = await db.getscanhistorydomain(word)
pluginscanstatistics = await db.getpluginscanstatistics()
generator = statichtmlgenerator.HtmlGenerator(word)
html_code = await generator.beginhtml()
html_code += await generator.generatedashboardcode(scanboarddata)
html_code += await generator.generatelatestscanresults(latestscanresults)
if len(screenshot_tups) > 0:
html_code += await generator.generatescreenshots(screenshot_tups)
html_code += await generator.generatepreviousscanresults(previousscanresults)
graph = reportgraph.GraphGenerator(word)
await graph.init_db()
html_code += await graph.drawlatestscangraph(word, latestscanchartdata)
html_code += await graph.drawscattergraphscanhistory(word, scanhistorydomain)
html_code += await generator.generatepluginscanstatistics(pluginscanstatistics)
html_code += '<p><span style="color: #000000;">Report generated on ' + str(
datetime.datetime.now()) + '</span></p>'
html_code += '''
</body>
</html>
'''
except Exception as e:
print(e)
print('\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m')
sys.exit(1)
html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
html_file.write(html_code)
html_file.close()
print('[*] Reporting finished.')
print('[*] Saving files.')
try:
# XML REPORT SECTION
filename = filename.rsplit('.', 1)[0] + '.xml'
if len(rest_filename) == 0:
filename = filename.rsplit('.', 1)[0] + '.xml'
else:
filename = 'theHarvester/app/static/' + rest_filename.rsplit('.', 1)[0] + '.xml'
# TODO use aiofiles if user is using rest api
with open(filename, 'w+') as file:
file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
for x in all_emails:
@ -767,16 +790,16 @@ async def handler(lst):
file.write('</servers>')
file.write('</theHarvester>')
print('[*] XML File saved.')
except Exception as er:
print(f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m')
print('[*] XML File saved.')
except Exception as error:
print(f'\033[93m[!] An error occurred while saving the XML file: {error} \033[0m')
try:
# JSON REPORT SECTION
filename = filename.rsplit('.', 1)[0] + '.json'
# create dict with values for json output
json_dict = dict()
json_dict: Dict = dict()
json_dict["emails"] = [email for email in all_emails]
json_dict["hosts"] = [host for host in full]
@ -791,9 +814,9 @@ async def handler(lst):
if len(linkedin_links_tracker) > 0:
json_dict["linkedin_links"] = [link for link in list(sorted(set(linkedin_links_tracker)))]
shodan_dict = dict()
shodan_dict: Dict = dict()
if shodanres != []:
shodanalysis = []
shodanalysis: List = []
for x in shodanres:
res = x.split('SAPO')
shodan_dict[res[0]] = [res[2], [res[1]]]

View file

@ -0,0 +1,40 @@
from theHarvester.discovery.constants import *
import asyncio
class SearchBinaryEdge:
def __init__(self, word, limit):
self.word = word
self.totalhosts = set()
self.proxy = False
self.key = Core.binaryedge_key()
self.limit = 501 if limit >= 501 else limit
self.limit = 2 if self.limit == 1 else self.limit
if self.key is None:
raise MissingKey('binaryedge')
async def do_search(self):
base_url = f'https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}'
headers = {'X-KEY': self.key, 'User-Agent': Core.get_user_agent()}
for page in range(1, self.limit):
params = {'page': page}
response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, params=params, headers=headers)
responses = response[0]
dct = responses
if ('status' in dct.keys() and 'message' in dct.keys()) and \
(dct['status'] == 400 or 'Bad Parameter' in dct['message'] or 'Error' in dct['message']):
# 400 status code means no more results
break
if 'events' in dct.keys():
if len(dct['events']) == 0:
break
self.totalhosts.update({host for host in dct['events']})
await asyncio.sleep(get_delay())
async def get_hostnames(self) -> set:
return self.totalhosts
async def process(self, proxy=False):
self.proxy = proxy
await self.do_search()

View file

@ -25,8 +25,8 @@ async def do_search(self):
# Based on: https://github.com/IntelligenceX/SDK/blob/master/Python/intelxapi.py
# API requests self identification
# https://intelx.io/integrations
headers: dict = {'x-key': self.key, 'User-Agent': f'{Core.get_user_agent()}-theHarvester'}
data: dict = {
headers = {'x-key': self.key, 'User-Agent': f'{Core.get_user_agent()}-theHarvester'}
data = {
"term": self.word,
"buckets": [],
"lookuplevel": 0,
@ -59,8 +59,8 @@ async def process(self, proxy=False):
intelx_parser = intelxparser.Parser()
self.info = await intelx_parser.parse_dictionaries(self.results)
async def get_emails(self) -> Set:
async def get_emails(self):
return self.info[0]
async def get_hostnames(self) -> Set:
async def get_interestingurls(self):
return self.info[1]

View file

@ -1,31 +1,61 @@
from theHarvester.discovery.constants import MissingKey
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
import rocketreach
import asyncio
class SearchRocketreach:
class SearchRocketReach:
def __init__(self, word):
def __init__(self, word, limit):
self.ips = set()
self.word = word
self.key = Core.rocketreach_key()
if self.key is None:
raise MissingKey('Rocketreach')
self.total_results = ""
raise MissingKey('RocketReach')
self.hosts = set()
self.proxy = False
self.baseurl = 'https://api.rocketreach.co/v2/api/search'
self.links = set()
self.limit = limit
async def do_search(self):
rr = rocketreach.Gateway(rocketreach.GatewayConfig(self.key))
s = rr.person.search().filter(current_employer=self.word)
result = s.execute()
if result.is_success:
lookup = rr.person.lookup(result.people[0].id)
if lookup.is_success:
print(repr(lookup.person))
try:
headers = {
'Api-Key': self.key,
'Content-Type': 'application/json',
'User-Agent': Core.get_user_agent()
}
import pprint as pp
# linkedin_urls = set()
for page in range(1, self.limit):
data = f'{{"query":{{"company_website_url": ["{self.word}"]}}, "start": {page}}}'
result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True)
if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result[
'detail']:
# No more results can be fetched
break
if 'detail' in result.keys() and 'Request was throttled.' in result['detail']:
# Rate limit has been triggered need to sleep extra
print(f'RocketReach requests have been throttled; '
f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}')
break
if 'profiles' in dict(result).keys():
if len(result['profiles']) == 0:
break
for profile in result['profiles']:
if 'linkedin_url' in dict(profile).keys():
self.links.add(profile['linkedin_url'])
await asyncio.sleep(get_delay() + 2)
except Exception as e:
print(f'An exception has occurred: {e}')
async def get_links(self):
return self.links
async def process(self, proxy=False):
self.proxy = proxy
await self.do_search() # Only need to do it once.
# async def get_emails(self):
# rawres = myparser.Parser(self.total_results, self.word)
# return await rawres.emails()
await self.do_search()

View file

@ -1,28 +1,31 @@
from typing import Coroutine
from typing import List
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
class SearchThreatcrowd:
def __init__(self, word):
self.word = word.replace(' ', '%20')
self.results: str = ""
self.totalresults: str = ""
self.hostnames = list()
self.ips = list()
self.proxy = False
async def do_search(self):
base_url = f'https://www.threatcrowd.org/searchApi/v2/domain/report/?domain={self.word}'
headers = {'User-Agent': Core.get_user_agent()}
try:
responses = await AsyncFetcher.fetch_all([base_url], headers=headers, proxy=self.proxy)
self.results = responses[0]
responses = await AsyncFetcher.fetch_all([base_url], headers=headers, proxy=self.proxy, json=True)
resp = responses[0]
self.ips = {ip['ip_address'] for ip in resp['resolutions'] if len(ip['ip_address']) > 4}
self.hostnames = set(list(resp['subdomains']))
except Exception as e:
print(e)
self.totalresults += self.results
async def get_hostnames(self) -> Coroutine:
return await myparser.Parser(self.results, self.word).hostnames()
async def get_ips(self) -> List:
return self.ips
async def get_hostnames(self) -> List:
return self.hostnames
async def process(self, proxy=False):
self.proxy = proxy

View file

@ -7,16 +7,23 @@ class SearchThreatminer:
def __init__(self, word):
self.word = word
self.totalhosts = list
self.totalips = list
self.proxy = False
async def do_search(self):
url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
self.totalhosts: set = {host for host in response[0]['results']}
second_url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2'
secondresp = await AsyncFetcher.fetch_all([second_url], json=True, proxy=self.proxy)
self.totalips: set = {resp['ip'] for resp in secondresp[0]['results']}
async def get_hostnames(self) -> Type[list]:
return self.totalhosts
async def get_ips(self) -> Type[list]:
return self.totalips
async def process(self, proxy=False):
self.proxy = proxy
await self.do_search()

View file

@ -1,12 +1,14 @@
from typing import Type
from typing import List
from theHarvester.lib.core import *
class SearchUrlscan:
def __init__(self, word):
self.word = word
self.totalhosts = list
self.totalips = list
self.totalhosts = list()
self.totalips = list()
self.interestingurls = list()
self.totalasns = list()
self.proxy = False
async def do_search(self):
@ -15,13 +17,22 @@ async def do_search(self):
resp = response[0]
self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']}
self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()}
self.interestingurls = {f"{page['page']['url']}" for page in resp['results'] if self.word in page['page']['url']
and 'url' in page['page'].keys()}
self.totalasns = {f"{page['page']['asn']}" for page in resp['results'] if 'asn' in page['page'].keys()}
async def get_hostnames(self) -> Type[list]:
async def get_hostnames(self) -> List:
return self.totalhosts
async def get_ips(self) -> Type[list]:
async def get_ips(self) -> List:
return self.totalips
async def get_interestingurls(self) -> List:
return self.interestingurls
async def get_asns(self) -> List:
return self.totalasns
async def process(self, proxy=False):
self.proxy = proxy
await self.do_search()

View file

@ -0,0 +1,199 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import asyncio
import re
class SearchZoomEye:
def __init__(self, word, limit):
self.word = word
self.limit = limit
self.key = Core.zoomeye_key()
if self.key is None:
raise MissingKey('zoomeye')
self.baseurl = 'https://api.zoomeye.org/host/search'
self.proxy = False
self.totalasns = list()
self.totalhosts = list()
self.interestingurls = list()
self.totalips = list()
self.totalemails = list()
# Regex used is directly from: https://github.com/GerbenJavado/LinkFinder/blob/master/linkfinder.py#L29
# Maybe one day it will be a pip package
# Regardless LinkFinder is an amazing tool!
self.iurl_regex = r"""
(?:"|') # Start newline delimiter
(
((?:[a-zA-Z]{1,10}://|//) # Match a scheme [a-Z]*1-10 or //
[^"'/]{1,}\. # Match a domainname (any character + dot)
[a-zA-Z]{2,}[^"']{0,}) # The domainextension and/or path
|
((?:/|\.\./|\./) # Start with /,../,./
[^"'><,;| *()(%%$^/\\\[\]] # Next character can't be...
[^"'><,;|()]{1,}) # Rest of the characters can't be
|
([a-zA-Z0-9_\-/]{1,}/ # Relative endpoint with /
[a-zA-Z0-9_\-/]{1,} # Resource name
\.(?:[a-zA-Z]{1,4}|action) # Rest + extension (length 1-4 or action)
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
|
([a-zA-Z0-9_\-/]{1,}/ # REST API (no extension) with /
[a-zA-Z0-9_\-/]{3,} # Proper REST endpoints usually have 3+ chars
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
|
([a-zA-Z0-9_\-]{1,} # filename
\.(?:php|asp|aspx|jsp|json|
action|html|js|txt|xml) # . + extension
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
)
(?:"|') # End newline delimiter
"""
self.iurl_regex = re.compile(self.iurl_regex, re.VERBOSE)
async def do_search(self):
headers = {
'API-KEY': self.key,
'User-Agent': Core.get_user_agent()
}
params = (
('query', f'site:{self.word}'),
('page', '1'),
)
# TODO add: https://www.zoomeye.org/profile/domain to fetch subdomains more easily once
# once api endpoint is created
response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers,
params=params)
# First request determines how many pages there in total
resp = response[0]
total_pages = int(resp['available'])
self.limit = self.limit if total_pages > self.limit else total_pages
self.limit = 3 if self.limit == 2 else self.limit
cur_page = 2 if self.limit >= 2 else -1
# Means there is only one page
# hostnames, emails, ips, asns, iurls
nomatches_counter = 0
# cur_page = -1
if cur_page == -1:
# No need to do loop just parse and leave
if 'matches' in resp.keys():
hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
self.totalhosts.extend(hostnames)
self.totalemails.extend(emails)
self.totalips.extend(ips)
self.totalasns.extend(asns)
self.interestingurls.extend(iurls)
else:
if 'matches' in resp.keys():
# Parse out initial results and then continue to loop
hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
self.totalhosts.extend(hostnames)
self.totalemails.extend(emails)
self.totalips.extend(ips)
self.totalasns.extend(asns)
self.interestingurls.extend(iurls)
for num in range(2, self.limit):
print(f'Currently on page: {num}')
params = (
('query', f'site:{self.word}'),
('page', f'{num}'),
)
response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers,
params=params)
resp = response[0]
if 'matches' not in resp.keys():
print(f'Your resp: {resp}')
print('Match not found in keys')
break
hostnames, emails, ips, asns, iurls = await self.parse_matchs(resp['matches'])
if len(hostnames) == 0 and len(emails) == 0 and len(ips) == 0 \
and len(asns) == 0 and len(iurls) == 0:
nomatches_counter += 1
if nomatches_counter >= 5:
break
self.totalhosts.extend(hostnames)
self.totalemails.extend(emails)
self.totalips.extend(ips)
self.totalasns.extend(asns)
self.interestingurls.extend(iurls)
await asyncio.sleep(get_delay() + 2)
async def parse_matchs(self, matches):
# Helper function to parse items from match json
# ips = {match["ip"] for match in matches}
ips = set()
iurls = set()
hostnames = set()
asns = set()
emails = set()
for match in matches:
try:
ips.add(match['ip'])
if 'geoinfo' in match.keys():
asns.add(int(match['geoinfo']['asn']))
if 'rdns_new' in match.keys():
rdns_new = match['rdns_new']
if ',' in rdns_new:
parts = str(rdns_new).split(',')
rdns_new = parts[0]
if len(parts) == 2:
hostnames.add(parts[1])
rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
hostnames.add(rdns_new)
else:
rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
hostnames.add(rdns_new)
if 'rdns' in match.keys():
rdns = match['rdns']
rdns = rdns[:-1] if rdns[-1] == '.' else rdns
hostnames.add(rdns)
if 'portinfo' in match.keys():
# re.
temp_emails = set(await self.parse_emails(match['portinfo']['banner']))
emails.update(temp_emails)
hostnames.update(set(await self.parse_hostnames(match['portinfo']['banner'])))
iurls = {str(iurl.group(1)).replace('"', '') for iurl
in re.finditer(self.iurl_regex, match['portinfo']['banner'])
if self.word in str(iurl.group(1))}
except Exception as e:
print(f'An exception has occurred: {e}')
return hostnames, emails, ips, asns, iurls
async def process(self, proxy=False):
self.proxy = proxy
await self.do_search() # Only need to do it once.
async def parse_emails(self, content):
rawres = myparser.Parser(content, self.word)
return await rawres.emails()
async def parse_hostnames(self, content):
rawres = myparser.Parser(content, self.word)
return await rawres.hostnames()
async def get_hostnames(self):
return set(self.totalhosts)
async def get_emails(self):
return set(self.totalemails)
async def get_ips(self):
return set(self.totalips)
async def get_asns(self):
return set(self.totalasns)
async def get_interestingurls(self):
return set(self.interestingurls)

View file

@ -78,20 +78,20 @@ async def dnsbrute(request: Request, user_agent: str = Header(None),
if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
response = RedirectResponse(app.url_path_for('bot'))
return response
dns_bruteforce = await __main__.entry_point(argparse.Namespace(dns_brute=True,
dns_lookup=False,
dns_server=False,
dns_tld=False,
domain=domain,
filename='',
google_dork=False,
limit=500,
proxies=False,
shodan=False,
source=','.join([]),
start=0,
take_over=False,
virtual_host=False))
dns_bruteforce = await __main__.start(argparse.Namespace(dns_brute=True,
dns_lookup=False,
dns_server=False,
dns_tld=False,
domain=domain,
filename='',
google_dork=False,
limit=500,
proxies=False,
shodan=False,
source=','.join([]),
start=0,
take_over=False,
virtual_host=False))
return {'dns_bruteforce': dns_bruteforce}
@ -115,7 +115,7 @@ async def query(request: Request, dns_server: str = Query(""), user_agent: str =
response = RedirectResponse(app.url_path_for('bot'))
return response
try:
emails, ips, urls, html_filename, xml_filename = await __main__.start(argparse.Namespace(dns_brute=dns_brute,
emails, ips, urls, xml_filename = await __main__.start(argparse.Namespace(dns_brute=dns_brute,
dns_lookup=dns_lookup,
dns_server=dns_server,
dns_tld=dns_tld,
@ -130,7 +130,6 @@ async def query(request: Request, dns_server: str = Query(""), user_agent: str =
take_over=take_over,
virtual_host=virtual_host))
return {'domain': f'{domain}', 'emails': emails, 'ips': ips, 'urls': urls, 'html_file': f'{html_filename}',
'xml_file': f'{xml_filename}'}
return {'domain': f'{domain}', 'emails': emails, 'ips': ips, 'urls': urls, 'xml_file': f'{xml_filename}'}
except Exception as e:
return {'exception': f'{e}'}

View file

@ -28,6 +28,10 @@ def api_keys() -> dict:
keys = yaml.safe_load(api_keys)
return keys['apikeys']
@staticmethod
def binaryedge_key() -> str:
return Core.api_keys()['binaryedge']['key']
@staticmethod
def bing_key() -> str:
return Core.api_keys()['bing']['key']
@ -72,6 +76,10 @@ def shodan_key() -> str:
def spyse_key() -> str:
return Core.api_keys()['spyse']['key']
@staticmethod
def zoomeye_key() -> str:
return Core.api_keys()['zoomEye']['key']
@staticmethod
def proxy_list() -> List:
try:
@ -106,6 +114,7 @@ def banner() -> None:
@staticmethod
def get_supportedengines() -> Set[Union[str, Any]]:
supportedengines = {'baidu',
'binaryedge',
'bing',
'bingapi',
'bufferoverun',
@ -140,6 +149,7 @@ def get_supportedengines() -> Set[Union[str, Any]]:
'urlscan',
'virustotal',
'yahoo',
'zoomeye'
}
return supportedengines