reformat with ruff

This commit is contained in:
Vizonex 2024-07-10 12:38:49 -05:00 committed by J.Townsend
parent 2c871d60e3
commit 309c04acd6
56 changed files with 1387 additions and 1990 deletions

View file

@ -1,5 +1,5 @@
#!/usr/bin/env python3
from theHarvester.restfulHarvest import main
if __name__ == "__main__":
if __name__ == '__main__':
main()

View file

@ -5,10 +5,8 @@
from theHarvester.theHarvester import main
if sys.version_info.major < 3 or sys.version_info.minor < 10:
print(
"\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m"
)
print('\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m')
sys.exit(1)
if __name__ == "__main__":
if __name__ == '__main__':
main()

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,7 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
url = f"https://jldc.me/anubis/subdomains/{self.word}"
url = f'https://jldc.me/anubis/subdomains/{self.word}'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
self.totalhosts = response[0]

View file

@ -5,23 +5,17 @@
class SearchBaidu:
def __init__(self, word, limit) -> None:
self.word = word
self.total_results = ""
self.server = "www.baidu.com"
self.hostname = "www.baidu.com"
self.total_results = ''
self.server = 'www.baidu.com'
self.hostname = 'www.baidu.com'
self.limit = limit
self.proxy = False
async def do_search(self) -> None:
headers = {"Host": self.hostname, "User-agent": Core.get_user_agent()}
base_url = f"https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}"
urls = [
base_url.replace("xx", str(num))
for num in range(0, self.limit, 10)
if num <= self.limit
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
headers = {'Host': self.hostname, 'User-agent': Core.get_user_agent()}
base_url = f'https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}'
urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response

View file

@ -9,27 +9,23 @@ def __init__(self, word) -> None:
self.interestingurls: set = set()
self.key = Core.bevigil_key()
if self.key is None:
self.key = ""
raise MissingKey("bevigil")
self.key = ''
raise MissingKey('bevigil')
self.proxy = False
async def do_search(self) -> None:
subdomain_endpoint = f"https://osint.bevigil.com/api/{self.word}/subdomains/"
url_endpoint = f"https://osint.bevigil.com/api/{self.word}/urls/"
headers = {"X-Access-Token": self.key}
subdomain_endpoint = f'https://osint.bevigil.com/api/{self.word}/subdomains/'
url_endpoint = f'https://osint.bevigil.com/api/{self.word}/urls/'
headers = {'X-Access-Token': self.key}
responses = await AsyncFetcher.fetch_all(
[subdomain_endpoint], json=True, proxy=self.proxy, headers=headers
)
responses = await AsyncFetcher.fetch_all([subdomain_endpoint], json=True, proxy=self.proxy, headers=headers)
response = responses[0]
for subdomain in response["subdomains"]:
for subdomain in response['subdomains']:
self.totalhosts.add(subdomain)
responses = await AsyncFetcher.fetch_all(
[url_endpoint], json=True, proxy=self.proxy, headers=headers
)
responses = await AsyncFetcher.fetch_all([url_endpoint], json=True, proxy=self.proxy, headers=headers)
response = responses[0]
for url in response["urls"]:
for url in response['urls']:
self.interestingurls.add(url)
async def get_hostnames(self) -> set:

View file

@ -13,29 +13,25 @@ def __init__(self, word, limit) -> None:
self.limit = 501 if limit >= 501 else limit
self.limit = 2 if self.limit == 1 else self.limit
if self.key is None:
raise MissingKey("binaryedge")
raise MissingKey('binaryedge')
async def do_search(self) -> None:
base_url = f"https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}"
headers = {"X-KEY": self.key, "User-Agent": Core.get_user_agent()}
base_url = f'https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}'
headers = {'X-KEY': self.key, 'User-Agent': Core.get_user_agent()}
for page in range(1, self.limit):
params = {"page": page}
response = await AsyncFetcher.fetch_all(
[base_url], json=True, proxy=self.proxy, params=params, headers=headers
)
params = {'page': page}
response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, params=params, headers=headers)
responses = response[0]
dct = responses
if ("status" in dct.keys() and "message" in dct.keys()) and (
dct["status"] == 400
or "Bad Parameter" in dct["message"]
or "Error" in dct["message"]
if ('status' in dct.keys() and 'message' in dct.keys()) and (
dct['status'] == 400 or 'Bad Parameter' in dct['message'] or 'Error' in dct['message']
):
# 400 status code means no more results
break
if "events" in dct.keys():
if len(dct["events"]) == 0:
if 'events' in dct.keys():
if len(dct['events']) == 0:
break
self.totalhosts.update({host for host in dct["events"]})
self.totalhosts.update({host for host in dct['events']})
await asyncio.sleep(get_delay())
async def get_hostnames(self) -> set:

View file

@ -7,12 +7,12 @@
class SearchBing:
def __init__(self, word, limit, start) -> None:
self.word = word.replace(" ", "%20")
self.word = word.replace(' ', '%20')
self.results: list[Any] = []
self.total_results = ""
self.server = "www.bing.com"
self.apiserver = "api.search.live.net"
self.hostname = "www.bing.com"
self.total_results = ''
self.server = 'www.bing.com'
self.apiserver = 'api.search.live.net'
self.hostname = 'www.bing.com'
self.limit = int(limit)
self.bingApi = Core.bing_key()
self.counter = start
@ -20,58 +20,44 @@ def __init__(self, word, limit, start) -> None:
async def do_search(self) -> None:
headers = {
"Host": self.hostname,
"Cookie": "SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50",
"Accept-Language": "en-us,en",
"User-agent": Core.get_user_agent(),
'Host': self.hostname,
'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
'Accept-Language': 'en-us,en',
'User-agent': Core.get_user_agent(),
}
base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx'
urls = [
base_url.replace("xx", str(num))
for num in range(0, self.limit, 50)
if num <= self.limit
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response
async def do_search_api(self) -> None:
url = "https://api.bing.microsoft.com/v7.0/search?"
url = 'https://api.bing.microsoft.com/v7.0/search?'
params = {
"q": self.word,
"count": str(self.limit),
"offset": "0",
"mkt": "en-us",
"safesearch": "Off",
'q': self.word,
'count': str(self.limit),
'offset': '0',
'mkt': 'en-us',
'safesearch': 'Off',
}
headers = {
"User-Agent": Core.get_user_agent(),
"Ocp-Apim-Subscription-Key": self.bingApi,
'User-Agent': Core.get_user_agent(),
'Ocp-Apim-Subscription-Key': self.bingApi,
}
self.results = await AsyncFetcher.fetch_all(
[url], headers=headers, params=params, proxy=self.proxy
)
self.results = await AsyncFetcher.fetch_all([url], headers=headers, params=params, proxy=self.proxy)
for res in self.results:
self.total_results += res
async def do_search_vhost(self) -> None:
headers = {
"Host": self.hostname,
"Cookie": "mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50",
"Accept-Language": "en-us,en",
"User-agent": Core.get_user_agent(),
'Host': self.hostname,
'Cookie': 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50',
'Accept-Language': 'en-us,en',
'User-agent': Core.get_user_agent(),
}
base_url = f"http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx"
urls = [
base_url.replace("xx", str(num))
for num in range(0, self.limit, 50)
if num <= self.limit
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx'
urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response
@ -89,13 +75,13 @@ async def get_allhostnames(self):
async def process(self, api, proxy: bool = False) -> None:
self.proxy = proxy
if api == "yes":
if api == 'yes':
if self.bingApi is None:
raise MissingKey("BingAPI")
raise MissingKey('BingAPI')
await self.do_search_api()
else:
await self.do_search()
print(f"\tSearching {self.counter} results.")
print(f'\tSearching {self.counter} results.')
async def process_vhost(self) -> None:
await self.do_search_vhost()

View file

@ -8,37 +8,34 @@
class SearchBrave:
def __init__(self, word, limit):
self.word = word
self.results = ""
self.totalresults = ""
self.server = "https://search.brave.com/search?q="
self.results = ''
self.totalresults = ''
self.server = 'https://search.brave.com/search?q='
self.limit = limit
self.proxy = False
async def do_search(self):
headers = {"User-Agent": Core.get_user_agent()}
for query in [f'"{self.word}"', f"site:{self.word}"]:
headers = {'User-Agent': Core.get_user_agent()}
for query in [f'"{self.word}"', f'site:{self.word}']:
try:
for offset in range(0, 50):
# To reduce the total number of requests, only two queries are made "self.word" and site:self.word
current_url = f"{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0"
resp = await AsyncFetcher.fetch_all(
[current_url], headers=headers, proxy=self.proxy
)
current_url = f'{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0'
resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy)
self.results = resp[0]
self.totalresults += self.results
# if 'Results from Microsoft Bing.' in resp[0] \
if (
"Not many great matches came back for your search" in resp[0]
or "Your request has been flagged as being suspicious and Brave Search"
in resp[0]
or "Prove" in resp[0]
and "robot" in resp[0]
or "Robot" in resp[0]
'Not many great matches came back for your search' in resp[0]
or 'Your request has been flagged as being suspicious and Brave Search' in resp[0]
or 'Prove' in resp[0]
and 'robot' in resp[0]
or 'Robot' in resp[0]
):
break
await asyncio.sleep(get_delay() + 15)
except Exception as e:
print(f"An exception has occurred in bravesearch: {e}")
print(f'An exception has occurred in bravesearch: {e}')
await asyncio.sleep(get_delay() + 80)
continue

View file

@ -11,33 +11,30 @@ def __init__(self, word) -> None:
self.totalips: set = set()
self.key = Core.bufferoverun_key()
if self.key is None:
raise MissingKey("bufferoverun")
raise MissingKey('bufferoverun')
self.proxy = False
async def do_search(self) -> None:
url = f"https://tls.bufferover.run/dns?q={self.word}"
url = f'https://tls.bufferover.run/dns?q={self.word}'
response = await AsyncFetcher.fetch_all(
[url],
json=True,
headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"},
headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'},
proxy=self.proxy,
)
dct = response[0]
if dct["Results"]:
if dct['Results']:
self.totalhosts = {
(
host.split(",")
if "," in host
and self.word.replace("www.", "") in host.split(",")[0] in host
else host.split(",")[4]
host.split(',')
if ',' in host and self.word.replace('www.', '') in host.split(',')[0] in host
else host.split(',')[4]
)
for host in dct["Results"]
for host in dct['Results']
}
self.totalips = {
ip.split(",")[0]
for ip in dct["Results"]
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip.split(",")[0])
ip.split(',')[0] for ip in dct['Results'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip.split(',')[0])
}
async def get_hostnames(self) -> set:

View file

@ -15,7 +15,7 @@ def __init__(self, domain, limit: int = 500) -> None:
self.word = domain
self.key = Core.censys_key()
if self.key[0] is None or self.key[1] is None:
raise MissingKey("Censys ID and/or Secret")
raise MissingKey('Censys ID and/or Secret')
self.totalhosts: set = set()
self.emails: set = set()
self.limit = limit
@ -26,26 +26,24 @@ async def do_search(self) -> None:
cert_search = CensysCerts(
api_id=self.key[0],
api_secret=self.key[1],
user_agent=f"censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)",
user_agent=f'censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)',
)
except CensysUnauthorizedException:
raise MissingKey("Censys ID and/or Secret")
raise MissingKey('Censys ID and/or Secret')
query = f"names: {self.word}"
query = f'names: {self.word}'
try:
response = cert_search.search(
query=query,
fields=["names", "parsed.subject.email_address"],
fields=['names', 'parsed.subject.email_address'],
max_records=self.limit,
)
for cert in response():
self.totalhosts.update(cert.get("names", []))
email_address = (
cert.get("parsed", {}).get("subject", {}).get("email_address", [])
)
self.totalhosts.update(cert.get('names', []))
email_address = cert.get('parsed', {}).get('subject', {}).get('email_address', [])
self.emails.update(email_address)
except CensysRateLimitExceededException:
print("Censys rate limit exceeded")
print('Censys rate limit exceeded')
async def get_hostnames(self) -> set:
return self.totalhosts

View file

@ -8,21 +8,19 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
base_url = f"https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names"
base_url = f'https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names'
try:
response = await AsyncFetcher.fetch_all(
[base_url], json=True, proxy=self.proxy
)
response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy)
response = response[0]
if isinstance(response, list):
for dct in response:
for key, value in dct.items():
if key == "dns_names":
if key == 'dns_names':
self.totalhosts.update({name for name in value if name})
elif isinstance(response, dict):
self.totalhosts.update({response["dns_names"] if "dns_names" in response.keys() else ""}) # type: ignore
self.totalhosts.update({response['dns_names'] if 'dns_names' in response.keys() else ''}) # type: ignore
else:
self.totalhosts.update({""})
self.totalhosts.update({''})
except Exception as e:
print(e)
@ -32,4 +30,4 @@ async def get_hostnames(self) -> set:
async def process(self, proxy: bool = False) -> None:
self.proxy = proxy
await self.do_search()
print("\tSearching results.")
print('\tSearching results.')

View file

@ -14,10 +14,10 @@ async def splitter(links):
unique_list = []
name_check = []
for url in links:
tail = url.split("/")[-1]
if len(tail) == 2 or tail == "zh-cn":
tail = url.split("/")[-2]
name = tail.split("-")
tail = url.split('/')[-1]
if len(tail) == 2 or tail == 'zh-cn':
tail = url.split('/')[-2]
name = tail.split('-')
if len(name) > 1:
joined_name = name[0] + name[1]
else:
@ -41,12 +41,8 @@ def filter(lst):
new_lst = []
for item in lst:
item = str(item)
if (
(item[0].isalpha() or item[0].isdigit())
and ("xxx" not in item)
and (".." not in item)
):
item = item.replace("252f", "").replace("2F", "").replace("2f", "")
if (item[0].isalpha() or item[0].isdigit()) and ('xxx' not in item) and ('..' not in item):
item = item.replace('252f', '').replace('2F', '').replace('2f', '')
new_lst.append(item.lower())
return new_lst
@ -63,10 +59,9 @@ async def search(text: str) -> bool:
"""
for line in text.strip().splitlines():
if (
"This page appears when Google automatically detects requests coming from your computer network"
in line
or "http://www.google.com/sorry/index" in line
or "https://www.google.com/sorry/index" in line
'This page appears when Google automatically detects requests coming from your computer network' in line
or 'http://www.google.com/sorry/index' in line
or 'https://www.google.com/sorry/index' in line
):
# print('\tGoogle is blocking your IP due to too many automated requests, wait or change your IP')
return True
@ -79,47 +74,37 @@ async def google_workaround(visit_url: str) -> bool | str:
:param visit_url: Url to scrape
:return: Correct html that can be parsed by BS4
"""
url = "https://websniffer.cc/"
url = 'https://websniffer.cc/'
data = {
"Cookie": "",
"url": visit_url,
"submit": "Submit",
"type": "GET&http=1.1",
"uak": str(random.randint(4, 8)), # select random UA to send to Google
'Cookie': '',
'url': visit_url,
'submit': 'Submit',
'type': 'GET&http=1.1',
'uak': str(random.randint(4, 8)), # select random UA to send to Google
}
returned_html = await AsyncFetcher.post_fetch(
url, headers={"User-Agent": Core.get_user_agent()}, data=data
)
returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
returned_html = (
"This page appears when Google automatically detects requests coming from your computer network"
if returned_html == ""
'This page appears when Google automatically detects requests coming from your computer network'
if returned_html == ''
else returned_html[0]
)
returned_html = (
"" if "Please Wait... | Cloudflare" in returned_html else returned_html
)
returned_html = '' if 'Please Wait... | Cloudflare' in returned_html else returned_html
if (
len(returned_html) == 0
or await search(returned_html)
or "&lt;html" not in returned_html
):
if len(returned_html) == 0 or await search(returned_html) or '&lt;html' not in returned_html:
# indicates that google is serving workaround a captcha
# That means we will try out second option which will utilize proxies
return True
# the html we get is malformed for BS4 as there are no greater than or less than signs
if "&lt;html&gt;" in returned_html:
start_index = returned_html.index("&lt;html&gt;")
if '&lt;html&gt;' in returned_html:
start_index = returned_html.index('&lt;html&gt;')
else:
start_index = returned_html.index("&lt;html")
start_index = returned_html.index('&lt;html')
end_index = returned_html.index("&lt;/html&gt;") + 1
end_index = returned_html.index('&lt;/html&gt;') + 1
correct_html = returned_html[start_index:end_index]
# Slice list to get the response's html
correct_html = "".join(
[ch.strip().replace("&lt;", "<").replace("&gt;", ">") for ch in correct_html]
)
correct_html = ''.join([ch.strip().replace('&lt;', '<').replace('&gt;', '>') for ch in correct_html])
return correct_html
@ -130,9 +115,9 @@ class MissingKey(Exception):
def __init__(self, source: str | None) -> None:
if source:
self.message = f"\n\033[93m[!] Missing API key for {source}. \033[0m"
self.message = f'\n\033[93m[!] Missing API key for {source}. \033[0m'
else:
self.message = "\n\033[93m[!] Missing CSE id. \033[0m"
self.message = '\n\033[93m[!] Missing CSE id. \033[0m'
def __str__(self) -> str:
return self.message

View file

@ -13,64 +13,56 @@ def __init__(self, word) -> None:
self.asns: set = set()
self.key = Core.criminalip_key()
if self.key is None:
raise MissingKey("criminalip")
raise MissingKey('criminalip')
self.proxy = False
async def do_search(self) -> None:
# https://www.criminalip.io/developer/api/post-domain-scan
# https://www.criminalip.io/developer/api/get-domain-status-id
# https://www.criminalip.io/developer/api/get-domain-report-id
url = "https://api.criminalip.io/v1/domain/scan"
url = 'https://api.criminalip.io/v1/domain/scan'
data = f'{{"query": "{self.word}"}}'
# print(f'Current key: {self.key}')
user_agent = Core.get_user_agent()
response = await AsyncFetcher.post_fetch(
url,
json=True,
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
data=data,
proxy=self.proxy,
)
# print(f'My response: {response}')
# Expected response format:
# {'data': {'scan_id': scan_id}, 'message': 'api success', 'status': 200}
if "status" in response.keys():
status = response["status"]
if 'status' in response.keys():
status = response['status']
if status != 200:
print(
f"An error has occurred searching criminalip dumping response: {response}"
)
print(f'An error has occurred searching criminalip dumping response: {response}')
else:
scan_id = response["data"]["scan_id"]
scan_id = response['data']['scan_id']
scan_percentage = 0
counter = 0
while scan_percentage != 100:
status_url = f"https://api.criminalip.io/v1/domain/status/{scan_id}"
status_url = f'https://api.criminalip.io/v1/domain/status/{scan_id}'
status_response = await AsyncFetcher.fetch_all(
[status_url],
json=True,
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
proxy=self.proxy,
)
status = status_response[0]
# print(f'Status response: {status}')
# Expected format:
# {"data": {"scan_percentage": 100}, "message": "api success", "status": 200}
scan_percentage = status["data"]["scan_percentage"]
scan_percentage = status['data']['scan_percentage']
if scan_percentage == 100:
break
if scan_percentage == -2:
print(
f"CriminalIP failed to scan: {self.word} does not exist, verify manually"
)
print(
f"Dumping data: scan_response: {response} status_response: {status}"
)
print(f'CriminalIP failed to scan: {self.word} does not exist, verify manually')
print(f'Dumping data: scan_response: {response} status_response: {status}')
return
if scan_percentage == -1:
print(
f"CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}"
)
print(f'CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}')
return
# Wait for scan to finish
if counter >= 5:
@ -80,18 +72,18 @@ async def do_search(self) -> None:
counter += 1
if counter == 10:
print(
"Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop."
'Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop.'
)
print(
f"Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}"
f'Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}'
)
return
report_url = f"https://api.criminalip.io/v1/domain/report/{scan_id}"
report_url = f'https://api.criminalip.io/v1/domain/report/{scan_id}'
scan_response = await AsyncFetcher.fetch_all(
[report_url],
json=True,
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
proxy=self.proxy,
)
scan = scan_response[0]
@ -100,125 +92,113 @@ async def do_search(self) -> None:
try:
await self.parser(scan)
except Exception as e:
print(f"An exception occurred while parsing criminalip result: {e}")
print("Dumping json: ")
print(f'An exception occurred while parsing criminalip result: {e}')
print('Dumping json: ')
print(scan)
async def parser(self, jlines):
# TODO when new scope field is added to parse lines for potential new scope!
# TODO map as_name to asn for asn data
# TODO determine if worth storing interesting urls
if "data" not in jlines.keys():
print(f"Error with criminalip data, dumping: {jlines}")
if 'data' not in jlines.keys():
print(f'Error with criminalip data, dumping: {jlines}')
return
data = jlines["data"]
for cert in data["certificates"]:
data = jlines['data']
for cert in data['certificates']:
# print(f'Current cert: {cert}')
if cert["subject"].endswith("." + self.word):
self.totalhosts.add(cert["subject"])
if cert['subject'].endswith('.' + self.word):
self.totalhosts.add(cert['subject'])
for connected_domain in data["connected_domain_subdomain"]:
for connected_domain in data['connected_domain_subdomain']:
try:
main_domain = connected_domain["main_domain"]["domain"]
subdomains = [sub["domain"] for sub in connected_domain["subdomains"]]
if main_domain.endswith("." + self.word):
main_domain = connected_domain['main_domain']['domain']
subdomains = [sub['domain'] for sub in connected_domain['subdomains']]
if main_domain.endswith('.' + self.word):
self.totalhosts.add(main_domain)
for sub in subdomains:
# print(f'Current sub: {sub}')
if sub.endswith("." + self.word):
if sub.endswith('.' + self.word):
self.totalhosts.add(sub)
except Exception as e:
print(f"An exception has occurred: {e}")
print(f"Main line: {connected_domain}")
print(f'An exception has occurred: {e}')
print(f'Main line: {connected_domain}')
for ip_info in data["connected_ip_info"]:
self.asns.add(str(ip_info["asn"]))
domains = [sub["domain"] for sub in ip_info["domain_list"]]
for ip_info in data['connected_ip_info']:
self.asns.add(str(ip_info['asn']))
domains = [sub['domain'] for sub in ip_info['domain_list']]
for sub in domains:
if sub.endswith("." + self.word):
if sub.endswith('.' + self.word):
self.totalhosts.add(sub)
self.totalips.add(ip_info["ip"])
self.totalips.add(ip_info['ip'])
for cookie in data["cookies"]:
if cookie["domain"] != "." + self.word and cookie["domain"].endswith(
"." + self.word
):
self.totalhosts.add(cookie["domain"])
for cookie in data['cookies']:
if cookie['domain'] != '.' + self.word and cookie['domain'].endswith('.' + self.word):
self.totalhosts.add(cookie['domain'])
for country in data["country"]:
if country["domain"].endswith("." + self.word):
self.totalhosts.add(country["domain"])
for ip in country["mapped_ips"]:
self.totalips.add(ip["ip"])
for country in data['country']:
if country['domain'].endswith('.' + self.word):
self.totalhosts.add(country['domain'])
for ip in country['mapped_ips']:
self.totalips.add(ip['ip'])
for k, v in data["dns_record"].items():
if k == "dns_record_type_a":
for ip in data["dns_record"][k]["ipv4"]:
self.totalips.add(ip["ip"])
for k, v in data['dns_record'].items():
if k == 'dns_record_type_a':
for ip in data['dns_record'][k]['ipv4']:
self.totalips.add(ip['ip'])
else:
if isinstance(v, list):
for item in v:
if isinstance(item, list):
for subitem in item:
if subitem.endswith("." + self.word):
if subitem.endswith('.' + self.word):
self.totalhosts.add(subitem)
else:
if item.endswith("." + self.word):
if item.endswith('.' + self.word):
self.totalhosts.add(item)
for domain_list in data["domain_list"]:
self.asns.add(str(domain_list["asn"]))
domains = [sub["domain"] for sub in domain_list["domain_list"]]
for domain_list in data['domain_list']:
self.asns.add(str(domain_list['asn']))
domains = [sub['domain'] for sub in domain_list['domain_list']]
for sub in domains:
if sub.endswith("." + self.word):
if sub.endswith('.' + self.word):
self.totalhosts.add(sub)
self.totalips.add(domain_list["ip"])
self.totalips.add(domain_list['ip'])
for html_page_links in data["html_page_link_domains"]:
domain = html_page_links["domain"]
if domain.endswith("." + self.word):
for html_page_links in data['html_page_link_domains']:
domain = html_page_links['domain']
if domain.endswith('.' + self.word):
self.totalhosts.add(domain)
for ip in html_page_links["mapped_ips"]:
self.totalips.add(ip["ip"])
for ip in html_page_links['mapped_ips']:
self.totalips.add(ip['ip'])
# TODO combine data['links'] and data['network_logs'] urls into one list for one run through
for link in data["links"]:
url = link["url"]
for link in data['links']:
url = link['url']
parsed_url = urlparse(url)
netloc = parsed_url.netloc
if self.word in netloc:
if (
":" in netloc and netloc.split(":")[0].endswith(self.word)
) or netloc.endswith(self.word):
if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
self.totalhosts.add(netloc)
for log in data["network_logs"]:
url = log["url"]
for log in data['network_logs']:
url = log['url']
parsed_url = urlparse(url)
netloc = parsed_url.netloc
if self.word in netloc:
if (
":" in netloc and netloc.split(":")[0].endswith(self.word)
) or netloc.endswith(self.word):
if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
self.totalhosts.add(netloc)
self.asns.add(str(log["as_number"]))
self.asns.add(str(log['as_number']))
for redirects in data["page_redirections"]:
for redirects in data['page_redirections']:
for redirect in redirects:
url = redirect["url"]
url = redirect['url']
parsed_url = urlparse(url)
netloc = parsed_url.netloc
if self.word in netloc:
if (
":" in netloc and netloc.split(":")[0].endswith(self.word)
) or netloc.endswith(self.word):
if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
self.totalhosts.add(netloc)
self.totalhosts = {
host.replace("www.", "")
for host in self.totalhosts
if "*." + self.word != host
}
self.totalhosts = {host.replace('www.', '') for host in self.totalhosts if '*.' + self.word != host}
# print(f'hostnames: {self.totalhosts}')
# print(f'asns: {self.asns}')

View file

@ -10,24 +10,11 @@ def __init__(self, word) -> None:
async def do_search(self) -> list:
data: set = set()
try:
url = f"https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json"
url = f'https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
response = response[0]
data = set(
[
(
dct["name_value"][2:]
if "*." == dct["name_value"][:2]
else dct["name_value"]
)
for dct in response
]
)
data = {
domain
for domain in data
if (domain[0] != "*" and str(domain[0:4]).isnumeric() is False)
}
data = set([(dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value']) for dct in response])
data = {domain for domain in data if (domain[0] != '*' and str(domain[0:4]).isnumeric() is False)}
except Exception as e:
print(e)
clean: list = []

View file

@ -8,51 +8,49 @@
class SearchDnsDumpster:
def __init__(self, word) -> None:
self.word = word.replace(" ", "%20")
self.results = ""
self.totalresults = ""
self.server = "dnsdumpster.com"
self.word = word.replace(' ', '%20')
self.results = ''
self.totalresults = ''
self.server = 'dnsdumpster.com'
self.proxy = False
async def do_search(self) -> None:
try:
agent = Core.get_user_agent()
headers = {"User-Agent": agent}
headers = {'User-Agent': agent}
session = aiohttp.ClientSession(headers=headers)
# create a session to properly verify
url = f"https://{self.server}"
csrftoken = ""
url = f'https://{self.server}'
csrftoken = ''
if self.proxy is False:
async with session.get(url, headers=headers) as resp:
resp_cookies = str(resp.cookies)
cookies = resp_cookies.split("csrftoken=")
csrftoken += cookies[1][: cookies[1].find(";")]
cookies = resp_cookies.split('csrftoken=')
csrftoken += cookies[1][: cookies[1].find(';')]
else:
async with session.get(url, headers=headers, proxy=self.proxy) as resp:
resp_cookies = str(resp.cookies)
cookies = resp_cookies.split("csrftoken=")
csrftoken += cookies[1][: cookies[1].find(";")]
cookies = resp_cookies.split('csrftoken=')
csrftoken += cookies[1][: cookies[1].find(';')]
await asyncio.sleep(5)
# extract csrftoken from cookies
data = {
"Cookie": f"csfrtoken={csrftoken}",
"csrfmiddlewaretoken": csrftoken,
"targetip": self.word,
"user": "free",
'Cookie': f'csfrtoken={csrftoken}',
'csrfmiddlewaretoken': csrftoken,
'targetip': self.word,
'user': 'free',
}
headers["Referer"] = url
headers['Referer'] = url
if self.proxy is False:
async with session.post(url, headers=headers, data=data) as resp:
self.results = await resp.text()
else:
async with session.post(
url, headers=headers, data=data, proxy=self.proxy
) as resp:
async with session.post(url, headers=headers, data=data, proxy=self.proxy) as resp:
self.results = await resp.text()
await session.close()
except Exception as e:
print(f"An exception occurred: {e}")
print(f'An exception occurred: {e}')
self.totalresults += self.results
async def get_hostnames(self):

View file

@ -21,7 +21,7 @@
# DNS FORCE
#####################################################################
DNS_NAMES = DATA_DIR / "wordlists" / "dns-names.txt"
DNS_NAMES = DATA_DIR / 'wordlists' / 'dns-names.txt'
class DnsForce:
@ -32,13 +32,13 @@ def __init__(self, domain, dnsserver, verbose: bool = False) -> None:
# self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver
# self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver
self.dnsserver = dnsserver
with DNS_NAMES.open("r") as file:
with DNS_NAMES.open('r') as file:
self.list = file.readlines()
self.domain = domain.replace("www.", "")
self.list = [f"{word.strip()}.{self.domain}" for word in self.list]
self.domain = domain.replace('www.', '')
self.list = [f'{word.strip()}.{self.domain}' for word in self.list]
async def run(self):
print(f"Starting DNS brute forcing with {len(self.list)} words")
print(f'Starting DNS brute forcing with {len(self.list)} words')
checker = hostchecker.Checker(self.list, nameserver=self.dnsserver)
resolved_pair, hosts, ips = await checker.check()
return resolved_pair, hosts, ips
@ -49,13 +49,13 @@ async def run(self):
#####################################################################
IP_REGEX = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"
PORT_REGEX = r"\d{1,5}"
NETMASK_REGEX: str = r"\d{1,2}|" + IP_REGEX
NETWORK_REGEX: str = rf"\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b"
IP_REGEX = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
PORT_REGEX = r'\d{1,5}'
NETMASK_REGEX: str = r'\d{1,2}|' + IP_REGEX
NETWORK_REGEX: str = rf'\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b'
def serialize_ip_range(ip: str, netmask: str = "24") -> str:
def serialize_ip_range(ip: str, netmask: str = '24') -> str:
"""
Serialize a network range in a constant format, 'x.x.x.x/y'.
@ -78,12 +78,12 @@ def serialize_ip_range(ip: str, netmask: str = "24") -> str:
__ip = __ip_matches.group(1)
__netmask = netmask if netmask else __ip_matches.group(3)
if __ip and __netmask:
return str(IPv4Network(f"{__ip}/{__netmask}", strict=False))
return str(IPv4Network(f'{__ip}/{__netmask}', strict=False))
elif __ip:
return str(IPv4Network("{}/{}".format(__ip, "24"), strict=False))
return str(IPv4Network('{}/{}'.format(__ip, '24'), strict=False))
# invalid input ip
return ""
return ''
def list_ips_in_network_range(iprange: str) -> list[str]:
@ -122,14 +122,12 @@ async def reverse_single_ip(ip: str, resolver: DNSResolver) -> str:
"""
try:
__host = await resolver.gethostbyaddr(ip)
return __host.name if __host else ""
return __host.name if __host else ''
except Exception:
return ""
return ''
async def reverse_all_ips_in_range(
iprange: str, callback: Callable, nameservers: list[str] | None = None
) -> None:
async def reverse_all_ips_in_range(iprange: str, callback: Callable, nameservers: list[str] | None = None) -> None:
"""
Reverse all the IPs stored in a network range.
All the queries are made concurrently.
@ -176,8 +174,8 @@ def log_query(ip: str) -> None:
-------
out: None.
"""
sys.stdout.write(chr(27) + "[2K" + chr(27) + "[G")
sys.stdout.write("\r" + ip + " - ")
sys.stdout.write(chr(27) + '[2K' + chr(27) + '[G')
sys.stdout.write('\r' + ip + ' - ')
sys.stdout.flush()

View file

@ -7,29 +7,27 @@
class SearchDuckDuckGo:
def __init__(self, word, limit) -> None:
self.word = word
self.results = ""
self.totalresults = ""
self.results = ''
self.totalresults = ''
self.dorks: list = []
self.links: list = []
self.database = "https://duckduckgo.com/?q="
self.api = "https://api.duckduckgo.com/?q=x&format=json&pretty=1" # Currently using API.
self.quantity = "100"
self.database = 'https://duckduckgo.com/?q='
self.api = 'https://api.duckduckgo.com/?q=x&format=json&pretty=1' # Currently using API.
self.quantity = '100'
self.limit = limit
self.proxy = False
async def do_search(self) -> None:
# Do normal scraping.
url = self.api.replace("x", self.word)
headers = {"User-Agent": Core.get_user_agent()}
first_resp = await AsyncFetcher.fetch_all(
[url], headers=headers, proxy=self.proxy
)
url = self.api.replace('x', self.word)
headers = {'User-Agent': Core.get_user_agent()}
first_resp = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
self.results = first_resp[0]
self.totalresults += self.results
urls = await self.crawl(self.results)
urls = {url for url in urls if len(url) > 5}
all_resps = await AsyncFetcher.fetch_all(urls)
self.totalresults += "".join(all_resps)
self.totalresults += ''.join(all_resps)
async def crawl(self, text):
"""
@ -54,39 +52,27 @@ async def crawl(self, text):
if isinstance(val, dict): # Validation check.
for key in val.keys():
value = val.get(key)
if (
isinstance(value, str)
and value != ""
and "https://" in value
or "http://" in value
):
if isinstance(value, str) and value != '' and 'https://' in value or 'http://' in value:
urls.add(value)
if (
isinstance(val, str)
and val != ""
and "https://" in val
or "http://" in val
):
if isinstance(val, str) and val != '' and 'https://' in val or 'http://' in val:
urls.add(val)
tmp = set()
for url in urls:
if (
"<" in url and "href=" in url
): # Format is <href="https://www.website.com"/>
equal_index = url.index("=")
true_url = ""
if '<' in url and 'href=' in url: # Format is <href="https://www.website.com"/>
equal_index = url.index('=')
true_url = ''
for ch in url[equal_index + 1 :]:
if ch == '"':
tmp.add(true_url)
break
true_url += ch
else:
if url != "":
if url != '':
tmp.add(url)
return tmp
except Exception as e:
print(f"Exception occurred: {e}")
print(f'Exception occurred: {e}')
return []
async def get_emails(self):

View file

@ -7,19 +7,19 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.fullhunt_key()
if self.key is None:
raise MissingKey("fullhunt")
raise MissingKey('fullhunt')
self.total_results = None
self.proxy = False
async def do_search(self) -> None:
url = f"https://fullhunt.io/api/v1/domain/{self.word}/subdomains"
url = f'https://fullhunt.io/api/v1/domain/{self.word}/subdomains'
response = await AsyncFetcher.fetch_all(
[url],
json=True,
headers={"User-Agent": Core.get_user_agent(), "X-API-KEY": self.key},
headers={'User-Agent': Core.get_user_agent(), 'X-API-KEY': self.key},
proxy=self.proxy,
)
self.total_results = response[0]["hosts"]
self.total_results = response[0]['hosts']
async def get_hostnames(self):
return self.total_results

View file

@ -28,8 +28,8 @@ class ErrorResult(NamedTuple):
class SearchGithubCode:
def __init__(self, word, limit) -> None:
self.word = word
self.total_results = ""
self.server = "api.github.com"
self.total_results = ''
self.server = 'api.github.com'
self.limit = limit
self.counter: int = 0
self.page: int | None = 1
@ -38,17 +38,17 @@ def __init__(self, word, limit) -> None:
# rate limits you more severely
# https://developer.github.com/v3/search/#rate-limit
if self.key is None:
raise MissingKey("Github")
raise MissingKey('Github')
self.proxy = False
@staticmethod
async def fragments_from_response(json_data: dict) -> list[str]:
items: list[dict[str, Any]] = json_data.get("items") or list()
items: list[dict[str, Any]] = json_data.get('items') or list()
fragments: list[str] = list()
for item in items:
matches = item.get("text_matches") or list()
matches = item.get('text_matches') or list()
for match in matches:
fragments.append(match.get("fragment"))
fragments.append(match.get('fragment'))
return [fragment for fragment in fragments if fragment is not None]
@ -56,22 +56,20 @@ async def fragments_from_response(json_data: dict) -> list[str]:
async def page_from_response(page: str, links) -> int | None:
page_link = links.get(page)
if page_link:
parsed = urlparse.urlparse(str(page_link.get("url")))
parsed = urlparse.urlparse(str(page_link.get('url')))
params = urlparse.parse_qs(parsed.query)
pages: list[Any] = params.get("page", [None])
pages: list[Any] = params.get('page', [None])
page_number = pages[0] and int(pages[0])
return page_number
else:
return None
async def handle_response(
self, response: tuple[str, dict, int, Any]
) -> ErrorResult | RetryResult | SuccessResult:
async def handle_response(self, response: tuple[str, dict, int, Any]) -> ErrorResult | RetryResult | SuccessResult:
text, json_data, status, links = response
if status == 200:
results = await self.fragments_from_response(json_data)
next_page = await self.page_from_response("next", links)
last_page = await self.page_from_response("last", links)
next_page = await self.page_from_response('next', links)
last_page = await self.page_from_response('last', links)
return SuccessResult(results, next_page, last_page)
elif status == 429 or status == 403:
return RetryResult(60)
@ -87,17 +85,15 @@ async def do_search(self, page: int) -> tuple[str, dict, int, Any]:
else:
url = f'https://{self.server}/search/code?q="{self.word}"&page={page}'
headers = {
"Host": self.server,
"User-agent": Core.get_user_agent(),
"Accept": "application/vnd.github.v3.text-match+json",
"Authorization": f"token {self.key}",
'Host': self.server,
'User-agent': Core.get_user_agent(),
'Accept': 'application/vnd.github.v3.text-match+json',
'Authorization': f'token {self.key}',
}
async with aiohttp.ClientSession(headers=headers) as sess:
if self.proxy:
async with sess.get(
url, proxy=random.choice(Core.proxy_list())
) as resp:
async with sess.get(url, proxy=random.choice(Core.proxy_list())) as resp:
return await resp.text(), await resp.json(), resp.status, resp.links
else:
async with sess.get(url) as resp:
@ -117,7 +113,7 @@ async def process(self, proxy: bool = False) -> None:
api_response = await self.do_search(self.page)
result = await self.handle_response(api_response)
if isinstance(result, SuccessResult):
print(f"\tSearching {self.counter} results.")
print(f'\tSearching {self.counter} results.')
for fragment in result.fragments:
self.total_results += fragment
self.counter = self.counter + 1
@ -125,16 +121,14 @@ async def process(self, proxy: bool = False) -> None:
await asyncio.sleep(get_delay())
elif isinstance(result, RetryResult):
sleepy_time = get_delay() + result.time
print(f"\tRetrying page in {sleepy_time} seconds...")
print(f'\tRetrying page in {sleepy_time} seconds...')
await asyncio.sleep(sleepy_time)
elif isinstance(result, ErrorResult):
raise Exception(
f"\tException occurred: status_code: {result.status_code} reason: {result.body}"
)
raise Exception(f'\tException occurred: status_code: {result.status_code} reason: {result.body}')
else:
raise Exception("\tUnknown exception occurred")
raise Exception('\tUnknown exception occurred')
except Exception as e:
print(f"An exception has occurred: {e}")
print(f'An exception has occurred: {e}')
async def get_emails(self):
rawres = myparser.Parser(self.total_results, self.word)

View file

@ -8,30 +8,24 @@ class SearchHackerTarget:
def __init__(self, word) -> None:
self.word = word
self.total_results = ""
self.hostname = "https://api.hackertarget.com"
self.total_results = ''
self.hostname = 'https://api.hackertarget.com'
self.proxy = False
self.results = None
async def do_search(self) -> None:
headers = {"User-agent": Core.get_user_agent()}
headers = {'User-agent': Core.get_user_agent()}
urls = [
f"{self.hostname}/hostsearch/?q={self.word}",
f"{self.hostname}/reversedns/?q={self.word}",
f'{self.hostname}/hostsearch/?q={self.word}',
f'{self.hostname}/reversedns/?q={self.word}',
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response.replace(",", ":")
self.total_results += response.replace(',', ':')
async def process(self, proxy: bool = False) -> None:
self.proxy = proxy
await self.do_search()
async def get_hostnames(self) -> list:
return [
result
for result in self.total_results.splitlines()
if "No PTR records found" not in result
]
return [result for result in self.total_results.splitlines() if 'No PTR records found' not in result]

View file

@ -12,10 +12,10 @@ def __init__(self, word, limit, start) -> None:
self.start = start
self.key = Core.hunter_key()
if self.key is None:
raise MissingKey("Hunter")
self.total_results = ""
raise MissingKey('Hunter')
self.total_results = ''
self.counter = start
self.database = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10"
self.database = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10'
self.proxy = False
self.hostnames: list = []
self.emails: list = []
@ -23,76 +23,56 @@ def __init__(self, word, limit, start) -> None:
async def do_search(self) -> None:
# First determine if a user account is not a free account, this call is free
is_free = True
headers = {"User-Agent": Core.get_user_agent()}
acc_info_url = f"https://api.hunter.io/v2/account?api_key={self.key}"
response = await AsyncFetcher.fetch_all(
[acc_info_url], headers=headers, json=True
)
headers = {'User-Agent': Core.get_user_agent()}
acc_info_url = f'https://api.hunter.io/v2/account?api_key={self.key}'
response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True)
is_free = (
is_free
if "plan_name" in response[0]["data"].keys()
and response[0]["data"]["plan_name"].lower() == "free"
else False
is_free if 'plan_name' in response[0]['data'].keys() and response[0]['data']['plan_name'].lower() == 'free' else False
)
# Extract the total number of requests that are available for an account
total_requests_avail = (
response[0]["data"]["requests"]["searches"]["available"]
- response[0]["data"]["requests"]["searches"]["used"]
response[0]['data']['requests']['searches']['available'] - response[0]['data']['requests']['searches']['used']
)
if is_free:
response = await AsyncFetcher.fetch_all(
[self.database], headers=headers, proxy=self.proxy, json=True
)
response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True)
self.emails, self.hostnames = await self.parse_resp(json_resp=response[0])
else:
# Determine the total number of emails that are available
# As the most emails you can get within one query are 100
# This is only done where paid accounts are in play
hunter_dinfo_url = (
f"https://api.hunter.io/v2/email-count?domain={self.word}"
)
response = await AsyncFetcher.fetch_all(
[hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True
)
total_number_reqs = response[0]["data"]["total"] // 100
hunter_dinfo_url = f'https://api.hunter.io/v2/email-count?domain={self.word}'
response = await AsyncFetcher.fetch_all([hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True)
total_number_reqs = response[0]['data']['total'] // 100
# Parse out meta field within initial JSON response to determine the total number of results
if total_requests_avail < total_number_reqs:
print('WARNING: account does not have enough requests to gather all emails')
print(
"WARNING: account does not have enough requests to gather all emails"
)
print(
f"Total requests available: {total_requests_avail}, total requests "
f"needed to be made: {total_number_reqs}"
)
print(
"RETURNING current results, if you would still like to "
"run this module comment out the if request"
f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}'
)
print('RETURNING current results, if you would still like to ' 'run this module comment out the if request')
return
self.limit = 100
# max number of emails you can get per request is 100
# increments of 100 with offset determining where to start
# See docs for more details: https://hunter.io/api-documentation/v2#domain-search
for offset in range(0, 100 * total_number_reqs, 100):
req_url = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}"
response = await AsyncFetcher.fetch_all(
[req_url], headers=headers, proxy=self.proxy, json=True
)
req_url = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}'
response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True)
temp_emails, temp_hostnames = await self.parse_resp(response[0])
self.emails.extend(temp_emails)
self.hostnames.extend(temp_hostnames)
await asyncio.sleep(1)
async def parse_resp(self, json_resp):
emails = list(sorted({email["value"] for email in json_resp["data"]["emails"]}))
emails = list(sorted({email['value'] for email in json_resp['data']['emails']}))
domains = list(
sorted(
{
source["domain"]
for email in json_resp["data"]["emails"]
for source in email["sources"]
if self.word in source["domain"]
source['domain']
for email in json_resp['data']['emails']
for source in email['sources']
if self.word in source['domain']
}
)
)

View file

@ -14,8 +14,8 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.intelx_key()
if self.key is None:
raise MissingKey("Intelx")
self.database = "https://2.intelx.io"
raise MissingKey('Intelx')
self.database = 'https://2.intelx.io'
self.results: Any = None
self.info: tuple[Any, ...] = ()
self.limit: int = 10000
@ -28,34 +28,30 @@ async def do_search(self) -> None:
# API requests self identification
# https://intelx.io/integrations
headers = {
"x-key": self.key,
"User-Agent": f"{Core.get_user_agent()}-theHarvester",
'x-key': self.key,
'User-Agent': f'{Core.get_user_agent()}-theHarvester',
}
data = {
"term": self.word,
"buckets": [],
"lookuplevel": 0,
"maxresults": self.limit,
"timeout": 5,
"datefrom": "",
"dateto": "",
"sort": 2,
"media": 0,
"terminate": [],
"target": 0,
'term': self.word,
'buckets': [],
'lookuplevel': 0,
'maxresults': self.limit,
'timeout': 5,
'datefrom': '',
'dateto': '',
'sort': 2,
'media': 0,
'terminate': [],
'target': 0,
}
total_resp = requests.post(
f"{self.database}/phonebook/search", headers=headers, json=data
)
phonebook_id = ujson.loads(total_resp.text)["id"]
total_resp = requests.post(f'{self.database}/phonebook/search', headers=headers, json=data)
phonebook_id = ujson.loads(total_resp.text)['id']
await asyncio.sleep(5)
# Fetch results from phonebook based on ID
resp = await AsyncFetcher.fetch_all(
[
f"{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}"
],
[f'{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}'],
headers=headers,
json=True,
proxy=self.proxy,
@ -63,7 +59,7 @@ async def do_search(self) -> None:
resp = resp[0]
self.results = resp # TODO: give self.results more appropriate typing
except Exception as e:
print(f"An exception has occurred in Intelx: {e}")
print(f'An exception has occurred in Intelx: {e}')
async def process(self, proxy: bool = False):
self.proxy = proxy

View file

@ -9,17 +9,15 @@ def __init__(self, word) -> None:
self.totalips: list = []
self.key = Core.netlas_key()
if self.key is None:
raise MissingKey("netlas")
raise MissingKey('netlas')
self.proxy = False
async def do_search(self) -> None:
api = f"https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*"
headers = {"X-API-Key": self.key}
response = await AsyncFetcher.fetch_all(
[api], json=True, headers=headers, proxy=self.proxy
)
for domain in response[0]["items"]:
self.totalhosts.append(domain["data"]["domain"])
api = f'https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*'
headers = {'X-API-Key': self.key}
response = await AsyncFetcher.fetch_all([api], json=True, headers=headers, proxy=self.proxy)
for domain in response[0]['items']:
self.totalhosts.append(domain['data']['domain'])
async def get_hostnames(self) -> list:
return self.totalhosts

View file

@ -9,28 +9,26 @@
class SearchOnyphe:
def __init__(self, word) -> None:
self.word = word
self.response = ""
self.response = ''
self.totalhosts: set = set()
self.totalips: set = set()
self.asns: set = set()
self.key = Core.onyphe_key()
if self.key is None:
raise MissingKey("onyphe")
raise MissingKey('onyphe')
self.proxy = False
async def do_search(self) -> None:
# https://www.onyphe.io/docs/apis/search
# https://www.onyphe.io/search?q=domain%3Acharter.com&captcharesponse=j5cGT
# base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:domain:{self.word}'
base_url = f"https://www.onyphe.io/api/v2/search/?q=domain:{self.word}"
base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:{self.word}'
headers = {
"User-Agent": Core.get_user_agent(),
"Content-Type": "application/json",
"Authorization": f"bearer {self.key}",
'User-Agent': Core.get_user_agent(),
'Content-Type': 'application/json',
'Authorization': f'bearer {self.key}',
}
response = await AsyncFetcher.fetch_all(
[base_url], json=True, headers=headers, proxy=self.proxy
)
response = await AsyncFetcher.fetch_all([base_url], json=True, headers=headers, proxy=self.proxy)
self.response = response[0]
await self.parse_onyphe_resp_json()
@ -38,74 +36,52 @@ async def parse_onyphe_resp_json(self):
if isinstance(self.response, list):
self.response = self.response[0]
if not isinstance(self.response, dict):
raise Exception(f"An exception has occurred {self.response} is not a dict")
if "Success" == self.response["text"]:
if "results" in self.response.keys():
for result in self.response["results"]:
raise Exception(f'An exception has occurred {self.response} is not a dict')
if 'Success' == self.response['text']:
if 'results' in self.response.keys():
for result in self.response['results']:
try:
if "alternativeip" in result.keys():
self.totalips.update(
{altip for altip in result["alternativeip"]}
)
if "url" in result.keys() and isinstance(result["url"], list):
if 'alternativeip' in result.keys():
self.totalips.update({altip for altip in result['alternativeip']})
if 'url' in result.keys() and isinstance(result['url'], list):
self.totalhosts.update(
urlparse(url).netloc
for url in result["url"]
if urlparse(url).netloc.endswith(self.word)
urlparse(url).netloc for url in result['url'] if urlparse(url).netloc.endswith(self.word)
)
self.asns.add(result["asn"])
self.asns.add(result["geolocus"]["asn"])
self.totalips.add(result["geolocus"]["subnet"])
self.totalips.add(result["ip"])
self.totalips.add(result["subnet"])
self.asns.add(result['asn'])
self.asns.add(result['geolocus']['asn'])
self.totalips.add(result['geolocus']['subnet'])
self.totalips.add(result['ip'])
self.totalips.add(result['subnet'])
# Shouldn't be needed as API autoparses urls from html raw data
# rawres = myparser.Parser(result['data'], self.word)
# if await rawres.hostnames():
# self.totalhosts.update(set(await rawres.hostnames()))
for subdomain_key in [
"domain",
"hostname",
"subdomains",
"subject",
"reverse",
"geolocus",
'domain',
'hostname',
'subdomains',
'subject',
'reverse',
'geolocus',
]:
if subdomain_key in result.keys():
if subdomain_key == "subject":
if subdomain_key == 'subject':
self.totalhosts.update(
{
domain
for domain in result[subdomain_key][
"altname"
]
if domain.endswith(self.word)
}
{domain for domain in result[subdomain_key]['altname'] if domain.endswith(self.word)}
)
elif subdomain_key == "geolocus":
elif subdomain_key == 'geolocus':
self.totalhosts.update(
{
domain
for domain in result[subdomain_key][
"domain"
]
if domain.endswith(self.word)
}
{domain for domain in result[subdomain_key]['domain'] if domain.endswith(self.word)}
)
else:
self.totalhosts.update(
{
domain
for domain in result[subdomain_key]
if domain.endswith(self.word)
}
{domain for domain in result[subdomain_key] if domain.endswith(self.word)}
)
except Exception as e:
print(f"An exception has occurred on result: {result}: {e}")
print(f'An exception has occurred on result: {result}: {e}')
continue
else:
print(
f"Onhyphe API query did not succeed dumping current response: {self.response}"
)
print(f'Onhyphe API query did not succeed dumping current response: {self.response}')
async def get_asns(self) -> set:
return self.asns

View file

@ -11,16 +11,14 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
url = f"https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns"
url = f'https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
responses = response[0]
dct = responses
self.totalhosts = {host["hostname"] for host in dct["passive_dns"]}
self.totalhosts = {host['hostname'] for host in dct['passive_dns']}
# filter out ips that are just called NXDOMAIN
self.totalips = {
ip["address"]
for ip in dct["passive_dns"]
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip["address"])
ip['address'] for ip in dct['passive_dns'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip['address'])
}
async def get_hostnames(self) -> set:

View file

@ -12,54 +12,41 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.pentest_tools_key()
if self.key is None:
raise MissingKey("PentestTools")
raise MissingKey('PentestTools')
self.total_results: list = []
self.api = f"https://pentest-tools.com/api?key={self.key}"
self.api = f'https://pentest-tools.com/api?key={self.key}'
self.proxy = False
async def poll(self, scan_id):
while True:
time.sleep(3)
# Get the status of our scan
scan_status_data = {"op": "get_scan_status", "scan_id": scan_id}
responses = await AsyncFetcher.post_fetch(
url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy
)
scan_status_data = {'op': 'get_scan_status', 'scan_id': scan_id}
responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy)
res_json = ujson.loads(responses.strip())
if res_json["op_status"] == "success":
if (
res_json["scan_status"] != "waiting"
and res_json["scan_status"] != "running"
):
if res_json['op_status'] == 'success':
if res_json['scan_status'] != 'waiting' and res_json['scan_status'] != 'running':
getoutput_data = {
"op": "get_output",
"scan_id": scan_id,
"output_format": "json",
'op': 'get_output',
'scan_id': scan_id,
'output_format': 'json',
}
responses = await AsyncFetcher.post_fetch(
url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy
)
responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy)
res_json = ujson.loads(responses.strip("\n"))
res_json = ujson.loads(responses.strip('\n'))
self.total_results = await self.parse_json(res_json)
break
else:
print(
f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}"
)
print(f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}")
break
@staticmethod
async def parse_json(json_results):
status = json_results["op_status"]
if status == "success":
scan_tests = json_results["scan_output"]["output_json"]
output_data = scan_tests[0]["output_data"]
host_to_ip = [
f"{subdomain[0]}:{subdomain[1]}"
for subdomain in output_data
if len(subdomain) > 0
]
status = json_results['op_status']
if status == 'success':
scan_tests = json_results['scan_output']['output_json']
output_data = scan_tests[0]['output_data']
host_to_ip = [f'{subdomain[0]}:{subdomain[1]}' for subdomain in output_data if len(subdomain) > 0]
return host_to_ip
return []
@ -68,20 +55,18 @@ async def get_hostnames(self) -> list:
async def do_search(self) -> None:
subdomain_payload = {
"op": "start_scan",
"tool_id": 20,
"tool_params": {
"target": f"{self.word}",
"web_details": "off",
"do_smart_search": "off",
'op': 'start_scan',
'tool_id': 20,
'tool_params': {
'target': f'{self.word}',
'web_details': 'off',
'do_smart_search': 'off',
},
}
responses = await AsyncFetcher.post_fetch(
url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy
)
responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy)
res_json = ujson.loads(responses.strip())
if res_json["op_status"] == "success":
scan_id = res_json["scan_id"]
if res_json['op_status'] == 'success':
scan_id = res_json['scan_id']
await self.poll(scan_id)
async def process(self, proxy: bool = False) -> None:

View file

@ -7,21 +7,19 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.projectdiscovery_key()
if self.key is None:
raise MissingKey("ProjectDiscovery")
raise MissingKey('ProjectDiscovery')
self.total_results = None
self.proxy = False
async def do_search(self):
url = f"https://dns.projectdiscovery.io/dns/{self.word}/subdomains"
url = f'https://dns.projectdiscovery.io/dns/{self.word}/subdomains'
response = await AsyncFetcher.fetch_all(
[url],
json=True,
headers={"User-Agent": Core.get_user_agent(), "Authorization": self.key},
headers={'User-Agent': Core.get_user_agent(), 'Authorization': self.key},
proxy=self.proxy,
)
self.total_results = [
f"{domains}.{self.word}" for domains in response[0]["subdomains"]
]
self.total_results = [f'{domains}.{self.word}' for domains in response[0]['subdomains']]
async def get_hostnames(self):
return self.total_results

View file

@ -11,33 +11,29 @@ def __init__(self, word) -> None:
async def do_search(self):
try:
headers = {"User-agent": Core.get_user_agent()}
headers = {'User-agent': Core.get_user_agent()}
# TODO see if it's worth adding sameip searches
# f'{self.hostname}/sameip/{self.word}?full=1#result'
urls = [f"https://rapiddns.io/subdomain/{self.word}?full=1#result"]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
urls = [f'https://rapiddns.io/subdomain/{self.word}?full=1#result']
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
if len(responses[0]) <= 1:
return self.total_results
soup = BeautifulSoup(responses[0], "html.parser")
rows = soup.find("table").find("tbody").find_all("tr")
soup = BeautifulSoup(responses[0], 'html.parser')
rows = soup.find('table').find('tbody').find_all('tr')
if rows:
# Validation check
for row in rows:
cells = row.find_all("td")
cells = row.find_all('td')
if len(cells) > 0:
# sanity check
subdomain = str(cells[0].get_text())
if cells[-1].get_text() == "CNAME":
self.total_results.append(f"{subdomain}")
if cells[-1].get_text() == 'CNAME':
self.total_results.append(f'{subdomain}')
else:
self.total_results.append(
f"{subdomain}:{str(cells[1].get_text()).strip()}"
)
self.total_results.append(f'{subdomain}:{str(cells[1].get_text()).strip()}')
self.total_results = list({domain for domain in self.total_results})
except Exception as e:
print(f"An exception has occurred: {str(e)}")
print(f'An exception has occurred: {str(e)}')
async def process(self, proxy: bool = False) -> None:
self.proxy = proxy

View file

@ -10,59 +10,50 @@ def __init__(self, word, limit) -> None:
self.word = word
self.key = Core.rocketreach_key()
if self.key is None:
raise MissingKey("RocketReach")
raise MissingKey('RocketReach')
self.hosts: set = set()
self.proxy = False
self.baseurl = "https://rocketreach.co/api/v2/person/search"
self.baseurl = 'https://rocketreach.co/api/v2/person/search'
self.links: set = set()
self.limit = limit
async def do_search(self) -> None:
try:
headers = {
"Api-Key": self.key,
"Content-Type": "application/json",
"User-Agent": Core.get_user_agent(),
'Api-Key': self.key,
'Content-Type': 'application/json',
'User-Agent': Core.get_user_agent(),
}
next_page = 1 # track pagination
for count in range(1, self.limit):
data = f'{{"query":{{"company_domain": ["{self.word}"]}}, "start": {next_page}, "page_size": 100}}'
result = await AsyncFetcher.post_fetch(
self.baseurl, headers=headers, data=data, json=True
)
if (
"detail" in result.keys()
and "error" in result.keys()
and "Subscribe to a plan to access" in result["detail"]
):
result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True)
if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result['detail']:
# No more results can be fetched
break
if (
"detail" in result.keys()
and "Request was throttled." in result["detail"]
):
if 'detail' in result.keys() and 'Request was throttled.' in result['detail']:
# Rate limit has been triggered need to sleep extra
print(
f"RocketReach requests have been throttled; "
f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}'
)
break
if "profiles" in dict(result).keys():
if len(result["profiles"]) == 0:
if 'profiles' in dict(result).keys():
if len(result['profiles']) == 0:
break
for profile in result["profiles"]:
if "linkedin_url" in dict(profile).keys():
self.links.add(profile["linkedin_url"])
if "pagination" in dict(result).keys():
next_page = int(result["pagination"]["next"])
if next_page > int(result["pagination"]["total"]):
for profile in result['profiles']:
if 'linkedin_url' in dict(profile).keys():
self.links.add(profile['linkedin_url'])
if 'pagination' in dict(result).keys():
next_page = int(result['pagination']['next'])
if next_page > int(result['pagination']['total']):
break
await asyncio.sleep(get_delay() + 5)
except Exception as e:
print(f"An exception has occurred: {e}")
print(f'An exception has occurred: {e}')
async def get_links(self):
return self.links

View file

@ -13,55 +13,52 @@ def __init__(self, word) -> None:
self.total_hostnames: set = set()
self.key = Core.hunterhow_key()
if self.key is None:
raise MissingKey("hunterhow")
raise MissingKey('hunterhow')
self.proxy = False
async def do_search(self) -> None:
# https://hunter.how/search-api
query = f'domain.suffix="{self.word}"'
# second_query = f'domain="{self.word}"'
encoded_query = base64.urlsafe_b64encode(query.encode("utf-8")).decode("ascii")
encoded_query = base64.urlsafe_b64encode(query.encode('utf-8')).decode('ascii')
page = 1
page_size = 100 # can be either: 10,20,50,100)
# The interval between the start time and the end time cannot exceed one year
# Can not exceed one year, but years=1 does not work due to their backend, 364 will suffice
today = datetime.today()
one_year_ago = today - relativedelta(days=364)
start_time = one_year_ago.strftime("%Y-%m-%d")
end_time = today.strftime("%Y-%m-%d")
start_time = one_year_ago.strftime('%Y-%m-%d')
end_time = today.strftime('%Y-%m-%d')
# two_years_ago = one_year_ago - relativedelta(days=364)
# start_time = two_years_ago.strftime('%Y-%m-%d')
# end_time = one_year_ago.strftime('%Y-%m-%d')
url = (
"https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s"
% (
# self.key, encoded_query, page, page_size, start_time, end_time
self.key,
encoded_query,
page,
page_size,
start_time,
end_time,
)
url = 'https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s' % (
# self.key, encoded_query, page, page_size, start_time, end_time
self.key,
encoded_query,
page,
page_size,
start_time,
end_time,
)
# print(f'Sending url: {url}')
response = await AsyncFetcher.fetch_all(
[url],
json=True,
headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"},
headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'},
proxy=self.proxy,
)
dct = response[0]
# print(f'json response: ')
# print(dct)
if "code" in dct.keys():
if dct["code"] == 40001:
if 'code' in dct.keys():
if dct['code'] == 40001:
print(f'Code 40001 indicates for searchhunterhow: {dct["message"]}')
return
# total = dct['data']['total']
# TODO determine if total is ever 100 how to get more subdomains?
for sub in dct["data"]["list"]:
self.total_hostnames.add(sub["domain"])
for sub in dct['data']['list']:
self.total_hostnames.add(sub['domain'])
async def get_hostnames(self) -> set:
return self.total_hostnames

View file

@ -10,41 +10,33 @@ def __init__(self, word) -> None:
self.word = word
self.key = Core.security_trails_key()
if self.key is None:
raise MissingKey("Securitytrail")
self.results = ""
self.totalresults = ""
self.api = "https://api.securitytrails.com/v1/"
raise MissingKey('Securitytrail')
self.results = ''
self.totalresults = ''
self.api = 'https://api.securitytrails.com/v1/'
self.info: tuple[set, set] = (set(), set())
self.proxy = False
async def authenticate(self) -> None:
# Method to authenticate API key before sending requests.
headers = {"APIKEY": self.key}
url = f"{self.api}ping"
auth_responses = await AsyncFetcher.fetch_all(
[url], headers=headers, proxy=self.proxy
)
headers = {'APIKEY': self.key}
url = f'{self.api}ping'
auth_responses = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
auth_responses = auth_responses[0]
if "False" in auth_responses or "Invalid authentication" in auth_responses:
print("\tKey could not be authenticated exiting program.")
if 'False' in auth_responses or 'Invalid authentication' in auth_responses:
print('\tKey could not be authenticated exiting program.')
await asyncio.sleep(5)
async def do_search(self) -> None:
# https://api.securitytrails.com/v1/domain/domain.com
url = f"{self.api}domain/{self.word}"
headers = {"APIKEY": self.key}
response = await AsyncFetcher.fetch_all(
[url], headers=headers, proxy=self.proxy
)
await asyncio.sleep(
5
) # Not random delay because 2 seconds is required due to rate limit.
url = f'{self.api}domain/{self.word}'
headers = {'APIKEY': self.key}
response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
await asyncio.sleep(5) # Not random delay because 2 seconds is required due to rate limit.
self.results = response[0]
self.totalresults += self.results
url += "/subdomains" # Get subdomains now.
subdomain_response = await AsyncFetcher.fetch_all(
[url], headers=headers, proxy=self.proxy
)
url += '/subdomains' # Get subdomains now.
subdomain_response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
await asyncio.sleep(5)
self.results = subdomain_response[0]
self.totalresults += self.results
@ -56,7 +48,7 @@ async def process(self, proxy: bool = False) -> None:
parser = securitytrailsparser.Parser(word=self.word, text=self.totalresults)
self.info = await parser.parse_text()
# Create parser and set self.info to tuple returned from parsing text.
print("\tDone Searching Results")
print('\tDone Searching Results')
async def get_ips(self) -> set:
return self.info[0]

View file

@ -10,7 +10,7 @@ class SearchShodan:
def __init__(self) -> None:
self.key = Core.shodan_key()
if self.key is None:
raise MissingKey("Shodan")
raise MissingKey('Shodan')
self.api = Shodan(self.key)
self.hostdatarow: list = []
self.tracker: OrderedDict = OrderedDict()
@ -19,81 +19,81 @@ async def search_ip(self, ip) -> OrderedDict:
try:
ipaddress = ip
results = self.api.host(ipaddress)
asn = ""
asn = ''
domains: list = list()
hostnames: list = list()
ip_str = ""
isp = ""
org = ""
ip_str = ''
isp = ''
org = ''
ports: list = list()
title = ""
server = ""
product = ""
title = ''
server = ''
product = ''
technologies: list = list()
data_first_dict = dict(results["data"][0])
data_first_dict = dict(results['data'][0])
if "ip_str" in data_first_dict.keys():
ip_str += data_first_dict["ip_str"]
if 'ip_str' in data_first_dict.keys():
ip_str += data_first_dict['ip_str']
if "http" in data_first_dict.keys():
http_results_dict = dict(data_first_dict["http"])
if "title" in http_results_dict.keys():
title_val = str(http_results_dict["title"]).strip()
if title_val != "None":
if 'http' in data_first_dict.keys():
http_results_dict = dict(data_first_dict['http'])
if 'title' in http_results_dict.keys():
title_val = str(http_results_dict['title']).strip()
if title_val != 'None':
title += title_val
if "components" in http_results_dict.keys():
for key in http_results_dict["components"].keys():
if 'components' in http_results_dict.keys():
for key in http_results_dict['components'].keys():
technologies.append(key)
if "server" in http_results_dict.keys():
server_val = str(http_results_dict["server"]).strip()
if server_val != "None":
if 'server' in http_results_dict.keys():
server_val = str(http_results_dict['server']).strip()
if server_val != 'None':
server += server_val
for key, value in results.items():
if key == "asn":
if key == 'asn':
asn += value
if key == "domains":
if key == 'domains':
value = list(value)
value.sort()
domains.extend(value)
if key == "hostnames":
if key == 'hostnames':
value = [host.strip() for host in list(value)]
value.sort()
hostnames.extend(value)
if key == "isp":
if key == 'isp':
isp += value
if key == "org":
if key == 'org':
org += str(value)
if key == "ports":
if key == 'ports':
value = list(value)
value.sort()
ports.extend(value)
if key == "product":
if key == 'product':
product += value
technologies = list(set(technologies))
self.tracker[ip] = {
"asn": asn.strip(),
"domains": domains,
"hostnames": hostnames,
"ip_str": ip_str.strip(),
"isp": isp.strip(),
"org": org.strip(),
"ports": ports,
"product": product.strip(),
"server": server.strip(),
"technologies": technologies,
"title": title.strip(),
'asn': asn.strip(),
'domains': domains,
'hostnames': hostnames,
'ip_str': ip_str.strip(),
'isp': isp.strip(),
'org': org.strip(),
'ports': ports,
'product': product.strip(),
'server': server.strip(),
'technologies': technologies,
'title': title.strip(),
}
return self.tracker
except exception.APIError:
print(f"{ip}: Not in Shodan")
self.tracker[ip] = "Not in Shodan"
print(f'{ip}: Not in Shodan')
self.tracker[ip] = 'Not in Shodan'
except Exception as e:
# print(f'Error occurred in the Shodan IP search module: {e}')
self.tracker[ip] = f"Error occurred in the Shodan IP search module: {e}"
self.tracker[ip] = f'Error occurred in the Shodan IP search module: {e}'
finally:
return self.tracker

View file

@ -10,7 +10,7 @@ class SearchSitedossier:
def __init__(self, word):
self.word = word
self.totalhosts = set()
self.server = "www.sitedossier.com"
self.server = 'www.sitedossier.com'
self.proxy = False
async def do_search(self):
@ -18,92 +18,75 @@ async def do_search(self):
# This site seems to yield a lot of results but is a bit annoying to scrape
# Hence the need for delays after each request to get the most results
# Feel free to tweak the delays as needed
url = f"http://{self.server}/parentdomain/{self.word}"
headers = {"User-Agent": Core.get_user_agent()}
response = await AsyncFetcher.fetch_all(
[url], headers=headers, proxy=self.proxy
)
url = f'http://{self.server}/parentdomain/{self.word}'
headers = {'User-Agent': Core.get_user_agent()}
response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
base_response = response[0]
soup = BeautifulSoup(base_response, "html.parser")
soup = BeautifulSoup(base_response, 'html.parser')
# iter_counter = 1
# iterations_needed = total_number // 100
# iterations_needed += 1
flagged_counter = 0
stop_conditions = ["End of list.", "No data currently available."]
stop_conditions = ['End of list.', 'No data currently available.']
bot_string = (
"Our web servers have detected unusual or excessive requests "
'Our web servers have detected unusual or excessive requests '
'from your computer or network. Please enter the unique "word"'
" below to confirm that you are a human interactively using this site."
' below to confirm that you are a human interactively using this site.'
)
if (
stop_conditions[0] not in base_response
and stop_conditions[1] not in base_response
stop_conditions[0] not in base_response and stop_conditions[1] not in base_response
) and bot_string not in base_response:
total_number = soup.find("i")
total_number = int(
total_number.text.strip().split(" ")[-1].replace(",", "")
)
hrefs = soup.find_all("a", href=True)
total_number = soup.find('i')
total_number = int(total_number.text.strip().split(' ')[-1].replace(',', ''))
hrefs = soup.find_all('a', href=True)
for a in hrefs:
unparsed = a["href"]
if "/site/" in unparsed:
subdomain = str(unparsed.split("/")[-1]).lower()
unparsed = a['href']
if '/site/' in unparsed:
subdomain = str(unparsed.split('/')[-1]).lower()
self.totalhosts.add(subdomain)
await asyncio.sleep(get_delay() + 15 + get_delay())
for i in range(101, total_number, 100):
headers = {"User-Agent": Core.get_user_agent()}
iter_url = f"http://{self.server}/parentdomain/{self.word}/{i}"
print(f"My current iter_url: {iter_url}")
response = await AsyncFetcher.fetch_all(
[iter_url], headers=headers, proxy=self.proxy
)
headers = {'User-Agent': Core.get_user_agent()}
iter_url = f'http://{self.server}/parentdomain/{self.word}/{i}'
print(f'My current iter_url: {iter_url}')
response = await AsyncFetcher.fetch_all([iter_url], headers=headers, proxy=self.proxy)
response = response[0]
if (
stop_conditions[0] in response
or stop_conditions[1] in response
or flagged_counter >= 3
):
if stop_conditions[0] in response or stop_conditions[1] in response or flagged_counter >= 3:
break
if bot_string in response:
new_sleep_time = get_delay() * 30
print(
f"Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds"
)
print(f'Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds')
flagged_counter += 1
await asyncio.sleep(new_sleep_time)
response = await AsyncFetcher.fetch_all(
[iter_url],
headers={"User-Agent": Core.get_user_agent()},
headers={'User-Agent': Core.get_user_agent()},
proxy=self.proxy,
)
response = response[0]
if bot_string in response:
new_sleep_time = get_delay() * 30 * get_delay()
print(
f"Still triggering a captcha, sleeping longer for: {new_sleep_time}"
f" and skipping this batch: {iter_url}"
f'Still triggering a captcha, sleeping longer for: {new_sleep_time}'
f' and skipping this batch: {iter_url}'
)
await asyncio.sleep(new_sleep_time)
flagged_counter += 1
if flagged_counter >= 3:
break
soup = BeautifulSoup(response, "html.parser")
hrefs = soup.find_all("a", href=True)
soup = BeautifulSoup(response, 'html.parser')
hrefs = soup.find_all('a', href=True)
for a in hrefs:
unparsed = a["href"]
if "/site/" in unparsed:
subdomain = str(unparsed.split("/")[-1]).lower()
unparsed = a['href']
if '/site/' in unparsed:
subdomain = str(unparsed.split('/')[-1]).lower()
self.totalhosts.add(subdomain)
await asyncio.sleep(get_delay() + 15 + get_delay())
print(f"In total found: {len(self.totalhosts)}")
print(f'In total found: {len(self.totalhosts)}')
print(self.totalhosts)
else:
print(
"Sitedossier module has triggered a captcha on first iteration, no results can be found."
)
print(
"Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module"
)
print('Sitedossier module has triggered a captcha on first iteration, no results can be found.')
print('Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module')
async def get_hostnames(self):
return self.totalhosts

View file

@ -5,23 +5,18 @@ class SubdomainCenter:
def __init__(self, word):
self.word = word
self.results = set()
self.server = "https://api.subdomain.center/?domain="
self.server = 'https://api.subdomain.center/?domain='
self.proxy = False
async def do_search(self):
headers = {"User-Agent": Core.get_user_agent()}
headers = {'User-Agent': Core.get_user_agent()}
try:
current_url = f"{self.server}{self.word}"
resp = await AsyncFetcher.fetch_all(
[current_url], headers=headers, proxy=self.proxy, json=True
)
current_url = f'{self.server}{self.word}'
resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy, json=True)
self.results = resp[0]
self.results = {
sub[4:] if sub[:4] == "www." and sub[4:] else sub
for sub in self.results
}
self.results = {sub[4:] if sub[:4] == 'www.' and sub[4:] else sub for sub in self.results}
except Exception as e:
print(f"An exception has occurred in SubdomainCenter on : {e}")
print(f'An exception has occurred in SubdomainCenter on : {e}')
async def get_hostnames(self):
return self.results

View file

@ -14,24 +14,20 @@ def __init__(self, word) -> None:
self.total_results: set = set()
self.proxy = False
# TODO add api support
self.server = "https://subdomainfinder.c99.nl/"
self.totalresults = ""
self.server = 'https://subdomainfinder.c99.nl/'
self.totalresults = ''
async def do_search(self) -> None:
# Based on https://gist.github.com/th3gundy/bc83580cbe04031e9164362b33600962
headers = {"User-Agent": Core.get_user_agent()}
resp = await AsyncFetcher.fetch_all(
[self.server], headers=headers, proxy=self.proxy
)
headers = {'User-Agent': Core.get_user_agent()}
resp = await AsyncFetcher.fetch_all([self.server], headers=headers, proxy=self.proxy)
data = await self.get_csrf_params(resp[0])
data["scan_subdomains"] = ""
data["domain"] = self.word
data["privatequery"] = "on"
data['scan_subdomains'] = ''
data['domain'] = self.word
data['privatequery'] = 'on'
await asyncio.sleep(get_delay())
second_resp = await AsyncFetcher.post_fetch(
self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data)
)
second_resp = await AsyncFetcher.post_fetch(self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data))
# print(second_resp)
self.totalresults += second_resp
@ -55,10 +51,10 @@ async def process(self, proxy: bool = False) -> None:
@staticmethod
async def get_csrf_params(data):
csrf_params = {}
html = BeautifulSoup(data, "html.parser").find("div", {"class": "input-group"})
for c in html.find_all("input"):
html = BeautifulSoup(data, 'html.parser').find('div', {'class': 'input-group'})
for c in html.find_all('input'):
try:
csrf_params[c.get("name")] = c.get("value")
csrf_params[c.get('name')] = c.get('value')
except Exception:
continue

View file

@ -18,70 +18,59 @@ def __init__(self, hosts) -> None:
async def populate_fingerprints(self):
# Thank you to https://github.com/EdOverflow/can-i-take-over-xyz for these fingerprints
populate_url = "https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json"
headers = {"User-Agent": Core.get_user_agent()}
populate_url = 'https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json'
headers = {'User-Agent': Core.get_user_agent()}
response = await AsyncFetcher.fetch_all([populate_url], headers=headers)
try:
resp = response[0]
unparsed_json = ujson.loads(resp)
for unparsed_fingerprint in unparsed_json:
if unparsed_fingerprint["service"] in ["Smugsmug"]:
if unparsed_fingerprint['service'] in ['Smugsmug']:
# Subdomain must be in format domain.smugsmug.com
# This will never happen as subdomains are parsed and filtered to be in format of *.word.com
continue
if (
unparsed_fingerprint["status"] == "Vulnerable"
or unparsed_fingerprint["status"] == "Edge case"
):
self.fingerprints[unparsed_fingerprint["fingerprint"]] = (
unparsed_fingerprint["service"]
)
if unparsed_fingerprint['status'] == 'Vulnerable' or unparsed_fingerprint['status'] == 'Edge case':
self.fingerprints[unparsed_fingerprint['fingerprint']] = unparsed_fingerprint['service']
except Exception as e:
print(
f"An exception has occurred populating takeover fingerprints: {e}, defaulting to static list"
)
print(f'An exception has occurred populating takeover fingerprints: {e}, defaulting to static list')
self.fingerprints = {
"'Trying to access your account?'": "Campaign Monitor",
"404 Not Found": "Fly.io",
"404 error unknown site!": "Pantheon",
"Do you want to register *.wordpress.com?": "Wordpress",
"Domain uses DO name serves with no records in DO.": "Digital Ocean",
"It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": "LaunchRock",
"No Site For Domain": "Kinsta",
"No settings were found for this company:": "Help Scout",
"Project doesnt exist... yet!": "Readme.io",
"Repository not found": "Bitbucket",
"The feed has not been found.": "Feedpress",
"No such app": "Heroku",
"The specified bucket does not exist": "AWS/S3",
"The thing you were looking for is no longer here, or never was": "Ghost",
"There isn't a Github Pages site here.": "Github",
"This UserVoice subdomain is currently available!": "UserVoice",
"Uh oh. That page doesn't exist.": "Intercom",
"We could not find what you're looking for.": "Help Juice",
"Whatever you were looking for doesn't currently exist at this address": "Tumblr",
"is not a registered InCloud YouTrack": "JetBrains",
"page not found": "Uptimerobot",
"project not found": "Surge.sh",
"'Trying to access your account?'": 'Campaign Monitor',
'404 Not Found': 'Fly.io',
'404 error unknown site!': 'Pantheon',
'Do you want to register *.wordpress.com?': 'Wordpress',
'Domain uses DO name serves with no records in DO.': 'Digital Ocean',
"It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": 'LaunchRock',
'No Site For Domain': 'Kinsta',
'No settings were found for this company:': 'Help Scout',
'Project doesnt exist... yet!': 'Readme.io',
'Repository not found': 'Bitbucket',
'The feed has not been found.': 'Feedpress',
'No such app': 'Heroku',
'The specified bucket does not exist': 'AWS/S3',
'The thing you were looking for is no longer here, or never was': 'Ghost',
"There isn't a Github Pages site here.": 'Github',
'This UserVoice subdomain is currently available!': 'UserVoice',
"Uh oh. That page doesn't exist.": 'Intercom',
"We could not find what you're looking for.": 'Help Juice',
"Whatever you were looking for doesn't currently exist at this address": 'Tumblr',
'is not a registered InCloud YouTrack': 'JetBrains',
'page not found': 'Uptimerobot',
'project not found': 'Surge.sh',
}
async def check(self, url, resp) -> None:
# Simple function that takes response and checks if any fingerprints exist
# If a fingerprint exists figures out which one and prints it out
regex = re.compile(
"(?=(" + "|".join(map(re.escape, list(self.fingerprints.keys()))) + "))"
)
regex = re.compile('(?=(' + '|'.join(map(re.escape, list(self.fingerprints.keys()))) + '))')
# Sanitize fingerprints
matches = re.findall(regex, resp)
matches = list(set(matches))
for match in matches:
print(f"\t\033[91m Takeover detected: {url}\033[1;32;40m")
print(f'\t\033[91m Takeover detected: {url}\033[1;32;40m')
if match in self.fingerprints.keys():
# Validation check as to not error out
service = self.fingerprints[match]
print(
f"\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m"
)
print(f'\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m')
self.results[url].append({match: service})
async def do_take(self) -> None:
@ -89,13 +78,11 @@ async def do_take(self) -> None:
if len(self.hosts) > 0:
# Returns a list of tuples in this format: (url, response)
# Filter out responses whose responses are empty strings (indicates errored)
https_hosts = [f"https://{host}" for host in self.hosts]
http_hosts = [f"http://{host}" for host in self.hosts]
https_hosts = [f'https://{host}' for host in self.hosts]
http_hosts = [f'http://{host}' for host in self.hosts]
all_hosts = https_hosts + http_hosts
shuffle(all_hosts)
resps: list = await AsyncFetcher.fetch_all(
all_hosts, takeover=True, proxy=self.proxy
)
resps: list = await AsyncFetcher.fetch_all(all_hosts, takeover=True, proxy=self.proxy)
for url, resp in tuple(resp for resp in resps if len(resp[1]) >= 1):
await self.check(url, resp)
else:

View file

@ -9,15 +9,13 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5"
url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
self.totalhosts = {host for host in response[0]["results"]}
second_url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2"
secondresp = await AsyncFetcher.fetch_all(
[second_url], json=True, proxy=self.proxy
)
self.totalhosts = {host for host in response[0]['results']}
second_url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2'
secondresp = await AsyncFetcher.fetch_all([second_url], json=True, proxy=self.proxy)
try:
self.totalips = {resp["ip"] for resp in secondresp[0]["results"]}
self.totalips = {resp['ip'] for resp in secondresp[0]['results']}
except TypeError:
pass

View file

@ -12,12 +12,10 @@ def __init__(self, word, limit, start) -> None:
self.start = start
self.key = Core.tomba_key()
if self.key[0] is None or self.key[1] is None:
raise MissingKey("Tomba Key and/or Secret")
self.total_results = ""
raise MissingKey('Tomba Key and/or Secret')
self.total_results = ''
self.counter = start
self.database = (
f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10"
)
self.database = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10'
self.proxy = False
self.hostnames: list = []
self.emails: list = []
@ -26,49 +24,38 @@ async def do_search(self) -> None:
# First determine if a user account is not a free account, this call is free
is_free = True
headers = {
"User-Agent": Core.get_user_agent(),
"X-Tomba-Key": self.key[0],
"X-Tomba-Secret": self.key[1],
'User-Agent': Core.get_user_agent(),
'X-Tomba-Key': self.key[0],
'X-Tomba-Secret': self.key[1],
}
acc_info_url = "https://api.tomba.io/v1/me"
response = await AsyncFetcher.fetch_all(
[acc_info_url], headers=headers, json=True
)
acc_info_url = 'https://api.tomba.io/v1/me'
response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True)
is_free = (
is_free
if "name" in response[0]["data"]["pricing"].keys()
and response[0]["data"]["pricing"]["name"].lower() == "free"
if 'name' in response[0]['data']['pricing'].keys() and response[0]['data']['pricing']['name'].lower() == 'free'
else False
)
# Extract the total number of requests that are available for an account
total_requests_avail = (
response[0]["data"]["requests"]["domains"]["available"]
- response[0]["data"]["requests"]["domains"]["used"]
response[0]['data']['requests']['domains']['available'] - response[0]['data']['requests']['domains']['used']
)
if is_free:
response = await AsyncFetcher.fetch_all(
[self.database], headers=headers, proxy=self.proxy, json=True
)
response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True)
self.emails, self.hostnames = await self.parse_resp(json_resp=response[0])
else:
# Determine the total number of emails that are available
# As the most emails you can get within one query are 100
# This is only done where paid accounts are in play
tomba_counter = f"https://api.tomba.io/v1/email-count?domain={self.word}"
response = await AsyncFetcher.fetch_all(
[tomba_counter], headers=headers, proxy=self.proxy, json=True
)
total_number_reqs = response[0]["data"]["total"] // 100
tomba_counter = f'https://api.tomba.io/v1/email-count?domain={self.word}'
response = await AsyncFetcher.fetch_all([tomba_counter], headers=headers, proxy=self.proxy, json=True)
total_number_reqs = response[0]['data']['total'] // 100
# Parse out meta field within initial JSON response to determine the total number of results
if total_requests_avail < total_number_reqs:
print('WARNING: The account does not have enough requests to gather all the emails.')
print(
"WARNING: The account does not have enough requests to gather all the emails."
)
print(
f"Total requests available: {total_requests_avail}, total requests "
f"needed to be made: {total_number_reqs}"
f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}'
)
print(
'RETURNING current results, If you still wish to run this module despite the current results, please comment out the "if request" line.'
@ -79,24 +66,22 @@ async def do_search(self) -> None:
# increments of max number with page determining where to start
# See docs for more details: https://developer.tomba.io/#domain-search
for page in range(0, total_number_reqs + 1):
req_url = f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}"
response = await AsyncFetcher.fetch_all(
[req_url], headers=headers, proxy=self.proxy, json=True
)
req_url = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}'
response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True)
temp_emails, temp_hostnames = await self.parse_resp(response[0])
self.emails.extend(temp_emails)
self.hostnames.extend(temp_hostnames)
await asyncio.sleep(1)
async def parse_resp(self, json_resp):
emails = list(sorted({email["email"] for email in json_resp["data"]["emails"]}))
emails = list(sorted({email['email'] for email in json_resp['data']['emails']}))
domains = list(
sorted(
{
source["website_url"]
for email in json_resp["data"]["emails"]
for source in email["sources"]
if self.word in source["website_url"]
source['website_url']
for email in json_resp['data']['emails']
for source in email['sources']
if self.word in source['website_url']
}
)
)

View file

@ -11,25 +11,17 @@ def __init__(self, word) -> None:
self.proxy = False
async def do_search(self) -> None:
url = f"https://urlscan.io/api/v1/search/?q=domain:{self.word}"
url = f'https://urlscan.io/api/v1/search/?q=domain:{self.word}'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
resp = response[0]
self.totalhosts = {f"{page['page']['domain']}" for page in resp["results"]}
self.totalips = {
f"{page['page']['ip']}"
for page in resp["results"]
if "ip" in page["page"].keys()
}
self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']}
self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()}
self.interestingurls = {
f"{page['page']['url']}"
for page in resp["results"]
if self.word in page["page"]["url"] and "url" in page["page"].keys()
}
self.totalasns = {
f"{page['page']['asn']}"
for page in resp["results"]
if "asn" in page["page"].keys()
for page in resp['results']
if self.word in page['page']['url'] and 'url' in page['page'].keys()
}
self.totalasns = {f"{page['page']['asn']}" for page in resp['results'] if 'asn' in page['page'].keys()}
async def get_hostnames(self) -> set:
return self.totalhosts

View file

@ -8,7 +8,7 @@ class SearchVirustotal:
def __init__(self, word) -> None:
self.key = Core.virustotal_key()
if self.key is None:
raise MissingKey("virustotal")
raise MissingKey('virustotal')
self.word = word
self.proxy = False
self.hostnames: list = []
@ -18,14 +18,12 @@ async def do_search(self) -> None:
# based on: https://developers.virustotal.com/reference/domains-relationships
# base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40"
headers = {
"User-Agent": Core.get_user_agent(),
"Accept": "application/json",
"x-apikey": self.key,
'User-Agent': Core.get_user_agent(),
'Accept': 'application/json',
'x-apikey': self.key,
}
base_url = (
f"https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40"
)
cursor = ""
base_url = f'https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40'
cursor = ''
count = 0
fail_counter = 0
counter = 0
@ -37,42 +35,29 @@ async def do_search(self) -> None:
# TODO add timer logic if proven to be needed
# in the meantime sleeping 16 seconds should eliminate hitting the rate limit
# in case rate limit is hit, fail counter exists and sleep for 65 seconds
send_url = (
base_url + "&cursor=" + cursor
if cursor != "" and len(cursor) > 2
else base_url
)
responses = await AsyncFetcher.fetch_all(
[send_url], headers=headers, proxy=self.proxy, json=True
)
send_url = base_url + '&cursor=' + cursor if cursor != '' and len(cursor) > 2 else base_url
responses = await AsyncFetcher.fetch_all([send_url], headers=headers, proxy=self.proxy, json=True)
jdata = responses[0]
if "data" not in jdata.keys():
if 'data' not in jdata.keys():
await asyncio.sleep(60 + 5)
fail_counter += 1
if "meta" in jdata.keys():
cursor = (
jdata["meta"]["cursor"] if "cursor" in jdata["meta"].keys() else ""
)
if len(cursor) == 0 and "data" in jdata.keys():
if 'meta' in jdata.keys():
cursor = jdata['meta']['cursor'] if 'cursor' in jdata['meta'].keys() else ''
if len(cursor) == 0 and 'data' in jdata.keys():
# if cursor no longer is within the meta field have hit last entry
breakcon = True
count += jdata["meta"]["count"]
count += jdata['meta']['count']
if count == 0 or fail_counter >= 2:
break
if "data" in jdata.keys():
data = jdata["data"]
if 'data' in jdata.keys():
data = jdata['data']
self.hostnames.extend(await self.parse_hostnames(data, self.word))
counter += 1
await asyncio.sleep(16)
self.hostnames = list(sorted(set(self.hostnames)))
# verify domains such as x.x.com.multicdn.x.com are parsed properly
self.hostnames = [
host
for host in self.hostnames
if (
(len(host.split(".")) >= 3)
and host.split(".")[-2] == self.word.split(".")[-2]
)
host for host in self.hostnames if ((len(host.split('.')) >= 3) and host.split('.')[-2] == self.word.split('.')[-2])
]
async def get_hostnames(self) -> list:
@ -82,22 +67,20 @@ async def get_hostnames(self) -> list:
async def parse_hostnames(data, word):
total_subdomains = set()
for attribute in data:
total_subdomains.add(attribute["id"].replace('"', "").replace("www.", ""))
attributes = attribute["attributes"]
total_subdomains.add(attribute['id'].replace('"', '').replace('www.', ''))
attributes = attribute['attributes']
total_subdomains.update(
{
value["value"].replace('"', "").replace("www.", "")
for value in attributes["last_dns_records"]
if word in value["value"]
value['value'].replace('"', '').replace('www.', '')
for value in attributes['last_dns_records']
if word in value['value']
}
)
if "last_https_certificate" in attributes.keys():
if 'last_https_certificate' in attributes.keys():
total_subdomains.update(
{
value.replace('"', "").replace("www.", "")
for value in attributes["last_https_certificate"]["extensions"][
"subject_alternative_name"
]
value.replace('"', '').replace('www.', '')
for value in attributes['last_https_certificate']['extensions']['subject_alternative_name']
if word in value
}
)
@ -108,9 +91,7 @@ async def parse_hostnames(data, word):
total_subdomains = [
x
for x in total_subdomains
if "edgekey.net" not in str(x)
and "akadns.net" not in str(x)
and "include:_spf" not in str(x)
if 'edgekey.net' not in str(x) and 'akadns.net' not in str(x) and 'include:_spf' not in str(x)
]
total_subdomains.sort()
return total_subdomains

View file

@ -5,22 +5,16 @@
class SearchYahoo:
def __init__(self, word, limit) -> None:
self.word = word
self.total_results = ""
self.server = "search.yahoo.com"
self.total_results = ''
self.server = 'search.yahoo.com'
self.limit = limit
self.proxy = False
async def do_search(self) -> None:
base_url = f"https://{self.server}/search?p=%40{self.word}&b=xx&pz=10"
headers = {"Host": self.server, "User-agent": Core.get_user_agent()}
urls = [
base_url.replace("xx", str(num))
for num in range(0, self.limit, 10)
if num <= self.limit
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
base_url = f'https://{self.server}/search?p=%40{self.word}&b=xx&pz=10'
headers = {'Host': self.server, 'User-agent': Core.get_user_agent()}
urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for response in responses:
self.total_results += response
@ -35,8 +29,8 @@ async def get_emails(self):
# strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld
for email in toparse_emails:
email = str(email)
if "-" in email and email[0].isdigit() and email.index("-") <= 9:
while email[0] == "-" or email[0].isdigit():
if '-' in email and email[0].isdigit() and email.index('-') <= 9:
while email[0] == '-' or email[0].isdigit():
email = email[1:]
emails.add(email)
return list(emails)

View file

@ -16,8 +16,8 @@ def __init__(self, word, limit) -> None:
# If you wish to extract as many subdomains as possible visit the fetch_subdomains
# To see how
if self.key is None:
raise MissingKey("zoomeye")
self.baseurl = "https://api.zoomeye.org/host/search"
raise MissingKey('zoomeye')
self.baseurl = 'https://api.zoomeye.org/host/search'
self.proxy = False
self.totalasns: list = list()
self.totalhosts: list = list()
@ -58,40 +58,38 @@ def __init__(self, word, limit) -> None:
async def fetch_subdomains(self) -> None:
# Based on docs from: https://www.zoomeye.org/doc#search-sub-domain-ip
headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()}
headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()}
subdomain_search_endpoint = (
f"https://api.zoomeye.org/domain/search?q={self.word}&type=0&"
)
subdomain_search_endpoint = f'https://api.zoomeye.org/domain/search?q={self.word}&type=0&'
response = await AsyncFetcher.fetch_all(
[subdomain_search_endpoint + "page=1"],
[subdomain_search_endpoint + 'page=1'],
json=True,
proxy=self.proxy,
headers=headers,
)
# Make initial request to determine total number of subdomains
resp = response[0]
if resp["status"] != 200:
if resp['status'] != 200:
return
total = resp["total"]
total = resp['total']
# max number of results per request seems to be 30
# NOTE: If you wish to get as many subdomains as possible
# Change the line below to:
# self.limit = (total // 30) + 1
self.limit = self.limit if total > self.limit else (total // 30) + 1
self.totalhosts.extend([item["name"] for item in resp["list"]])
self.totalhosts.extend([item['name'] for item in resp['list']])
for i in range(2, self.limit):
response = await AsyncFetcher.fetch_all(
[subdomain_search_endpoint + f"page={i}"],
[subdomain_search_endpoint + f'page={i}'],
json=True,
proxy=self.proxy,
headers=headers,
)
resp = response[0]
if resp["status"] != 200:
if resp['status'] != 200:
return
found_subdomains = [item["name"] for item in resp["list"]]
found_subdomains = [item['name'] for item in resp['list']]
if len(found_subdomains) == 0:
break
self.totalhosts.extend(found_subdomains)
@ -99,19 +97,17 @@ async def fetch_subdomains(self) -> None:
await asyncio.sleep(get_delay() + 1)
async def do_search(self) -> None:
headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()}
headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()}
# Fetch subdomains first
await self.fetch_subdomains()
params = (
("query", f"site:{self.word}"),
("page", "1"),
)
response = await AsyncFetcher.fetch_all(
[self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params
('query', f'site:{self.word}'),
('page', '1'),
)
response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params)
# The First request determines how many pages there in total
resp = response[0]
total_pages = int(resp["available"])
total_pages = int(resp['available'])
self.limit = self.limit if total_pages > self.limit else total_pages
self.limit = 3 if self.limit == 2 else self.limit
cur_page = 2 if self.limit >= 2 else -1
@ -121,21 +117,17 @@ async def do_search(self) -> None:
# cur_page = -1
if cur_page == -1:
# No need to do loop just parse and leave
if "matches" in resp.keys():
hostnames, emails, ips, asns, iurls = await self.parse_matches(
resp["matches"]
)
if 'matches' in resp.keys():
hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
self.totalhosts.extend(hostnames)
self.totalemails.extend(emails)
self.totalips.extend(ips)
self.totalasns.extend(asns)
self.interestingurls.extend(iurls)
else:
if "matches" in resp.keys():
if 'matches' in resp.keys():
# Parse out initial results and then continue to loop
hostnames, emails, ips, asns, iurls = await self.parse_matches(
resp["matches"]
)
hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
self.totalhosts.extend(hostnames)
self.totalemails.extend(emails)
self.totalips.extend(ips)
@ -145,8 +137,8 @@ async def do_search(self) -> None:
for num in range(2, self.limit):
# print(f'Currently on page: {num}')
params = (
("query", f"site:{self.word}"),
("page", f"{num}"),
('query', f'site:{self.word}'),
('page', f'{num}'),
)
response = await AsyncFetcher.fetch_all(
[self.baseurl],
@ -156,22 +148,14 @@ async def do_search(self) -> None:
params=params,
)
resp = response[0]
if "matches" not in resp.keys():
print(f"Your resp: {resp}")
print("Match not found in keys")
if 'matches' not in resp.keys():
print(f'Your resp: {resp}')
print('Match not found in keys')
break
hostnames, emails, ips, asns, iurls = await self.parse_matches(
resp["matches"]
)
hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
if (
len(hostnames) == 0
and len(emails) == 0
and len(ips) == 0
and len(asns) == 0
and len(iurls) == 0
):
if len(hostnames) == 0 and len(emails) == 0 and len(ips) == 0 and len(asns) == 0 and len(iurls) == 0:
nomatches_counter += 1
if nomatches_counter >= 5:
@ -196,48 +180,42 @@ async def parse_matches(self, matches):
emails = set()
for match in matches:
try:
ips.add(match["ip"])
ips.add(match['ip'])
if "geoinfo" in match.keys():
if 'geoinfo' in match.keys():
asns.add(f"AS{match['geoinfo']['asn']}")
if "rdns_new" in match.keys():
rdns_new = match["rdns_new"]
if 'rdns_new' in match.keys():
rdns_new = match['rdns_new']
if "," in rdns_new:
parts = str(rdns_new).split(",")
if ',' in rdns_new:
parts = str(rdns_new).split(',')
rdns_new = parts[0]
if len(parts) == 2:
hostnames.add(parts[1])
rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new
rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
hostnames.add(rdns_new)
else:
rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new
rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
hostnames.add(rdns_new)
if "rdns" in match.keys():
rdns = match["rdns"]
rdns = rdns[:-1] if rdns[-1] == "." else rdns
if 'rdns' in match.keys():
rdns = match['rdns']
rdns = rdns[:-1] if rdns[-1] == '.' else rdns
hostnames.add(rdns)
if "portinfo" in match.keys():
if 'portinfo' in match.keys():
# re.
temp_emails = set(
await self.parse_emails(match["portinfo"]["banner"])
)
temp_emails = set(await self.parse_emails(match['portinfo']['banner']))
emails.update(temp_emails)
hostnames.update(
set(await self.parse_hostnames(match["portinfo"]["banner"]))
)
hostnames.update(set(await self.parse_hostnames(match['portinfo']['banner'])))
iurls = {
str(iurl.group(1)).replace('"', "")
for iurl in re.finditer(
self.iurl_regex, match["portinfo"]["banner"]
)
str(iurl.group(1)).replace('"', '')
for iurl in re.finditer(self.iurl_regex, match['portinfo']['banner'])
if self.word in str(iurl.group(1))
}
except Exception as e:
print(f"An exception has occurred: {e}")
print(f'An exception has occurred: {e}')
return hostnames, emails, ips, asns, iurls
async def process(self, proxy: bool = False) -> None:

View file

@ -1 +1 @@
__all__ = ["hostchecker"]
__all__ = ['hostchecker']

View file

@ -12,36 +12,32 @@
limiter = Limiter(key_func=get_remote_address)
app = FastAPI(
title="Restful Harvest",
description="Rest API for theHarvester powered by FastAPI",
version="0.0.2",
title='Restful Harvest',
description='Rest API for theHarvester powered by FastAPI',
version='0.0.2',
)
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore
# This is where we will host files that arise if the user specifies a filename
try:
app.mount(
"/static", StaticFiles(directory="theHarvester/lib/api/static/"), name="static"
)
app.mount('/static', StaticFiles(directory='theHarvester/lib/api/static/'), name='static')
except RuntimeError:
static_path = os.path.expanduser("~/.local/share/theHarvester/static/")
static_path = os.path.expanduser('~/.local/share/theHarvester/static/')
if not os.path.isdir(static_path):
os.makedirs(static_path)
app.mount(
"/static",
'/static',
StaticFiles(directory=static_path),
name="static",
name='static',
)
@app.get("/")
@app.get('/')
async def root(*, user_agent: str = Header(None)) -> Response:
# very basic user agent filtering
if user_agent and (
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
):
response = RedirectResponse(app.url_path_for("bot"))
if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
response = RedirectResponse(app.url_path_for('bot'))
return response
return HTMLResponse(
@ -70,36 +66,34 @@ async def root(*, user_agent: str = Header(None)) -> Response:
)
@app.get("/nicebot")
@app.get('/nicebot')
async def bot() -> dict[str, str]:
# nice bot
string = {"bot": "These are not the droids you are looking for"}
string = {'bot': 'These are not the droids you are looking for'}
return string
@app.get("/sources", response_class=UJSONResponse)
@limiter.limit("5/minute")
@app.get('/sources', response_class=UJSONResponse)
@limiter.limit('5/minute')
async def getsources(request: Request):
# Endpoint for user to query for available sources theHarvester supports
# Rate limit of 5 requests per minute
sources = __main__.Core.get_supportedengines()
return {"sources": sources}
return {'sources': sources}
@app.get("/dnsbrute")
@limiter.limit("5/minute")
@app.get('/dnsbrute')
@limiter.limit('5/minute')
async def dnsbrute(
request: Request,
user_agent: str = Header(None),
domain: str = Query(..., description="Domain to be brute forced"),
domain: str = Query(..., description='Domain to be brute forced'),
) -> Response:
# Endpoint for user to signal to do DNS brute forcing
# Rate limit of 5 requests per minute
# basic user agent filtering
if user_agent and (
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
):
response = RedirectResponse(app.url_path_for("bot"))
if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
response = RedirectResponse(app.url_path_for('bot'))
return response
dns_bruteforce = await __main__.start(
argparse.Namespace(
@ -108,49 +102,45 @@ async def dnsbrute(
dns_server=False,
dns_tld=False,
domain=domain,
filename="",
filename='',
google_dork=False,
limit=500,
proxies=False,
shodan=False,
source=",".join([]),
source=','.join([]),
start=0,
take_over=False,
virtual_host=False,
)
)
return UJSONResponse({"dns_bruteforce": dns_bruteforce})
return UJSONResponse({'dns_bruteforce': dns_bruteforce})
@app.get("/query")
@limiter.limit("2/minute")
@app.get('/query')
@limiter.limit('2/minute')
async def query(
request: Request,
dns_server: str = Query(""),
dns_server: str = Query(''),
user_agent: str = Header(None),
dns_brute: bool = Query(False),
dns_lookup: bool = Query(False),
dns_tld: bool = Query(False),
filename: str = Query(""),
filename: str = Query(''),
google_dork: bool = Query(False),
proxies: bool = Query(False),
shodan: bool = Query(False),
take_over: bool = Query(False),
virtual_host: bool = Query(False),
source: list[str] = Query(
..., description="Data sources to query comma separated with no space"
),
source: list[str] = Query(..., description='Data sources to query comma separated with no space'),
limit: int = Query(500),
start: int = Query(0),
domain: str = Query(..., description="Domain to be harvested"),
domain: str = Query(..., description='Domain to be harvested'),
) -> Response:
# Query function that allows user to query theHarvester rest API
# Rate limit of 2 requests per minute
# basic user agent filtering
if user_agent and (
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
):
response = RedirectResponse(app.url_path_for("bot"))
if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
response = RedirectResponse(app.url_path_for('bot'))
return response
try:
(
@ -175,7 +165,7 @@ async def query(
limit=limit,
proxies=proxies,
shodan=shodan,
source=",".join(source),
source=','.join(source),
start=start,
take_over=take_over,
virtual_host=virtual_host,
@ -184,18 +174,16 @@ async def query(
return UJSONResponse(
{
"asns": asns,
"interesting_urls": iurls,
"twitter_people": twitter_people_list,
"linkedin_people": linkedin_people_list,
"linkedin_links": linkedin_links,
"trello_urls": aurls,
"ips": aips,
"emails": aemails,
"hosts": ahosts,
'asns': asns,
'interesting_urls': iurls,
'twitter_people': twitter_people_list,
'linkedin_people': linkedin_people_list,
'linkedin_links': linkedin_links,
'trello_urls': aurls,
'ips': aips,
'emails': aemails,
'hosts': ahosts,
}
)
except Exception:
return UJSONResponse(
{"exception": "Please contact the server administrator to check the issue"}
)
return UJSONResponse({'exception': 'Please contact the server administrator to check the issue'})

View file

@ -23,100 +23,94 @@ async def main() -> None:
Just a simple example of how to interact with the rest api
you can easily use requests instead of aiohttp or whatever you best see fit
"""
url = "http://127.0.0.1:5000"
domain = "netflix.com"
query_url = (
f"{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}"
)
url = 'http://127.0.0.1:5000'
domain = 'netflix.com'
query_url = f'{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}'
async with aiohttp.ClientSession() as session:
fetched_json = await fetch_json(session, query_url)
total_asns = fetched_json["asns"]
interesting_urls = fetched_json["interesting_urls"]
twitter_people_list_tracker = fetched_json["twitter_people"]
linkedin_people_list_tracker = fetched_json["linkedin_people"]
linkedin_links_tracker = fetched_json["linkedin_links"]
trello_urls = fetched_json["trello_urls"]
ips = fetched_json["ips"]
emails = fetched_json["emails"]
hosts = fetched_json["hosts"]
total_asns = fetched_json['asns']
interesting_urls = fetched_json['interesting_urls']
twitter_people_list_tracker = fetched_json['twitter_people']
linkedin_people_list_tracker = fetched_json['linkedin_people']
linkedin_links_tracker = fetched_json['linkedin_links']
trello_urls = fetched_json['trello_urls']
ips = fetched_json['ips']
emails = fetched_json['emails']
hosts = fetched_json['hosts']
if len(total_asns) > 0:
print(f"\n[*] ASNS found: {len(total_asns)}")
print("--------------------")
print(f'\n[*] ASNS found: {len(total_asns)}')
print('--------------------')
total_asns = list(sorted(set(total_asns)))
for asn in total_asns:
print(asn)
if len(interesting_urls) > 0:
print(f"\n[*] Interesting Urls found: {len(interesting_urls)}")
print("--------------------")
print(f'\n[*] Interesting Urls found: {len(interesting_urls)}')
print('--------------------')
interesting_urls = list(sorted(set(interesting_urls)))
for iurl in interesting_urls:
print(iurl)
if len(twitter_people_list_tracker) == 0:
print("\n[*] No Twitter users found.\n\n")
print('\n[*] No Twitter users found.\n\n')
else:
if len(twitter_people_list_tracker) >= 1:
print("\n[*] Twitter Users found: " + str(len(twitter_people_list_tracker)))
print("---------------------")
print('\n[*] Twitter Users found: ' + str(len(twitter_people_list_tracker)))
print('---------------------')
twitter_people_list_tracker = list(sorted(set(twitter_people_list_tracker)))
for usr in twitter_people_list_tracker:
print(usr)
if len(linkedin_people_list_tracker) == 0:
print("\n[*] No LinkedIn users found.\n\n")
print('\n[*] No LinkedIn users found.\n\n')
else:
if len(linkedin_people_list_tracker) >= 1:
print(
"\n[*] LinkedIn Users found: " + str(len(linkedin_people_list_tracker))
)
print("---------------------")
linkedin_people_list_tracker = list(
sorted(set(linkedin_people_list_tracker))
)
print('\n[*] LinkedIn Users found: ' + str(len(linkedin_people_list_tracker)))
print('---------------------')
linkedin_people_list_tracker = list(sorted(set(linkedin_people_list_tracker)))
for usr in linkedin_people_list_tracker:
print(usr)
if len(linkedin_links_tracker) == 0:
print(f"\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}")
print(f'\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}')
linkedin_links_tracker = list(sorted(set(linkedin_links_tracker)))
print("---------------------")
print('---------------------')
for link in linkedin_links_tracker:
print(link)
length_urls = len(trello_urls)
total = length_urls
print("\n[*] Trello URLs found: " + str(total))
print("--------------------")
print('\n[*] Trello URLs found: ' + str(total))
print('--------------------')
all_urls = list(sorted(set(trello_urls)))
for url in sorted(all_urls):
print(url)
if len(ips) == 0:
print("\n[*] No IPs found.")
print('\n[*] No IPs found.')
else:
print("\n[*] IPs found: " + str(len(ips)))
print("-------------------")
print('\n[*] IPs found: ' + str(len(ips)))
print('-------------------')
# use netaddr as the list may contain ipv4 and ipv6 addresses
ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(ips)])
print("\n".join(map(str, ip_list)))
print('\n'.join(map(str, ip_list)))
if len(emails) == 0:
print("\n[*] No emails found.")
print('\n[*] No emails found.')
else:
print("\n[*] Emails found: " + str(len(emails)))
print("----------------------")
print('\n[*] Emails found: ' + str(len(emails)))
print('----------------------')
all_emails = sorted(list(set(emails)))
print("\n".join(all_emails))
print('\n'.join(all_emails))
if len(hosts) == 0:
print("\n[*] No hosts found.\n\n")
print('\n[*] No hosts found.\n\n')
else:
print("\n[*] Hosts found: " + str(len(hosts)))
print("---------------------")
print("\n".join(hosts))
print('\n[*] Hosts found: ' + str(len(hosts)))
print('---------------------')
print('\n'.join(hosts))
if __name__ == "__main__":
if __name__ == '__main__':
asyncio.run(main())

View file

@ -19,11 +19,11 @@
if TYPE_CHECKING:
from collections.abc import Sized
DATA_DIR = Path(__file__).parents[1] / "data"
DATA_DIR = Path(__file__).parents[1] / 'data'
CONFIG_DIRS = [
Path("/etc/theHarvester/"),
Path("/usr/local/etc/theHarvester/"),
Path("~/.theHarvester"),
Path('/etc/theHarvester/'),
Path('/usr/local/etc/theHarvester/'),
Path('~/.theHarvester'),
]
@ -35,7 +35,7 @@ def _read_config(filename: str) -> str:
with contextlib.suppress(FileNotFoundError):
file = path.expanduser() / filename
config = file.read_text()
print(f"Read {filename} from {file}")
print(f'Read {filename} from {file}')
return config
# Fallback to creating default in user's home dir
@ -43,168 +43,160 @@ def _read_config(filename: str) -> str:
dest = CONFIG_DIRS[-1].expanduser() / filename
dest.parent.mkdir(exist_ok=True)
dest.write_text(default)
print(f"Created default {filename} at {dest}")
print(f'Created default {filename} at {dest}')
return default
@staticmethod
def api_keys() -> dict:
keys = yaml.safe_load(Core._read_config("api-keys.yaml"))
return keys["apikeys"]
keys = yaml.safe_load(Core._read_config('api-keys.yaml'))
return keys['apikeys']
@staticmethod
def bevigil_key() -> str:
return Core.api_keys()["bevigil"]["key"]
return Core.api_keys()['bevigil']['key']
@staticmethod
def binaryedge_key() -> str:
return Core.api_keys()["binaryedge"]["key"]
return Core.api_keys()['binaryedge']['key']
@staticmethod
def bing_key() -> str:
return Core.api_keys()["bing"]["key"]
return Core.api_keys()['bing']['key']
@staticmethod
def bufferoverun_key() -> str:
return Core.api_keys()["bufferoverun"]["key"]
return Core.api_keys()['bufferoverun']['key']
@staticmethod
def censys_key() -> tuple:
return Core.api_keys()["censys"]["id"], Core.api_keys()["censys"]["secret"]
return Core.api_keys()['censys']['id'], Core.api_keys()['censys']['secret']
@staticmethod
def criminalip_key() -> str:
return Core.api_keys()["criminalip"]["key"]
return Core.api_keys()['criminalip']['key']
@staticmethod
def fullhunt_key() -> str:
return Core.api_keys()["fullhunt"]["key"]
return Core.api_keys()['fullhunt']['key']
@staticmethod
def github_key() -> str:
return Core.api_keys()["github"]["key"]
return Core.api_keys()['github']['key']
@staticmethod
def hunter_key() -> str:
return Core.api_keys()["hunter"]["key"]
return Core.api_keys()['hunter']['key']
@staticmethod
def hunterhow_key() -> str:
return Core.api_keys()["hunterhow"]["key"]
return Core.api_keys()['hunterhow']['key']
@staticmethod
def intelx_key() -> str:
return Core.api_keys()["intelx"]["key"]
return Core.api_keys()['intelx']['key']
@staticmethod
def netlas_key() -> str:
return Core.api_keys()["netlas"]["key"]
return Core.api_keys()['netlas']['key']
@staticmethod
def pentest_tools_key() -> str:
return Core.api_keys()["pentestTools"]["key"]
return Core.api_keys()['pentestTools']['key']
@staticmethod
def onyphe_key() -> str:
return Core.api_keys()["onyphe"]["key"]
return Core.api_keys()['onyphe']['key']
@staticmethod
def projectdiscovery_key() -> str:
return Core.api_keys()["projectDiscovery"]["key"]
return Core.api_keys()['projectDiscovery']['key']
@staticmethod
def rocketreach_key() -> str:
return Core.api_keys()["rocketreach"]["key"]
return Core.api_keys()['rocketreach']['key']
@staticmethod
def security_trails_key() -> str:
return Core.api_keys()["securityTrails"]["key"]
return Core.api_keys()['securityTrails']['key']
@staticmethod
def shodan_key() -> str:
return Core.api_keys()["shodan"]["key"]
return Core.api_keys()['shodan']['key']
@staticmethod
def zoomeye_key() -> str:
return Core.api_keys()["zoomeye"]["key"]
return Core.api_keys()['zoomeye']['key']
@staticmethod
def tomba_key() -> tuple[str, str]:
return Core.api_keys()["tomba"]["key"], Core.api_keys()["tomba"]["secret"]
return Core.api_keys()['tomba']['key'], Core.api_keys()['tomba']['secret']
@staticmethod
def virustotal_key() -> str:
return Core.api_keys()["virustotal"]["key"]
return Core.api_keys()['virustotal']['key']
@staticmethod
def proxy_list() -> list:
keys = yaml.safe_load(Core._read_config("proxies.yaml"))
http_list = (
[f"http://{proxy}" for proxy in keys["http"]]
if keys["http"] is not None
else []
)
keys = yaml.safe_load(Core._read_config('proxies.yaml'))
http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else []
return http_list
@staticmethod
def banner() -> None:
print("*******************************************************************")
print("* _ _ _ *")
print(r"* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *")
print('*******************************************************************')
print('* _ _ _ *')
print(r'* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *')
print(r"* | __| _ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *")
print(r"* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *")
print(r"* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *")
print("* *")
print(
"* theHarvester {version}{filler}*".format(
version=version(), filler=" " * (51 - len(version()))
)
)
print("* Coded by Christian Martorella *")
print("* Edge-Security Research *")
print("* cmartorella@edge-security.com *")
print("* *")
print("*******************************************************************")
print(r'* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *')
print(r'* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *')
print('* *')
print('* theHarvester {version}{filler}*'.format(version=version(), filler=' ' * (51 - len(version()))))
print('* Coded by Christian Martorella *')
print('* Edge-Security Research *')
print('* cmartorella@edge-security.com *')
print('* *')
print('*******************************************************************')
@staticmethod
def get_supportedengines() -> list[str | Any]:
supportedengines = [
"anubis",
"baidu",
"bevigil",
"binaryedge",
"bing",
"bingapi",
"bufferoverun",
"brave",
"censys",
"certspotter",
"criminalip",
"crtsh",
"dnsdumpster",
"duckduckgo",
"fullhunt",
"github-code",
"hackertarget",
"hunter",
"hunterhow",
"intelx",
"netlas",
"onyphe",
"otx",
"pentesttools",
"projectdiscovery",
"rapiddns",
"rocketreach",
"securityTrails",
"sitedossier",
"subdomaincenter",
"subdomainfinderc99",
"threatminer",
"tomba",
"urlscan",
"virustotal",
"yahoo",
"zoomeye",
'anubis',
'baidu',
'bevigil',
'binaryedge',
'bing',
'bingapi',
'bufferoverun',
'brave',
'censys',
'certspotter',
'criminalip',
'crtsh',
'dnsdumpster',
'duckduckgo',
'fullhunt',
'github-code',
'hackertarget',
'hunter',
'hunterhow',
'intelx',
'netlas',
'onyphe',
'otx',
'pentesttools',
'projectdiscovery',
'rapiddns',
'rocketreach',
'securityTrails',
'sitedossier',
'subdomaincenter',
'subdomainfinderc99',
'threatminer',
'tomba',
'urlscan',
'virustotal',
'yahoo',
'zoomeye',
]
return supportedengines
@ -214,58 +206,58 @@ def get_user_agent() -> str:
# Lasted updated 7/2/23
# TODO use bs4 to auto parse user agents
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0",
"Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43",
"Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37",
"Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0",
"Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0',
'Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43',
'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37',
'Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
]
return random.choice(user_agents)
@ -278,129 +270,85 @@ async def post_fetch(
cls,
url,
headers=None,
data: str | dict[str, str] = "",
params: str = "",
data: str | dict[str, str] = '',
params: str = '',
json: bool = False,
proxy: bool = False,
):
if headers is None:
headers = {}
if len(headers) == 0:
headers = {"User-Agent": Core.get_user_agent()}
headers = {'User-Agent': Core.get_user_agent()}
timeout = aiohttp.ClientTimeout(total=720)
# By default, timeout is 5 minutes, changed to 12-minutes
# results are well worth the wait
try:
if proxy:
proxy = random.choice(cls().proxy_list)
if params != "":
async with aiohttp.ClientSession(
headers=headers, timeout=timeout
) as session:
async with session.get(
url, params=params, proxy=proxy
) as response:
if params != '':
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
async with session.get(url, params=params, proxy=proxy) as response:
await asyncio.sleep(5)
return (
await response.text()
if json is False
else await response.json()
)
return await response.text() if json is False else await response.json()
else:
async with aiohttp.ClientSession(
headers=headers, timeout=timeout
) as session:
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
async with session.get(url, proxy=proxy) as response:
await asyncio.sleep(5)
return (
await response.text()
if json is False
else await response.json()
)
elif params == "":
return await response.text() if json is False else await response.json()
elif params == '':
if isinstance(data, str):
data = json_loader.loads(data)
async with aiohttp.ClientSession(
headers=headers, timeout=timeout
) as session:
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
async with session.post(url, data=data) as resp:
await asyncio.sleep(3)
return await resp.text() if json is False else await resp.json()
else:
if isinstance(data, str):
data = json_loader.loads(data)
async with aiohttp.ClientSession(
headers=headers, timeout=timeout
) as session:
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.post(
url, data=data, ssl=sslcontext, params=params
) as resp:
async with session.post(url, data=data, ssl=sslcontext, params=params) as resp:
await asyncio.sleep(3)
return await resp.text() if json is False else await resp.json()
except Exception as e:
print(f"An exception has occurred in post_fetch: {e}")
return ""
print(f'An exception has occurred in post_fetch: {e}')
return ''
@classmethod
async def fetch(
cls, session, url, params: Sized = "", json: bool = False, proxy: str = ""
) -> str | dict | list | bool:
async def fetch(cls, session, url, params: Sized = '', json: bool = False, proxy: str = '') -> str | dict | list | bool:
# This fetch method solely focuses on get requests
try:
# Wrap in try except due to 0x89 png/jpg files
# This fetch method solely focuses on get requests
if proxy != "":
if proxy != '':
proxy = str(random.choice(cls().proxy_list))
if len(params) != 0:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(
url, ssl=sslcontext, params=params, proxy=proxy
) as response:
return (
await response.text()
if json is False
else await response.json()
)
async with session.get(url, ssl=sslcontext, params=params, proxy=proxy) as response:
return await response.text() if json is False else await response.json()
else:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(
url, ssl=sslcontext, proxy=proxy
) as response:
async with session.get(url, ssl=sslcontext, proxy=proxy) as response:
await asyncio.sleep(5)
return (
await response.text()
if json is False
else await response.json()
)
return await response.text() if json is False else await response.json()
if len(params) != 0:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext, params=params) as response:
await asyncio.sleep(5)
return (
await response.text()
if json is False
else await response.json()
)
return await response.text() if json is False else await response.json()
else:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext) as response:
await asyncio.sleep(5)
return (
await response.text()
if json is False
else await response.json()
)
return await response.text() if json is False else await response.json()
except Exception as e:
print(f"An exception has occurred: {e}")
return ""
print(f'An exception has occurred: {e}')
return ''
@staticmethod
async def takeover_fetch(
session, url: str, proxy: str = ""
) -> tuple[Any, Any] | str:
async def takeover_fetch(session, url: str, proxy: str = '') -> tuple[Any, Any] | str:
# This fetch method solely focuses on get requests
try:
# Wrap in try except due to 0x89 png/jpg files
@ -408,12 +356,10 @@ async def takeover_fetch(
# TODO determine if method for post requests is necessary
# url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url
# Clean up urls with proper schemas
if proxy != "":
if "https://" in url:
if proxy != '':
if 'https://' in url:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(
url, proxy=proxy, ssl=sslcontext
) as response:
async with session.get(url, proxy=proxy, ssl=sslcontext) as response:
await asyncio.sleep(5)
return url, await response.text()
else:
@ -421,7 +367,7 @@ async def takeover_fetch(
await asyncio.sleep(5)
return url, await response.text()
else:
if "https://" in url:
if 'https://' in url:
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext) as response:
await asyncio.sleep(5)
@ -431,15 +377,15 @@ async def takeover_fetch(
await asyncio.sleep(5)
return url, await response.text()
except Exception as e:
print(f"Takeover check error: {e}")
return url, ""
print(f'Takeover check error: {e}')
return url, ''
@classmethod
async def fetch_all(
cls,
urls,
headers=None,
params: Sized = "",
params: Sized = '',
json: bool = False,
takeover: bool = False,
proxy: bool = False,
@ -449,29 +395,18 @@ async def fetch_all(
headers = {}
timeout = aiohttp.ClientTimeout(total=60)
if len(headers) == 0:
headers = {"User-Agent": Core.get_user_agent()}
headers = {'User-Agent': Core.get_user_agent()}
if takeover:
async with aiohttp.ClientSession(
headers=headers, timeout=aiohttp.ClientTimeout(total=15)
) as session:
async with aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as session:
if proxy:
return await asyncio.gather(
*[
AsyncFetcher.takeover_fetch(
session, url, proxy=random.choice(cls().proxy_list)
)
for url in urls
]
*[AsyncFetcher.takeover_fetch(session, url, proxy=random.choice(cls().proxy_list)) for url in urls]
)
else:
return await asyncio.gather(
*[AsyncFetcher.takeover_fetch(session, url) for url in urls]
)
return await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls])
if len(params) == 0:
async with aiohttp.ClientSession(
headers=headers, timeout=timeout, max_field_size=13000
) as session:
async with aiohttp.ClientSession(headers=headers, timeout=timeout, max_field_size=13000) as session:
if proxy:
return await asyncio.gather(
*[
@ -485,14 +420,10 @@ async def fetch_all(
]
)
else:
return await asyncio.gather(
*[AsyncFetcher.fetch(session, url, json=json) for url in urls]
)
return await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
else:
# Indicates the request has certain params
async with aiohttp.ClientSession(
headers=headers, timeout=timeout
) as session:
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
if proxy:
return await asyncio.gather(
*[
@ -507,9 +438,4 @@ async def fetch_all(
]
)
else:
return await asyncio.gather(
*[
AsyncFetcher.fetch(session, url, params, json)
for url in urls
]
)
return await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])

View file

@ -40,13 +40,13 @@ async def resolve_host(host, resolver) -> str:
result = await resolver.gethostbyname(host, socket.AF_INET)
addresses = result.addresses
if addresses == [] or addresses is None or result is None:
return f"{host}:"
return f'{host}:'
else:
addresses = ",".join(map(str, list(sorted(set(addresses)))))
addresses = ','.join(map(str, list(sorted(set(addresses)))))
# addresses = list(sorted(addresses))
return f"{host}:{addresses}"
return f'{host}:{addresses}'
except Exception:
return f"{host}:"
return f'{host}:'
# https://stackoverflow.com/questions/312443/how-do-i-split-a-list-into-equally-sized-chunks
@staticmethod
@ -57,9 +57,7 @@ def chunks(lst, n):
async def query_all(self, resolver, hosts) -> list[Any]:
# TODO chunk list into 50 pieces regardless of IPs and subnets
results = await asyncio.gather(
*[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts]
)
results = await asyncio.gather(*[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts])
return results
async def check(self):
@ -75,9 +73,9 @@ async def check(self):
results = await self.query_all(resolver, chunk)
all_results.update(results)
for pair in results:
host, addresses = pair.split(":")
host, addresses = pair.split(':')
self.realhosts.append(host)
self.addresses.update({addr for addr in addresses.split(",")})
self.addresses.update({addr for addr in addresses.split(',')})
# address may be a list of ips
# and do a set comprehension to remove duplicates
self.realhosts.sort()

View file

@ -5,7 +5,7 @@
import aiosqlite
db_path = os.path.expanduser("~/.local/share/theHarvester")
db_path = os.path.expanduser('~/.local/share/theHarvester')
if not os.path.isdir(db_path):
os.makedirs(db_path)
@ -13,9 +13,9 @@
class StashManager:
def __init__(self) -> None:
self.db = os.path.join(db_path, "stash.sqlite")
self.results = ""
self.totalresults = ""
self.db = os.path.join(db_path, 'stash.sqlite')
self.results = ''
self.totalresults = ''
self.latestscandomain: dict = {}
self.domainscanhistory: list = []
self.scanboarddata: dict = {}
@ -26,7 +26,7 @@ def __init__(self) -> None:
async def do_init(self) -> None:
async with aiosqlite.connect(self.db) as db:
await db.execute(
"CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)"
'CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)'
)
await db.commit()
@ -39,7 +39,7 @@ async def store(self, domain, resource, res_type, source) -> None:
try:
async with aiosqlite.connect(self.db, timeout=30) as db:
await db.execute(
"INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)",
'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
(self.domain, self.resource, self.type, self.date, self.source),
)
await db.commit()
@ -52,13 +52,11 @@ async def store_all(self, domain, all, res_type, source) -> None:
self.type = res_type
self.source = source
self.date = datetime.date.today()
master_list = [
(self.domain, x, self.type, self.date, self.source) for x in self.all
]
master_list = [(self.domain, x, self.type, self.date, self.source) for x in self.all]
async with aiosqlite.connect(self.db, timeout=30) as db:
try:
await db.executemany(
"INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)",
'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
master_list,
)
await db.commit()
@ -68,43 +66,41 @@ async def store_all(self, domain, all, res_type, source) -> None:
async def generatedashboardcode(self, domain):
try:
# TODO refactor into generic method
self.latestscandomain["domain"] = domain
self.latestscandomain['domain'] = domain
async with aiosqlite.connect(self.db, timeout=30) as conn:
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="host"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["host"] = data[0]
self.latestscandomain['host'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="email"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["email"] = data[0]
self.latestscandomain['email'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["ip"] = data[0]
self.latestscandomain['ip'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["vhost"] = data[0]
self.latestscandomain['vhost'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["shodan"] = data[0]
cursor = await conn.execute(
"""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)
)
self.latestscandomain['shodan'] = data[0]
cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,))
data = await cursor.fetchone()
self.latestscandomain["latestdate"] = data[0]
self.latestscandomain['latestdate'] = data[0]
latestdate = data[0]
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''',
@ -114,7 +110,7 @@ async def generatedashboardcode(self, domain):
),
)
scandetailshost = await cursor.fetchall()
self.latestscandomain["scandetailshost"] = scandetailshost
self.latestscandomain['scandetailshost'] = scandetailshost
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
(
@ -123,7 +119,7 @@ async def generatedashboardcode(self, domain):
),
)
scandetailsemail = await cursor.fetchall()
self.latestscandomain["scandetailsemail"] = scandetailsemail
self.latestscandomain['scandetailsemail'] = scandetailsemail
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''',
(
@ -132,7 +128,7 @@ async def generatedashboardcode(self, domain):
),
)
scandetailsip = await cursor.fetchall()
self.latestscandomain["scandetailsip"] = scandetailsip
self.latestscandomain['scandetailsip'] = scandetailsip
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
(
@ -141,7 +137,7 @@ async def generatedashboardcode(self, domain):
),
)
scandetailsvhost = await cursor.fetchall()
self.latestscandomain["scandetailsvhost"] = scandetailsvhost
self.latestscandomain['scandetailsvhost'] = scandetailsvhost
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
(
@ -150,14 +146,12 @@ async def generatedashboardcode(self, domain):
),
)
scandetailsshodan = await cursor.fetchall()
self.latestscandomain["scandetailsshodan"] = scandetailsshodan
self.latestscandomain['scandetailsshodan'] = scandetailsshodan
return self.latestscandomain
except Exception as e:
print(e)
async def getlatestscanresults(
self, domain, previousday: bool = False
) -> Iterable[Row | str] | None:
async def getlatestscanresults(self, domain, previousday: bool = False) -> Iterable[Row | str] | None:
try:
async with aiosqlite.connect(self.db, timeout=30) as conn:
if previousday:
@ -170,15 +164,13 @@ async def getlatestscanresults(
(domain,),
)
previousscandate = await cursor.fetchone()
if (
not previousscandate
): # When theHarvester runs first time/day, this query will return.
if not previousscandate: # When theHarvester runs first time/day, this query will return.
self.previousscanresults = [
"No results",
"No results",
"No results",
"No results",
"No results",
'No results',
'No results',
'No results',
'No results',
'No results',
]
else:
cursor = await conn.execute(
@ -197,9 +189,7 @@ async def getlatestscanresults(
self.previousscanresults = list(results)
return self.previousscanresults
except Exception as e:
print(
f"Error in getting the previous scan results from the database: {e}"
)
print(f'Error in getting the previous scan results from the database: {e}')
else:
try:
cursor = await conn.execute(
@ -223,46 +213,32 @@ async def getlatestscanresults(
self.latestscanresults = list(results)
return self.latestscanresults
except Exception as e:
print(
f"Error in getting the latest scan results from the database: {e}"
)
print(f'Error in getting the latest scan results from the database: {e}')
except Exception as e:
print(f"Error connecting to theHarvester database: {e}")
print(f'Error connecting to theHarvester database: {e}')
return self.latestscanresults
async def getscanboarddata(self):
try:
async with aiosqlite.connect(self.db, timeout=30) as conn:
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE type="host"'''
)
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''')
data = await cursor.fetchone()
self.scanboarddata["host"] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE type="email"'''
)
self.scanboarddata['host'] = data[0]
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="email"''')
data = await cursor.fetchone()
self.scanboarddata["email"] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE type="ip"'''
)
self.scanboarddata['email'] = data[0]
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="ip"''')
data = await cursor.fetchone()
self.scanboarddata["ip"] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE type="vhost"'''
)
self.scanboarddata['ip'] = data[0]
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="vhost"''')
data = await cursor.fetchone()
self.scanboarddata["vhost"] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE type="shodan"'''
)
self.scanboarddata['vhost'] = data[0]
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="shodan"''')
data = await cursor.fetchone()
self.scanboarddata["shodan"] = data[0]
cursor = await conn.execute(
"""SELECT COUNT(DISTINCT(domain)) FROM results """
)
self.scanboarddata['shodan'] = data[0]
cursor = await conn.execute("""SELECT COUNT(DISTINCT(domain)) FROM results """)
data = await cursor.fetchone()
self.scanboarddata["domains"] = data[0]
self.scanboarddata['domains'] = data[0]
return self.scanboarddata
except Exception as e:
print(e)
@ -302,12 +278,12 @@ async def getscanhistorydomain(self, domain):
)
countshodan = await cursor.fetchone()
results = {
"date": str(date[0]),
"hosts": str(counthost[0]),
"email": str(countemail[0]),
"ip": str(countip[0]),
"vhost": str(countvhost[0]),
"shodan": str(countshodan[0]),
'date': str(date[0]),
'hosts': str(counthost[0]),
'email': str(countemail[0]),
'ip': str(countip[0]),
'vhost': str(countvhost[0]),
'shodan': str(countshodan[0]),
}
self.domainscanhistory.append(results)
return self.domainscanhistory
@ -333,42 +309,40 @@ async def getpluginscanstatistics(self) -> Iterable[Row] | None:
async def latestscanchartdata(self, domain):
try:
async with aiosqlite.connect(self.db, timeout=30) as conn:
self.latestscandomain["domain"] = domain
self.latestscandomain['domain'] = domain
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="host"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["host"] = data[0]
self.latestscandomain['host'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="email"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["email"] = data[0]
self.latestscandomain['email'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["ip"] = data[0]
self.latestscandomain['ip'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["vhost"] = data[0]
self.latestscandomain['vhost'] = data[0]
cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''',
(domain,),
)
data = await cursor.fetchone()
self.latestscandomain["shodan"] = data[0]
cursor = await conn.execute(
"""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)
)
self.latestscandomain['shodan'] = data[0]
cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,))
data = await cursor.fetchone()
self.latestscandomain["latestdate"] = data[0]
self.latestscandomain['latestdate'] = data[0]
latestdate = data[0]
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''',
@ -378,7 +352,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailshost = await cursor.fetchall()
self.latestscandomain["scandetailshost"] = scandetailshost
self.latestscandomain['scandetailshost'] = scandetailshost
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
(
@ -387,7 +361,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailsemail = await cursor.fetchall()
self.latestscandomain["scandetailsemail"] = scandetailsemail
self.latestscandomain['scandetailsemail'] = scandetailsemail
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''',
(
@ -396,7 +370,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailsip = await cursor.fetchall()
self.latestscandomain["scandetailsip"] = scandetailsip
self.latestscandomain['scandetailsip'] = scandetailsip
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
(
@ -405,7 +379,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailsvhost = await cursor.fetchall()
self.latestscandomain["scandetailsvhost"] = scandetailsvhost
self.latestscandomain['scandetailsvhost'] = scandetailsvhost
cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
(
@ -414,7 +388,7 @@ async def latestscanchartdata(self, domain):
),
)
scandetailsshodan = await cursor.fetchall()
self.latestscandomain["scandetailsshodan"] = scandetailsshodan
self.latestscandomain['scandetailsshodan'] = scandetailsshodan
return self.latestscandomain
except Exception as e:
print(e)

View file

@ -1,4 +1,4 @@
VERSION = "4.6.0"
VERSION = '4.6.0'
def version() -> str:

View file

@ -10,17 +10,17 @@ async def parse_dictionaries(self, results: dict) -> tuple:
:return: tuple of emails and hosts
"""
if results is not None:
for dictionary in results["selectors"]:
field = dictionary["selectorvalue"]
if "@" in field:
for dictionary in results['selectors']:
field = dictionary['selectorvalue']
if '@' in field:
self.emails.add(field)
else:
field = str(field)
if "http" in field or "https" in field:
if field[:5] == "https":
if 'http' in field or 'https' in field:
if field[:5] == 'https':
field = field[8:]
else:
field = field[7:]
self.hosts.add(field.replace(")", "").replace(",", ""))
self.hosts.add(field.replace(')', '').replace(',', ''))
return self.emails, self.hosts
return None, None

View file

@ -10,59 +10,49 @@ def __init__(self, results, word) -> None:
async def genericClean(self) -> None:
self.results = (
self.results.replace("<em>", "")
.replace("<b>", "")
.replace("</b>", "")
.replace("</em>", "")
.replace("%3a", "")
.replace("<strong>", "")
.replace("</strong>", "")
.replace("<wbr>", "")
.replace("</wbr>", "")
self.results.replace('<em>', '')
.replace('<b>', '')
.replace('</b>', '')
.replace('</em>', '')
.replace('%3a', '')
.replace('<strong>', '')
.replace('</strong>', '')
.replace('<wbr>', '')
.replace('</wbr>', '')
)
for search in (
"<",
">",
":",
"=",
";",
"&",
"%3A",
"%3D",
"%3C",
"%2f",
"/",
"\\",
'<',
'>',
':',
'=',
';',
'&',
'%3A',
'%3D',
'%3C',
'%2f',
'/',
'\\',
):
self.results = self.results.replace(search, " ")
self.results = self.results.replace(search, ' ')
async def urlClean(self) -> None:
self.results = (
self.results.replace("<em>", "")
.replace("</em>", "")
.replace("%2f", "")
.replace("%3a", "")
)
for search in ("<", ">", ":", "=", ";", "&", "%3A", "%3D", "%3C"):
self.results = self.results.replace(search, " ")
self.results = self.results.replace('<em>', '').replace('</em>', '').replace('%2f', '').replace('%3a', '')
for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
self.results = self.results.replace(search, ' ')
async def emails(self):
await self.genericClean()
# Local part is required, charset is flexible.
# https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly)
reg_emails = re.compile(
r"[a-zA-Z0-9.\-_+#~!$&\',;=:]+"
+ "@"
+ "[a-zA-Z0-9.-]*"
+ self.word.replace("www.", "")
)
reg_emails = re.compile(r'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' + '@' + '[a-zA-Z0-9.-]*' + self.word.replace('www.', ''))
self.temp = reg_emails.findall(self.results)
emails = await self.unique()
true_emails = {
(
str(email)[1:].lower().strip()
if len(str(email)) > 1 and str(email)[0] == "."
if len(str(email)) > 1 and str(email)[0] == '.'
else len(str(email)) > 1 and str(email).lower().strip()
)
for email in emails
@ -76,11 +66,7 @@ async def fileurls(self, file) -> list:
self.temp = reg_urls.findall(self.results)
allurls = await self.unique()
for iteration in allurls:
if (
iteration.count("webcache")
or iteration.count("google.com")
or iteration.count("search?hl")
):
if iteration.count('webcache') or iteration.count('google.com') or iteration.count('search?hl'):
pass
else:
urls.append(iteration)
@ -90,11 +76,11 @@ async def hostnames(self):
# should check both www. and not www.
hostnames = []
await self.genericClean()
reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word)
reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word)
first_hostnames = reg_hosts.findall(self.results)
hostnames.extend(first_hostnames)
# TODO determine if necessary below or if only pass through is fine
reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word.replace("www.", ""))
reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word.replace('www.', ''))
# reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.' + 'www.' + self.word)
# reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.(?:' + 'www.' + self.word + ')?')
second_hostnames = reg_hosts.findall(self.results)
@ -102,31 +88,29 @@ async def hostnames(self):
return list(set(hostnames))
async def hostnames_all(self):
reg_hosts = re.compile("<cite>(.*?)</cite>")
reg_hosts = re.compile('<cite>(.*?)</cite>')
temp = reg_hosts.findall(self.results)
for iteration in temp:
if iteration.count(":"):
res = iteration.split(":")[1].split("/")[2]
if iteration.count(':'):
res = iteration.split(':')[1].split('/')[2]
else:
res = iteration.split("/")[0]
res = iteration.split('/')[0]
self.temp.append(res)
hostnames = await self.unique()
return hostnames
async def set(self):
reg_sets = re.compile(r">[a-zA-Z\d]*</a></font>")
reg_sets = re.compile(r'>[a-zA-Z\d]*</a></font>')
self.temp = reg_sets.findall(self.results)
sets = []
for iteration in self.temp:
delete = iteration.replace(">", "")
delete = delete.replace("</a</font", "")
delete = iteration.replace('>', '')
delete = delete.replace('</a</font', '')
sets.append(delete)
return sets
async def urls(self) -> Set[str]:
found = re.finditer(
r"(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*", self.results
)
found = re.finditer(r'(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*', self.results)
urls = {match.group().strip() for match in found}
return urls

View file

@ -13,7 +13,7 @@ async def parse_text(self) -> tuple[set, set]:
line = self.text[index].strip()
if '"ip":' in line:
# Extract IP.
ip = ""
ip = ''
for ch in line[7:]:
if ch == '"':
break
@ -25,17 +25,13 @@ async def parse_text(self) -> tuple[set, set]:
sub_domain_flag = 1
continue
elif sub_domain_flag > 0:
if "]" in line:
if ']' in line:
sub_domain_flag = 0
else:
if "www" in self.word:
self.word = (
str(self.word).replace("www.", "").replace("www", "")
)
if 'www' in self.word:
self.word = str(self.word).replace('www.', '').replace('www', '')
# Remove www from word if entered
self.hostnames.add(
str(line).replace('"', "").replace(",", "") + "." + self.word
)
self.hostnames.add(str(line).replace('"', '').replace(',', '') + '.' + self.word)
else:
continue
return self.ips, self.hostnames

View file

@ -6,35 +6,35 @@
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"-H",
"--host",
default="127.0.0.1",
help="IP address to listen on default is 127.0.0.1",
'-H',
'--host',
default='127.0.0.1',
help='IP address to listen on default is 127.0.0.1',
)
parser.add_argument(
"-p",
"--port",
'-p',
'--port',
default=5000,
help="Port to bind the web server to, default is 5000",
help='Port to bind the web server to, default is 5000',
type=int,
)
parser.add_argument(
"-l",
"--log-level",
default="info",
help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set",
'-l',
'--log-level',
default='info',
help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set',
)
parser.add_argument(
"-r",
"--reload",
'-r',
'--reload',
default=False,
help="Enable automatic reload used during development of the api",
action="store_true",
help='Enable automatic reload used during development of the api',
action='store_true',
)
args: argparse.Namespace = parser.parse_args()
uvicorn.run(
"theHarvester.lib.api.api:app",
'theHarvester.lib.api.api:app',
host=args.host,
port=args.port,
log_level=args.log_level,
@ -42,5 +42,5 @@ def main():
)
if __name__ == "__main__":
if __name__ == '__main__':
main()

View file

@ -17,27 +17,21 @@
class ScreenShotter:
def __init__(self, output) -> None:
self.output = output
self.slash = "\\" if "win" in sys.platform else "/"
self.slash = (
"" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash
)
self.slash = '\\' if 'win' in sys.platform else '/'
self.slash = '' if (self.output[-1] == '\\' or self.output[-1] == '/') else self.slash
def verify_path(self) -> bool:
try:
if not os.path.isdir(self.output):
answer = input(
"[+] The output path you have entered does not exist would you like to create it (y/n): "
)
if answer.lower() == "yes" or answer.lower() == "y":
answer = input('[+] The output path you have entered does not exist would you like to create it (y/n): ')
if answer.lower() == 'yes' or answer.lower() == 'y':
os.makedirs(self.output)
return True
else:
return False
return True
except Exception as e:
print(
f"An exception has occurred while attempting to verify output path's existence: {e}"
)
print(f"An exception has occurred while attempting to verify output path's existence: {e}")
return False
@staticmethod
@ -47,29 +41,25 @@ async def verify_installation() -> None:
async with async_playwright() as p:
browser = await p.chromium.launch()
await browser.close()
print("Playwright and Chromium are successfully installed.")
print('Playwright and Chromium are successfully installed.')
except Exception as e:
print(
f"An exception has occurred while attempting to verify installation: {e}"
)
print(f'An exception has occurred while attempting to verify installation: {e}')
@staticmethod
def chunk_list(items: Collection, chunk_size: int) -> list:
# Based off of: https://github.com/apache/incubator-sdap-ingester
return [
list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)
]
return [list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)]
@staticmethod
async def visit(url: str) -> tuple[str, str]:
try:
timeout = aiohttp.ClientTimeout(total=35)
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/122.0.0.0 Safari/537.36"
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/122.0.0.0 Safari/537.36'
}
url = f"http://{url}" if not url.startswith("http") else url
url = url.replace("www.", "")
url = f'http://{url}' if not url.startswith('http') else url
url = url.replace('www.', '')
sslcontext = ssl.create_default_context(cafile=certifi.where())
async with aiohttp.ClientSession(
timeout=timeout,
@ -77,16 +67,16 @@ async def visit(url: str) -> tuple[str, str]:
connector=aiohttp.TCPConnector(ssl=sslcontext),
) as session:
async with session.get(url, verify_ssl=False) as resp:
text = await resp.text("UTF-8")
return f"http://{url}" if not url.startswith("http") else url, text
text = await resp.text('UTF-8')
return f'http://{url}' if not url.startswith('http') else url, text
except Exception as e:
print(f"An exception has occurred while attempting to visit {url} : {e}")
return "", ""
print(f'An exception has occurred while attempting to visit {url} : {e}')
return '', ''
async def take_screenshot(self, url: str) -> tuple[str, ...]:
url = f"http://{url}" if not url.startswith("http") else url
url = url.replace("www.", "")
print(f"Attempting to take a screenshot of: {url}")
url = f'http://{url}' if not url.startswith('http') else url
url = url.replace('www.', '')
print(f'Attempting to take a screenshot of: {url}')
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
# New browser context
@ -100,10 +90,8 @@ async def take_screenshot(self, url: str) -> tuple[str, ...]:
await page.goto(url, timeout=35000)
await page.screenshot(path=path)
except Exception as e:
print(
f"An exception has occurred attempting to screenshot: {url} : {e}"
)
path = ""
print(f'An exception has occurred attempting to screenshot: {url} : {e}')
path = ''
finally:
await page.close()
await context.close()

View file

@ -6,7 +6,7 @@
def main():
platform = sys.platform
if platform == "win32":
if platform == 'win32':
# Required or things will break if trying to take screenshots
import multiprocessing
@ -23,9 +23,9 @@ def main():
uvloop.install()
if "linux" in platform:
if 'linux' in platform:
import aiomultiprocess
# As we are not using Windows, we can change the spawn method to fork for greater performance
aiomultiprocess.set_context("fork")
aiomultiprocess.set_context('fork')
asyncio.run(__main__.entry_point())