reformat with ruff

This commit is contained in:
Vizonex 2024-07-10 12:38:49 -05:00 committed by J.Townsend
parent 2c871d60e3
commit 309c04acd6
56 changed files with 1387 additions and 1990 deletions

View file

@ -1,5 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from theHarvester.restfulHarvest import main from theHarvester.restfulHarvest import main
if __name__ == "__main__": if __name__ == '__main__':
main() main()

View file

@ -5,10 +5,8 @@
from theHarvester.theHarvester import main from theHarvester.theHarvester import main
if sys.version_info.major < 3 or sys.version_info.minor < 10: if sys.version_info.major < 3 or sys.version_info.minor < 10:
print( print('\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m')
"\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m"
)
sys.exit(1) sys.exit(1)
if __name__ == "__main__": if __name__ == '__main__':
main() main()

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,7 @@ def __init__(self, word) -> None:
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
url = f"https://jldc.me/anubis/subdomains/{self.word}" url = f'https://jldc.me/anubis/subdomains/{self.word}'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
self.totalhosts = response[0] self.totalhosts = response[0]

View file

@ -5,23 +5,17 @@
class SearchBaidu: class SearchBaidu:
def __init__(self, word, limit) -> None: def __init__(self, word, limit) -> None:
self.word = word self.word = word
self.total_results = "" self.total_results = ''
self.server = "www.baidu.com" self.server = 'www.baidu.com'
self.hostname = "www.baidu.com" self.hostname = 'www.baidu.com'
self.limit = limit self.limit = limit
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
headers = {"Host": self.hostname, "User-agent": Core.get_user_agent()} headers = {'Host': self.hostname, 'User-agent': Core.get_user_agent()}
base_url = f"https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}" base_url = f'https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}'
urls = [ urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
base_url.replace("xx", str(num)) responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for num in range(0, self.limit, 10)
if num <= self.limit
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
for response in responses: for response in responses:
self.total_results += response self.total_results += response

View file

@ -9,27 +9,23 @@ def __init__(self, word) -> None:
self.interestingurls: set = set() self.interestingurls: set = set()
self.key = Core.bevigil_key() self.key = Core.bevigil_key()
if self.key is None: if self.key is None:
self.key = "" self.key = ''
raise MissingKey("bevigil") raise MissingKey('bevigil')
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
subdomain_endpoint = f"https://osint.bevigil.com/api/{self.word}/subdomains/" subdomain_endpoint = f'https://osint.bevigil.com/api/{self.word}/subdomains/'
url_endpoint = f"https://osint.bevigil.com/api/{self.word}/urls/" url_endpoint = f'https://osint.bevigil.com/api/{self.word}/urls/'
headers = {"X-Access-Token": self.key} headers = {'X-Access-Token': self.key}
responses = await AsyncFetcher.fetch_all( responses = await AsyncFetcher.fetch_all([subdomain_endpoint], json=True, proxy=self.proxy, headers=headers)
[subdomain_endpoint], json=True, proxy=self.proxy, headers=headers
)
response = responses[0] response = responses[0]
for subdomain in response["subdomains"]: for subdomain in response['subdomains']:
self.totalhosts.add(subdomain) self.totalhosts.add(subdomain)
responses = await AsyncFetcher.fetch_all( responses = await AsyncFetcher.fetch_all([url_endpoint], json=True, proxy=self.proxy, headers=headers)
[url_endpoint], json=True, proxy=self.proxy, headers=headers
)
response = responses[0] response = responses[0]
for url in response["urls"]: for url in response['urls']:
self.interestingurls.add(url) self.interestingurls.add(url)
async def get_hostnames(self) -> set: async def get_hostnames(self) -> set:

View file

@ -13,29 +13,25 @@ def __init__(self, word, limit) -> None:
self.limit = 501 if limit >= 501 else limit self.limit = 501 if limit >= 501 else limit
self.limit = 2 if self.limit == 1 else self.limit self.limit = 2 if self.limit == 1 else self.limit
if self.key is None: if self.key is None:
raise MissingKey("binaryedge") raise MissingKey('binaryedge')
async def do_search(self) -> None: async def do_search(self) -> None:
base_url = f"https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}" base_url = f'https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}'
headers = {"X-KEY": self.key, "User-Agent": Core.get_user_agent()} headers = {'X-KEY': self.key, 'User-Agent': Core.get_user_agent()}
for page in range(1, self.limit): for page in range(1, self.limit):
params = {"page": page} params = {'page': page}
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, params=params, headers=headers)
[base_url], json=True, proxy=self.proxy, params=params, headers=headers
)
responses = response[0] responses = response[0]
dct = responses dct = responses
if ("status" in dct.keys() and "message" in dct.keys()) and ( if ('status' in dct.keys() and 'message' in dct.keys()) and (
dct["status"] == 400 dct['status'] == 400 or 'Bad Parameter' in dct['message'] or 'Error' in dct['message']
or "Bad Parameter" in dct["message"]
or "Error" in dct["message"]
): ):
# 400 status code means no more results # 400 status code means no more results
break break
if "events" in dct.keys(): if 'events' in dct.keys():
if len(dct["events"]) == 0: if len(dct['events']) == 0:
break break
self.totalhosts.update({host for host in dct["events"]}) self.totalhosts.update({host for host in dct['events']})
await asyncio.sleep(get_delay()) await asyncio.sleep(get_delay())
async def get_hostnames(self) -> set: async def get_hostnames(self) -> set:

View file

@ -7,12 +7,12 @@
class SearchBing: class SearchBing:
def __init__(self, word, limit, start) -> None: def __init__(self, word, limit, start) -> None:
self.word = word.replace(" ", "%20") self.word = word.replace(' ', '%20')
self.results: list[Any] = [] self.results: list[Any] = []
self.total_results = "" self.total_results = ''
self.server = "www.bing.com" self.server = 'www.bing.com'
self.apiserver = "api.search.live.net" self.apiserver = 'api.search.live.net'
self.hostname = "www.bing.com" self.hostname = 'www.bing.com'
self.limit = int(limit) self.limit = int(limit)
self.bingApi = Core.bing_key() self.bingApi = Core.bing_key()
self.counter = start self.counter = start
@ -20,58 +20,44 @@ def __init__(self, word, limit, start) -> None:
async def do_search(self) -> None: async def do_search(self) -> None:
headers = { headers = {
"Host": self.hostname, 'Host': self.hostname,
"Cookie": "SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50", 'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
"Accept-Language": "en-us,en", 'Accept-Language': 'en-us,en',
"User-agent": Core.get_user_agent(), 'User-agent': Core.get_user_agent(),
} }
base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx' base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx'
urls = [ urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
base_url.replace("xx", str(num)) responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for num in range(0, self.limit, 50)
if num <= self.limit
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
for response in responses: for response in responses:
self.total_results += response self.total_results += response
async def do_search_api(self) -> None: async def do_search_api(self) -> None:
url = "https://api.bing.microsoft.com/v7.0/search?" url = 'https://api.bing.microsoft.com/v7.0/search?'
params = { params = {
"q": self.word, 'q': self.word,
"count": str(self.limit), 'count': str(self.limit),
"offset": "0", 'offset': '0',
"mkt": "en-us", 'mkt': 'en-us',
"safesearch": "Off", 'safesearch': 'Off',
} }
headers = { headers = {
"User-Agent": Core.get_user_agent(), 'User-Agent': Core.get_user_agent(),
"Ocp-Apim-Subscription-Key": self.bingApi, 'Ocp-Apim-Subscription-Key': self.bingApi,
} }
self.results = await AsyncFetcher.fetch_all( self.results = await AsyncFetcher.fetch_all([url], headers=headers, params=params, proxy=self.proxy)
[url], headers=headers, params=params, proxy=self.proxy
)
for res in self.results: for res in self.results:
self.total_results += res self.total_results += res
async def do_search_vhost(self) -> None: async def do_search_vhost(self) -> None:
headers = { headers = {
"Host": self.hostname, 'Host': self.hostname,
"Cookie": "mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50", 'Cookie': 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50',
"Accept-Language": "en-us,en", 'Accept-Language': 'en-us,en',
"User-agent": Core.get_user_agent(), 'User-agent': Core.get_user_agent(),
} }
base_url = f"http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx" base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx'
urls = [ urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
base_url.replace("xx", str(num)) responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for num in range(0, self.limit, 50)
if num <= self.limit
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
for response in responses: for response in responses:
self.total_results += response self.total_results += response
@ -89,13 +75,13 @@ async def get_allhostnames(self):
async def process(self, api, proxy: bool = False) -> None: async def process(self, api, proxy: bool = False) -> None:
self.proxy = proxy self.proxy = proxy
if api == "yes": if api == 'yes':
if self.bingApi is None: if self.bingApi is None:
raise MissingKey("BingAPI") raise MissingKey('BingAPI')
await self.do_search_api() await self.do_search_api()
else: else:
await self.do_search() await self.do_search()
print(f"\tSearching {self.counter} results.") print(f'\tSearching {self.counter} results.')
async def process_vhost(self) -> None: async def process_vhost(self) -> None:
await self.do_search_vhost() await self.do_search_vhost()

View file

@ -8,37 +8,34 @@
class SearchBrave: class SearchBrave:
def __init__(self, word, limit): def __init__(self, word, limit):
self.word = word self.word = word
self.results = "" self.results = ''
self.totalresults = "" self.totalresults = ''
self.server = "https://search.brave.com/search?q=" self.server = 'https://search.brave.com/search?q='
self.limit = limit self.limit = limit
self.proxy = False self.proxy = False
async def do_search(self): async def do_search(self):
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
for query in [f'"{self.word}"', f"site:{self.word}"]: for query in [f'"{self.word}"', f'site:{self.word}']:
try: try:
for offset in range(0, 50): for offset in range(0, 50):
# To reduce the total number of requests, only two queries are made "self.word" and site:self.word # To reduce the total number of requests, only two queries are made "self.word" and site:self.word
current_url = f"{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0" current_url = f'{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0'
resp = await AsyncFetcher.fetch_all( resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy)
[current_url], headers=headers, proxy=self.proxy
)
self.results = resp[0] self.results = resp[0]
self.totalresults += self.results self.totalresults += self.results
# if 'Results from Microsoft Bing.' in resp[0] \ # if 'Results from Microsoft Bing.' in resp[0] \
if ( if (
"Not many great matches came back for your search" in resp[0] 'Not many great matches came back for your search' in resp[0]
or "Your request has been flagged as being suspicious and Brave Search" or 'Your request has been flagged as being suspicious and Brave Search' in resp[0]
in resp[0] or 'Prove' in resp[0]
or "Prove" in resp[0] and 'robot' in resp[0]
and "robot" in resp[0] or 'Robot' in resp[0]
or "Robot" in resp[0]
): ):
break break
await asyncio.sleep(get_delay() + 15) await asyncio.sleep(get_delay() + 15)
except Exception as e: except Exception as e:
print(f"An exception has occurred in bravesearch: {e}") print(f'An exception has occurred in bravesearch: {e}')
await asyncio.sleep(get_delay() + 80) await asyncio.sleep(get_delay() + 80)
continue continue

View file

@ -11,33 +11,30 @@ def __init__(self, word) -> None:
self.totalips: set = set() self.totalips: set = set()
self.key = Core.bufferoverun_key() self.key = Core.bufferoverun_key()
if self.key is None: if self.key is None:
raise MissingKey("bufferoverun") raise MissingKey('bufferoverun')
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
url = f"https://tls.bufferover.run/dns?q={self.word}" url = f'https://tls.bufferover.run/dns?q={self.word}'
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all(
[url], [url],
json=True, json=True,
headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"}, headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'},
proxy=self.proxy, proxy=self.proxy,
) )
dct = response[0] dct = response[0]
if dct["Results"]: if dct['Results']:
self.totalhosts = { self.totalhosts = {
( (
host.split(",") host.split(',')
if "," in host if ',' in host and self.word.replace('www.', '') in host.split(',')[0] in host
and self.word.replace("www.", "") in host.split(",")[0] in host else host.split(',')[4]
else host.split(",")[4]
) )
for host in dct["Results"] for host in dct['Results']
} }
self.totalips = { self.totalips = {
ip.split(",")[0] ip.split(',')[0] for ip in dct['Results'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip.split(',')[0])
for ip in dct["Results"]
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip.split(",")[0])
} }
async def get_hostnames(self) -> set: async def get_hostnames(self) -> set:

View file

@ -15,7 +15,7 @@ def __init__(self, domain, limit: int = 500) -> None:
self.word = domain self.word = domain
self.key = Core.censys_key() self.key = Core.censys_key()
if self.key[0] is None or self.key[1] is None: if self.key[0] is None or self.key[1] is None:
raise MissingKey("Censys ID and/or Secret") raise MissingKey('Censys ID and/or Secret')
self.totalhosts: set = set() self.totalhosts: set = set()
self.emails: set = set() self.emails: set = set()
self.limit = limit self.limit = limit
@ -26,26 +26,24 @@ async def do_search(self) -> None:
cert_search = CensysCerts( cert_search = CensysCerts(
api_id=self.key[0], api_id=self.key[0],
api_secret=self.key[1], api_secret=self.key[1],
user_agent=f"censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)", user_agent=f'censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)',
) )
except CensysUnauthorizedException: except CensysUnauthorizedException:
raise MissingKey("Censys ID and/or Secret") raise MissingKey('Censys ID and/or Secret')
query = f"names: {self.word}" query = f'names: {self.word}'
try: try:
response = cert_search.search( response = cert_search.search(
query=query, query=query,
fields=["names", "parsed.subject.email_address"], fields=['names', 'parsed.subject.email_address'],
max_records=self.limit, max_records=self.limit,
) )
for cert in response(): for cert in response():
self.totalhosts.update(cert.get("names", [])) self.totalhosts.update(cert.get('names', []))
email_address = ( email_address = cert.get('parsed', {}).get('subject', {}).get('email_address', [])
cert.get("parsed", {}).get("subject", {}).get("email_address", [])
)
self.emails.update(email_address) self.emails.update(email_address)
except CensysRateLimitExceededException: except CensysRateLimitExceededException:
print("Censys rate limit exceeded") print('Censys rate limit exceeded')
async def get_hostnames(self) -> set: async def get_hostnames(self) -> set:
return self.totalhosts return self.totalhosts

View file

@ -8,21 +8,19 @@ def __init__(self, word) -> None:
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
base_url = f"https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names" base_url = f'https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names'
try: try:
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy)
[base_url], json=True, proxy=self.proxy
)
response = response[0] response = response[0]
if isinstance(response, list): if isinstance(response, list):
for dct in response: for dct in response:
for key, value in dct.items(): for key, value in dct.items():
if key == "dns_names": if key == 'dns_names':
self.totalhosts.update({name for name in value if name}) self.totalhosts.update({name for name in value if name})
elif isinstance(response, dict): elif isinstance(response, dict):
self.totalhosts.update({response["dns_names"] if "dns_names" in response.keys() else ""}) # type: ignore self.totalhosts.update({response['dns_names'] if 'dns_names' in response.keys() else ''}) # type: ignore
else: else:
self.totalhosts.update({""}) self.totalhosts.update({''})
except Exception as e: except Exception as e:
print(e) print(e)
@ -32,4 +30,4 @@ async def get_hostnames(self) -> set:
async def process(self, proxy: bool = False) -> None: async def process(self, proxy: bool = False) -> None:
self.proxy = proxy self.proxy = proxy
await self.do_search() await self.do_search()
print("\tSearching results.") print('\tSearching results.')

View file

@ -14,10 +14,10 @@ async def splitter(links):
unique_list = [] unique_list = []
name_check = [] name_check = []
for url in links: for url in links:
tail = url.split("/")[-1] tail = url.split('/')[-1]
if len(tail) == 2 or tail == "zh-cn": if len(tail) == 2 or tail == 'zh-cn':
tail = url.split("/")[-2] tail = url.split('/')[-2]
name = tail.split("-") name = tail.split('-')
if len(name) > 1: if len(name) > 1:
joined_name = name[0] + name[1] joined_name = name[0] + name[1]
else: else:
@ -41,12 +41,8 @@ def filter(lst):
new_lst = [] new_lst = []
for item in lst: for item in lst:
item = str(item) item = str(item)
if ( if (item[0].isalpha() or item[0].isdigit()) and ('xxx' not in item) and ('..' not in item):
(item[0].isalpha() or item[0].isdigit()) item = item.replace('252f', '').replace('2F', '').replace('2f', '')
and ("xxx" not in item)
and (".." not in item)
):
item = item.replace("252f", "").replace("2F", "").replace("2f", "")
new_lst.append(item.lower()) new_lst.append(item.lower())
return new_lst return new_lst
@ -63,10 +59,9 @@ async def search(text: str) -> bool:
""" """
for line in text.strip().splitlines(): for line in text.strip().splitlines():
if ( if (
"This page appears when Google automatically detects requests coming from your computer network" 'This page appears when Google automatically detects requests coming from your computer network' in line
in line or 'http://www.google.com/sorry/index' in line
or "http://www.google.com/sorry/index" in line or 'https://www.google.com/sorry/index' in line
or "https://www.google.com/sorry/index" in line
): ):
# print('\tGoogle is blocking your IP due to too many automated requests, wait or change your IP') # print('\tGoogle is blocking your IP due to too many automated requests, wait or change your IP')
return True return True
@ -79,47 +74,37 @@ async def google_workaround(visit_url: str) -> bool | str:
:param visit_url: Url to scrape :param visit_url: Url to scrape
:return: Correct html that can be parsed by BS4 :return: Correct html that can be parsed by BS4
""" """
url = "https://websniffer.cc/" url = 'https://websniffer.cc/'
data = { data = {
"Cookie": "", 'Cookie': '',
"url": visit_url, 'url': visit_url,
"submit": "Submit", 'submit': 'Submit',
"type": "GET&http=1.1", 'type': 'GET&http=1.1',
"uak": str(random.randint(4, 8)), # select random UA to send to Google 'uak': str(random.randint(4, 8)), # select random UA to send to Google
} }
returned_html = await AsyncFetcher.post_fetch( returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
url, headers={"User-Agent": Core.get_user_agent()}, data=data
)
returned_html = ( returned_html = (
"This page appears when Google automatically detects requests coming from your computer network" 'This page appears when Google automatically detects requests coming from your computer network'
if returned_html == "" if returned_html == ''
else returned_html[0] else returned_html[0]
) )
returned_html = ( returned_html = '' if 'Please Wait... | Cloudflare' in returned_html else returned_html
"" if "Please Wait... | Cloudflare" in returned_html else returned_html
)
if ( if len(returned_html) == 0 or await search(returned_html) or '&lt;html' not in returned_html:
len(returned_html) == 0
or await search(returned_html)
or "&lt;html" not in returned_html
):
# indicates that google is serving workaround a captcha # indicates that google is serving workaround a captcha
# That means we will try out second option which will utilize proxies # That means we will try out second option which will utilize proxies
return True return True
# the html we get is malformed for BS4 as there are no greater than or less than signs # the html we get is malformed for BS4 as there are no greater than or less than signs
if "&lt;html&gt;" in returned_html: if '&lt;html&gt;' in returned_html:
start_index = returned_html.index("&lt;html&gt;") start_index = returned_html.index('&lt;html&gt;')
else: else:
start_index = returned_html.index("&lt;html") start_index = returned_html.index('&lt;html')
end_index = returned_html.index("&lt;/html&gt;") + 1 end_index = returned_html.index('&lt;/html&gt;') + 1
correct_html = returned_html[start_index:end_index] correct_html = returned_html[start_index:end_index]
# Slice list to get the response's html # Slice list to get the response's html
correct_html = "".join( correct_html = ''.join([ch.strip().replace('&lt;', '<').replace('&gt;', '>') for ch in correct_html])
[ch.strip().replace("&lt;", "<").replace("&gt;", ">") for ch in correct_html]
)
return correct_html return correct_html
@ -130,9 +115,9 @@ class MissingKey(Exception):
def __init__(self, source: str | None) -> None: def __init__(self, source: str | None) -> None:
if source: if source:
self.message = f"\n\033[93m[!] Missing API key for {source}. \033[0m" self.message = f'\n\033[93m[!] Missing API key for {source}. \033[0m'
else: else:
self.message = "\n\033[93m[!] Missing CSE id. \033[0m" self.message = '\n\033[93m[!] Missing CSE id. \033[0m'
def __str__(self) -> str: def __str__(self) -> str:
return self.message return self.message

View file

@ -13,64 +13,56 @@ def __init__(self, word) -> None:
self.asns: set = set() self.asns: set = set()
self.key = Core.criminalip_key() self.key = Core.criminalip_key()
if self.key is None: if self.key is None:
raise MissingKey("criminalip") raise MissingKey('criminalip')
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
# https://www.criminalip.io/developer/api/post-domain-scan # https://www.criminalip.io/developer/api/post-domain-scan
# https://www.criminalip.io/developer/api/get-domain-status-id # https://www.criminalip.io/developer/api/get-domain-status-id
# https://www.criminalip.io/developer/api/get-domain-report-id # https://www.criminalip.io/developer/api/get-domain-report-id
url = "https://api.criminalip.io/v1/domain/scan" url = 'https://api.criminalip.io/v1/domain/scan'
data = f'{{"query": "{self.word}"}}' data = f'{{"query": "{self.word}"}}'
# print(f'Current key: {self.key}') # print(f'Current key: {self.key}')
user_agent = Core.get_user_agent() user_agent = Core.get_user_agent()
response = await AsyncFetcher.post_fetch( response = await AsyncFetcher.post_fetch(
url, url,
json=True, json=True,
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"}, headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
data=data, data=data,
proxy=self.proxy, proxy=self.proxy,
) )
# print(f'My response: {response}') # print(f'My response: {response}')
# Expected response format: # Expected response format:
# {'data': {'scan_id': scan_id}, 'message': 'api success', 'status': 200} # {'data': {'scan_id': scan_id}, 'message': 'api success', 'status': 200}
if "status" in response.keys(): if 'status' in response.keys():
status = response["status"] status = response['status']
if status != 200: if status != 200:
print( print(f'An error has occurred searching criminalip dumping response: {response}')
f"An error has occurred searching criminalip dumping response: {response}"
)
else: else:
scan_id = response["data"]["scan_id"] scan_id = response['data']['scan_id']
scan_percentage = 0 scan_percentage = 0
counter = 0 counter = 0
while scan_percentage != 100: while scan_percentage != 100:
status_url = f"https://api.criminalip.io/v1/domain/status/{scan_id}" status_url = f'https://api.criminalip.io/v1/domain/status/{scan_id}'
status_response = await AsyncFetcher.fetch_all( status_response = await AsyncFetcher.fetch_all(
[status_url], [status_url],
json=True, json=True,
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"}, headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
proxy=self.proxy, proxy=self.proxy,
) )
status = status_response[0] status = status_response[0]
# print(f'Status response: {status}') # print(f'Status response: {status}')
# Expected format: # Expected format:
# {"data": {"scan_percentage": 100}, "message": "api success", "status": 200} # {"data": {"scan_percentage": 100}, "message": "api success", "status": 200}
scan_percentage = status["data"]["scan_percentage"] scan_percentage = status['data']['scan_percentage']
if scan_percentage == 100: if scan_percentage == 100:
break break
if scan_percentage == -2: if scan_percentage == -2:
print( print(f'CriminalIP failed to scan: {self.word} does not exist, verify manually')
f"CriminalIP failed to scan: {self.word} does not exist, verify manually" print(f'Dumping data: scan_response: {response} status_response: {status}')
)
print(
f"Dumping data: scan_response: {response} status_response: {status}"
)
return return
if scan_percentage == -1: if scan_percentage == -1:
print( print(f'CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}')
f"CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}"
)
return return
# Wait for scan to finish # Wait for scan to finish
if counter >= 5: if counter >= 5:
@ -80,18 +72,18 @@ async def do_search(self) -> None:
counter += 1 counter += 1
if counter == 10: if counter == 10:
print( print(
"Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop." 'Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop.'
) )
print( print(
f"Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}" f'Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}'
) )
return return
report_url = f"https://api.criminalip.io/v1/domain/report/{scan_id}" report_url = f'https://api.criminalip.io/v1/domain/report/{scan_id}'
scan_response = await AsyncFetcher.fetch_all( scan_response = await AsyncFetcher.fetch_all(
[report_url], [report_url],
json=True, json=True,
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"}, headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
proxy=self.proxy, proxy=self.proxy,
) )
scan = scan_response[0] scan = scan_response[0]
@ -100,125 +92,113 @@ async def do_search(self) -> None:
try: try:
await self.parser(scan) await self.parser(scan)
except Exception as e: except Exception as e:
print(f"An exception occurred while parsing criminalip result: {e}") print(f'An exception occurred while parsing criminalip result: {e}')
print("Dumping json: ") print('Dumping json: ')
print(scan) print(scan)
async def parser(self, jlines): async def parser(self, jlines):
# TODO when new scope field is added to parse lines for potential new scope! # TODO when new scope field is added to parse lines for potential new scope!
# TODO map as_name to asn for asn data # TODO map as_name to asn for asn data
# TODO determine if worth storing interesting urls # TODO determine if worth storing interesting urls
if "data" not in jlines.keys(): if 'data' not in jlines.keys():
print(f"Error with criminalip data, dumping: {jlines}") print(f'Error with criminalip data, dumping: {jlines}')
return return
data = jlines["data"] data = jlines['data']
for cert in data["certificates"]: for cert in data['certificates']:
# print(f'Current cert: {cert}') # print(f'Current cert: {cert}')
if cert["subject"].endswith("." + self.word): if cert['subject'].endswith('.' + self.word):
self.totalhosts.add(cert["subject"]) self.totalhosts.add(cert['subject'])
for connected_domain in data["connected_domain_subdomain"]: for connected_domain in data['connected_domain_subdomain']:
try: try:
main_domain = connected_domain["main_domain"]["domain"] main_domain = connected_domain['main_domain']['domain']
subdomains = [sub["domain"] for sub in connected_domain["subdomains"]] subdomains = [sub['domain'] for sub in connected_domain['subdomains']]
if main_domain.endswith("." + self.word): if main_domain.endswith('.' + self.word):
self.totalhosts.add(main_domain) self.totalhosts.add(main_domain)
for sub in subdomains: for sub in subdomains:
# print(f'Current sub: {sub}') # print(f'Current sub: {sub}')
if sub.endswith("." + self.word): if sub.endswith('.' + self.word):
self.totalhosts.add(sub) self.totalhosts.add(sub)
except Exception as e: except Exception as e:
print(f"An exception has occurred: {e}") print(f'An exception has occurred: {e}')
print(f"Main line: {connected_domain}") print(f'Main line: {connected_domain}')
for ip_info in data["connected_ip_info"]: for ip_info in data['connected_ip_info']:
self.asns.add(str(ip_info["asn"])) self.asns.add(str(ip_info['asn']))
domains = [sub["domain"] for sub in ip_info["domain_list"]] domains = [sub['domain'] for sub in ip_info['domain_list']]
for sub in domains: for sub in domains:
if sub.endswith("." + self.word): if sub.endswith('.' + self.word):
self.totalhosts.add(sub) self.totalhosts.add(sub)
self.totalips.add(ip_info["ip"]) self.totalips.add(ip_info['ip'])
for cookie in data["cookies"]: for cookie in data['cookies']:
if cookie["domain"] != "." + self.word and cookie["domain"].endswith( if cookie['domain'] != '.' + self.word and cookie['domain'].endswith('.' + self.word):
"." + self.word self.totalhosts.add(cookie['domain'])
):
self.totalhosts.add(cookie["domain"])
for country in data["country"]: for country in data['country']:
if country["domain"].endswith("." + self.word): if country['domain'].endswith('.' + self.word):
self.totalhosts.add(country["domain"]) self.totalhosts.add(country['domain'])
for ip in country["mapped_ips"]: for ip in country['mapped_ips']:
self.totalips.add(ip["ip"]) self.totalips.add(ip['ip'])
for k, v in data["dns_record"].items(): for k, v in data['dns_record'].items():
if k == "dns_record_type_a": if k == 'dns_record_type_a':
for ip in data["dns_record"][k]["ipv4"]: for ip in data['dns_record'][k]['ipv4']:
self.totalips.add(ip["ip"]) self.totalips.add(ip['ip'])
else: else:
if isinstance(v, list): if isinstance(v, list):
for item in v: for item in v:
if isinstance(item, list): if isinstance(item, list):
for subitem in item: for subitem in item:
if subitem.endswith("." + self.word): if subitem.endswith('.' + self.word):
self.totalhosts.add(subitem) self.totalhosts.add(subitem)
else: else:
if item.endswith("." + self.word): if item.endswith('.' + self.word):
self.totalhosts.add(item) self.totalhosts.add(item)
for domain_list in data["domain_list"]: for domain_list in data['domain_list']:
self.asns.add(str(domain_list["asn"])) self.asns.add(str(domain_list['asn']))
domains = [sub["domain"] for sub in domain_list["domain_list"]] domains = [sub['domain'] for sub in domain_list['domain_list']]
for sub in domains: for sub in domains:
if sub.endswith("." + self.word): if sub.endswith('.' + self.word):
self.totalhosts.add(sub) self.totalhosts.add(sub)
self.totalips.add(domain_list["ip"]) self.totalips.add(domain_list['ip'])
for html_page_links in data["html_page_link_domains"]: for html_page_links in data['html_page_link_domains']:
domain = html_page_links["domain"] domain = html_page_links['domain']
if domain.endswith("." + self.word): if domain.endswith('.' + self.word):
self.totalhosts.add(domain) self.totalhosts.add(domain)
for ip in html_page_links["mapped_ips"]: for ip in html_page_links['mapped_ips']:
self.totalips.add(ip["ip"]) self.totalips.add(ip['ip'])
# TODO combine data['links'] and data['network_logs'] urls into one list for one run through # TODO combine data['links'] and data['network_logs'] urls into one list for one run through
for link in data["links"]: for link in data['links']:
url = link["url"] url = link['url']
parsed_url = urlparse(url) parsed_url = urlparse(url)
netloc = parsed_url.netloc netloc = parsed_url.netloc
if self.word in netloc: if self.word in netloc:
if ( if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
":" in netloc and netloc.split(":")[0].endswith(self.word)
) or netloc.endswith(self.word):
self.totalhosts.add(netloc) self.totalhosts.add(netloc)
for log in data["network_logs"]: for log in data['network_logs']:
url = log["url"] url = log['url']
parsed_url = urlparse(url) parsed_url = urlparse(url)
netloc = parsed_url.netloc netloc = parsed_url.netloc
if self.word in netloc: if self.word in netloc:
if ( if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
":" in netloc and netloc.split(":")[0].endswith(self.word)
) or netloc.endswith(self.word):
self.totalhosts.add(netloc) self.totalhosts.add(netloc)
self.asns.add(str(log["as_number"])) self.asns.add(str(log['as_number']))
for redirects in data["page_redirections"]: for redirects in data['page_redirections']:
for redirect in redirects: for redirect in redirects:
url = redirect["url"] url = redirect['url']
parsed_url = urlparse(url) parsed_url = urlparse(url)
netloc = parsed_url.netloc netloc = parsed_url.netloc
if self.word in netloc: if self.word in netloc:
if ( if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
":" in netloc and netloc.split(":")[0].endswith(self.word)
) or netloc.endswith(self.word):
self.totalhosts.add(netloc) self.totalhosts.add(netloc)
self.totalhosts = { self.totalhosts = {host.replace('www.', '') for host in self.totalhosts if '*.' + self.word != host}
host.replace("www.", "")
for host in self.totalhosts
if "*." + self.word != host
}
# print(f'hostnames: {self.totalhosts}') # print(f'hostnames: {self.totalhosts}')
# print(f'asns: {self.asns}') # print(f'asns: {self.asns}')

View file

@ -10,24 +10,11 @@ def __init__(self, word) -> None:
async def do_search(self) -> list: async def do_search(self) -> list:
data: set = set() data: set = set()
try: try:
url = f"https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json" url = f'https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
response = response[0] response = response[0]
data = set( data = set([(dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value']) for dct in response])
[ data = {domain for domain in data if (domain[0] != '*' and str(domain[0:4]).isnumeric() is False)}
(
dct["name_value"][2:]
if "*." == dct["name_value"][:2]
else dct["name_value"]
)
for dct in response
]
)
data = {
domain
for domain in data
if (domain[0] != "*" and str(domain[0:4]).isnumeric() is False)
}
except Exception as e: except Exception as e:
print(e) print(e)
clean: list = [] clean: list = []

View file

@ -8,51 +8,49 @@
class SearchDnsDumpster: class SearchDnsDumpster:
def __init__(self, word) -> None: def __init__(self, word) -> None:
self.word = word.replace(" ", "%20") self.word = word.replace(' ', '%20')
self.results = "" self.results = ''
self.totalresults = "" self.totalresults = ''
self.server = "dnsdumpster.com" self.server = 'dnsdumpster.com'
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
try: try:
agent = Core.get_user_agent() agent = Core.get_user_agent()
headers = {"User-Agent": agent} headers = {'User-Agent': agent}
session = aiohttp.ClientSession(headers=headers) session = aiohttp.ClientSession(headers=headers)
# create a session to properly verify # create a session to properly verify
url = f"https://{self.server}" url = f'https://{self.server}'
csrftoken = "" csrftoken = ''
if self.proxy is False: if self.proxy is False:
async with session.get(url, headers=headers) as resp: async with session.get(url, headers=headers) as resp:
resp_cookies = str(resp.cookies) resp_cookies = str(resp.cookies)
cookies = resp_cookies.split("csrftoken=") cookies = resp_cookies.split('csrftoken=')
csrftoken += cookies[1][: cookies[1].find(";")] csrftoken += cookies[1][: cookies[1].find(';')]
else: else:
async with session.get(url, headers=headers, proxy=self.proxy) as resp: async with session.get(url, headers=headers, proxy=self.proxy) as resp:
resp_cookies = str(resp.cookies) resp_cookies = str(resp.cookies)
cookies = resp_cookies.split("csrftoken=") cookies = resp_cookies.split('csrftoken=')
csrftoken += cookies[1][: cookies[1].find(";")] csrftoken += cookies[1][: cookies[1].find(';')]
await asyncio.sleep(5) await asyncio.sleep(5)
# extract csrftoken from cookies # extract csrftoken from cookies
data = { data = {
"Cookie": f"csfrtoken={csrftoken}", 'Cookie': f'csfrtoken={csrftoken}',
"csrfmiddlewaretoken": csrftoken, 'csrfmiddlewaretoken': csrftoken,
"targetip": self.word, 'targetip': self.word,
"user": "free", 'user': 'free',
} }
headers["Referer"] = url headers['Referer'] = url
if self.proxy is False: if self.proxy is False:
async with session.post(url, headers=headers, data=data) as resp: async with session.post(url, headers=headers, data=data) as resp:
self.results = await resp.text() self.results = await resp.text()
else: else:
async with session.post( async with session.post(url, headers=headers, data=data, proxy=self.proxy) as resp:
url, headers=headers, data=data, proxy=self.proxy
) as resp:
self.results = await resp.text() self.results = await resp.text()
await session.close() await session.close()
except Exception as e: except Exception as e:
print(f"An exception occurred: {e}") print(f'An exception occurred: {e}')
self.totalresults += self.results self.totalresults += self.results
async def get_hostnames(self): async def get_hostnames(self):

View file

@ -21,7 +21,7 @@
# DNS FORCE # DNS FORCE
##################################################################### #####################################################################
DNS_NAMES = DATA_DIR / "wordlists" / "dns-names.txt" DNS_NAMES = DATA_DIR / 'wordlists' / 'dns-names.txt'
class DnsForce: class DnsForce:
@ -32,13 +32,13 @@ def __init__(self, domain, dnsserver, verbose: bool = False) -> None:
# self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver # self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver
# self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver # self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver
self.dnsserver = dnsserver self.dnsserver = dnsserver
with DNS_NAMES.open("r") as file: with DNS_NAMES.open('r') as file:
self.list = file.readlines() self.list = file.readlines()
self.domain = domain.replace("www.", "") self.domain = domain.replace('www.', '')
self.list = [f"{word.strip()}.{self.domain}" for word in self.list] self.list = [f'{word.strip()}.{self.domain}' for word in self.list]
async def run(self): async def run(self):
print(f"Starting DNS brute forcing with {len(self.list)} words") print(f'Starting DNS brute forcing with {len(self.list)} words')
checker = hostchecker.Checker(self.list, nameserver=self.dnsserver) checker = hostchecker.Checker(self.list, nameserver=self.dnsserver)
resolved_pair, hosts, ips = await checker.check() resolved_pair, hosts, ips = await checker.check()
return resolved_pair, hosts, ips return resolved_pair, hosts, ips
@ -49,13 +49,13 @@ async def run(self):
##################################################################### #####################################################################
IP_REGEX = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}" IP_REGEX = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
PORT_REGEX = r"\d{1,5}" PORT_REGEX = r'\d{1,5}'
NETMASK_REGEX: str = r"\d{1,2}|" + IP_REGEX NETMASK_REGEX: str = r'\d{1,2}|' + IP_REGEX
NETWORK_REGEX: str = rf"\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b" NETWORK_REGEX: str = rf'\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b'
def serialize_ip_range(ip: str, netmask: str = "24") -> str: def serialize_ip_range(ip: str, netmask: str = '24') -> str:
""" """
Serialize a network range in a constant format, 'x.x.x.x/y'. Serialize a network range in a constant format, 'x.x.x.x/y'.
@ -78,12 +78,12 @@ def serialize_ip_range(ip: str, netmask: str = "24") -> str:
__ip = __ip_matches.group(1) __ip = __ip_matches.group(1)
__netmask = netmask if netmask else __ip_matches.group(3) __netmask = netmask if netmask else __ip_matches.group(3)
if __ip and __netmask: if __ip and __netmask:
return str(IPv4Network(f"{__ip}/{__netmask}", strict=False)) return str(IPv4Network(f'{__ip}/{__netmask}', strict=False))
elif __ip: elif __ip:
return str(IPv4Network("{}/{}".format(__ip, "24"), strict=False)) return str(IPv4Network('{}/{}'.format(__ip, '24'), strict=False))
# invalid input ip # invalid input ip
return "" return ''
def list_ips_in_network_range(iprange: str) -> list[str]: def list_ips_in_network_range(iprange: str) -> list[str]:
@ -122,14 +122,12 @@ async def reverse_single_ip(ip: str, resolver: DNSResolver) -> str:
""" """
try: try:
__host = await resolver.gethostbyaddr(ip) __host = await resolver.gethostbyaddr(ip)
return __host.name if __host else "" return __host.name if __host else ''
except Exception: except Exception:
return "" return ''
async def reverse_all_ips_in_range( async def reverse_all_ips_in_range(iprange: str, callback: Callable, nameservers: list[str] | None = None) -> None:
iprange: str, callback: Callable, nameservers: list[str] | None = None
) -> None:
""" """
Reverse all the IPs stored in a network range. Reverse all the IPs stored in a network range.
All the queries are made concurrently. All the queries are made concurrently.
@ -176,8 +174,8 @@ def log_query(ip: str) -> None:
------- -------
out: None. out: None.
""" """
sys.stdout.write(chr(27) + "[2K" + chr(27) + "[G") sys.stdout.write(chr(27) + '[2K' + chr(27) + '[G')
sys.stdout.write("\r" + ip + " - ") sys.stdout.write('\r' + ip + ' - ')
sys.stdout.flush() sys.stdout.flush()

View file

@ -7,29 +7,27 @@
class SearchDuckDuckGo: class SearchDuckDuckGo:
def __init__(self, word, limit) -> None: def __init__(self, word, limit) -> None:
self.word = word self.word = word
self.results = "" self.results = ''
self.totalresults = "" self.totalresults = ''
self.dorks: list = [] self.dorks: list = []
self.links: list = [] self.links: list = []
self.database = "https://duckduckgo.com/?q=" self.database = 'https://duckduckgo.com/?q='
self.api = "https://api.duckduckgo.com/?q=x&format=json&pretty=1" # Currently using API. self.api = 'https://api.duckduckgo.com/?q=x&format=json&pretty=1' # Currently using API.
self.quantity = "100" self.quantity = '100'
self.limit = limit self.limit = limit
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
# Do normal scraping. # Do normal scraping.
url = self.api.replace("x", self.word) url = self.api.replace('x', self.word)
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
first_resp = await AsyncFetcher.fetch_all( first_resp = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
[url], headers=headers, proxy=self.proxy
)
self.results = first_resp[0] self.results = first_resp[0]
self.totalresults += self.results self.totalresults += self.results
urls = await self.crawl(self.results) urls = await self.crawl(self.results)
urls = {url for url in urls if len(url) > 5} urls = {url for url in urls if len(url) > 5}
all_resps = await AsyncFetcher.fetch_all(urls) all_resps = await AsyncFetcher.fetch_all(urls)
self.totalresults += "".join(all_resps) self.totalresults += ''.join(all_resps)
async def crawl(self, text): async def crawl(self, text):
""" """
@ -54,39 +52,27 @@ async def crawl(self, text):
if isinstance(val, dict): # Validation check. if isinstance(val, dict): # Validation check.
for key in val.keys(): for key in val.keys():
value = val.get(key) value = val.get(key)
if ( if isinstance(value, str) and value != '' and 'https://' in value or 'http://' in value:
isinstance(value, str)
and value != ""
and "https://" in value
or "http://" in value
):
urls.add(value) urls.add(value)
if ( if isinstance(val, str) and val != '' and 'https://' in val or 'http://' in val:
isinstance(val, str)
and val != ""
and "https://" in val
or "http://" in val
):
urls.add(val) urls.add(val)
tmp = set() tmp = set()
for url in urls: for url in urls:
if ( if '<' in url and 'href=' in url: # Format is <href="https://www.website.com"/>
"<" in url and "href=" in url equal_index = url.index('=')
): # Format is <href="https://www.website.com"/> true_url = ''
equal_index = url.index("=")
true_url = ""
for ch in url[equal_index + 1 :]: for ch in url[equal_index + 1 :]:
if ch == '"': if ch == '"':
tmp.add(true_url) tmp.add(true_url)
break break
true_url += ch true_url += ch
else: else:
if url != "": if url != '':
tmp.add(url) tmp.add(url)
return tmp return tmp
except Exception as e: except Exception as e:
print(f"Exception occurred: {e}") print(f'Exception occurred: {e}')
return [] return []
async def get_emails(self): async def get_emails(self):

View file

@ -7,19 +7,19 @@ def __init__(self, word) -> None:
self.word = word self.word = word
self.key = Core.fullhunt_key() self.key = Core.fullhunt_key()
if self.key is None: if self.key is None:
raise MissingKey("fullhunt") raise MissingKey('fullhunt')
self.total_results = None self.total_results = None
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
url = f"https://fullhunt.io/api/v1/domain/{self.word}/subdomains" url = f'https://fullhunt.io/api/v1/domain/{self.word}/subdomains'
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all(
[url], [url],
json=True, json=True,
headers={"User-Agent": Core.get_user_agent(), "X-API-KEY": self.key}, headers={'User-Agent': Core.get_user_agent(), 'X-API-KEY': self.key},
proxy=self.proxy, proxy=self.proxy,
) )
self.total_results = response[0]["hosts"] self.total_results = response[0]['hosts']
async def get_hostnames(self): async def get_hostnames(self):
return self.total_results return self.total_results

View file

@ -28,8 +28,8 @@ class ErrorResult(NamedTuple):
class SearchGithubCode: class SearchGithubCode:
def __init__(self, word, limit) -> None: def __init__(self, word, limit) -> None:
self.word = word self.word = word
self.total_results = "" self.total_results = ''
self.server = "api.github.com" self.server = 'api.github.com'
self.limit = limit self.limit = limit
self.counter: int = 0 self.counter: int = 0
self.page: int | None = 1 self.page: int | None = 1
@ -38,17 +38,17 @@ def __init__(self, word, limit) -> None:
# rate limits you more severely # rate limits you more severely
# https://developer.github.com/v3/search/#rate-limit # https://developer.github.com/v3/search/#rate-limit
if self.key is None: if self.key is None:
raise MissingKey("Github") raise MissingKey('Github')
self.proxy = False self.proxy = False
@staticmethod @staticmethod
async def fragments_from_response(json_data: dict) -> list[str]: async def fragments_from_response(json_data: dict) -> list[str]:
items: list[dict[str, Any]] = json_data.get("items") or list() items: list[dict[str, Any]] = json_data.get('items') or list()
fragments: list[str] = list() fragments: list[str] = list()
for item in items: for item in items:
matches = item.get("text_matches") or list() matches = item.get('text_matches') or list()
for match in matches: for match in matches:
fragments.append(match.get("fragment")) fragments.append(match.get('fragment'))
return [fragment for fragment in fragments if fragment is not None] return [fragment for fragment in fragments if fragment is not None]
@ -56,22 +56,20 @@ async def fragments_from_response(json_data: dict) -> list[str]:
async def page_from_response(page: str, links) -> int | None: async def page_from_response(page: str, links) -> int | None:
page_link = links.get(page) page_link = links.get(page)
if page_link: if page_link:
parsed = urlparse.urlparse(str(page_link.get("url"))) parsed = urlparse.urlparse(str(page_link.get('url')))
params = urlparse.parse_qs(parsed.query) params = urlparse.parse_qs(parsed.query)
pages: list[Any] = params.get("page", [None]) pages: list[Any] = params.get('page', [None])
page_number = pages[0] and int(pages[0]) page_number = pages[0] and int(pages[0])
return page_number return page_number
else: else:
return None return None
async def handle_response( async def handle_response(self, response: tuple[str, dict, int, Any]) -> ErrorResult | RetryResult | SuccessResult:
self, response: tuple[str, dict, int, Any]
) -> ErrorResult | RetryResult | SuccessResult:
text, json_data, status, links = response text, json_data, status, links = response
if status == 200: if status == 200:
results = await self.fragments_from_response(json_data) results = await self.fragments_from_response(json_data)
next_page = await self.page_from_response("next", links) next_page = await self.page_from_response('next', links)
last_page = await self.page_from_response("last", links) last_page = await self.page_from_response('last', links)
return SuccessResult(results, next_page, last_page) return SuccessResult(results, next_page, last_page)
elif status == 429 or status == 403: elif status == 429 or status == 403:
return RetryResult(60) return RetryResult(60)
@ -87,17 +85,15 @@ async def do_search(self, page: int) -> tuple[str, dict, int, Any]:
else: else:
url = f'https://{self.server}/search/code?q="{self.word}"&page={page}' url = f'https://{self.server}/search/code?q="{self.word}"&page={page}'
headers = { headers = {
"Host": self.server, 'Host': self.server,
"User-agent": Core.get_user_agent(), 'User-agent': Core.get_user_agent(),
"Accept": "application/vnd.github.v3.text-match+json", 'Accept': 'application/vnd.github.v3.text-match+json',
"Authorization": f"token {self.key}", 'Authorization': f'token {self.key}',
} }
async with aiohttp.ClientSession(headers=headers) as sess: async with aiohttp.ClientSession(headers=headers) as sess:
if self.proxy: if self.proxy:
async with sess.get( async with sess.get(url, proxy=random.choice(Core.proxy_list())) as resp:
url, proxy=random.choice(Core.proxy_list())
) as resp:
return await resp.text(), await resp.json(), resp.status, resp.links return await resp.text(), await resp.json(), resp.status, resp.links
else: else:
async with sess.get(url) as resp: async with sess.get(url) as resp:
@ -117,7 +113,7 @@ async def process(self, proxy: bool = False) -> None:
api_response = await self.do_search(self.page) api_response = await self.do_search(self.page)
result = await self.handle_response(api_response) result = await self.handle_response(api_response)
if isinstance(result, SuccessResult): if isinstance(result, SuccessResult):
print(f"\tSearching {self.counter} results.") print(f'\tSearching {self.counter} results.')
for fragment in result.fragments: for fragment in result.fragments:
self.total_results += fragment self.total_results += fragment
self.counter = self.counter + 1 self.counter = self.counter + 1
@ -125,16 +121,14 @@ async def process(self, proxy: bool = False) -> None:
await asyncio.sleep(get_delay()) await asyncio.sleep(get_delay())
elif isinstance(result, RetryResult): elif isinstance(result, RetryResult):
sleepy_time = get_delay() + result.time sleepy_time = get_delay() + result.time
print(f"\tRetrying page in {sleepy_time} seconds...") print(f'\tRetrying page in {sleepy_time} seconds...')
await asyncio.sleep(sleepy_time) await asyncio.sleep(sleepy_time)
elif isinstance(result, ErrorResult): elif isinstance(result, ErrorResult):
raise Exception( raise Exception(f'\tException occurred: status_code: {result.status_code} reason: {result.body}')
f"\tException occurred: status_code: {result.status_code} reason: {result.body}"
)
else: else:
raise Exception("\tUnknown exception occurred") raise Exception('\tUnknown exception occurred')
except Exception as e: except Exception as e:
print(f"An exception has occurred: {e}") print(f'An exception has occurred: {e}')
async def get_emails(self): async def get_emails(self):
rawres = myparser.Parser(self.total_results, self.word) rawres = myparser.Parser(self.total_results, self.word)

View file

@ -8,30 +8,24 @@ class SearchHackerTarget:
def __init__(self, word) -> None: def __init__(self, word) -> None:
self.word = word self.word = word
self.total_results = "" self.total_results = ''
self.hostname = "https://api.hackertarget.com" self.hostname = 'https://api.hackertarget.com'
self.proxy = False self.proxy = False
self.results = None self.results = None
async def do_search(self) -> None: async def do_search(self) -> None:
headers = {"User-agent": Core.get_user_agent()} headers = {'User-agent': Core.get_user_agent()}
urls = [ urls = [
f"{self.hostname}/hostsearch/?q={self.word}", f'{self.hostname}/hostsearch/?q={self.word}',
f"{self.hostname}/reversedns/?q={self.word}", f'{self.hostname}/reversedns/?q={self.word}',
] ]
responses = await AsyncFetcher.fetch_all( responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
urls, headers=headers, proxy=self.proxy
)
for response in responses: for response in responses:
self.total_results += response.replace(",", ":") self.total_results += response.replace(',', ':')
async def process(self, proxy: bool = False) -> None: async def process(self, proxy: bool = False) -> None:
self.proxy = proxy self.proxy = proxy
await self.do_search() await self.do_search()
async def get_hostnames(self) -> list: async def get_hostnames(self) -> list:
return [ return [result for result in self.total_results.splitlines() if 'No PTR records found' not in result]
result
for result in self.total_results.splitlines()
if "No PTR records found" not in result
]

View file

@ -12,10 +12,10 @@ def __init__(self, word, limit, start) -> None:
self.start = start self.start = start
self.key = Core.hunter_key() self.key = Core.hunter_key()
if self.key is None: if self.key is None:
raise MissingKey("Hunter") raise MissingKey('Hunter')
self.total_results = "" self.total_results = ''
self.counter = start self.counter = start
self.database = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10" self.database = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10'
self.proxy = False self.proxy = False
self.hostnames: list = [] self.hostnames: list = []
self.emails: list = [] self.emails: list = []
@ -23,76 +23,56 @@ def __init__(self, word, limit, start) -> None:
async def do_search(self) -> None: async def do_search(self) -> None:
# First determine if a user account is not a free account, this call is free # First determine if a user account is not a free account, this call is free
is_free = True is_free = True
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
acc_info_url = f"https://api.hunter.io/v2/account?api_key={self.key}" acc_info_url = f'https://api.hunter.io/v2/account?api_key={self.key}'
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True)
[acc_info_url], headers=headers, json=True
)
is_free = ( is_free = (
is_free is_free if 'plan_name' in response[0]['data'].keys() and response[0]['data']['plan_name'].lower() == 'free' else False
if "plan_name" in response[0]["data"].keys()
and response[0]["data"]["plan_name"].lower() == "free"
else False
) )
# Extract the total number of requests that are available for an account # Extract the total number of requests that are available for an account
total_requests_avail = ( total_requests_avail = (
response[0]["data"]["requests"]["searches"]["available"] response[0]['data']['requests']['searches']['available'] - response[0]['data']['requests']['searches']['used']
- response[0]["data"]["requests"]["searches"]["used"]
) )
if is_free: if is_free:
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True)
[self.database], headers=headers, proxy=self.proxy, json=True
)
self.emails, self.hostnames = await self.parse_resp(json_resp=response[0]) self.emails, self.hostnames = await self.parse_resp(json_resp=response[0])
else: else:
# Determine the total number of emails that are available # Determine the total number of emails that are available
# As the most emails you can get within one query are 100 # As the most emails you can get within one query are 100
# This is only done where paid accounts are in play # This is only done where paid accounts are in play
hunter_dinfo_url = ( hunter_dinfo_url = f'https://api.hunter.io/v2/email-count?domain={self.word}'
f"https://api.hunter.io/v2/email-count?domain={self.word}" response = await AsyncFetcher.fetch_all([hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True)
) total_number_reqs = response[0]['data']['total'] // 100
response = await AsyncFetcher.fetch_all(
[hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True
)
total_number_reqs = response[0]["data"]["total"] // 100
# Parse out meta field within initial JSON response to determine the total number of results # Parse out meta field within initial JSON response to determine the total number of results
if total_requests_avail < total_number_reqs: if total_requests_avail < total_number_reqs:
print('WARNING: account does not have enough requests to gather all emails')
print( print(
"WARNING: account does not have enough requests to gather all emails" f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}'
)
print(
f"Total requests available: {total_requests_avail}, total requests "
f"needed to be made: {total_number_reqs}"
)
print(
"RETURNING current results, if you would still like to "
"run this module comment out the if request"
) )
print('RETURNING current results, if you would still like to ' 'run this module comment out the if request')
return return
self.limit = 100 self.limit = 100
# max number of emails you can get per request is 100 # max number of emails you can get per request is 100
# increments of 100 with offset determining where to start # increments of 100 with offset determining where to start
# See docs for more details: https://hunter.io/api-documentation/v2#domain-search # See docs for more details: https://hunter.io/api-documentation/v2#domain-search
for offset in range(0, 100 * total_number_reqs, 100): for offset in range(0, 100 * total_number_reqs, 100):
req_url = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}" req_url = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}'
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True)
[req_url], headers=headers, proxy=self.proxy, json=True
)
temp_emails, temp_hostnames = await self.parse_resp(response[0]) temp_emails, temp_hostnames = await self.parse_resp(response[0])
self.emails.extend(temp_emails) self.emails.extend(temp_emails)
self.hostnames.extend(temp_hostnames) self.hostnames.extend(temp_hostnames)
await asyncio.sleep(1) await asyncio.sleep(1)
async def parse_resp(self, json_resp): async def parse_resp(self, json_resp):
emails = list(sorted({email["value"] for email in json_resp["data"]["emails"]})) emails = list(sorted({email['value'] for email in json_resp['data']['emails']}))
domains = list( domains = list(
sorted( sorted(
{ {
source["domain"] source['domain']
for email in json_resp["data"]["emails"] for email in json_resp['data']['emails']
for source in email["sources"] for source in email['sources']
if self.word in source["domain"] if self.word in source['domain']
} }
) )
) )

View file

@ -14,8 +14,8 @@ def __init__(self, word) -> None:
self.word = word self.word = word
self.key = Core.intelx_key() self.key = Core.intelx_key()
if self.key is None: if self.key is None:
raise MissingKey("Intelx") raise MissingKey('Intelx')
self.database = "https://2.intelx.io" self.database = 'https://2.intelx.io'
self.results: Any = None self.results: Any = None
self.info: tuple[Any, ...] = () self.info: tuple[Any, ...] = ()
self.limit: int = 10000 self.limit: int = 10000
@ -28,34 +28,30 @@ async def do_search(self) -> None:
# API requests self identification # API requests self identification
# https://intelx.io/integrations # https://intelx.io/integrations
headers = { headers = {
"x-key": self.key, 'x-key': self.key,
"User-Agent": f"{Core.get_user_agent()}-theHarvester", 'User-Agent': f'{Core.get_user_agent()}-theHarvester',
} }
data = { data = {
"term": self.word, 'term': self.word,
"buckets": [], 'buckets': [],
"lookuplevel": 0, 'lookuplevel': 0,
"maxresults": self.limit, 'maxresults': self.limit,
"timeout": 5, 'timeout': 5,
"datefrom": "", 'datefrom': '',
"dateto": "", 'dateto': '',
"sort": 2, 'sort': 2,
"media": 0, 'media': 0,
"terminate": [], 'terminate': [],
"target": 0, 'target': 0,
} }
total_resp = requests.post( total_resp = requests.post(f'{self.database}/phonebook/search', headers=headers, json=data)
f"{self.database}/phonebook/search", headers=headers, json=data phonebook_id = ujson.loads(total_resp.text)['id']
)
phonebook_id = ujson.loads(total_resp.text)["id"]
await asyncio.sleep(5) await asyncio.sleep(5)
# Fetch results from phonebook based on ID # Fetch results from phonebook based on ID
resp = await AsyncFetcher.fetch_all( resp = await AsyncFetcher.fetch_all(
[ [f'{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}'],
f"{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}"
],
headers=headers, headers=headers,
json=True, json=True,
proxy=self.proxy, proxy=self.proxy,
@ -63,7 +59,7 @@ async def do_search(self) -> None:
resp = resp[0] resp = resp[0]
self.results = resp # TODO: give self.results more appropriate typing self.results = resp # TODO: give self.results more appropriate typing
except Exception as e: except Exception as e:
print(f"An exception has occurred in Intelx: {e}") print(f'An exception has occurred in Intelx: {e}')
async def process(self, proxy: bool = False): async def process(self, proxy: bool = False):
self.proxy = proxy self.proxy = proxy

View file

@ -9,17 +9,15 @@ def __init__(self, word) -> None:
self.totalips: list = [] self.totalips: list = []
self.key = Core.netlas_key() self.key = Core.netlas_key()
if self.key is None: if self.key is None:
raise MissingKey("netlas") raise MissingKey('netlas')
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
api = f"https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*" api = f'https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*'
headers = {"X-API-Key": self.key} headers = {'X-API-Key': self.key}
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([api], json=True, headers=headers, proxy=self.proxy)
[api], json=True, headers=headers, proxy=self.proxy for domain in response[0]['items']:
) self.totalhosts.append(domain['data']['domain'])
for domain in response[0]["items"]:
self.totalhosts.append(domain["data"]["domain"])
async def get_hostnames(self) -> list: async def get_hostnames(self) -> list:
return self.totalhosts return self.totalhosts

View file

@ -9,28 +9,26 @@
class SearchOnyphe: class SearchOnyphe:
def __init__(self, word) -> None: def __init__(self, word) -> None:
self.word = word self.word = word
self.response = "" self.response = ''
self.totalhosts: set = set() self.totalhosts: set = set()
self.totalips: set = set() self.totalips: set = set()
self.asns: set = set() self.asns: set = set()
self.key = Core.onyphe_key() self.key = Core.onyphe_key()
if self.key is None: if self.key is None:
raise MissingKey("onyphe") raise MissingKey('onyphe')
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
# https://www.onyphe.io/docs/apis/search # https://www.onyphe.io/docs/apis/search
# https://www.onyphe.io/search?q=domain%3Acharter.com&captcharesponse=j5cGT # https://www.onyphe.io/search?q=domain%3Acharter.com&captcharesponse=j5cGT
# base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:domain:{self.word}' # base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:domain:{self.word}'
base_url = f"https://www.onyphe.io/api/v2/search/?q=domain:{self.word}" base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:{self.word}'
headers = { headers = {
"User-Agent": Core.get_user_agent(), 'User-Agent': Core.get_user_agent(),
"Content-Type": "application/json", 'Content-Type': 'application/json',
"Authorization": f"bearer {self.key}", 'Authorization': f'bearer {self.key}',
} }
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([base_url], json=True, headers=headers, proxy=self.proxy)
[base_url], json=True, headers=headers, proxy=self.proxy
)
self.response = response[0] self.response = response[0]
await self.parse_onyphe_resp_json() await self.parse_onyphe_resp_json()
@ -38,74 +36,52 @@ async def parse_onyphe_resp_json(self):
if isinstance(self.response, list): if isinstance(self.response, list):
self.response = self.response[0] self.response = self.response[0]
if not isinstance(self.response, dict): if not isinstance(self.response, dict):
raise Exception(f"An exception has occurred {self.response} is not a dict") raise Exception(f'An exception has occurred {self.response} is not a dict')
if "Success" == self.response["text"]: if 'Success' == self.response['text']:
if "results" in self.response.keys(): if 'results' in self.response.keys():
for result in self.response["results"]: for result in self.response['results']:
try: try:
if "alternativeip" in result.keys(): if 'alternativeip' in result.keys():
self.totalips.update( self.totalips.update({altip for altip in result['alternativeip']})
{altip for altip in result["alternativeip"]} if 'url' in result.keys() and isinstance(result['url'], list):
)
if "url" in result.keys() and isinstance(result["url"], list):
self.totalhosts.update( self.totalhosts.update(
urlparse(url).netloc urlparse(url).netloc for url in result['url'] if urlparse(url).netloc.endswith(self.word)
for url in result["url"]
if urlparse(url).netloc.endswith(self.word)
) )
self.asns.add(result["asn"]) self.asns.add(result['asn'])
self.asns.add(result["geolocus"]["asn"]) self.asns.add(result['geolocus']['asn'])
self.totalips.add(result["geolocus"]["subnet"]) self.totalips.add(result['geolocus']['subnet'])
self.totalips.add(result["ip"]) self.totalips.add(result['ip'])
self.totalips.add(result["subnet"]) self.totalips.add(result['subnet'])
# Shouldn't be needed as API autoparses urls from html raw data # Shouldn't be needed as API autoparses urls from html raw data
# rawres = myparser.Parser(result['data'], self.word) # rawres = myparser.Parser(result['data'], self.word)
# if await rawres.hostnames(): # if await rawres.hostnames():
# self.totalhosts.update(set(await rawres.hostnames())) # self.totalhosts.update(set(await rawres.hostnames()))
for subdomain_key in [ for subdomain_key in [
"domain", 'domain',
"hostname", 'hostname',
"subdomains", 'subdomains',
"subject", 'subject',
"reverse", 'reverse',
"geolocus", 'geolocus',
]: ]:
if subdomain_key in result.keys(): if subdomain_key in result.keys():
if subdomain_key == "subject": if subdomain_key == 'subject':
self.totalhosts.update( self.totalhosts.update(
{ {domain for domain in result[subdomain_key]['altname'] if domain.endswith(self.word)}
domain
for domain in result[subdomain_key][
"altname"
]
if domain.endswith(self.word)
}
) )
elif subdomain_key == "geolocus": elif subdomain_key == 'geolocus':
self.totalhosts.update( self.totalhosts.update(
{ {domain for domain in result[subdomain_key]['domain'] if domain.endswith(self.word)}
domain
for domain in result[subdomain_key][
"domain"
]
if domain.endswith(self.word)
}
) )
else: else:
self.totalhosts.update( self.totalhosts.update(
{ {domain for domain in result[subdomain_key] if domain.endswith(self.word)}
domain
for domain in result[subdomain_key]
if domain.endswith(self.word)
}
) )
except Exception as e: except Exception as e:
print(f"An exception has occurred on result: {result}: {e}") print(f'An exception has occurred on result: {result}: {e}')
continue continue
else: else:
print( print(f'Onhyphe API query did not succeed dumping current response: {self.response}')
f"Onhyphe API query did not succeed dumping current response: {self.response}"
)
async def get_asns(self) -> set: async def get_asns(self) -> set:
return self.asns return self.asns

View file

@ -11,16 +11,14 @@ def __init__(self, word) -> None:
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
url = f"https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns" url = f'https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
responses = response[0] responses = response[0]
dct = responses dct = responses
self.totalhosts = {host["hostname"] for host in dct["passive_dns"]} self.totalhosts = {host['hostname'] for host in dct['passive_dns']}
# filter out ips that are just called NXDOMAIN # filter out ips that are just called NXDOMAIN
self.totalips = { self.totalips = {
ip["address"] ip['address'] for ip in dct['passive_dns'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip['address'])
for ip in dct["passive_dns"]
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip["address"])
} }
async def get_hostnames(self) -> set: async def get_hostnames(self) -> set:

View file

@ -12,54 +12,41 @@ def __init__(self, word) -> None:
self.word = word self.word = word
self.key = Core.pentest_tools_key() self.key = Core.pentest_tools_key()
if self.key is None: if self.key is None:
raise MissingKey("PentestTools") raise MissingKey('PentestTools')
self.total_results: list = [] self.total_results: list = []
self.api = f"https://pentest-tools.com/api?key={self.key}" self.api = f'https://pentest-tools.com/api?key={self.key}'
self.proxy = False self.proxy = False
async def poll(self, scan_id): async def poll(self, scan_id):
while True: while True:
time.sleep(3) time.sleep(3)
# Get the status of our scan # Get the status of our scan
scan_status_data = {"op": "get_scan_status", "scan_id": scan_id} scan_status_data = {'op': 'get_scan_status', 'scan_id': scan_id}
responses = await AsyncFetcher.post_fetch( responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy)
url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy
)
res_json = ujson.loads(responses.strip()) res_json = ujson.loads(responses.strip())
if res_json["op_status"] == "success": if res_json['op_status'] == 'success':
if ( if res_json['scan_status'] != 'waiting' and res_json['scan_status'] != 'running':
res_json["scan_status"] != "waiting"
and res_json["scan_status"] != "running"
):
getoutput_data = { getoutput_data = {
"op": "get_output", 'op': 'get_output',
"scan_id": scan_id, 'scan_id': scan_id,
"output_format": "json", 'output_format': 'json',
} }
responses = await AsyncFetcher.post_fetch( responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy)
url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy
)
res_json = ujson.loads(responses.strip("\n")) res_json = ujson.loads(responses.strip('\n'))
self.total_results = await self.parse_json(res_json) self.total_results = await self.parse_json(res_json)
break break
else: else:
print( print(f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}")
f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}"
)
break break
@staticmethod @staticmethod
async def parse_json(json_results): async def parse_json(json_results):
status = json_results["op_status"] status = json_results['op_status']
if status == "success": if status == 'success':
scan_tests = json_results["scan_output"]["output_json"] scan_tests = json_results['scan_output']['output_json']
output_data = scan_tests[0]["output_data"] output_data = scan_tests[0]['output_data']
host_to_ip = [ host_to_ip = [f'{subdomain[0]}:{subdomain[1]}' for subdomain in output_data if len(subdomain) > 0]
f"{subdomain[0]}:{subdomain[1]}"
for subdomain in output_data
if len(subdomain) > 0
]
return host_to_ip return host_to_ip
return [] return []
@ -68,20 +55,18 @@ async def get_hostnames(self) -> list:
async def do_search(self) -> None: async def do_search(self) -> None:
subdomain_payload = { subdomain_payload = {
"op": "start_scan", 'op': 'start_scan',
"tool_id": 20, 'tool_id': 20,
"tool_params": { 'tool_params': {
"target": f"{self.word}", 'target': f'{self.word}',
"web_details": "off", 'web_details': 'off',
"do_smart_search": "off", 'do_smart_search': 'off',
}, },
} }
responses = await AsyncFetcher.post_fetch( responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy)
url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy
)
res_json = ujson.loads(responses.strip()) res_json = ujson.loads(responses.strip())
if res_json["op_status"] == "success": if res_json['op_status'] == 'success':
scan_id = res_json["scan_id"] scan_id = res_json['scan_id']
await self.poll(scan_id) await self.poll(scan_id)
async def process(self, proxy: bool = False) -> None: async def process(self, proxy: bool = False) -> None:

View file

@ -7,21 +7,19 @@ def __init__(self, word) -> None:
self.word = word self.word = word
self.key = Core.projectdiscovery_key() self.key = Core.projectdiscovery_key()
if self.key is None: if self.key is None:
raise MissingKey("ProjectDiscovery") raise MissingKey('ProjectDiscovery')
self.total_results = None self.total_results = None
self.proxy = False self.proxy = False
async def do_search(self): async def do_search(self):
url = f"https://dns.projectdiscovery.io/dns/{self.word}/subdomains" url = f'https://dns.projectdiscovery.io/dns/{self.word}/subdomains'
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all(
[url], [url],
json=True, json=True,
headers={"User-Agent": Core.get_user_agent(), "Authorization": self.key}, headers={'User-Agent': Core.get_user_agent(), 'Authorization': self.key},
proxy=self.proxy, proxy=self.proxy,
) )
self.total_results = [ self.total_results = [f'{domains}.{self.word}' for domains in response[0]['subdomains']]
f"{domains}.{self.word}" for domains in response[0]["subdomains"]
]
async def get_hostnames(self): async def get_hostnames(self):
return self.total_results return self.total_results

View file

@ -11,33 +11,29 @@ def __init__(self, word) -> None:
async def do_search(self): async def do_search(self):
try: try:
headers = {"User-agent": Core.get_user_agent()} headers = {'User-agent': Core.get_user_agent()}
# TODO see if it's worth adding sameip searches # TODO see if it's worth adding sameip searches
# f'{self.hostname}/sameip/{self.word}?full=1#result' # f'{self.hostname}/sameip/{self.word}?full=1#result'
urls = [f"https://rapiddns.io/subdomain/{self.word}?full=1#result"] urls = [f'https://rapiddns.io/subdomain/{self.word}?full=1#result']
responses = await AsyncFetcher.fetch_all( responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
urls, headers=headers, proxy=self.proxy
)
if len(responses[0]) <= 1: if len(responses[0]) <= 1:
return self.total_results return self.total_results
soup = BeautifulSoup(responses[0], "html.parser") soup = BeautifulSoup(responses[0], 'html.parser')
rows = soup.find("table").find("tbody").find_all("tr") rows = soup.find('table').find('tbody').find_all('tr')
if rows: if rows:
# Validation check # Validation check
for row in rows: for row in rows:
cells = row.find_all("td") cells = row.find_all('td')
if len(cells) > 0: if len(cells) > 0:
# sanity check # sanity check
subdomain = str(cells[0].get_text()) subdomain = str(cells[0].get_text())
if cells[-1].get_text() == "CNAME": if cells[-1].get_text() == 'CNAME':
self.total_results.append(f"{subdomain}") self.total_results.append(f'{subdomain}')
else: else:
self.total_results.append( self.total_results.append(f'{subdomain}:{str(cells[1].get_text()).strip()}')
f"{subdomain}:{str(cells[1].get_text()).strip()}"
)
self.total_results = list({domain for domain in self.total_results}) self.total_results = list({domain for domain in self.total_results})
except Exception as e: except Exception as e:
print(f"An exception has occurred: {str(e)}") print(f'An exception has occurred: {str(e)}')
async def process(self, proxy: bool = False) -> None: async def process(self, proxy: bool = False) -> None:
self.proxy = proxy self.proxy = proxy

View file

@ -10,59 +10,50 @@ def __init__(self, word, limit) -> None:
self.word = word self.word = word
self.key = Core.rocketreach_key() self.key = Core.rocketreach_key()
if self.key is None: if self.key is None:
raise MissingKey("RocketReach") raise MissingKey('RocketReach')
self.hosts: set = set() self.hosts: set = set()
self.proxy = False self.proxy = False
self.baseurl = "https://rocketreach.co/api/v2/person/search" self.baseurl = 'https://rocketreach.co/api/v2/person/search'
self.links: set = set() self.links: set = set()
self.limit = limit self.limit = limit
async def do_search(self) -> None: async def do_search(self) -> None:
try: try:
headers = { headers = {
"Api-Key": self.key, 'Api-Key': self.key,
"Content-Type": "application/json", 'Content-Type': 'application/json',
"User-Agent": Core.get_user_agent(), 'User-Agent': Core.get_user_agent(),
} }
next_page = 1 # track pagination next_page = 1 # track pagination
for count in range(1, self.limit): for count in range(1, self.limit):
data = f'{{"query":{{"company_domain": ["{self.word}"]}}, "start": {next_page}, "page_size": 100}}' data = f'{{"query":{{"company_domain": ["{self.word}"]}}, "start": {next_page}, "page_size": 100}}'
result = await AsyncFetcher.post_fetch( result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True)
self.baseurl, headers=headers, data=data, json=True if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result['detail']:
)
if (
"detail" in result.keys()
and "error" in result.keys()
and "Subscribe to a plan to access" in result["detail"]
):
# No more results can be fetched # No more results can be fetched
break break
if ( if 'detail' in result.keys() and 'Request was throttled.' in result['detail']:
"detail" in result.keys()
and "Request was throttled." in result["detail"]
):
# Rate limit has been triggered need to sleep extra # Rate limit has been triggered need to sleep extra
print( print(
f"RocketReach requests have been throttled; " f"RocketReach requests have been throttled; "
f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}' f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}'
) )
break break
if "profiles" in dict(result).keys(): if 'profiles' in dict(result).keys():
if len(result["profiles"]) == 0: if len(result['profiles']) == 0:
break break
for profile in result["profiles"]: for profile in result['profiles']:
if "linkedin_url" in dict(profile).keys(): if 'linkedin_url' in dict(profile).keys():
self.links.add(profile["linkedin_url"]) self.links.add(profile['linkedin_url'])
if "pagination" in dict(result).keys(): if 'pagination' in dict(result).keys():
next_page = int(result["pagination"]["next"]) next_page = int(result['pagination']['next'])
if next_page > int(result["pagination"]["total"]): if next_page > int(result['pagination']['total']):
break break
await asyncio.sleep(get_delay() + 5) await asyncio.sleep(get_delay() + 5)
except Exception as e: except Exception as e:
print(f"An exception has occurred: {e}") print(f'An exception has occurred: {e}')
async def get_links(self): async def get_links(self):
return self.links return self.links

View file

@ -13,55 +13,52 @@ def __init__(self, word) -> None:
self.total_hostnames: set = set() self.total_hostnames: set = set()
self.key = Core.hunterhow_key() self.key = Core.hunterhow_key()
if self.key is None: if self.key is None:
raise MissingKey("hunterhow") raise MissingKey('hunterhow')
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
# https://hunter.how/search-api # https://hunter.how/search-api
query = f'domain.suffix="{self.word}"' query = f'domain.suffix="{self.word}"'
# second_query = f'domain="{self.word}"' # second_query = f'domain="{self.word}"'
encoded_query = base64.urlsafe_b64encode(query.encode("utf-8")).decode("ascii") encoded_query = base64.urlsafe_b64encode(query.encode('utf-8')).decode('ascii')
page = 1 page = 1
page_size = 100 # can be either: 10,20,50,100) page_size = 100 # can be either: 10,20,50,100)
# The interval between the start time and the end time cannot exceed one year # The interval between the start time and the end time cannot exceed one year
# Can not exceed one year, but years=1 does not work due to their backend, 364 will suffice # Can not exceed one year, but years=1 does not work due to their backend, 364 will suffice
today = datetime.today() today = datetime.today()
one_year_ago = today - relativedelta(days=364) one_year_ago = today - relativedelta(days=364)
start_time = one_year_ago.strftime("%Y-%m-%d") start_time = one_year_ago.strftime('%Y-%m-%d')
end_time = today.strftime("%Y-%m-%d") end_time = today.strftime('%Y-%m-%d')
# two_years_ago = one_year_ago - relativedelta(days=364) # two_years_ago = one_year_ago - relativedelta(days=364)
# start_time = two_years_ago.strftime('%Y-%m-%d') # start_time = two_years_ago.strftime('%Y-%m-%d')
# end_time = one_year_ago.strftime('%Y-%m-%d') # end_time = one_year_ago.strftime('%Y-%m-%d')
url = ( url = 'https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s' % (
"https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s" # self.key, encoded_query, page, page_size, start_time, end_time
% ( self.key,
# self.key, encoded_query, page, page_size, start_time, end_time encoded_query,
self.key, page,
encoded_query, page_size,
page, start_time,
page_size, end_time,
start_time,
end_time,
)
) )
# print(f'Sending url: {url}') # print(f'Sending url: {url}')
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all(
[url], [url],
json=True, json=True,
headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"}, headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'},
proxy=self.proxy, proxy=self.proxy,
) )
dct = response[0] dct = response[0]
# print(f'json response: ') # print(f'json response: ')
# print(dct) # print(dct)
if "code" in dct.keys(): if 'code' in dct.keys():
if dct["code"] == 40001: if dct['code'] == 40001:
print(f'Code 40001 indicates for searchhunterhow: {dct["message"]}') print(f'Code 40001 indicates for searchhunterhow: {dct["message"]}')
return return
# total = dct['data']['total'] # total = dct['data']['total']
# TODO determine if total is ever 100 how to get more subdomains? # TODO determine if total is ever 100 how to get more subdomains?
for sub in dct["data"]["list"]: for sub in dct['data']['list']:
self.total_hostnames.add(sub["domain"]) self.total_hostnames.add(sub['domain'])
async def get_hostnames(self) -> set: async def get_hostnames(self) -> set:
return self.total_hostnames return self.total_hostnames

View file

@ -10,41 +10,33 @@ def __init__(self, word) -> None:
self.word = word self.word = word
self.key = Core.security_trails_key() self.key = Core.security_trails_key()
if self.key is None: if self.key is None:
raise MissingKey("Securitytrail") raise MissingKey('Securitytrail')
self.results = "" self.results = ''
self.totalresults = "" self.totalresults = ''
self.api = "https://api.securitytrails.com/v1/" self.api = 'https://api.securitytrails.com/v1/'
self.info: tuple[set, set] = (set(), set()) self.info: tuple[set, set] = (set(), set())
self.proxy = False self.proxy = False
async def authenticate(self) -> None: async def authenticate(self) -> None:
# Method to authenticate API key before sending requests. # Method to authenticate API key before sending requests.
headers = {"APIKEY": self.key} headers = {'APIKEY': self.key}
url = f"{self.api}ping" url = f'{self.api}ping'
auth_responses = await AsyncFetcher.fetch_all( auth_responses = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
[url], headers=headers, proxy=self.proxy
)
auth_responses = auth_responses[0] auth_responses = auth_responses[0]
if "False" in auth_responses or "Invalid authentication" in auth_responses: if 'False' in auth_responses or 'Invalid authentication' in auth_responses:
print("\tKey could not be authenticated exiting program.") print('\tKey could not be authenticated exiting program.')
await asyncio.sleep(5) await asyncio.sleep(5)
async def do_search(self) -> None: async def do_search(self) -> None:
# https://api.securitytrails.com/v1/domain/domain.com # https://api.securitytrails.com/v1/domain/domain.com
url = f"{self.api}domain/{self.word}" url = f'{self.api}domain/{self.word}'
headers = {"APIKEY": self.key} headers = {'APIKEY': self.key}
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
[url], headers=headers, proxy=self.proxy await asyncio.sleep(5) # Not random delay because 2 seconds is required due to rate limit.
)
await asyncio.sleep(
5
) # Not random delay because 2 seconds is required due to rate limit.
self.results = response[0] self.results = response[0]
self.totalresults += self.results self.totalresults += self.results
url += "/subdomains" # Get subdomains now. url += '/subdomains' # Get subdomains now.
subdomain_response = await AsyncFetcher.fetch_all( subdomain_response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
[url], headers=headers, proxy=self.proxy
)
await asyncio.sleep(5) await asyncio.sleep(5)
self.results = subdomain_response[0] self.results = subdomain_response[0]
self.totalresults += self.results self.totalresults += self.results
@ -56,7 +48,7 @@ async def process(self, proxy: bool = False) -> None:
parser = securitytrailsparser.Parser(word=self.word, text=self.totalresults) parser = securitytrailsparser.Parser(word=self.word, text=self.totalresults)
self.info = await parser.parse_text() self.info = await parser.parse_text()
# Create parser and set self.info to tuple returned from parsing text. # Create parser and set self.info to tuple returned from parsing text.
print("\tDone Searching Results") print('\tDone Searching Results')
async def get_ips(self) -> set: async def get_ips(self) -> set:
return self.info[0] return self.info[0]

View file

@ -10,7 +10,7 @@ class SearchShodan:
def __init__(self) -> None: def __init__(self) -> None:
self.key = Core.shodan_key() self.key = Core.shodan_key()
if self.key is None: if self.key is None:
raise MissingKey("Shodan") raise MissingKey('Shodan')
self.api = Shodan(self.key) self.api = Shodan(self.key)
self.hostdatarow: list = [] self.hostdatarow: list = []
self.tracker: OrderedDict = OrderedDict() self.tracker: OrderedDict = OrderedDict()
@ -19,81 +19,81 @@ async def search_ip(self, ip) -> OrderedDict:
try: try:
ipaddress = ip ipaddress = ip
results = self.api.host(ipaddress) results = self.api.host(ipaddress)
asn = "" asn = ''
domains: list = list() domains: list = list()
hostnames: list = list() hostnames: list = list()
ip_str = "" ip_str = ''
isp = "" isp = ''
org = "" org = ''
ports: list = list() ports: list = list()
title = "" title = ''
server = "" server = ''
product = "" product = ''
technologies: list = list() technologies: list = list()
data_first_dict = dict(results["data"][0]) data_first_dict = dict(results['data'][0])
if "ip_str" in data_first_dict.keys(): if 'ip_str' in data_first_dict.keys():
ip_str += data_first_dict["ip_str"] ip_str += data_first_dict['ip_str']
if "http" in data_first_dict.keys(): if 'http' in data_first_dict.keys():
http_results_dict = dict(data_first_dict["http"]) http_results_dict = dict(data_first_dict['http'])
if "title" in http_results_dict.keys(): if 'title' in http_results_dict.keys():
title_val = str(http_results_dict["title"]).strip() title_val = str(http_results_dict['title']).strip()
if title_val != "None": if title_val != 'None':
title += title_val title += title_val
if "components" in http_results_dict.keys(): if 'components' in http_results_dict.keys():
for key in http_results_dict["components"].keys(): for key in http_results_dict['components'].keys():
technologies.append(key) technologies.append(key)
if "server" in http_results_dict.keys(): if 'server' in http_results_dict.keys():
server_val = str(http_results_dict["server"]).strip() server_val = str(http_results_dict['server']).strip()
if server_val != "None": if server_val != 'None':
server += server_val server += server_val
for key, value in results.items(): for key, value in results.items():
if key == "asn": if key == 'asn':
asn += value asn += value
if key == "domains": if key == 'domains':
value = list(value) value = list(value)
value.sort() value.sort()
domains.extend(value) domains.extend(value)
if key == "hostnames": if key == 'hostnames':
value = [host.strip() for host in list(value)] value = [host.strip() for host in list(value)]
value.sort() value.sort()
hostnames.extend(value) hostnames.extend(value)
if key == "isp": if key == 'isp':
isp += value isp += value
if key == "org": if key == 'org':
org += str(value) org += str(value)
if key == "ports": if key == 'ports':
value = list(value) value = list(value)
value.sort() value.sort()
ports.extend(value) ports.extend(value)
if key == "product": if key == 'product':
product += value product += value
technologies = list(set(technologies)) technologies = list(set(technologies))
self.tracker[ip] = { self.tracker[ip] = {
"asn": asn.strip(), 'asn': asn.strip(),
"domains": domains, 'domains': domains,
"hostnames": hostnames, 'hostnames': hostnames,
"ip_str": ip_str.strip(), 'ip_str': ip_str.strip(),
"isp": isp.strip(), 'isp': isp.strip(),
"org": org.strip(), 'org': org.strip(),
"ports": ports, 'ports': ports,
"product": product.strip(), 'product': product.strip(),
"server": server.strip(), 'server': server.strip(),
"technologies": technologies, 'technologies': technologies,
"title": title.strip(), 'title': title.strip(),
} }
return self.tracker return self.tracker
except exception.APIError: except exception.APIError:
print(f"{ip}: Not in Shodan") print(f'{ip}: Not in Shodan')
self.tracker[ip] = "Not in Shodan" self.tracker[ip] = 'Not in Shodan'
except Exception as e: except Exception as e:
# print(f'Error occurred in the Shodan IP search module: {e}') # print(f'Error occurred in the Shodan IP search module: {e}')
self.tracker[ip] = f"Error occurred in the Shodan IP search module: {e}" self.tracker[ip] = f'Error occurred in the Shodan IP search module: {e}'
finally: finally:
return self.tracker return self.tracker

View file

@ -10,7 +10,7 @@ class SearchSitedossier:
def __init__(self, word): def __init__(self, word):
self.word = word self.word = word
self.totalhosts = set() self.totalhosts = set()
self.server = "www.sitedossier.com" self.server = 'www.sitedossier.com'
self.proxy = False self.proxy = False
async def do_search(self): async def do_search(self):
@ -18,92 +18,75 @@ async def do_search(self):
# This site seems to yield a lot of results but is a bit annoying to scrape # This site seems to yield a lot of results but is a bit annoying to scrape
# Hence the need for delays after each request to get the most results # Hence the need for delays after each request to get the most results
# Feel free to tweak the delays as needed # Feel free to tweak the delays as needed
url = f"http://{self.server}/parentdomain/{self.word}" url = f'http://{self.server}/parentdomain/{self.word}'
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
[url], headers=headers, proxy=self.proxy
)
base_response = response[0] base_response = response[0]
soup = BeautifulSoup(base_response, "html.parser") soup = BeautifulSoup(base_response, 'html.parser')
# iter_counter = 1 # iter_counter = 1
# iterations_needed = total_number // 100 # iterations_needed = total_number // 100
# iterations_needed += 1 # iterations_needed += 1
flagged_counter = 0 flagged_counter = 0
stop_conditions = ["End of list.", "No data currently available."] stop_conditions = ['End of list.', 'No data currently available.']
bot_string = ( bot_string = (
"Our web servers have detected unusual or excessive requests " 'Our web servers have detected unusual or excessive requests '
'from your computer or network. Please enter the unique "word"' 'from your computer or network. Please enter the unique "word"'
" below to confirm that you are a human interactively using this site." ' below to confirm that you are a human interactively using this site.'
) )
if ( if (
stop_conditions[0] not in base_response stop_conditions[0] not in base_response and stop_conditions[1] not in base_response
and stop_conditions[1] not in base_response
) and bot_string not in base_response: ) and bot_string not in base_response:
total_number = soup.find("i") total_number = soup.find('i')
total_number = int( total_number = int(total_number.text.strip().split(' ')[-1].replace(',', ''))
total_number.text.strip().split(" ")[-1].replace(",", "") hrefs = soup.find_all('a', href=True)
)
hrefs = soup.find_all("a", href=True)
for a in hrefs: for a in hrefs:
unparsed = a["href"] unparsed = a['href']
if "/site/" in unparsed: if '/site/' in unparsed:
subdomain = str(unparsed.split("/")[-1]).lower() subdomain = str(unparsed.split('/')[-1]).lower()
self.totalhosts.add(subdomain) self.totalhosts.add(subdomain)
await asyncio.sleep(get_delay() + 15 + get_delay()) await asyncio.sleep(get_delay() + 15 + get_delay())
for i in range(101, total_number, 100): for i in range(101, total_number, 100):
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
iter_url = f"http://{self.server}/parentdomain/{self.word}/{i}" iter_url = f'http://{self.server}/parentdomain/{self.word}/{i}'
print(f"My current iter_url: {iter_url}") print(f'My current iter_url: {iter_url}')
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([iter_url], headers=headers, proxy=self.proxy)
[iter_url], headers=headers, proxy=self.proxy
)
response = response[0] response = response[0]
if ( if stop_conditions[0] in response or stop_conditions[1] in response or flagged_counter >= 3:
stop_conditions[0] in response
or stop_conditions[1] in response
or flagged_counter >= 3
):
break break
if bot_string in response: if bot_string in response:
new_sleep_time = get_delay() * 30 new_sleep_time = get_delay() * 30
print( print(f'Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds')
f"Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds"
)
flagged_counter += 1 flagged_counter += 1
await asyncio.sleep(new_sleep_time) await asyncio.sleep(new_sleep_time)
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all(
[iter_url], [iter_url],
headers={"User-Agent": Core.get_user_agent()}, headers={'User-Agent': Core.get_user_agent()},
proxy=self.proxy, proxy=self.proxy,
) )
response = response[0] response = response[0]
if bot_string in response: if bot_string in response:
new_sleep_time = get_delay() * 30 * get_delay() new_sleep_time = get_delay() * 30 * get_delay()
print( print(
f"Still triggering a captcha, sleeping longer for: {new_sleep_time}" f'Still triggering a captcha, sleeping longer for: {new_sleep_time}'
f" and skipping this batch: {iter_url}" f' and skipping this batch: {iter_url}'
) )
await asyncio.sleep(new_sleep_time) await asyncio.sleep(new_sleep_time)
flagged_counter += 1 flagged_counter += 1
if flagged_counter >= 3: if flagged_counter >= 3:
break break
soup = BeautifulSoup(response, "html.parser") soup = BeautifulSoup(response, 'html.parser')
hrefs = soup.find_all("a", href=True) hrefs = soup.find_all('a', href=True)
for a in hrefs: for a in hrefs:
unparsed = a["href"] unparsed = a['href']
if "/site/" in unparsed: if '/site/' in unparsed:
subdomain = str(unparsed.split("/")[-1]).lower() subdomain = str(unparsed.split('/')[-1]).lower()
self.totalhosts.add(subdomain) self.totalhosts.add(subdomain)
await asyncio.sleep(get_delay() + 15 + get_delay()) await asyncio.sleep(get_delay() + 15 + get_delay())
print(f"In total found: {len(self.totalhosts)}") print(f'In total found: {len(self.totalhosts)}')
print(self.totalhosts) print(self.totalhosts)
else: else:
print( print('Sitedossier module has triggered a captcha on first iteration, no results can be found.')
"Sitedossier module has triggered a captcha on first iteration, no results can be found." print('Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module')
)
print(
"Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module"
)
async def get_hostnames(self): async def get_hostnames(self):
return self.totalhosts return self.totalhosts

View file

@ -5,23 +5,18 @@ class SubdomainCenter:
def __init__(self, word): def __init__(self, word):
self.word = word self.word = word
self.results = set() self.results = set()
self.server = "https://api.subdomain.center/?domain=" self.server = 'https://api.subdomain.center/?domain='
self.proxy = False self.proxy = False
async def do_search(self): async def do_search(self):
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
try: try:
current_url = f"{self.server}{self.word}" current_url = f'{self.server}{self.word}'
resp = await AsyncFetcher.fetch_all( resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy, json=True)
[current_url], headers=headers, proxy=self.proxy, json=True
)
self.results = resp[0] self.results = resp[0]
self.results = { self.results = {sub[4:] if sub[:4] == 'www.' and sub[4:] else sub for sub in self.results}
sub[4:] if sub[:4] == "www." and sub[4:] else sub
for sub in self.results
}
except Exception as e: except Exception as e:
print(f"An exception has occurred in SubdomainCenter on : {e}") print(f'An exception has occurred in SubdomainCenter on : {e}')
async def get_hostnames(self): async def get_hostnames(self):
return self.results return self.results

View file

@ -14,24 +14,20 @@ def __init__(self, word) -> None:
self.total_results: set = set() self.total_results: set = set()
self.proxy = False self.proxy = False
# TODO add api support # TODO add api support
self.server = "https://subdomainfinder.c99.nl/" self.server = 'https://subdomainfinder.c99.nl/'
self.totalresults = "" self.totalresults = ''
async def do_search(self) -> None: async def do_search(self) -> None:
# Based on https://gist.github.com/th3gundy/bc83580cbe04031e9164362b33600962 # Based on https://gist.github.com/th3gundy/bc83580cbe04031e9164362b33600962
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
resp = await AsyncFetcher.fetch_all( resp = await AsyncFetcher.fetch_all([self.server], headers=headers, proxy=self.proxy)
[self.server], headers=headers, proxy=self.proxy
)
data = await self.get_csrf_params(resp[0]) data = await self.get_csrf_params(resp[0])
data["scan_subdomains"] = "" data['scan_subdomains'] = ''
data["domain"] = self.word data['domain'] = self.word
data["privatequery"] = "on" data['privatequery'] = 'on'
await asyncio.sleep(get_delay()) await asyncio.sleep(get_delay())
second_resp = await AsyncFetcher.post_fetch( second_resp = await AsyncFetcher.post_fetch(self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data))
self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data)
)
# print(second_resp) # print(second_resp)
self.totalresults += second_resp self.totalresults += second_resp
@ -55,10 +51,10 @@ async def process(self, proxy: bool = False) -> None:
@staticmethod @staticmethod
async def get_csrf_params(data): async def get_csrf_params(data):
csrf_params = {} csrf_params = {}
html = BeautifulSoup(data, "html.parser").find("div", {"class": "input-group"}) html = BeautifulSoup(data, 'html.parser').find('div', {'class': 'input-group'})
for c in html.find_all("input"): for c in html.find_all('input'):
try: try:
csrf_params[c.get("name")] = c.get("value") csrf_params[c.get('name')] = c.get('value')
except Exception: except Exception:
continue continue

View file

@ -18,70 +18,59 @@ def __init__(self, hosts) -> None:
async def populate_fingerprints(self): async def populate_fingerprints(self):
# Thank you to https://github.com/EdOverflow/can-i-take-over-xyz for these fingerprints # Thank you to https://github.com/EdOverflow/can-i-take-over-xyz for these fingerprints
populate_url = "https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json" populate_url = 'https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json'
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
response = await AsyncFetcher.fetch_all([populate_url], headers=headers) response = await AsyncFetcher.fetch_all([populate_url], headers=headers)
try: try:
resp = response[0] resp = response[0]
unparsed_json = ujson.loads(resp) unparsed_json = ujson.loads(resp)
for unparsed_fingerprint in unparsed_json: for unparsed_fingerprint in unparsed_json:
if unparsed_fingerprint["service"] in ["Smugsmug"]: if unparsed_fingerprint['service'] in ['Smugsmug']:
# Subdomain must be in format domain.smugsmug.com # Subdomain must be in format domain.smugsmug.com
# This will never happen as subdomains are parsed and filtered to be in format of *.word.com # This will never happen as subdomains are parsed and filtered to be in format of *.word.com
continue continue
if ( if unparsed_fingerprint['status'] == 'Vulnerable' or unparsed_fingerprint['status'] == 'Edge case':
unparsed_fingerprint["status"] == "Vulnerable" self.fingerprints[unparsed_fingerprint['fingerprint']] = unparsed_fingerprint['service']
or unparsed_fingerprint["status"] == "Edge case"
):
self.fingerprints[unparsed_fingerprint["fingerprint"]] = (
unparsed_fingerprint["service"]
)
except Exception as e: except Exception as e:
print( print(f'An exception has occurred populating takeover fingerprints: {e}, defaulting to static list')
f"An exception has occurred populating takeover fingerprints: {e}, defaulting to static list"
)
self.fingerprints = { self.fingerprints = {
"'Trying to access your account?'": "Campaign Monitor", "'Trying to access your account?'": 'Campaign Monitor',
"404 Not Found": "Fly.io", '404 Not Found': 'Fly.io',
"404 error unknown site!": "Pantheon", '404 error unknown site!': 'Pantheon',
"Do you want to register *.wordpress.com?": "Wordpress", 'Do you want to register *.wordpress.com?': 'Wordpress',
"Domain uses DO name serves with no records in DO.": "Digital Ocean", 'Domain uses DO name serves with no records in DO.': 'Digital Ocean',
"It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": "LaunchRock", "It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": 'LaunchRock',
"No Site For Domain": "Kinsta", 'No Site For Domain': 'Kinsta',
"No settings were found for this company:": "Help Scout", 'No settings were found for this company:': 'Help Scout',
"Project doesnt exist... yet!": "Readme.io", 'Project doesnt exist... yet!': 'Readme.io',
"Repository not found": "Bitbucket", 'Repository not found': 'Bitbucket',
"The feed has not been found.": "Feedpress", 'The feed has not been found.': 'Feedpress',
"No such app": "Heroku", 'No such app': 'Heroku',
"The specified bucket does not exist": "AWS/S3", 'The specified bucket does not exist': 'AWS/S3',
"The thing you were looking for is no longer here, or never was": "Ghost", 'The thing you were looking for is no longer here, or never was': 'Ghost',
"There isn't a Github Pages site here.": "Github", "There isn't a Github Pages site here.": 'Github',
"This UserVoice subdomain is currently available!": "UserVoice", 'This UserVoice subdomain is currently available!': 'UserVoice',
"Uh oh. That page doesn't exist.": "Intercom", "Uh oh. That page doesn't exist.": 'Intercom',
"We could not find what you're looking for.": "Help Juice", "We could not find what you're looking for.": 'Help Juice',
"Whatever you were looking for doesn't currently exist at this address": "Tumblr", "Whatever you were looking for doesn't currently exist at this address": 'Tumblr',
"is not a registered InCloud YouTrack": "JetBrains", 'is not a registered InCloud YouTrack': 'JetBrains',
"page not found": "Uptimerobot", 'page not found': 'Uptimerobot',
"project not found": "Surge.sh", 'project not found': 'Surge.sh',
} }
async def check(self, url, resp) -> None: async def check(self, url, resp) -> None:
# Simple function that takes response and checks if any fingerprints exist # Simple function that takes response and checks if any fingerprints exist
# If a fingerprint exists figures out which one and prints it out # If a fingerprint exists figures out which one and prints it out
regex = re.compile( regex = re.compile('(?=(' + '|'.join(map(re.escape, list(self.fingerprints.keys()))) + '))')
"(?=(" + "|".join(map(re.escape, list(self.fingerprints.keys()))) + "))"
)
# Sanitize fingerprints # Sanitize fingerprints
matches = re.findall(regex, resp) matches = re.findall(regex, resp)
matches = list(set(matches)) matches = list(set(matches))
for match in matches: for match in matches:
print(f"\t\033[91m Takeover detected: {url}\033[1;32;40m") print(f'\t\033[91m Takeover detected: {url}\033[1;32;40m')
if match in self.fingerprints.keys(): if match in self.fingerprints.keys():
# Validation check as to not error out # Validation check as to not error out
service = self.fingerprints[match] service = self.fingerprints[match]
print( print(f'\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m')
f"\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m"
)
self.results[url].append({match: service}) self.results[url].append({match: service})
async def do_take(self) -> None: async def do_take(self) -> None:
@ -89,13 +78,11 @@ async def do_take(self) -> None:
if len(self.hosts) > 0: if len(self.hosts) > 0:
# Returns a list of tuples in this format: (url, response) # Returns a list of tuples in this format: (url, response)
# Filter out responses whose responses are empty strings (indicates errored) # Filter out responses whose responses are empty strings (indicates errored)
https_hosts = [f"https://{host}" for host in self.hosts] https_hosts = [f'https://{host}' for host in self.hosts]
http_hosts = [f"http://{host}" for host in self.hosts] http_hosts = [f'http://{host}' for host in self.hosts]
all_hosts = https_hosts + http_hosts all_hosts = https_hosts + http_hosts
shuffle(all_hosts) shuffle(all_hosts)
resps: list = await AsyncFetcher.fetch_all( resps: list = await AsyncFetcher.fetch_all(all_hosts, takeover=True, proxy=self.proxy)
all_hosts, takeover=True, proxy=self.proxy
)
for url, resp in tuple(resp for resp in resps if len(resp[1]) >= 1): for url, resp in tuple(resp for resp in resps if len(resp[1]) >= 1):
await self.check(url, resp) await self.check(url, resp)
else: else:

View file

@ -9,15 +9,13 @@ def __init__(self, word) -> None:
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5" url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
self.totalhosts = {host for host in response[0]["results"]} self.totalhosts = {host for host in response[0]['results']}
second_url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2" second_url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2'
secondresp = await AsyncFetcher.fetch_all( secondresp = await AsyncFetcher.fetch_all([second_url], json=True, proxy=self.proxy)
[second_url], json=True, proxy=self.proxy
)
try: try:
self.totalips = {resp["ip"] for resp in secondresp[0]["results"]} self.totalips = {resp['ip'] for resp in secondresp[0]['results']}
except TypeError: except TypeError:
pass pass

View file

@ -12,12 +12,10 @@ def __init__(self, word, limit, start) -> None:
self.start = start self.start = start
self.key = Core.tomba_key() self.key = Core.tomba_key()
if self.key[0] is None or self.key[1] is None: if self.key[0] is None or self.key[1] is None:
raise MissingKey("Tomba Key and/or Secret") raise MissingKey('Tomba Key and/or Secret')
self.total_results = "" self.total_results = ''
self.counter = start self.counter = start
self.database = ( self.database = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10'
f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10"
)
self.proxy = False self.proxy = False
self.hostnames: list = [] self.hostnames: list = []
self.emails: list = [] self.emails: list = []
@ -26,49 +24,38 @@ async def do_search(self) -> None:
# First determine if a user account is not a free account, this call is free # First determine if a user account is not a free account, this call is free
is_free = True is_free = True
headers = { headers = {
"User-Agent": Core.get_user_agent(), 'User-Agent': Core.get_user_agent(),
"X-Tomba-Key": self.key[0], 'X-Tomba-Key': self.key[0],
"X-Tomba-Secret": self.key[1], 'X-Tomba-Secret': self.key[1],
} }
acc_info_url = "https://api.tomba.io/v1/me" acc_info_url = 'https://api.tomba.io/v1/me'
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True)
[acc_info_url], headers=headers, json=True
)
is_free = ( is_free = (
is_free is_free
if "name" in response[0]["data"]["pricing"].keys() if 'name' in response[0]['data']['pricing'].keys() and response[0]['data']['pricing']['name'].lower() == 'free'
and response[0]["data"]["pricing"]["name"].lower() == "free"
else False else False
) )
# Extract the total number of requests that are available for an account # Extract the total number of requests that are available for an account
total_requests_avail = ( total_requests_avail = (
response[0]["data"]["requests"]["domains"]["available"] response[0]['data']['requests']['domains']['available'] - response[0]['data']['requests']['domains']['used']
- response[0]["data"]["requests"]["domains"]["used"]
) )
if is_free: if is_free:
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True)
[self.database], headers=headers, proxy=self.proxy, json=True
)
self.emails, self.hostnames = await self.parse_resp(json_resp=response[0]) self.emails, self.hostnames = await self.parse_resp(json_resp=response[0])
else: else:
# Determine the total number of emails that are available # Determine the total number of emails that are available
# As the most emails you can get within one query are 100 # As the most emails you can get within one query are 100
# This is only done where paid accounts are in play # This is only done where paid accounts are in play
tomba_counter = f"https://api.tomba.io/v1/email-count?domain={self.word}" tomba_counter = f'https://api.tomba.io/v1/email-count?domain={self.word}'
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([tomba_counter], headers=headers, proxy=self.proxy, json=True)
[tomba_counter], headers=headers, proxy=self.proxy, json=True total_number_reqs = response[0]['data']['total'] // 100
)
total_number_reqs = response[0]["data"]["total"] // 100
# Parse out meta field within initial JSON response to determine the total number of results # Parse out meta field within initial JSON response to determine the total number of results
if total_requests_avail < total_number_reqs: if total_requests_avail < total_number_reqs:
print('WARNING: The account does not have enough requests to gather all the emails.')
print( print(
"WARNING: The account does not have enough requests to gather all the emails." f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}'
)
print(
f"Total requests available: {total_requests_avail}, total requests "
f"needed to be made: {total_number_reqs}"
) )
print( print(
'RETURNING current results, If you still wish to run this module despite the current results, please comment out the "if request" line.' 'RETURNING current results, If you still wish to run this module despite the current results, please comment out the "if request" line.'
@ -79,24 +66,22 @@ async def do_search(self) -> None:
# increments of max number with page determining where to start # increments of max number with page determining where to start
# See docs for more details: https://developer.tomba.io/#domain-search # See docs for more details: https://developer.tomba.io/#domain-search
for page in range(0, total_number_reqs + 1): for page in range(0, total_number_reqs + 1):
req_url = f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}" req_url = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}'
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True)
[req_url], headers=headers, proxy=self.proxy, json=True
)
temp_emails, temp_hostnames = await self.parse_resp(response[0]) temp_emails, temp_hostnames = await self.parse_resp(response[0])
self.emails.extend(temp_emails) self.emails.extend(temp_emails)
self.hostnames.extend(temp_hostnames) self.hostnames.extend(temp_hostnames)
await asyncio.sleep(1) await asyncio.sleep(1)
async def parse_resp(self, json_resp): async def parse_resp(self, json_resp):
emails = list(sorted({email["email"] for email in json_resp["data"]["emails"]})) emails = list(sorted({email['email'] for email in json_resp['data']['emails']}))
domains = list( domains = list(
sorted( sorted(
{ {
source["website_url"] source['website_url']
for email in json_resp["data"]["emails"] for email in json_resp['data']['emails']
for source in email["sources"] for source in email['sources']
if self.word in source["website_url"] if self.word in source['website_url']
} }
) )
) )

View file

@ -11,25 +11,17 @@ def __init__(self, word) -> None:
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
url = f"https://urlscan.io/api/v1/search/?q=domain:{self.word}" url = f'https://urlscan.io/api/v1/search/?q=domain:{self.word}'
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy) response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
resp = response[0] resp = response[0]
self.totalhosts = {f"{page['page']['domain']}" for page in resp["results"]} self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']}
self.totalips = { self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()}
f"{page['page']['ip']}"
for page in resp["results"]
if "ip" in page["page"].keys()
}
self.interestingurls = { self.interestingurls = {
f"{page['page']['url']}" f"{page['page']['url']}"
for page in resp["results"] for page in resp['results']
if self.word in page["page"]["url"] and "url" in page["page"].keys() if self.word in page['page']['url'] and 'url' in page['page'].keys()
}
self.totalasns = {
f"{page['page']['asn']}"
for page in resp["results"]
if "asn" in page["page"].keys()
} }
self.totalasns = {f"{page['page']['asn']}" for page in resp['results'] if 'asn' in page['page'].keys()}
async def get_hostnames(self) -> set: async def get_hostnames(self) -> set:
return self.totalhosts return self.totalhosts

View file

@ -8,7 +8,7 @@ class SearchVirustotal:
def __init__(self, word) -> None: def __init__(self, word) -> None:
self.key = Core.virustotal_key() self.key = Core.virustotal_key()
if self.key is None: if self.key is None:
raise MissingKey("virustotal") raise MissingKey('virustotal')
self.word = word self.word = word
self.proxy = False self.proxy = False
self.hostnames: list = [] self.hostnames: list = []
@ -18,14 +18,12 @@ async def do_search(self) -> None:
# based on: https://developers.virustotal.com/reference/domains-relationships # based on: https://developers.virustotal.com/reference/domains-relationships
# base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40" # base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40"
headers = { headers = {
"User-Agent": Core.get_user_agent(), 'User-Agent': Core.get_user_agent(),
"Accept": "application/json", 'Accept': 'application/json',
"x-apikey": self.key, 'x-apikey': self.key,
} }
base_url = ( base_url = f'https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40'
f"https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40" cursor = ''
)
cursor = ""
count = 0 count = 0
fail_counter = 0 fail_counter = 0
counter = 0 counter = 0
@ -37,42 +35,29 @@ async def do_search(self) -> None:
# TODO add timer logic if proven to be needed # TODO add timer logic if proven to be needed
# in the meantime sleeping 16 seconds should eliminate hitting the rate limit # in the meantime sleeping 16 seconds should eliminate hitting the rate limit
# in case rate limit is hit, fail counter exists and sleep for 65 seconds # in case rate limit is hit, fail counter exists and sleep for 65 seconds
send_url = ( send_url = base_url + '&cursor=' + cursor if cursor != '' and len(cursor) > 2 else base_url
base_url + "&cursor=" + cursor responses = await AsyncFetcher.fetch_all([send_url], headers=headers, proxy=self.proxy, json=True)
if cursor != "" and len(cursor) > 2
else base_url
)
responses = await AsyncFetcher.fetch_all(
[send_url], headers=headers, proxy=self.proxy, json=True
)
jdata = responses[0] jdata = responses[0]
if "data" not in jdata.keys(): if 'data' not in jdata.keys():
await asyncio.sleep(60 + 5) await asyncio.sleep(60 + 5)
fail_counter += 1 fail_counter += 1
if "meta" in jdata.keys(): if 'meta' in jdata.keys():
cursor = ( cursor = jdata['meta']['cursor'] if 'cursor' in jdata['meta'].keys() else ''
jdata["meta"]["cursor"] if "cursor" in jdata["meta"].keys() else "" if len(cursor) == 0 and 'data' in jdata.keys():
)
if len(cursor) == 0 and "data" in jdata.keys():
# if cursor no longer is within the meta field have hit last entry # if cursor no longer is within the meta field have hit last entry
breakcon = True breakcon = True
count += jdata["meta"]["count"] count += jdata['meta']['count']
if count == 0 or fail_counter >= 2: if count == 0 or fail_counter >= 2:
break break
if "data" in jdata.keys(): if 'data' in jdata.keys():
data = jdata["data"] data = jdata['data']
self.hostnames.extend(await self.parse_hostnames(data, self.word)) self.hostnames.extend(await self.parse_hostnames(data, self.word))
counter += 1 counter += 1
await asyncio.sleep(16) await asyncio.sleep(16)
self.hostnames = list(sorted(set(self.hostnames))) self.hostnames = list(sorted(set(self.hostnames)))
# verify domains such as x.x.com.multicdn.x.com are parsed properly # verify domains such as x.x.com.multicdn.x.com are parsed properly
self.hostnames = [ self.hostnames = [
host host for host in self.hostnames if ((len(host.split('.')) >= 3) and host.split('.')[-2] == self.word.split('.')[-2])
for host in self.hostnames
if (
(len(host.split(".")) >= 3)
and host.split(".")[-2] == self.word.split(".")[-2]
)
] ]
async def get_hostnames(self) -> list: async def get_hostnames(self) -> list:
@ -82,22 +67,20 @@ async def get_hostnames(self) -> list:
async def parse_hostnames(data, word): async def parse_hostnames(data, word):
total_subdomains = set() total_subdomains = set()
for attribute in data: for attribute in data:
total_subdomains.add(attribute["id"].replace('"', "").replace("www.", "")) total_subdomains.add(attribute['id'].replace('"', '').replace('www.', ''))
attributes = attribute["attributes"] attributes = attribute['attributes']
total_subdomains.update( total_subdomains.update(
{ {
value["value"].replace('"', "").replace("www.", "") value['value'].replace('"', '').replace('www.', '')
for value in attributes["last_dns_records"] for value in attributes['last_dns_records']
if word in value["value"] if word in value['value']
} }
) )
if "last_https_certificate" in attributes.keys(): if 'last_https_certificate' in attributes.keys():
total_subdomains.update( total_subdomains.update(
{ {
value.replace('"', "").replace("www.", "") value.replace('"', '').replace('www.', '')
for value in attributes["last_https_certificate"]["extensions"][ for value in attributes['last_https_certificate']['extensions']['subject_alternative_name']
"subject_alternative_name"
]
if word in value if word in value
} }
) )
@ -108,9 +91,7 @@ async def parse_hostnames(data, word):
total_subdomains = [ total_subdomains = [
x x
for x in total_subdomains for x in total_subdomains
if "edgekey.net" not in str(x) if 'edgekey.net' not in str(x) and 'akadns.net' not in str(x) and 'include:_spf' not in str(x)
and "akadns.net" not in str(x)
and "include:_spf" not in str(x)
] ]
total_subdomains.sort() total_subdomains.sort()
return total_subdomains return total_subdomains

View file

@ -5,22 +5,16 @@
class SearchYahoo: class SearchYahoo:
def __init__(self, word, limit) -> None: def __init__(self, word, limit) -> None:
self.word = word self.word = word
self.total_results = "" self.total_results = ''
self.server = "search.yahoo.com" self.server = 'search.yahoo.com'
self.limit = limit self.limit = limit
self.proxy = False self.proxy = False
async def do_search(self) -> None: async def do_search(self) -> None:
base_url = f"https://{self.server}/search?p=%40{self.word}&b=xx&pz=10" base_url = f'https://{self.server}/search?p=%40{self.word}&b=xx&pz=10'
headers = {"Host": self.server, "User-agent": Core.get_user_agent()} headers = {'Host': self.server, 'User-agent': Core.get_user_agent()}
urls = [ urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
base_url.replace("xx", str(num)) responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
for num in range(0, self.limit, 10)
if num <= self.limit
]
responses = await AsyncFetcher.fetch_all(
urls, headers=headers, proxy=self.proxy
)
for response in responses: for response in responses:
self.total_results += response self.total_results += response
@ -35,8 +29,8 @@ async def get_emails(self):
# strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld # strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld
for email in toparse_emails: for email in toparse_emails:
email = str(email) email = str(email)
if "-" in email and email[0].isdigit() and email.index("-") <= 9: if '-' in email and email[0].isdigit() and email.index('-') <= 9:
while email[0] == "-" or email[0].isdigit(): while email[0] == '-' or email[0].isdigit():
email = email[1:] email = email[1:]
emails.add(email) emails.add(email)
return list(emails) return list(emails)

View file

@ -16,8 +16,8 @@ def __init__(self, word, limit) -> None:
# If you wish to extract as many subdomains as possible visit the fetch_subdomains # If you wish to extract as many subdomains as possible visit the fetch_subdomains
# To see how # To see how
if self.key is None: if self.key is None:
raise MissingKey("zoomeye") raise MissingKey('zoomeye')
self.baseurl = "https://api.zoomeye.org/host/search" self.baseurl = 'https://api.zoomeye.org/host/search'
self.proxy = False self.proxy = False
self.totalasns: list = list() self.totalasns: list = list()
self.totalhosts: list = list() self.totalhosts: list = list()
@ -58,40 +58,38 @@ def __init__(self, word, limit) -> None:
async def fetch_subdomains(self) -> None: async def fetch_subdomains(self) -> None:
# Based on docs from: https://www.zoomeye.org/doc#search-sub-domain-ip # Based on docs from: https://www.zoomeye.org/doc#search-sub-domain-ip
headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()} headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()}
subdomain_search_endpoint = ( subdomain_search_endpoint = f'https://api.zoomeye.org/domain/search?q={self.word}&type=0&'
f"https://api.zoomeye.org/domain/search?q={self.word}&type=0&"
)
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all(
[subdomain_search_endpoint + "page=1"], [subdomain_search_endpoint + 'page=1'],
json=True, json=True,
proxy=self.proxy, proxy=self.proxy,
headers=headers, headers=headers,
) )
# Make initial request to determine total number of subdomains # Make initial request to determine total number of subdomains
resp = response[0] resp = response[0]
if resp["status"] != 200: if resp['status'] != 200:
return return
total = resp["total"] total = resp['total']
# max number of results per request seems to be 30 # max number of results per request seems to be 30
# NOTE: If you wish to get as many subdomains as possible # NOTE: If you wish to get as many subdomains as possible
# Change the line below to: # Change the line below to:
# self.limit = (total // 30) + 1 # self.limit = (total // 30) + 1
self.limit = self.limit if total > self.limit else (total // 30) + 1 self.limit = self.limit if total > self.limit else (total // 30) + 1
self.totalhosts.extend([item["name"] for item in resp["list"]]) self.totalhosts.extend([item['name'] for item in resp['list']])
for i in range(2, self.limit): for i in range(2, self.limit):
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all(
[subdomain_search_endpoint + f"page={i}"], [subdomain_search_endpoint + f'page={i}'],
json=True, json=True,
proxy=self.proxy, proxy=self.proxy,
headers=headers, headers=headers,
) )
resp = response[0] resp = response[0]
if resp["status"] != 200: if resp['status'] != 200:
return return
found_subdomains = [item["name"] for item in resp["list"]] found_subdomains = [item['name'] for item in resp['list']]
if len(found_subdomains) == 0: if len(found_subdomains) == 0:
break break
self.totalhosts.extend(found_subdomains) self.totalhosts.extend(found_subdomains)
@ -99,19 +97,17 @@ async def fetch_subdomains(self) -> None:
await asyncio.sleep(get_delay() + 1) await asyncio.sleep(get_delay() + 1)
async def do_search(self) -> None: async def do_search(self) -> None:
headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()} headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()}
# Fetch subdomains first # Fetch subdomains first
await self.fetch_subdomains() await self.fetch_subdomains()
params = ( params = (
("query", f"site:{self.word}"), ('query', f'site:{self.word}'),
("page", "1"), ('page', '1'),
)
response = await AsyncFetcher.fetch_all(
[self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params
) )
response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params)
# The First request determines how many pages there in total # The First request determines how many pages there in total
resp = response[0] resp = response[0]
total_pages = int(resp["available"]) total_pages = int(resp['available'])
self.limit = self.limit if total_pages > self.limit else total_pages self.limit = self.limit if total_pages > self.limit else total_pages
self.limit = 3 if self.limit == 2 else self.limit self.limit = 3 if self.limit == 2 else self.limit
cur_page = 2 if self.limit >= 2 else -1 cur_page = 2 if self.limit >= 2 else -1
@ -121,21 +117,17 @@ async def do_search(self) -> None:
# cur_page = -1 # cur_page = -1
if cur_page == -1: if cur_page == -1:
# No need to do loop just parse and leave # No need to do loop just parse and leave
if "matches" in resp.keys(): if 'matches' in resp.keys():
hostnames, emails, ips, asns, iurls = await self.parse_matches( hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
resp["matches"]
)
self.totalhosts.extend(hostnames) self.totalhosts.extend(hostnames)
self.totalemails.extend(emails) self.totalemails.extend(emails)
self.totalips.extend(ips) self.totalips.extend(ips)
self.totalasns.extend(asns) self.totalasns.extend(asns)
self.interestingurls.extend(iurls) self.interestingurls.extend(iurls)
else: else:
if "matches" in resp.keys(): if 'matches' in resp.keys():
# Parse out initial results and then continue to loop # Parse out initial results and then continue to loop
hostnames, emails, ips, asns, iurls = await self.parse_matches( hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
resp["matches"]
)
self.totalhosts.extend(hostnames) self.totalhosts.extend(hostnames)
self.totalemails.extend(emails) self.totalemails.extend(emails)
self.totalips.extend(ips) self.totalips.extend(ips)
@ -145,8 +137,8 @@ async def do_search(self) -> None:
for num in range(2, self.limit): for num in range(2, self.limit):
# print(f'Currently on page: {num}') # print(f'Currently on page: {num}')
params = ( params = (
("query", f"site:{self.word}"), ('query', f'site:{self.word}'),
("page", f"{num}"), ('page', f'{num}'),
) )
response = await AsyncFetcher.fetch_all( response = await AsyncFetcher.fetch_all(
[self.baseurl], [self.baseurl],
@ -156,22 +148,14 @@ async def do_search(self) -> None:
params=params, params=params,
) )
resp = response[0] resp = response[0]
if "matches" not in resp.keys(): if 'matches' not in resp.keys():
print(f"Your resp: {resp}") print(f'Your resp: {resp}')
print("Match not found in keys") print('Match not found in keys')
break break
hostnames, emails, ips, asns, iurls = await self.parse_matches( hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
resp["matches"]
)
if ( if len(hostnames) == 0 and len(emails) == 0 and len(ips) == 0 and len(asns) == 0 and len(iurls) == 0:
len(hostnames) == 0
and len(emails) == 0
and len(ips) == 0
and len(asns) == 0
and len(iurls) == 0
):
nomatches_counter += 1 nomatches_counter += 1
if nomatches_counter >= 5: if nomatches_counter >= 5:
@ -196,48 +180,42 @@ async def parse_matches(self, matches):
emails = set() emails = set()
for match in matches: for match in matches:
try: try:
ips.add(match["ip"]) ips.add(match['ip'])
if "geoinfo" in match.keys(): if 'geoinfo' in match.keys():
asns.add(f"AS{match['geoinfo']['asn']}") asns.add(f"AS{match['geoinfo']['asn']}")
if "rdns_new" in match.keys(): if 'rdns_new' in match.keys():
rdns_new = match["rdns_new"] rdns_new = match['rdns_new']
if "," in rdns_new: if ',' in rdns_new:
parts = str(rdns_new).split(",") parts = str(rdns_new).split(',')
rdns_new = parts[0] rdns_new = parts[0]
if len(parts) == 2: if len(parts) == 2:
hostnames.add(parts[1]) hostnames.add(parts[1])
rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
hostnames.add(rdns_new) hostnames.add(rdns_new)
else: else:
rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
hostnames.add(rdns_new) hostnames.add(rdns_new)
if "rdns" in match.keys(): if 'rdns' in match.keys():
rdns = match["rdns"] rdns = match['rdns']
rdns = rdns[:-1] if rdns[-1] == "." else rdns rdns = rdns[:-1] if rdns[-1] == '.' else rdns
hostnames.add(rdns) hostnames.add(rdns)
if "portinfo" in match.keys(): if 'portinfo' in match.keys():
# re. # re.
temp_emails = set( temp_emails = set(await self.parse_emails(match['portinfo']['banner']))
await self.parse_emails(match["portinfo"]["banner"])
)
emails.update(temp_emails) emails.update(temp_emails)
hostnames.update( hostnames.update(set(await self.parse_hostnames(match['portinfo']['banner'])))
set(await self.parse_hostnames(match["portinfo"]["banner"]))
)
iurls = { iurls = {
str(iurl.group(1)).replace('"', "") str(iurl.group(1)).replace('"', '')
for iurl in re.finditer( for iurl in re.finditer(self.iurl_regex, match['portinfo']['banner'])
self.iurl_regex, match["portinfo"]["banner"]
)
if self.word in str(iurl.group(1)) if self.word in str(iurl.group(1))
} }
except Exception as e: except Exception as e:
print(f"An exception has occurred: {e}") print(f'An exception has occurred: {e}')
return hostnames, emails, ips, asns, iurls return hostnames, emails, ips, asns, iurls
async def process(self, proxy: bool = False) -> None: async def process(self, proxy: bool = False) -> None:

View file

@ -1 +1 @@
__all__ = ["hostchecker"] __all__ = ['hostchecker']

View file

@ -12,36 +12,32 @@
limiter = Limiter(key_func=get_remote_address) limiter = Limiter(key_func=get_remote_address)
app = FastAPI( app = FastAPI(
title="Restful Harvest", title='Restful Harvest',
description="Rest API for theHarvester powered by FastAPI", description='Rest API for theHarvester powered by FastAPI',
version="0.0.2", version='0.0.2',
) )
app.state.limiter = limiter app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore
# This is where we will host files that arise if the user specifies a filename # This is where we will host files that arise if the user specifies a filename
try: try:
app.mount( app.mount('/static', StaticFiles(directory='theHarvester/lib/api/static/'), name='static')
"/static", StaticFiles(directory="theHarvester/lib/api/static/"), name="static"
)
except RuntimeError: except RuntimeError:
static_path = os.path.expanduser("~/.local/share/theHarvester/static/") static_path = os.path.expanduser('~/.local/share/theHarvester/static/')
if not os.path.isdir(static_path): if not os.path.isdir(static_path):
os.makedirs(static_path) os.makedirs(static_path)
app.mount( app.mount(
"/static", '/static',
StaticFiles(directory=static_path), StaticFiles(directory=static_path),
name="static", name='static',
) )
@app.get("/") @app.get('/')
async def root(*, user_agent: str = Header(None)) -> Response: async def root(*, user_agent: str = Header(None)) -> Response:
# very basic user agent filtering # very basic user agent filtering
if user_agent and ( if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent response = RedirectResponse(app.url_path_for('bot'))
):
response = RedirectResponse(app.url_path_for("bot"))
return response return response
return HTMLResponse( return HTMLResponse(
@ -70,36 +66,34 @@ async def root(*, user_agent: str = Header(None)) -> Response:
) )
@app.get("/nicebot") @app.get('/nicebot')
async def bot() -> dict[str, str]: async def bot() -> dict[str, str]:
# nice bot # nice bot
string = {"bot": "These are not the droids you are looking for"} string = {'bot': 'These are not the droids you are looking for'}
return string return string
@app.get("/sources", response_class=UJSONResponse) @app.get('/sources', response_class=UJSONResponse)
@limiter.limit("5/minute") @limiter.limit('5/minute')
async def getsources(request: Request): async def getsources(request: Request):
# Endpoint for user to query for available sources theHarvester supports # Endpoint for user to query for available sources theHarvester supports
# Rate limit of 5 requests per minute # Rate limit of 5 requests per minute
sources = __main__.Core.get_supportedengines() sources = __main__.Core.get_supportedengines()
return {"sources": sources} return {'sources': sources}
@app.get("/dnsbrute") @app.get('/dnsbrute')
@limiter.limit("5/minute") @limiter.limit('5/minute')
async def dnsbrute( async def dnsbrute(
request: Request, request: Request,
user_agent: str = Header(None), user_agent: str = Header(None),
domain: str = Query(..., description="Domain to be brute forced"), domain: str = Query(..., description='Domain to be brute forced'),
) -> Response: ) -> Response:
# Endpoint for user to signal to do DNS brute forcing # Endpoint for user to signal to do DNS brute forcing
# Rate limit of 5 requests per minute # Rate limit of 5 requests per minute
# basic user agent filtering # basic user agent filtering
if user_agent and ( if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent response = RedirectResponse(app.url_path_for('bot'))
):
response = RedirectResponse(app.url_path_for("bot"))
return response return response
dns_bruteforce = await __main__.start( dns_bruteforce = await __main__.start(
argparse.Namespace( argparse.Namespace(
@ -108,49 +102,45 @@ async def dnsbrute(
dns_server=False, dns_server=False,
dns_tld=False, dns_tld=False,
domain=domain, domain=domain,
filename="", filename='',
google_dork=False, google_dork=False,
limit=500, limit=500,
proxies=False, proxies=False,
shodan=False, shodan=False,
source=",".join([]), source=','.join([]),
start=0, start=0,
take_over=False, take_over=False,
virtual_host=False, virtual_host=False,
) )
) )
return UJSONResponse({"dns_bruteforce": dns_bruteforce}) return UJSONResponse({'dns_bruteforce': dns_bruteforce})
@app.get("/query") @app.get('/query')
@limiter.limit("2/minute") @limiter.limit('2/minute')
async def query( async def query(
request: Request, request: Request,
dns_server: str = Query(""), dns_server: str = Query(''),
user_agent: str = Header(None), user_agent: str = Header(None),
dns_brute: bool = Query(False), dns_brute: bool = Query(False),
dns_lookup: bool = Query(False), dns_lookup: bool = Query(False),
dns_tld: bool = Query(False), dns_tld: bool = Query(False),
filename: str = Query(""), filename: str = Query(''),
google_dork: bool = Query(False), google_dork: bool = Query(False),
proxies: bool = Query(False), proxies: bool = Query(False),
shodan: bool = Query(False), shodan: bool = Query(False),
take_over: bool = Query(False), take_over: bool = Query(False),
virtual_host: bool = Query(False), virtual_host: bool = Query(False),
source: list[str] = Query( source: list[str] = Query(..., description='Data sources to query comma separated with no space'),
..., description="Data sources to query comma separated with no space"
),
limit: int = Query(500), limit: int = Query(500),
start: int = Query(0), start: int = Query(0),
domain: str = Query(..., description="Domain to be harvested"), domain: str = Query(..., description='Domain to be harvested'),
) -> Response: ) -> Response:
# Query function that allows user to query theHarvester rest API # Query function that allows user to query theHarvester rest API
# Rate limit of 2 requests per minute # Rate limit of 2 requests per minute
# basic user agent filtering # basic user agent filtering
if user_agent and ( if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent response = RedirectResponse(app.url_path_for('bot'))
):
response = RedirectResponse(app.url_path_for("bot"))
return response return response
try: try:
( (
@ -175,7 +165,7 @@ async def query(
limit=limit, limit=limit,
proxies=proxies, proxies=proxies,
shodan=shodan, shodan=shodan,
source=",".join(source), source=','.join(source),
start=start, start=start,
take_over=take_over, take_over=take_over,
virtual_host=virtual_host, virtual_host=virtual_host,
@ -184,18 +174,16 @@ async def query(
return UJSONResponse( return UJSONResponse(
{ {
"asns": asns, 'asns': asns,
"interesting_urls": iurls, 'interesting_urls': iurls,
"twitter_people": twitter_people_list, 'twitter_people': twitter_people_list,
"linkedin_people": linkedin_people_list, 'linkedin_people': linkedin_people_list,
"linkedin_links": linkedin_links, 'linkedin_links': linkedin_links,
"trello_urls": aurls, 'trello_urls': aurls,
"ips": aips, 'ips': aips,
"emails": aemails, 'emails': aemails,
"hosts": ahosts, 'hosts': ahosts,
} }
) )
except Exception: except Exception:
return UJSONResponse( return UJSONResponse({'exception': 'Please contact the server administrator to check the issue'})
{"exception": "Please contact the server administrator to check the issue"}
)

View file

@ -23,100 +23,94 @@ async def main() -> None:
Just a simple example of how to interact with the rest api Just a simple example of how to interact with the rest api
you can easily use requests instead of aiohttp or whatever you best see fit you can easily use requests instead of aiohttp or whatever you best see fit
""" """
url = "http://127.0.0.1:5000" url = 'http://127.0.0.1:5000'
domain = "netflix.com" domain = 'netflix.com'
query_url = ( query_url = f'{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}'
f"{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}"
)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
fetched_json = await fetch_json(session, query_url) fetched_json = await fetch_json(session, query_url)
total_asns = fetched_json["asns"] total_asns = fetched_json['asns']
interesting_urls = fetched_json["interesting_urls"] interesting_urls = fetched_json['interesting_urls']
twitter_people_list_tracker = fetched_json["twitter_people"] twitter_people_list_tracker = fetched_json['twitter_people']
linkedin_people_list_tracker = fetched_json["linkedin_people"] linkedin_people_list_tracker = fetched_json['linkedin_people']
linkedin_links_tracker = fetched_json["linkedin_links"] linkedin_links_tracker = fetched_json['linkedin_links']
trello_urls = fetched_json["trello_urls"] trello_urls = fetched_json['trello_urls']
ips = fetched_json["ips"] ips = fetched_json['ips']
emails = fetched_json["emails"] emails = fetched_json['emails']
hosts = fetched_json["hosts"] hosts = fetched_json['hosts']
if len(total_asns) > 0: if len(total_asns) > 0:
print(f"\n[*] ASNS found: {len(total_asns)}") print(f'\n[*] ASNS found: {len(total_asns)}')
print("--------------------") print('--------------------')
total_asns = list(sorted(set(total_asns))) total_asns = list(sorted(set(total_asns)))
for asn in total_asns: for asn in total_asns:
print(asn) print(asn)
if len(interesting_urls) > 0: if len(interesting_urls) > 0:
print(f"\n[*] Interesting Urls found: {len(interesting_urls)}") print(f'\n[*] Interesting Urls found: {len(interesting_urls)}')
print("--------------------") print('--------------------')
interesting_urls = list(sorted(set(interesting_urls))) interesting_urls = list(sorted(set(interesting_urls)))
for iurl in interesting_urls: for iurl in interesting_urls:
print(iurl) print(iurl)
if len(twitter_people_list_tracker) == 0: if len(twitter_people_list_tracker) == 0:
print("\n[*] No Twitter users found.\n\n") print('\n[*] No Twitter users found.\n\n')
else: else:
if len(twitter_people_list_tracker) >= 1: if len(twitter_people_list_tracker) >= 1:
print("\n[*] Twitter Users found: " + str(len(twitter_people_list_tracker))) print('\n[*] Twitter Users found: ' + str(len(twitter_people_list_tracker)))
print("---------------------") print('---------------------')
twitter_people_list_tracker = list(sorted(set(twitter_people_list_tracker))) twitter_people_list_tracker = list(sorted(set(twitter_people_list_tracker)))
for usr in twitter_people_list_tracker: for usr in twitter_people_list_tracker:
print(usr) print(usr)
if len(linkedin_people_list_tracker) == 0: if len(linkedin_people_list_tracker) == 0:
print("\n[*] No LinkedIn users found.\n\n") print('\n[*] No LinkedIn users found.\n\n')
else: else:
if len(linkedin_people_list_tracker) >= 1: if len(linkedin_people_list_tracker) >= 1:
print( print('\n[*] LinkedIn Users found: ' + str(len(linkedin_people_list_tracker)))
"\n[*] LinkedIn Users found: " + str(len(linkedin_people_list_tracker)) print('---------------------')
) linkedin_people_list_tracker = list(sorted(set(linkedin_people_list_tracker)))
print("---------------------")
linkedin_people_list_tracker = list(
sorted(set(linkedin_people_list_tracker))
)
for usr in linkedin_people_list_tracker: for usr in linkedin_people_list_tracker:
print(usr) print(usr)
if len(linkedin_links_tracker) == 0: if len(linkedin_links_tracker) == 0:
print(f"\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}") print(f'\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}')
linkedin_links_tracker = list(sorted(set(linkedin_links_tracker))) linkedin_links_tracker = list(sorted(set(linkedin_links_tracker)))
print("---------------------") print('---------------------')
for link in linkedin_links_tracker: for link in linkedin_links_tracker:
print(link) print(link)
length_urls = len(trello_urls) length_urls = len(trello_urls)
total = length_urls total = length_urls
print("\n[*] Trello URLs found: " + str(total)) print('\n[*] Trello URLs found: ' + str(total))
print("--------------------") print('--------------------')
all_urls = list(sorted(set(trello_urls))) all_urls = list(sorted(set(trello_urls)))
for url in sorted(all_urls): for url in sorted(all_urls):
print(url) print(url)
if len(ips) == 0: if len(ips) == 0:
print("\n[*] No IPs found.") print('\n[*] No IPs found.')
else: else:
print("\n[*] IPs found: " + str(len(ips))) print('\n[*] IPs found: ' + str(len(ips)))
print("-------------------") print('-------------------')
# use netaddr as the list may contain ipv4 and ipv6 addresses # use netaddr as the list may contain ipv4 and ipv6 addresses
ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(ips)]) ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(ips)])
print("\n".join(map(str, ip_list))) print('\n'.join(map(str, ip_list)))
if len(emails) == 0: if len(emails) == 0:
print("\n[*] No emails found.") print('\n[*] No emails found.')
else: else:
print("\n[*] Emails found: " + str(len(emails))) print('\n[*] Emails found: ' + str(len(emails)))
print("----------------------") print('----------------------')
all_emails = sorted(list(set(emails))) all_emails = sorted(list(set(emails)))
print("\n".join(all_emails)) print('\n'.join(all_emails))
if len(hosts) == 0: if len(hosts) == 0:
print("\n[*] No hosts found.\n\n") print('\n[*] No hosts found.\n\n')
else: else:
print("\n[*] Hosts found: " + str(len(hosts))) print('\n[*] Hosts found: ' + str(len(hosts)))
print("---------------------") print('---------------------')
print("\n".join(hosts)) print('\n'.join(hosts))
if __name__ == "__main__": if __name__ == '__main__':
asyncio.run(main()) asyncio.run(main())

View file

@ -19,11 +19,11 @@
if TYPE_CHECKING: if TYPE_CHECKING:
from collections.abc import Sized from collections.abc import Sized
DATA_DIR = Path(__file__).parents[1] / "data" DATA_DIR = Path(__file__).parents[1] / 'data'
CONFIG_DIRS = [ CONFIG_DIRS = [
Path("/etc/theHarvester/"), Path('/etc/theHarvester/'),
Path("/usr/local/etc/theHarvester/"), Path('/usr/local/etc/theHarvester/'),
Path("~/.theHarvester"), Path('~/.theHarvester'),
] ]
@ -35,7 +35,7 @@ def _read_config(filename: str) -> str:
with contextlib.suppress(FileNotFoundError): with contextlib.suppress(FileNotFoundError):
file = path.expanduser() / filename file = path.expanduser() / filename
config = file.read_text() config = file.read_text()
print(f"Read {filename} from {file}") print(f'Read {filename} from {file}')
return config return config
# Fallback to creating default in user's home dir # Fallback to creating default in user's home dir
@ -43,168 +43,160 @@ def _read_config(filename: str) -> str:
dest = CONFIG_DIRS[-1].expanduser() / filename dest = CONFIG_DIRS[-1].expanduser() / filename
dest.parent.mkdir(exist_ok=True) dest.parent.mkdir(exist_ok=True)
dest.write_text(default) dest.write_text(default)
print(f"Created default {filename} at {dest}") print(f'Created default {filename} at {dest}')
return default return default
@staticmethod @staticmethod
def api_keys() -> dict: def api_keys() -> dict:
keys = yaml.safe_load(Core._read_config("api-keys.yaml")) keys = yaml.safe_load(Core._read_config('api-keys.yaml'))
return keys["apikeys"] return keys['apikeys']
@staticmethod @staticmethod
def bevigil_key() -> str: def bevigil_key() -> str:
return Core.api_keys()["bevigil"]["key"] return Core.api_keys()['bevigil']['key']
@staticmethod @staticmethod
def binaryedge_key() -> str: def binaryedge_key() -> str:
return Core.api_keys()["binaryedge"]["key"] return Core.api_keys()['binaryedge']['key']
@staticmethod @staticmethod
def bing_key() -> str: def bing_key() -> str:
return Core.api_keys()["bing"]["key"] return Core.api_keys()['bing']['key']
@staticmethod @staticmethod
def bufferoverun_key() -> str: def bufferoverun_key() -> str:
return Core.api_keys()["bufferoverun"]["key"] return Core.api_keys()['bufferoverun']['key']
@staticmethod @staticmethod
def censys_key() -> tuple: def censys_key() -> tuple:
return Core.api_keys()["censys"]["id"], Core.api_keys()["censys"]["secret"] return Core.api_keys()['censys']['id'], Core.api_keys()['censys']['secret']
@staticmethod @staticmethod
def criminalip_key() -> str: def criminalip_key() -> str:
return Core.api_keys()["criminalip"]["key"] return Core.api_keys()['criminalip']['key']
@staticmethod @staticmethod
def fullhunt_key() -> str: def fullhunt_key() -> str:
return Core.api_keys()["fullhunt"]["key"] return Core.api_keys()['fullhunt']['key']
@staticmethod @staticmethod
def github_key() -> str: def github_key() -> str:
return Core.api_keys()["github"]["key"] return Core.api_keys()['github']['key']
@staticmethod @staticmethod
def hunter_key() -> str: def hunter_key() -> str:
return Core.api_keys()["hunter"]["key"] return Core.api_keys()['hunter']['key']
@staticmethod @staticmethod
def hunterhow_key() -> str: def hunterhow_key() -> str:
return Core.api_keys()["hunterhow"]["key"] return Core.api_keys()['hunterhow']['key']
@staticmethod @staticmethod
def intelx_key() -> str: def intelx_key() -> str:
return Core.api_keys()["intelx"]["key"] return Core.api_keys()['intelx']['key']
@staticmethod @staticmethod
def netlas_key() -> str: def netlas_key() -> str:
return Core.api_keys()["netlas"]["key"] return Core.api_keys()['netlas']['key']
@staticmethod @staticmethod
def pentest_tools_key() -> str: def pentest_tools_key() -> str:
return Core.api_keys()["pentestTools"]["key"] return Core.api_keys()['pentestTools']['key']
@staticmethod @staticmethod
def onyphe_key() -> str: def onyphe_key() -> str:
return Core.api_keys()["onyphe"]["key"] return Core.api_keys()['onyphe']['key']
@staticmethod @staticmethod
def projectdiscovery_key() -> str: def projectdiscovery_key() -> str:
return Core.api_keys()["projectDiscovery"]["key"] return Core.api_keys()['projectDiscovery']['key']
@staticmethod @staticmethod
def rocketreach_key() -> str: def rocketreach_key() -> str:
return Core.api_keys()["rocketreach"]["key"] return Core.api_keys()['rocketreach']['key']
@staticmethod @staticmethod
def security_trails_key() -> str: def security_trails_key() -> str:
return Core.api_keys()["securityTrails"]["key"] return Core.api_keys()['securityTrails']['key']
@staticmethod @staticmethod
def shodan_key() -> str: def shodan_key() -> str:
return Core.api_keys()["shodan"]["key"] return Core.api_keys()['shodan']['key']
@staticmethod @staticmethod
def zoomeye_key() -> str: def zoomeye_key() -> str:
return Core.api_keys()["zoomeye"]["key"] return Core.api_keys()['zoomeye']['key']
@staticmethod @staticmethod
def tomba_key() -> tuple[str, str]: def tomba_key() -> tuple[str, str]:
return Core.api_keys()["tomba"]["key"], Core.api_keys()["tomba"]["secret"] return Core.api_keys()['tomba']['key'], Core.api_keys()['tomba']['secret']
@staticmethod @staticmethod
def virustotal_key() -> str: def virustotal_key() -> str:
return Core.api_keys()["virustotal"]["key"] return Core.api_keys()['virustotal']['key']
@staticmethod @staticmethod
def proxy_list() -> list: def proxy_list() -> list:
keys = yaml.safe_load(Core._read_config("proxies.yaml")) keys = yaml.safe_load(Core._read_config('proxies.yaml'))
http_list = ( http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else []
[f"http://{proxy}" for proxy in keys["http"]]
if keys["http"] is not None
else []
)
return http_list return http_list
@staticmethod @staticmethod
def banner() -> None: def banner() -> None:
print("*******************************************************************") print('*******************************************************************')
print("* _ _ _ *") print('* _ _ _ *')
print(r"* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *") print(r'* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *')
print(r"* | __| _ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *") print(r"* | __| _ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *")
print(r"* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *") print(r'* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *')
print(r"* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *") print(r'* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *')
print("* *") print('* *')
print( print('* theHarvester {version}{filler}*'.format(version=version(), filler=' ' * (51 - len(version()))))
"* theHarvester {version}{filler}*".format( print('* Coded by Christian Martorella *')
version=version(), filler=" " * (51 - len(version())) print('* Edge-Security Research *')
) print('* cmartorella@edge-security.com *')
) print('* *')
print("* Coded by Christian Martorella *") print('*******************************************************************')
print("* Edge-Security Research *")
print("* cmartorella@edge-security.com *")
print("* *")
print("*******************************************************************")
@staticmethod @staticmethod
def get_supportedengines() -> list[str | Any]: def get_supportedengines() -> list[str | Any]:
supportedengines = [ supportedengines = [
"anubis", 'anubis',
"baidu", 'baidu',
"bevigil", 'bevigil',
"binaryedge", 'binaryedge',
"bing", 'bing',
"bingapi", 'bingapi',
"bufferoverun", 'bufferoverun',
"brave", 'brave',
"censys", 'censys',
"certspotter", 'certspotter',
"criminalip", 'criminalip',
"crtsh", 'crtsh',
"dnsdumpster", 'dnsdumpster',
"duckduckgo", 'duckduckgo',
"fullhunt", 'fullhunt',
"github-code", 'github-code',
"hackertarget", 'hackertarget',
"hunter", 'hunter',
"hunterhow", 'hunterhow',
"intelx", 'intelx',
"netlas", 'netlas',
"onyphe", 'onyphe',
"otx", 'otx',
"pentesttools", 'pentesttools',
"projectdiscovery", 'projectdiscovery',
"rapiddns", 'rapiddns',
"rocketreach", 'rocketreach',
"securityTrails", 'securityTrails',
"sitedossier", 'sitedossier',
"subdomaincenter", 'subdomaincenter',
"subdomainfinderc99", 'subdomainfinderc99',
"threatminer", 'threatminer',
"tomba", 'tomba',
"urlscan", 'urlscan',
"virustotal", 'virustotal',
"yahoo", 'yahoo',
"zoomeye", 'zoomeye',
] ]
return supportedengines return supportedengines
@ -214,58 +206,58 @@ def get_user_agent() -> str:
# Lasted updated 7/2/23 # Lasted updated 7/2/23
# TODO use bs4 to auto parse user agents # TODO use bs4 to auto parse user agents
user_agents = [ user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0", 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0',
"Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0", 'Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43',
"Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0", 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0',
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0", 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0',
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0", 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37',
"Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0", 'Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15',
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0", 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15',
"Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0", 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0',
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0", 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
"Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", 'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
"Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse", 'Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15',
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
] ]
return random.choice(user_agents) return random.choice(user_agents)
@ -278,129 +270,85 @@ async def post_fetch(
cls, cls,
url, url,
headers=None, headers=None,
data: str | dict[str, str] = "", data: str | dict[str, str] = '',
params: str = "", params: str = '',
json: bool = False, json: bool = False,
proxy: bool = False, proxy: bool = False,
): ):
if headers is None: if headers is None:
headers = {} headers = {}
if len(headers) == 0: if len(headers) == 0:
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
timeout = aiohttp.ClientTimeout(total=720) timeout = aiohttp.ClientTimeout(total=720)
# By default, timeout is 5 minutes, changed to 12-minutes # By default, timeout is 5 minutes, changed to 12-minutes
# results are well worth the wait # results are well worth the wait
try: try:
if proxy: if proxy:
proxy = random.choice(cls().proxy_list) proxy = random.choice(cls().proxy_list)
if params != "": if params != '':
async with aiohttp.ClientSession( async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
headers=headers, timeout=timeout async with session.get(url, params=params, proxy=proxy) as response:
) as session:
async with session.get(
url, params=params, proxy=proxy
) as response:
await asyncio.sleep(5) await asyncio.sleep(5)
return ( return await response.text() if json is False else await response.json()
await response.text()
if json is False
else await response.json()
)
else: else:
async with aiohttp.ClientSession( async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
headers=headers, timeout=timeout
) as session:
async with session.get(url, proxy=proxy) as response: async with session.get(url, proxy=proxy) as response:
await asyncio.sleep(5) await asyncio.sleep(5)
return ( return await response.text() if json is False else await response.json()
await response.text() elif params == '':
if json is False
else await response.json()
)
elif params == "":
if isinstance(data, str): if isinstance(data, str):
data = json_loader.loads(data) data = json_loader.loads(data)
async with aiohttp.ClientSession( async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
headers=headers, timeout=timeout
) as session:
async with session.post(url, data=data) as resp: async with session.post(url, data=data) as resp:
await asyncio.sleep(3) await asyncio.sleep(3)
return await resp.text() if json is False else await resp.json() return await resp.text() if json is False else await resp.json()
else: else:
if isinstance(data, str): if isinstance(data, str):
data = json_loader.loads(data) data = json_loader.loads(data)
async with aiohttp.ClientSession( async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
headers=headers, timeout=timeout
) as session:
sslcontext = ssl.create_default_context(cafile=certifi.where()) sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.post( async with session.post(url, data=data, ssl=sslcontext, params=params) as resp:
url, data=data, ssl=sslcontext, params=params
) as resp:
await asyncio.sleep(3) await asyncio.sleep(3)
return await resp.text() if json is False else await resp.json() return await resp.text() if json is False else await resp.json()
except Exception as e: except Exception as e:
print(f"An exception has occurred in post_fetch: {e}") print(f'An exception has occurred in post_fetch: {e}')
return "" return ''
@classmethod @classmethod
async def fetch( async def fetch(cls, session, url, params: Sized = '', json: bool = False, proxy: str = '') -> str | dict | list | bool:
cls, session, url, params: Sized = "", json: bool = False, proxy: str = ""
) -> str | dict | list | bool:
# This fetch method solely focuses on get requests # This fetch method solely focuses on get requests
try: try:
# Wrap in try except due to 0x89 png/jpg files # Wrap in try except due to 0x89 png/jpg files
# This fetch method solely focuses on get requests # This fetch method solely focuses on get requests
if proxy != "": if proxy != '':
proxy = str(random.choice(cls().proxy_list)) proxy = str(random.choice(cls().proxy_list))
if len(params) != 0: if len(params) != 0:
sslcontext = ssl.create_default_context(cafile=certifi.where()) sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get( async with session.get(url, ssl=sslcontext, params=params, proxy=proxy) as response:
url, ssl=sslcontext, params=params, proxy=proxy return await response.text() if json is False else await response.json()
) as response:
return (
await response.text()
if json is False
else await response.json()
)
else: else:
sslcontext = ssl.create_default_context(cafile=certifi.where()) sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get( async with session.get(url, ssl=sslcontext, proxy=proxy) as response:
url, ssl=sslcontext, proxy=proxy
) as response:
await asyncio.sleep(5) await asyncio.sleep(5)
return ( return await response.text() if json is False else await response.json()
await response.text()
if json is False
else await response.json()
)
if len(params) != 0: if len(params) != 0:
sslcontext = ssl.create_default_context(cafile=certifi.where()) sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext, params=params) as response: async with session.get(url, ssl=sslcontext, params=params) as response:
await asyncio.sleep(5) await asyncio.sleep(5)
return ( return await response.text() if json is False else await response.json()
await response.text()
if json is False
else await response.json()
)
else: else:
sslcontext = ssl.create_default_context(cafile=certifi.where()) sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext) as response: async with session.get(url, ssl=sslcontext) as response:
await asyncio.sleep(5) await asyncio.sleep(5)
return ( return await response.text() if json is False else await response.json()
await response.text()
if json is False
else await response.json()
)
except Exception as e: except Exception as e:
print(f"An exception has occurred: {e}") print(f'An exception has occurred: {e}')
return "" return ''
@staticmethod @staticmethod
async def takeover_fetch( async def takeover_fetch(session, url: str, proxy: str = '') -> tuple[Any, Any] | str:
session, url: str, proxy: str = ""
) -> tuple[Any, Any] | str:
# This fetch method solely focuses on get requests # This fetch method solely focuses on get requests
try: try:
# Wrap in try except due to 0x89 png/jpg files # Wrap in try except due to 0x89 png/jpg files
@ -408,12 +356,10 @@ async def takeover_fetch(
# TODO determine if method for post requests is necessary # TODO determine if method for post requests is necessary
# url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url # url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url
# Clean up urls with proper schemas # Clean up urls with proper schemas
if proxy != "": if proxy != '':
if "https://" in url: if 'https://' in url:
sslcontext = ssl.create_default_context(cafile=certifi.where()) sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get( async with session.get(url, proxy=proxy, ssl=sslcontext) as response:
url, proxy=proxy, ssl=sslcontext
) as response:
await asyncio.sleep(5) await asyncio.sleep(5)
return url, await response.text() return url, await response.text()
else: else:
@ -421,7 +367,7 @@ async def takeover_fetch(
await asyncio.sleep(5) await asyncio.sleep(5)
return url, await response.text() return url, await response.text()
else: else:
if "https://" in url: if 'https://' in url:
sslcontext = ssl.create_default_context(cafile=certifi.where()) sslcontext = ssl.create_default_context(cafile=certifi.where())
async with session.get(url, ssl=sslcontext) as response: async with session.get(url, ssl=sslcontext) as response:
await asyncio.sleep(5) await asyncio.sleep(5)
@ -431,15 +377,15 @@ async def takeover_fetch(
await asyncio.sleep(5) await asyncio.sleep(5)
return url, await response.text() return url, await response.text()
except Exception as e: except Exception as e:
print(f"Takeover check error: {e}") print(f'Takeover check error: {e}')
return url, "" return url, ''
@classmethod @classmethod
async def fetch_all( async def fetch_all(
cls, cls,
urls, urls,
headers=None, headers=None,
params: Sized = "", params: Sized = '',
json: bool = False, json: bool = False,
takeover: bool = False, takeover: bool = False,
proxy: bool = False, proxy: bool = False,
@ -449,29 +395,18 @@ async def fetch_all(
headers = {} headers = {}
timeout = aiohttp.ClientTimeout(total=60) timeout = aiohttp.ClientTimeout(total=60)
if len(headers) == 0: if len(headers) == 0:
headers = {"User-Agent": Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
if takeover: if takeover:
async with aiohttp.ClientSession( async with aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as session:
headers=headers, timeout=aiohttp.ClientTimeout(total=15)
) as session:
if proxy: if proxy:
return await asyncio.gather( return await asyncio.gather(
*[ *[AsyncFetcher.takeover_fetch(session, url, proxy=random.choice(cls().proxy_list)) for url in urls]
AsyncFetcher.takeover_fetch(
session, url, proxy=random.choice(cls().proxy_list)
)
for url in urls
]
) )
else: else:
return await asyncio.gather( return await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls])
*[AsyncFetcher.takeover_fetch(session, url) for url in urls]
)
if len(params) == 0: if len(params) == 0:
async with aiohttp.ClientSession( async with aiohttp.ClientSession(headers=headers, timeout=timeout, max_field_size=13000) as session:
headers=headers, timeout=timeout, max_field_size=13000
) as session:
if proxy: if proxy:
return await asyncio.gather( return await asyncio.gather(
*[ *[
@ -485,14 +420,10 @@ async def fetch_all(
] ]
) )
else: else:
return await asyncio.gather( return await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
*[AsyncFetcher.fetch(session, url, json=json) for url in urls]
)
else: else:
# Indicates the request has certain params # Indicates the request has certain params
async with aiohttp.ClientSession( async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
headers=headers, timeout=timeout
) as session:
if proxy: if proxy:
return await asyncio.gather( return await asyncio.gather(
*[ *[
@ -507,9 +438,4 @@ async def fetch_all(
] ]
) )
else: else:
return await asyncio.gather( return await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
*[
AsyncFetcher.fetch(session, url, params, json)
for url in urls
]
)

View file

@ -40,13 +40,13 @@ async def resolve_host(host, resolver) -> str:
result = await resolver.gethostbyname(host, socket.AF_INET) result = await resolver.gethostbyname(host, socket.AF_INET)
addresses = result.addresses addresses = result.addresses
if addresses == [] or addresses is None or result is None: if addresses == [] or addresses is None or result is None:
return f"{host}:" return f'{host}:'
else: else:
addresses = ",".join(map(str, list(sorted(set(addresses))))) addresses = ','.join(map(str, list(sorted(set(addresses)))))
# addresses = list(sorted(addresses)) # addresses = list(sorted(addresses))
return f"{host}:{addresses}" return f'{host}:{addresses}'
except Exception: except Exception:
return f"{host}:" return f'{host}:'
# https://stackoverflow.com/questions/312443/how-do-i-split-a-list-into-equally-sized-chunks # https://stackoverflow.com/questions/312443/how-do-i-split-a-list-into-equally-sized-chunks
@staticmethod @staticmethod
@ -57,9 +57,7 @@ def chunks(lst, n):
async def query_all(self, resolver, hosts) -> list[Any]: async def query_all(self, resolver, hosts) -> list[Any]:
# TODO chunk list into 50 pieces regardless of IPs and subnets # TODO chunk list into 50 pieces regardless of IPs and subnets
results = await asyncio.gather( results = await asyncio.gather(*[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts])
*[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts]
)
return results return results
async def check(self): async def check(self):
@ -75,9 +73,9 @@ async def check(self):
results = await self.query_all(resolver, chunk) results = await self.query_all(resolver, chunk)
all_results.update(results) all_results.update(results)
for pair in results: for pair in results:
host, addresses = pair.split(":") host, addresses = pair.split(':')
self.realhosts.append(host) self.realhosts.append(host)
self.addresses.update({addr for addr in addresses.split(",")}) self.addresses.update({addr for addr in addresses.split(',')})
# address may be a list of ips # address may be a list of ips
# and do a set comprehension to remove duplicates # and do a set comprehension to remove duplicates
self.realhosts.sort() self.realhosts.sort()

View file

@ -5,7 +5,7 @@
import aiosqlite import aiosqlite
db_path = os.path.expanduser("~/.local/share/theHarvester") db_path = os.path.expanduser('~/.local/share/theHarvester')
if not os.path.isdir(db_path): if not os.path.isdir(db_path):
os.makedirs(db_path) os.makedirs(db_path)
@ -13,9 +13,9 @@
class StashManager: class StashManager:
def __init__(self) -> None: def __init__(self) -> None:
self.db = os.path.join(db_path, "stash.sqlite") self.db = os.path.join(db_path, 'stash.sqlite')
self.results = "" self.results = ''
self.totalresults = "" self.totalresults = ''
self.latestscandomain: dict = {} self.latestscandomain: dict = {}
self.domainscanhistory: list = [] self.domainscanhistory: list = []
self.scanboarddata: dict = {} self.scanboarddata: dict = {}
@ -26,7 +26,7 @@ def __init__(self) -> None:
async def do_init(self) -> None: async def do_init(self) -> None:
async with aiosqlite.connect(self.db) as db: async with aiosqlite.connect(self.db) as db:
await db.execute( await db.execute(
"CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)" 'CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)'
) )
await db.commit() await db.commit()
@ -39,7 +39,7 @@ async def store(self, domain, resource, res_type, source) -> None:
try: try:
async with aiosqlite.connect(self.db, timeout=30) as db: async with aiosqlite.connect(self.db, timeout=30) as db:
await db.execute( await db.execute(
"INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)", 'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
(self.domain, self.resource, self.type, self.date, self.source), (self.domain, self.resource, self.type, self.date, self.source),
) )
await db.commit() await db.commit()
@ -52,13 +52,11 @@ async def store_all(self, domain, all, res_type, source) -> None:
self.type = res_type self.type = res_type
self.source = source self.source = source
self.date = datetime.date.today() self.date = datetime.date.today()
master_list = [ master_list = [(self.domain, x, self.type, self.date, self.source) for x in self.all]
(self.domain, x, self.type, self.date, self.source) for x in self.all
]
async with aiosqlite.connect(self.db, timeout=30) as db: async with aiosqlite.connect(self.db, timeout=30) as db:
try: try:
await db.executemany( await db.executemany(
"INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)", 'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
master_list, master_list,
) )
await db.commit() await db.commit()
@ -68,43 +66,41 @@ async def store_all(self, domain, all, res_type, source) -> None:
async def generatedashboardcode(self, domain): async def generatedashboardcode(self, domain):
try: try:
# TODO refactor into generic method # TODO refactor into generic method
self.latestscandomain["domain"] = domain self.latestscandomain['domain'] = domain
async with aiosqlite.connect(self.db, timeout=30) as conn: async with aiosqlite.connect(self.db, timeout=30) as conn:
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="host"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="host"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["host"] = data[0] self.latestscandomain['host'] = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="email"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="email"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["email"] = data[0] self.latestscandomain['email'] = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["ip"] = data[0] self.latestscandomain['ip'] = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["vhost"] = data[0] self.latestscandomain['vhost'] = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["shodan"] = data[0] self.latestscandomain['shodan'] = data[0]
cursor = await conn.execute( cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,))
"""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)
)
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["latestdate"] = data[0] self.latestscandomain['latestdate'] = data[0]
latestdate = data[0] latestdate = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''',
@ -114,7 +110,7 @@ async def generatedashboardcode(self, domain):
), ),
) )
scandetailshost = await cursor.fetchall() scandetailshost = await cursor.fetchall()
self.latestscandomain["scandetailshost"] = scandetailshost self.latestscandomain['scandetailshost'] = scandetailshost
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
( (
@ -123,7 +119,7 @@ async def generatedashboardcode(self, domain):
), ),
) )
scandetailsemail = await cursor.fetchall() scandetailsemail = await cursor.fetchall()
self.latestscandomain["scandetailsemail"] = scandetailsemail self.latestscandomain['scandetailsemail'] = scandetailsemail
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''',
( (
@ -132,7 +128,7 @@ async def generatedashboardcode(self, domain):
), ),
) )
scandetailsip = await cursor.fetchall() scandetailsip = await cursor.fetchall()
self.latestscandomain["scandetailsip"] = scandetailsip self.latestscandomain['scandetailsip'] = scandetailsip
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
( (
@ -141,7 +137,7 @@ async def generatedashboardcode(self, domain):
), ),
) )
scandetailsvhost = await cursor.fetchall() scandetailsvhost = await cursor.fetchall()
self.latestscandomain["scandetailsvhost"] = scandetailsvhost self.latestscandomain['scandetailsvhost'] = scandetailsvhost
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
( (
@ -150,14 +146,12 @@ async def generatedashboardcode(self, domain):
), ),
) )
scandetailsshodan = await cursor.fetchall() scandetailsshodan = await cursor.fetchall()
self.latestscandomain["scandetailsshodan"] = scandetailsshodan self.latestscandomain['scandetailsshodan'] = scandetailsshodan
return self.latestscandomain return self.latestscandomain
except Exception as e: except Exception as e:
print(e) print(e)
async def getlatestscanresults( async def getlatestscanresults(self, domain, previousday: bool = False) -> Iterable[Row | str] | None:
self, domain, previousday: bool = False
) -> Iterable[Row | str] | None:
try: try:
async with aiosqlite.connect(self.db, timeout=30) as conn: async with aiosqlite.connect(self.db, timeout=30) as conn:
if previousday: if previousday:
@ -170,15 +164,13 @@ async def getlatestscanresults(
(domain,), (domain,),
) )
previousscandate = await cursor.fetchone() previousscandate = await cursor.fetchone()
if ( if not previousscandate: # When theHarvester runs first time/day, this query will return.
not previousscandate
): # When theHarvester runs first time/day, this query will return.
self.previousscanresults = [ self.previousscanresults = [
"No results", 'No results',
"No results", 'No results',
"No results", 'No results',
"No results", 'No results',
"No results", 'No results',
] ]
else: else:
cursor = await conn.execute( cursor = await conn.execute(
@ -197,9 +189,7 @@ async def getlatestscanresults(
self.previousscanresults = list(results) self.previousscanresults = list(results)
return self.previousscanresults return self.previousscanresults
except Exception as e: except Exception as e:
print( print(f'Error in getting the previous scan results from the database: {e}')
f"Error in getting the previous scan results from the database: {e}"
)
else: else:
try: try:
cursor = await conn.execute( cursor = await conn.execute(
@ -223,46 +213,32 @@ async def getlatestscanresults(
self.latestscanresults = list(results) self.latestscanresults = list(results)
return self.latestscanresults return self.latestscanresults
except Exception as e: except Exception as e:
print( print(f'Error in getting the latest scan results from the database: {e}')
f"Error in getting the latest scan results from the database: {e}"
)
except Exception as e: except Exception as e:
print(f"Error connecting to theHarvester database: {e}") print(f'Error connecting to theHarvester database: {e}')
return self.latestscanresults return self.latestscanresults
async def getscanboarddata(self): async def getscanboarddata(self):
try: try:
async with aiosqlite.connect(self.db, timeout=30) as conn: async with aiosqlite.connect(self.db, timeout=30) as conn:
cursor = await conn.execute( cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''')
'''SELECT COUNT(*) from results WHERE type="host"'''
)
data = await cursor.fetchone() data = await cursor.fetchone()
self.scanboarddata["host"] = data[0] self.scanboarddata['host'] = data[0]
cursor = await conn.execute( cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="email"''')
'''SELECT COUNT(*) from results WHERE type="email"'''
)
data = await cursor.fetchone() data = await cursor.fetchone()
self.scanboarddata["email"] = data[0] self.scanboarddata['email'] = data[0]
cursor = await conn.execute( cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="ip"''')
'''SELECT COUNT(*) from results WHERE type="ip"'''
)
data = await cursor.fetchone() data = await cursor.fetchone()
self.scanboarddata["ip"] = data[0] self.scanboarddata['ip'] = data[0]
cursor = await conn.execute( cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="vhost"''')
'''SELECT COUNT(*) from results WHERE type="vhost"'''
)
data = await cursor.fetchone() data = await cursor.fetchone()
self.scanboarddata["vhost"] = data[0] self.scanboarddata['vhost'] = data[0]
cursor = await conn.execute( cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="shodan"''')
'''SELECT COUNT(*) from results WHERE type="shodan"'''
)
data = await cursor.fetchone() data = await cursor.fetchone()
self.scanboarddata["shodan"] = data[0] self.scanboarddata['shodan'] = data[0]
cursor = await conn.execute( cursor = await conn.execute("""SELECT COUNT(DISTINCT(domain)) FROM results """)
"""SELECT COUNT(DISTINCT(domain)) FROM results """
)
data = await cursor.fetchone() data = await cursor.fetchone()
self.scanboarddata["domains"] = data[0] self.scanboarddata['domains'] = data[0]
return self.scanboarddata return self.scanboarddata
except Exception as e: except Exception as e:
print(e) print(e)
@ -302,12 +278,12 @@ async def getscanhistorydomain(self, domain):
) )
countshodan = await cursor.fetchone() countshodan = await cursor.fetchone()
results = { results = {
"date": str(date[0]), 'date': str(date[0]),
"hosts": str(counthost[0]), 'hosts': str(counthost[0]),
"email": str(countemail[0]), 'email': str(countemail[0]),
"ip": str(countip[0]), 'ip': str(countip[0]),
"vhost": str(countvhost[0]), 'vhost': str(countvhost[0]),
"shodan": str(countshodan[0]), 'shodan': str(countshodan[0]),
} }
self.domainscanhistory.append(results) self.domainscanhistory.append(results)
return self.domainscanhistory return self.domainscanhistory
@ -333,42 +309,40 @@ async def getpluginscanstatistics(self) -> Iterable[Row] | None:
async def latestscanchartdata(self, domain): async def latestscanchartdata(self, domain):
try: try:
async with aiosqlite.connect(self.db, timeout=30) as conn: async with aiosqlite.connect(self.db, timeout=30) as conn:
self.latestscandomain["domain"] = domain self.latestscandomain['domain'] = domain
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="host"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="host"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["host"] = data[0] self.latestscandomain['host'] = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="email"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="email"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["email"] = data[0] self.latestscandomain['email'] = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["ip"] = data[0] self.latestscandomain['ip'] = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["vhost"] = data[0] self.latestscandomain['vhost'] = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''', '''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''',
(domain,), (domain,),
) )
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["shodan"] = data[0] self.latestscandomain['shodan'] = data[0]
cursor = await conn.execute( cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,))
"""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)
)
data = await cursor.fetchone() data = await cursor.fetchone()
self.latestscandomain["latestdate"] = data[0] self.latestscandomain['latestdate'] = data[0]
latestdate = data[0] latestdate = data[0]
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''',
@ -378,7 +352,7 @@ async def latestscanchartdata(self, domain):
), ),
) )
scandetailshost = await cursor.fetchall() scandetailshost = await cursor.fetchall()
self.latestscandomain["scandetailshost"] = scandetailshost self.latestscandomain['scandetailshost'] = scandetailshost
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
( (
@ -387,7 +361,7 @@ async def latestscanchartdata(self, domain):
), ),
) )
scandetailsemail = await cursor.fetchall() scandetailsemail = await cursor.fetchall()
self.latestscandomain["scandetailsemail"] = scandetailsemail self.latestscandomain['scandetailsemail'] = scandetailsemail
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''',
( (
@ -396,7 +370,7 @@ async def latestscanchartdata(self, domain):
), ),
) )
scandetailsip = await cursor.fetchall() scandetailsip = await cursor.fetchall()
self.latestscandomain["scandetailsip"] = scandetailsip self.latestscandomain['scandetailsip'] = scandetailsip
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
( (
@ -405,7 +379,7 @@ async def latestscanchartdata(self, domain):
), ),
) )
scandetailsvhost = await cursor.fetchall() scandetailsvhost = await cursor.fetchall()
self.latestscandomain["scandetailsvhost"] = scandetailsvhost self.latestscandomain['scandetailsvhost'] = scandetailsvhost
cursor = await conn.execute( cursor = await conn.execute(
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''', '''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
( (
@ -414,7 +388,7 @@ async def latestscanchartdata(self, domain):
), ),
) )
scandetailsshodan = await cursor.fetchall() scandetailsshodan = await cursor.fetchall()
self.latestscandomain["scandetailsshodan"] = scandetailsshodan self.latestscandomain['scandetailsshodan'] = scandetailsshodan
return self.latestscandomain return self.latestscandomain
except Exception as e: except Exception as e:
print(e) print(e)

View file

@ -1,4 +1,4 @@
VERSION = "4.6.0" VERSION = '4.6.0'
def version() -> str: def version() -> str:

View file

@ -10,17 +10,17 @@ async def parse_dictionaries(self, results: dict) -> tuple:
:return: tuple of emails and hosts :return: tuple of emails and hosts
""" """
if results is not None: if results is not None:
for dictionary in results["selectors"]: for dictionary in results['selectors']:
field = dictionary["selectorvalue"] field = dictionary['selectorvalue']
if "@" in field: if '@' in field:
self.emails.add(field) self.emails.add(field)
else: else:
field = str(field) field = str(field)
if "http" in field or "https" in field: if 'http' in field or 'https' in field:
if field[:5] == "https": if field[:5] == 'https':
field = field[8:] field = field[8:]
else: else:
field = field[7:] field = field[7:]
self.hosts.add(field.replace(")", "").replace(",", "")) self.hosts.add(field.replace(')', '').replace(',', ''))
return self.emails, self.hosts return self.emails, self.hosts
return None, None return None, None

View file

@ -10,59 +10,49 @@ def __init__(self, results, word) -> None:
async def genericClean(self) -> None: async def genericClean(self) -> None:
self.results = ( self.results = (
self.results.replace("<em>", "") self.results.replace('<em>', '')
.replace("<b>", "") .replace('<b>', '')
.replace("</b>", "") .replace('</b>', '')
.replace("</em>", "") .replace('</em>', '')
.replace("%3a", "") .replace('%3a', '')
.replace("<strong>", "") .replace('<strong>', '')
.replace("</strong>", "") .replace('</strong>', '')
.replace("<wbr>", "") .replace('<wbr>', '')
.replace("</wbr>", "") .replace('</wbr>', '')
) )
for search in ( for search in (
"<", '<',
">", '>',
":", ':',
"=", '=',
";", ';',
"&", '&',
"%3A", '%3A',
"%3D", '%3D',
"%3C", '%3C',
"%2f", '%2f',
"/", '/',
"\\", '\\',
): ):
self.results = self.results.replace(search, " ") self.results = self.results.replace(search, ' ')
async def urlClean(self) -> None: async def urlClean(self) -> None:
self.results = ( self.results = self.results.replace('<em>', '').replace('</em>', '').replace('%2f', '').replace('%3a', '')
self.results.replace("<em>", "") for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
.replace("</em>", "") self.results = self.results.replace(search, ' ')
.replace("%2f", "")
.replace("%3a", "")
)
for search in ("<", ">", ":", "=", ";", "&", "%3A", "%3D", "%3C"):
self.results = self.results.replace(search, " ")
async def emails(self): async def emails(self):
await self.genericClean() await self.genericClean()
# Local part is required, charset is flexible. # Local part is required, charset is flexible.
# https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly) # https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly)
reg_emails = re.compile( reg_emails = re.compile(r'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' + '@' + '[a-zA-Z0-9.-]*' + self.word.replace('www.', ''))
r"[a-zA-Z0-9.\-_+#~!$&\',;=:]+"
+ "@"
+ "[a-zA-Z0-9.-]*"
+ self.word.replace("www.", "")
)
self.temp = reg_emails.findall(self.results) self.temp = reg_emails.findall(self.results)
emails = await self.unique() emails = await self.unique()
true_emails = { true_emails = {
( (
str(email)[1:].lower().strip() str(email)[1:].lower().strip()
if len(str(email)) > 1 and str(email)[0] == "." if len(str(email)) > 1 and str(email)[0] == '.'
else len(str(email)) > 1 and str(email).lower().strip() else len(str(email)) > 1 and str(email).lower().strip()
) )
for email in emails for email in emails
@ -76,11 +66,7 @@ async def fileurls(self, file) -> list:
self.temp = reg_urls.findall(self.results) self.temp = reg_urls.findall(self.results)
allurls = await self.unique() allurls = await self.unique()
for iteration in allurls: for iteration in allurls:
if ( if iteration.count('webcache') or iteration.count('google.com') or iteration.count('search?hl'):
iteration.count("webcache")
or iteration.count("google.com")
or iteration.count("search?hl")
):
pass pass
else: else:
urls.append(iteration) urls.append(iteration)
@ -90,11 +76,11 @@ async def hostnames(self):
# should check both www. and not www. # should check both www. and not www.
hostnames = [] hostnames = []
await self.genericClean() await self.genericClean()
reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word) reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word)
first_hostnames = reg_hosts.findall(self.results) first_hostnames = reg_hosts.findall(self.results)
hostnames.extend(first_hostnames) hostnames.extend(first_hostnames)
# TODO determine if necessary below or if only pass through is fine # TODO determine if necessary below or if only pass through is fine
reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word.replace("www.", "")) reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word.replace('www.', ''))
# reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.' + 'www.' + self.word) # reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.' + 'www.' + self.word)
# reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.(?:' + 'www.' + self.word + ')?') # reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.(?:' + 'www.' + self.word + ')?')
second_hostnames = reg_hosts.findall(self.results) second_hostnames = reg_hosts.findall(self.results)
@ -102,31 +88,29 @@ async def hostnames(self):
return list(set(hostnames)) return list(set(hostnames))
async def hostnames_all(self): async def hostnames_all(self):
reg_hosts = re.compile("<cite>(.*?)</cite>") reg_hosts = re.compile('<cite>(.*?)</cite>')
temp = reg_hosts.findall(self.results) temp = reg_hosts.findall(self.results)
for iteration in temp: for iteration in temp:
if iteration.count(":"): if iteration.count(':'):
res = iteration.split(":")[1].split("/")[2] res = iteration.split(':')[1].split('/')[2]
else: else:
res = iteration.split("/")[0] res = iteration.split('/')[0]
self.temp.append(res) self.temp.append(res)
hostnames = await self.unique() hostnames = await self.unique()
return hostnames return hostnames
async def set(self): async def set(self):
reg_sets = re.compile(r">[a-zA-Z\d]*</a></font>") reg_sets = re.compile(r'>[a-zA-Z\d]*</a></font>')
self.temp = reg_sets.findall(self.results) self.temp = reg_sets.findall(self.results)
sets = [] sets = []
for iteration in self.temp: for iteration in self.temp:
delete = iteration.replace(">", "") delete = iteration.replace('>', '')
delete = delete.replace("</a</font", "") delete = delete.replace('</a</font', '')
sets.append(delete) sets.append(delete)
return sets return sets
async def urls(self) -> Set[str]: async def urls(self) -> Set[str]:
found = re.finditer( found = re.finditer(r'(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*', self.results)
r"(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*", self.results
)
urls = {match.group().strip() for match in found} urls = {match.group().strip() for match in found}
return urls return urls

View file

@ -13,7 +13,7 @@ async def parse_text(self) -> tuple[set, set]:
line = self.text[index].strip() line = self.text[index].strip()
if '"ip":' in line: if '"ip":' in line:
# Extract IP. # Extract IP.
ip = "" ip = ''
for ch in line[7:]: for ch in line[7:]:
if ch == '"': if ch == '"':
break break
@ -25,17 +25,13 @@ async def parse_text(self) -> tuple[set, set]:
sub_domain_flag = 1 sub_domain_flag = 1
continue continue
elif sub_domain_flag > 0: elif sub_domain_flag > 0:
if "]" in line: if ']' in line:
sub_domain_flag = 0 sub_domain_flag = 0
else: else:
if "www" in self.word: if 'www' in self.word:
self.word = ( self.word = str(self.word).replace('www.', '').replace('www', '')
str(self.word).replace("www.", "").replace("www", "")
)
# Remove www from word if entered # Remove www from word if entered
self.hostnames.add( self.hostnames.add(str(line).replace('"', '').replace(',', '') + '.' + self.word)
str(line).replace('"', "").replace(",", "") + "." + self.word
)
else: else:
continue continue
return self.ips, self.hostnames return self.ips, self.hostnames

View file

@ -6,35 +6,35 @@
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
"-H", '-H',
"--host", '--host',
default="127.0.0.1", default='127.0.0.1',
help="IP address to listen on default is 127.0.0.1", help='IP address to listen on default is 127.0.0.1',
) )
parser.add_argument( parser.add_argument(
"-p", '-p',
"--port", '--port',
default=5000, default=5000,
help="Port to bind the web server to, default is 5000", help='Port to bind the web server to, default is 5000',
type=int, type=int,
) )
parser.add_argument( parser.add_argument(
"-l", '-l',
"--log-level", '--log-level',
default="info", default='info',
help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set", help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set',
) )
parser.add_argument( parser.add_argument(
"-r", '-r',
"--reload", '--reload',
default=False, default=False,
help="Enable automatic reload used during development of the api", help='Enable automatic reload used during development of the api',
action="store_true", action='store_true',
) )
args: argparse.Namespace = parser.parse_args() args: argparse.Namespace = parser.parse_args()
uvicorn.run( uvicorn.run(
"theHarvester.lib.api.api:app", 'theHarvester.lib.api.api:app',
host=args.host, host=args.host,
port=args.port, port=args.port,
log_level=args.log_level, log_level=args.log_level,
@ -42,5 +42,5 @@ def main():
) )
if __name__ == "__main__": if __name__ == '__main__':
main() main()

View file

@ -17,27 +17,21 @@
class ScreenShotter: class ScreenShotter:
def __init__(self, output) -> None: def __init__(self, output) -> None:
self.output = output self.output = output
self.slash = "\\" if "win" in sys.platform else "/" self.slash = '\\' if 'win' in sys.platform else '/'
self.slash = ( self.slash = '' if (self.output[-1] == '\\' or self.output[-1] == '/') else self.slash
"" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash
)
def verify_path(self) -> bool: def verify_path(self) -> bool:
try: try:
if not os.path.isdir(self.output): if not os.path.isdir(self.output):
answer = input( answer = input('[+] The output path you have entered does not exist would you like to create it (y/n): ')
"[+] The output path you have entered does not exist would you like to create it (y/n): " if answer.lower() == 'yes' or answer.lower() == 'y':
)
if answer.lower() == "yes" or answer.lower() == "y":
os.makedirs(self.output) os.makedirs(self.output)
return True return True
else: else:
return False return False
return True return True
except Exception as e: except Exception as e:
print( print(f"An exception has occurred while attempting to verify output path's existence: {e}")
f"An exception has occurred while attempting to verify output path's existence: {e}"
)
return False return False
@staticmethod @staticmethod
@ -47,29 +41,25 @@ async def verify_installation() -> None:
async with async_playwright() as p: async with async_playwright() as p:
browser = await p.chromium.launch() browser = await p.chromium.launch()
await browser.close() await browser.close()
print("Playwright and Chromium are successfully installed.") print('Playwright and Chromium are successfully installed.')
except Exception as e: except Exception as e:
print( print(f'An exception has occurred while attempting to verify installation: {e}')
f"An exception has occurred while attempting to verify installation: {e}"
)
@staticmethod @staticmethod
def chunk_list(items: Collection, chunk_size: int) -> list: def chunk_list(items: Collection, chunk_size: int) -> list:
# Based off of: https://github.com/apache/incubator-sdap-ingester # Based off of: https://github.com/apache/incubator-sdap-ingester
return [ return [list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)]
list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)
]
@staticmethod @staticmethod
async def visit(url: str) -> tuple[str, str]: async def visit(url: str) -> tuple[str, str]:
try: try:
timeout = aiohttp.ClientTimeout(total=35) timeout = aiohttp.ClientTimeout(total=35)
headers = { headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) " 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
"Chrome/122.0.0.0 Safari/537.36" 'Chrome/122.0.0.0 Safari/537.36'
} }
url = f"http://{url}" if not url.startswith("http") else url url = f'http://{url}' if not url.startswith('http') else url
url = url.replace("www.", "") url = url.replace('www.', '')
sslcontext = ssl.create_default_context(cafile=certifi.where()) sslcontext = ssl.create_default_context(cafile=certifi.where())
async with aiohttp.ClientSession( async with aiohttp.ClientSession(
timeout=timeout, timeout=timeout,
@ -77,16 +67,16 @@ async def visit(url: str) -> tuple[str, str]:
connector=aiohttp.TCPConnector(ssl=sslcontext), connector=aiohttp.TCPConnector(ssl=sslcontext),
) as session: ) as session:
async with session.get(url, verify_ssl=False) as resp: async with session.get(url, verify_ssl=False) as resp:
text = await resp.text("UTF-8") text = await resp.text('UTF-8')
return f"http://{url}" if not url.startswith("http") else url, text return f'http://{url}' if not url.startswith('http') else url, text
except Exception as e: except Exception as e:
print(f"An exception has occurred while attempting to visit {url} : {e}") print(f'An exception has occurred while attempting to visit {url} : {e}')
return "", "" return '', ''
async def take_screenshot(self, url: str) -> tuple[str, ...]: async def take_screenshot(self, url: str) -> tuple[str, ...]:
url = f"http://{url}" if not url.startswith("http") else url url = f'http://{url}' if not url.startswith('http') else url
url = url.replace("www.", "") url = url.replace('www.', '')
print(f"Attempting to take a screenshot of: {url}") print(f'Attempting to take a screenshot of: {url}')
async with async_playwright() as p: async with async_playwright() as p:
browser = await p.chromium.launch(headless=True) browser = await p.chromium.launch(headless=True)
# New browser context # New browser context
@ -100,10 +90,8 @@ async def take_screenshot(self, url: str) -> tuple[str, ...]:
await page.goto(url, timeout=35000) await page.goto(url, timeout=35000)
await page.screenshot(path=path) await page.screenshot(path=path)
except Exception as e: except Exception as e:
print( print(f'An exception has occurred attempting to screenshot: {url} : {e}')
f"An exception has occurred attempting to screenshot: {url} : {e}" path = ''
)
path = ""
finally: finally:
await page.close() await page.close()
await context.close() await context.close()

View file

@ -6,7 +6,7 @@
def main(): def main():
platform = sys.platform platform = sys.platform
if platform == "win32": if platform == 'win32':
# Required or things will break if trying to take screenshots # Required or things will break if trying to take screenshots
import multiprocessing import multiprocessing
@ -23,9 +23,9 @@ def main():
uvloop.install() uvloop.install()
if "linux" in platform: if 'linux' in platform:
import aiomultiprocess import aiomultiprocess
# As we are not using Windows, we can change the spawn method to fork for greater performance # As we are not using Windows, we can change the spawn method to fork for greater performance
aiomultiprocess.set_context("fork") aiomultiprocess.set_context('fork')
asyncio.run(__main__.entry_point()) asyncio.run(__main__.entry_point())