mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-20 07:16:31 +08:00
reformat using black
This commit is contained in:
parent
4376dac5a8
commit
2c871d60e3
|
@ -4,12 +4,40 @@ import argparse
|
|||
import uvicorn
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-H', '--host', default='127.0.0.1', help='IP address to listen on default is 127.0.0.1')
|
||||
parser.add_argument('-p', '--port', default=5000, help='Port to bind the web server to, default is 5000', type=int)
|
||||
parser.add_argument('-l', '--log-level', default='info', help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set')
|
||||
parser.add_argument('-r', '--reload', default=False, help='Enable automatic reload used during development of the api', action='store_true')
|
||||
parser.add_argument(
|
||||
"-H",
|
||||
"--host",
|
||||
default="127.0.0.1",
|
||||
help="IP address to listen on default is 127.0.0.1",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--port",
|
||||
default=5000,
|
||||
help="Port to bind the web server to, default is 5000",
|
||||
type=int,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--log-level",
|
||||
default="info",
|
||||
help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--reload",
|
||||
default=False,
|
||||
help="Enable automatic reload used during development of the api",
|
||||
action="store_true",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if __name__ == '__main__':
|
||||
uvicorn.run('theHarvester.lib.api.api:app', host=args.host, port=args.port, log_level=args.log_level, reload=args.reload)
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(
|
||||
"theHarvester.lib.api.api:app",
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
log_level=args.log_level,
|
||||
reload=args.reload,
|
||||
)
|
||||
|
|
|
@ -6,12 +6,14 @@ import sys
|
|||
from theHarvester import __main__
|
||||
|
||||
if sys.version_info.major < 3 or sys.version_info.minor < 10:
|
||||
print('\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m')
|
||||
print(
|
||||
"\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
platform = sys.platform
|
||||
if platform == 'win32':
|
||||
if platform == "win32":
|
||||
# Required or things will break if trying to take screenshots
|
||||
import multiprocessing
|
||||
|
||||
|
@ -19,11 +21,13 @@ if __name__ == '__main__':
|
|||
try:
|
||||
# See if we have winloop as a performance enhancement on windows
|
||||
import winloop
|
||||
|
||||
asyncio.DefaultEventLoopPolicy = winloop.EventLoopPolicy
|
||||
except ModuleNotFoundError:
|
||||
asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy
|
||||
else:
|
||||
import uvloop
|
||||
|
||||
uvloop.install()
|
||||
|
||||
if "linux" in platform:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env python3
|
||||
from theHarvester.restfulHarvest import main
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -5,8 +5,10 @@
|
|||
from theHarvester.theHarvester import main
|
||||
|
||||
if sys.version_info.major < 3 or sys.version_info.minor < 10:
|
||||
print('\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m')
|
||||
print(
|
||||
"\033[93m[!] Make sure you have Python 3.10+ installed, quitting.\n\n \033[0m"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -8,7 +8,7 @@ def __init__(self, word) -> None:
|
|||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
url = f'https://jldc.me/anubis/subdomains/{self.word}'
|
||||
url = f"https://jldc.me/anubis/subdomains/{self.word}"
|
||||
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
|
||||
self.totalhosts = response[0]
|
||||
|
||||
|
|
|
@ -5,17 +5,23 @@
|
|||
class SearchBaidu:
|
||||
def __init__(self, word, limit) -> None:
|
||||
self.word = word
|
||||
self.total_results = ''
|
||||
self.server = 'www.baidu.com'
|
||||
self.hostname = 'www.baidu.com'
|
||||
self.total_results = ""
|
||||
self.server = "www.baidu.com"
|
||||
self.hostname = "www.baidu.com"
|
||||
self.limit = limit
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
headers = {'Host': self.hostname, 'User-agent': Core.get_user_agent()}
|
||||
base_url = f'https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}'
|
||||
urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
|
||||
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
|
||||
headers = {"Host": self.hostname, "User-agent": Core.get_user_agent()}
|
||||
base_url = f"https://{self.server}/s?wd=%40{self.word}&pn=xx&oq={self.word}"
|
||||
urls = [
|
||||
base_url.replace("xx", str(num))
|
||||
for num in range(0, self.limit, 10)
|
||||
if num <= self.limit
|
||||
]
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
urls, headers=headers, proxy=self.proxy
|
||||
)
|
||||
for response in responses:
|
||||
self.total_results += response
|
||||
|
||||
|
|
|
@ -9,23 +9,27 @@ def __init__(self, word) -> None:
|
|||
self.interestingurls: set = set()
|
||||
self.key = Core.bevigil_key()
|
||||
if self.key is None:
|
||||
self.key = ''
|
||||
raise MissingKey('bevigil')
|
||||
self.key = ""
|
||||
raise MissingKey("bevigil")
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
subdomain_endpoint = f'https://osint.bevigil.com/api/{self.word}/subdomains/'
|
||||
url_endpoint = f'https://osint.bevigil.com/api/{self.word}/urls/'
|
||||
headers = {'X-Access-Token': self.key}
|
||||
subdomain_endpoint = f"https://osint.bevigil.com/api/{self.word}/subdomains/"
|
||||
url_endpoint = f"https://osint.bevigil.com/api/{self.word}/urls/"
|
||||
headers = {"X-Access-Token": self.key}
|
||||
|
||||
responses = await AsyncFetcher.fetch_all([subdomain_endpoint], json=True, proxy=self.proxy, headers=headers)
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
[subdomain_endpoint], json=True, proxy=self.proxy, headers=headers
|
||||
)
|
||||
response = responses[0]
|
||||
for subdomain in response['subdomains']:
|
||||
for subdomain in response["subdomains"]:
|
||||
self.totalhosts.add(subdomain)
|
||||
|
||||
responses = await AsyncFetcher.fetch_all([url_endpoint], json=True, proxy=self.proxy, headers=headers)
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
[url_endpoint], json=True, proxy=self.proxy, headers=headers
|
||||
)
|
||||
response = responses[0]
|
||||
for url in response['urls']:
|
||||
for url in response["urls"]:
|
||||
self.interestingurls.add(url)
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
|
|
|
@ -13,25 +13,29 @@ def __init__(self, word, limit) -> None:
|
|||
self.limit = 501 if limit >= 501 else limit
|
||||
self.limit = 2 if self.limit == 1 else self.limit
|
||||
if self.key is None:
|
||||
raise MissingKey('binaryedge')
|
||||
raise MissingKey("binaryedge")
|
||||
|
||||
async def do_search(self) -> None:
|
||||
base_url = f'https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}'
|
||||
headers = {'X-KEY': self.key, 'User-Agent': Core.get_user_agent()}
|
||||
base_url = f"https://api.binaryedge.io/v2/query/domains/subdomain/{self.word}"
|
||||
headers = {"X-KEY": self.key, "User-Agent": Core.get_user_agent()}
|
||||
for page in range(1, self.limit):
|
||||
params = {'page': page}
|
||||
response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, params=params, headers=headers)
|
||||
params = {"page": page}
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[base_url], json=True, proxy=self.proxy, params=params, headers=headers
|
||||
)
|
||||
responses = response[0]
|
||||
dct = responses
|
||||
if ('status' in dct.keys() and 'message' in dct.keys()) and (
|
||||
dct['status'] == 400 or 'Bad Parameter' in dct['message'] or 'Error' in dct['message']
|
||||
if ("status" in dct.keys() and "message" in dct.keys()) and (
|
||||
dct["status"] == 400
|
||||
or "Bad Parameter" in dct["message"]
|
||||
or "Error" in dct["message"]
|
||||
):
|
||||
# 400 status code means no more results
|
||||
break
|
||||
if 'events' in dct.keys():
|
||||
if len(dct['events']) == 0:
|
||||
if "events" in dct.keys():
|
||||
if len(dct["events"]) == 0:
|
||||
break
|
||||
self.totalhosts.update({host for host in dct['events']})
|
||||
self.totalhosts.update({host for host in dct["events"]})
|
||||
await asyncio.sleep(get_delay())
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
|
|
|
@ -7,12 +7,12 @@
|
|||
|
||||
class SearchBing:
|
||||
def __init__(self, word, limit, start) -> None:
|
||||
self.word = word.replace(' ', '%20')
|
||||
self.word = word.replace(" ", "%20")
|
||||
self.results: list[Any] = []
|
||||
self.total_results = ''
|
||||
self.server = 'www.bing.com'
|
||||
self.apiserver = 'api.search.live.net'
|
||||
self.hostname = 'www.bing.com'
|
||||
self.total_results = ""
|
||||
self.server = "www.bing.com"
|
||||
self.apiserver = "api.search.live.net"
|
||||
self.hostname = "www.bing.com"
|
||||
self.limit = int(limit)
|
||||
self.bingApi = Core.bing_key()
|
||||
self.counter = start
|
||||
|
@ -20,44 +20,58 @@ def __init__(self, word, limit, start) -> None:
|
|||
|
||||
async def do_search(self) -> None:
|
||||
headers = {
|
||||
'Host': self.hostname,
|
||||
'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
|
||||
'Accept-Language': 'en-us,en',
|
||||
'User-agent': Core.get_user_agent(),
|
||||
"Host": self.hostname,
|
||||
"Cookie": "SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50",
|
||||
"Accept-Language": "en-us,en",
|
||||
"User-agent": Core.get_user_agent(),
|
||||
}
|
||||
base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx'
|
||||
urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
|
||||
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
|
||||
urls = [
|
||||
base_url.replace("xx", str(num))
|
||||
for num in range(0, self.limit, 50)
|
||||
if num <= self.limit
|
||||
]
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
urls, headers=headers, proxy=self.proxy
|
||||
)
|
||||
for response in responses:
|
||||
self.total_results += response
|
||||
|
||||
async def do_search_api(self) -> None:
|
||||
url = 'https://api.bing.microsoft.com/v7.0/search?'
|
||||
url = "https://api.bing.microsoft.com/v7.0/search?"
|
||||
params = {
|
||||
'q': self.word,
|
||||
'count': str(self.limit),
|
||||
'offset': '0',
|
||||
'mkt': 'en-us',
|
||||
'safesearch': 'Off',
|
||||
"q": self.word,
|
||||
"count": str(self.limit),
|
||||
"offset": "0",
|
||||
"mkt": "en-us",
|
||||
"safesearch": "Off",
|
||||
}
|
||||
headers = {
|
||||
'User-Agent': Core.get_user_agent(),
|
||||
'Ocp-Apim-Subscription-Key': self.bingApi,
|
||||
"User-Agent": Core.get_user_agent(),
|
||||
"Ocp-Apim-Subscription-Key": self.bingApi,
|
||||
}
|
||||
self.results = await AsyncFetcher.fetch_all([url], headers=headers, params=params, proxy=self.proxy)
|
||||
self.results = await AsyncFetcher.fetch_all(
|
||||
[url], headers=headers, params=params, proxy=self.proxy
|
||||
)
|
||||
for res in self.results:
|
||||
self.total_results += res
|
||||
|
||||
async def do_search_vhost(self) -> None:
|
||||
headers = {
|
||||
'Host': self.hostname,
|
||||
'Cookie': 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50',
|
||||
'Accept-Language': 'en-us,en',
|
||||
'User-agent': Core.get_user_agent(),
|
||||
"Host": self.hostname,
|
||||
"Cookie": "mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50",
|
||||
"Accept-Language": "en-us,en",
|
||||
"User-agent": Core.get_user_agent(),
|
||||
}
|
||||
base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx'
|
||||
urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
|
||||
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
|
||||
base_url = f"http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx"
|
||||
urls = [
|
||||
base_url.replace("xx", str(num))
|
||||
for num in range(0, self.limit, 50)
|
||||
if num <= self.limit
|
||||
]
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
urls, headers=headers, proxy=self.proxy
|
||||
)
|
||||
for response in responses:
|
||||
self.total_results += response
|
||||
|
||||
|
@ -75,13 +89,13 @@ async def get_allhostnames(self):
|
|||
|
||||
async def process(self, api, proxy: bool = False) -> None:
|
||||
self.proxy = proxy
|
||||
if api == 'yes':
|
||||
if api == "yes":
|
||||
if self.bingApi is None:
|
||||
raise MissingKey('BingAPI')
|
||||
raise MissingKey("BingAPI")
|
||||
await self.do_search_api()
|
||||
else:
|
||||
await self.do_search()
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
print(f"\tSearching {self.counter} results.")
|
||||
|
||||
async def process_vhost(self) -> None:
|
||||
await self.do_search_vhost()
|
||||
|
|
|
@ -8,34 +8,37 @@
|
|||
class SearchBrave:
|
||||
def __init__(self, word, limit):
|
||||
self.word = word
|
||||
self.results = ''
|
||||
self.totalresults = ''
|
||||
self.server = 'https://search.brave.com/search?q='
|
||||
self.results = ""
|
||||
self.totalresults = ""
|
||||
self.server = "https://search.brave.com/search?q="
|
||||
self.limit = limit
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self):
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
for query in [f'"{self.word}"', f'site:{self.word}']:
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
for query in [f'"{self.word}"', f"site:{self.word}"]:
|
||||
try:
|
||||
for offset in range(0, 50):
|
||||
# To reduce the total number of requests, only two queries are made "self.word" and site:self.word
|
||||
current_url = f'{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0'
|
||||
resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy)
|
||||
current_url = f"{self.server}{query}&offset={offset}&source=web&show_local=0&spellcheck=0"
|
||||
resp = await AsyncFetcher.fetch_all(
|
||||
[current_url], headers=headers, proxy=self.proxy
|
||||
)
|
||||
self.results = resp[0]
|
||||
self.totalresults += self.results
|
||||
# if 'Results from Microsoft Bing.' in resp[0] \
|
||||
if (
|
||||
'Not many great matches came back for your search' in resp[0]
|
||||
or 'Your request has been flagged as being suspicious and Brave Search' in resp[0]
|
||||
or 'Prove' in resp[0]
|
||||
and 'robot' in resp[0]
|
||||
or 'Robot' in resp[0]
|
||||
"Not many great matches came back for your search" in resp[0]
|
||||
or "Your request has been flagged as being suspicious and Brave Search"
|
||||
in resp[0]
|
||||
or "Prove" in resp[0]
|
||||
and "robot" in resp[0]
|
||||
or "Robot" in resp[0]
|
||||
):
|
||||
break
|
||||
await asyncio.sleep(get_delay() + 15)
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred in bravesearch: {e}')
|
||||
print(f"An exception has occurred in bravesearch: {e}")
|
||||
await asyncio.sleep(get_delay() + 80)
|
||||
continue
|
||||
|
||||
|
|
|
@ -11,28 +11,33 @@ def __init__(self, word) -> None:
|
|||
self.totalips: set = set()
|
||||
self.key = Core.bufferoverun_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('bufferoverun')
|
||||
raise MissingKey("bufferoverun")
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
url = f'https://tls.bufferover.run/dns?q={self.word}'
|
||||
url = f"https://tls.bufferover.run/dns?q={self.word}"
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[url],
|
||||
json=True,
|
||||
headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'},
|
||||
headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"},
|
||||
proxy=self.proxy,
|
||||
)
|
||||
dct = response[0]
|
||||
if dct['Results']:
|
||||
if dct["Results"]:
|
||||
self.totalhosts = {
|
||||
host.split(',')
|
||||
if ',' in host and self.word.replace('www.', '') in host.split(',')[0] in host
|
||||
else host.split(',')[4]
|
||||
for host in dct['Results']
|
||||
(
|
||||
host.split(",")
|
||||
if "," in host
|
||||
and self.word.replace("www.", "") in host.split(",")[0] in host
|
||||
else host.split(",")[4]
|
||||
)
|
||||
for host in dct["Results"]
|
||||
}
|
||||
|
||||
self.totalips = {
|
||||
ip.split(',')[0] for ip in dct['Results'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip.split(',')[0])
|
||||
ip.split(",")[0]
|
||||
for ip in dct["Results"]
|
||||
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip.split(",")[0])
|
||||
}
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
|
|
|
@ -15,7 +15,7 @@ def __init__(self, domain, limit: int = 500) -> None:
|
|||
self.word = domain
|
||||
self.key = Core.censys_key()
|
||||
if self.key[0] is None or self.key[1] is None:
|
||||
raise MissingKey('Censys ID and/or Secret')
|
||||
raise MissingKey("Censys ID and/or Secret")
|
||||
self.totalhosts: set = set()
|
||||
self.emails: set = set()
|
||||
self.limit = limit
|
||||
|
@ -26,24 +26,26 @@ async def do_search(self) -> None:
|
|||
cert_search = CensysCerts(
|
||||
api_id=self.key[0],
|
||||
api_secret=self.key[1],
|
||||
user_agent=f'censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)',
|
||||
user_agent=f"censys-python/{__version__} (theHarvester/{thehavester_version}); +https://github.com/laramies/theHarvester)",
|
||||
)
|
||||
except CensysUnauthorizedException:
|
||||
raise MissingKey('Censys ID and/or Secret')
|
||||
raise MissingKey("Censys ID and/or Secret")
|
||||
|
||||
query = f'names: {self.word}'
|
||||
query = f"names: {self.word}"
|
||||
try:
|
||||
response = cert_search.search(
|
||||
query=query,
|
||||
fields=['names', 'parsed.subject.email_address'],
|
||||
fields=["names", "parsed.subject.email_address"],
|
||||
max_records=self.limit,
|
||||
)
|
||||
for cert in response():
|
||||
self.totalhosts.update(cert.get('names', []))
|
||||
email_address = cert.get('parsed', {}).get('subject', {}).get('email_address', [])
|
||||
self.totalhosts.update(cert.get("names", []))
|
||||
email_address = (
|
||||
cert.get("parsed", {}).get("subject", {}).get("email_address", [])
|
||||
)
|
||||
self.emails.update(email_address)
|
||||
except CensysRateLimitExceededException:
|
||||
print('Censys rate limit exceeded')
|
||||
print("Censys rate limit exceeded")
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
return self.totalhosts
|
||||
|
|
|
@ -8,19 +8,21 @@ def __init__(self, word) -> None:
|
|||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
base_url = f'https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names'
|
||||
base_url = f"https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names"
|
||||
try:
|
||||
response = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy)
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[base_url], json=True, proxy=self.proxy
|
||||
)
|
||||
response = response[0]
|
||||
if isinstance(response, list):
|
||||
for dct in response:
|
||||
for key, value in dct.items():
|
||||
if key == 'dns_names':
|
||||
if key == "dns_names":
|
||||
self.totalhosts.update({name for name in value if name})
|
||||
elif isinstance(response, dict):
|
||||
self.totalhosts.update({response['dns_names'] if 'dns_names' in response.keys() else ''}) # type: ignore
|
||||
self.totalhosts.update({response["dns_names"] if "dns_names" in response.keys() else ""}) # type: ignore
|
||||
else:
|
||||
self.totalhosts.update({''})
|
||||
self.totalhosts.update({""})
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
@ -30,4 +32,4 @@ async def get_hostnames(self) -> set:
|
|||
async def process(self, proxy: bool = False) -> None:
|
||||
self.proxy = proxy
|
||||
await self.do_search()
|
||||
print('\tSearching results.')
|
||||
print("\tSearching results.")
|
||||
|
|
|
@ -14,10 +14,10 @@ async def splitter(links):
|
|||
unique_list = []
|
||||
name_check = []
|
||||
for url in links:
|
||||
tail = url.split('/')[-1]
|
||||
if len(tail) == 2 or tail == 'zh-cn':
|
||||
tail = url.split('/')[-2]
|
||||
name = tail.split('-')
|
||||
tail = url.split("/")[-1]
|
||||
if len(tail) == 2 or tail == "zh-cn":
|
||||
tail = url.split("/")[-2]
|
||||
name = tail.split("-")
|
||||
if len(name) > 1:
|
||||
joined_name = name[0] + name[1]
|
||||
else:
|
||||
|
@ -41,8 +41,12 @@ def filter(lst):
|
|||
new_lst = []
|
||||
for item in lst:
|
||||
item = str(item)
|
||||
if (item[0].isalpha() or item[0].isdigit()) and ('xxx' not in item) and ('..' not in item):
|
||||
item = item.replace('252f', '').replace('2F', '').replace('2f', '')
|
||||
if (
|
||||
(item[0].isalpha() or item[0].isdigit())
|
||||
and ("xxx" not in item)
|
||||
and (".." not in item)
|
||||
):
|
||||
item = item.replace("252f", "").replace("2F", "").replace("2f", "")
|
||||
new_lst.append(item.lower())
|
||||
return new_lst
|
||||
|
||||
|
@ -59,9 +63,10 @@ async def search(text: str) -> bool:
|
|||
"""
|
||||
for line in text.strip().splitlines():
|
||||
if (
|
||||
'This page appears when Google automatically detects requests coming from your computer network' in line
|
||||
or 'http://www.google.com/sorry/index' in line
|
||||
or 'https://www.google.com/sorry/index' in line
|
||||
"This page appears when Google automatically detects requests coming from your computer network"
|
||||
in line
|
||||
or "http://www.google.com/sorry/index" in line
|
||||
or "https://www.google.com/sorry/index" in line
|
||||
):
|
||||
# print('\tGoogle is blocking your IP due to too many automated requests, wait or change your IP')
|
||||
return True
|
||||
|
@ -74,37 +79,47 @@ async def google_workaround(visit_url: str) -> bool | str:
|
|||
:param visit_url: Url to scrape
|
||||
:return: Correct html that can be parsed by BS4
|
||||
"""
|
||||
url = 'https://websniffer.cc/'
|
||||
url = "https://websniffer.cc/"
|
||||
data = {
|
||||
'Cookie': '',
|
||||
'url': visit_url,
|
||||
'submit': 'Submit',
|
||||
'type': 'GET&http=1.1',
|
||||
'uak': str(random.randint(4, 8)), # select random UA to send to Google
|
||||
"Cookie": "",
|
||||
"url": visit_url,
|
||||
"submit": "Submit",
|
||||
"type": "GET&http=1.1",
|
||||
"uak": str(random.randint(4, 8)), # select random UA to send to Google
|
||||
}
|
||||
returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
|
||||
returned_html = await AsyncFetcher.post_fetch(
|
||||
url, headers={"User-Agent": Core.get_user_agent()}, data=data
|
||||
)
|
||||
returned_html = (
|
||||
'This page appears when Google automatically detects requests coming from your computer network'
|
||||
if returned_html == ''
|
||||
"This page appears when Google automatically detects requests coming from your computer network"
|
||||
if returned_html == ""
|
||||
else returned_html[0]
|
||||
)
|
||||
|
||||
returned_html = '' if 'Please Wait... | Cloudflare' in returned_html else returned_html
|
||||
returned_html = (
|
||||
"" if "Please Wait... | Cloudflare" in returned_html else returned_html
|
||||
)
|
||||
|
||||
if len(returned_html) == 0 or await search(returned_html) or '<html' not in returned_html:
|
||||
if (
|
||||
len(returned_html) == 0
|
||||
or await search(returned_html)
|
||||
or "<html" not in returned_html
|
||||
):
|
||||
# indicates that google is serving workaround a captcha
|
||||
# That means we will try out second option which will utilize proxies
|
||||
return True
|
||||
# the html we get is malformed for BS4 as there are no greater than or less than signs
|
||||
if '<html>' in returned_html:
|
||||
start_index = returned_html.index('<html>')
|
||||
if "<html>" in returned_html:
|
||||
start_index = returned_html.index("<html>")
|
||||
else:
|
||||
start_index = returned_html.index('<html')
|
||||
start_index = returned_html.index("<html")
|
||||
|
||||
end_index = returned_html.index('</html>') + 1
|
||||
end_index = returned_html.index("</html>") + 1
|
||||
correct_html = returned_html[start_index:end_index]
|
||||
# Slice list to get the response's html
|
||||
correct_html = ''.join([ch.strip().replace('<', '<').replace('>', '>') for ch in correct_html])
|
||||
correct_html = "".join(
|
||||
[ch.strip().replace("<", "<").replace(">", ">") for ch in correct_html]
|
||||
)
|
||||
return correct_html
|
||||
|
||||
|
||||
|
@ -115,9 +130,9 @@ class MissingKey(Exception):
|
|||
|
||||
def __init__(self, source: str | None) -> None:
|
||||
if source:
|
||||
self.message = f'\n\033[93m[!] Missing API key for {source}. \033[0m'
|
||||
self.message = f"\n\033[93m[!] Missing API key for {source}. \033[0m"
|
||||
else:
|
||||
self.message = '\n\033[93m[!] Missing CSE id. \033[0m'
|
||||
self.message = "\n\033[93m[!] Missing CSE id. \033[0m"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.message
|
||||
|
|
|
@ -13,56 +13,64 @@ def __init__(self, word) -> None:
|
|||
self.asns: set = set()
|
||||
self.key = Core.criminalip_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('criminalip')
|
||||
raise MissingKey("criminalip")
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
# https://www.criminalip.io/developer/api/post-domain-scan
|
||||
# https://www.criminalip.io/developer/api/get-domain-status-id
|
||||
# https://www.criminalip.io/developer/api/get-domain-report-id
|
||||
url = 'https://api.criminalip.io/v1/domain/scan'
|
||||
url = "https://api.criminalip.io/v1/domain/scan"
|
||||
data = f'{{"query": "{self.word}"}}'
|
||||
# print(f'Current key: {self.key}')
|
||||
user_agent = Core.get_user_agent()
|
||||
response = await AsyncFetcher.post_fetch(
|
||||
url,
|
||||
json=True,
|
||||
headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
|
||||
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
|
||||
data=data,
|
||||
proxy=self.proxy,
|
||||
)
|
||||
# print(f'My response: {response}')
|
||||
# Expected response format:
|
||||
# {'data': {'scan_id': scan_id}, 'message': 'api success', 'status': 200}
|
||||
if 'status' in response.keys():
|
||||
status = response['status']
|
||||
if "status" in response.keys():
|
||||
status = response["status"]
|
||||
if status != 200:
|
||||
print(f'An error has occurred searching criminalip dumping response: {response}')
|
||||
print(
|
||||
f"An error has occurred searching criminalip dumping response: {response}"
|
||||
)
|
||||
else:
|
||||
scan_id = response['data']['scan_id']
|
||||
scan_id = response["data"]["scan_id"]
|
||||
scan_percentage = 0
|
||||
counter = 0
|
||||
while scan_percentage != 100:
|
||||
status_url = f'https://api.criminalip.io/v1/domain/status/{scan_id}'
|
||||
status_url = f"https://api.criminalip.io/v1/domain/status/{scan_id}"
|
||||
status_response = await AsyncFetcher.fetch_all(
|
||||
[status_url],
|
||||
json=True,
|
||||
headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
|
||||
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
|
||||
proxy=self.proxy,
|
||||
)
|
||||
status = status_response[0]
|
||||
# print(f'Status response: {status}')
|
||||
# Expected format:
|
||||
# {"data": {"scan_percentage": 100}, "message": "api success", "status": 200}
|
||||
scan_percentage = status['data']['scan_percentage']
|
||||
scan_percentage = status["data"]["scan_percentage"]
|
||||
if scan_percentage == 100:
|
||||
break
|
||||
if scan_percentage == -2:
|
||||
print(f'CriminalIP failed to scan: {self.word} does not exist, verify manually')
|
||||
print(f'Dumping data: scan_response: {response} status_response: {status}')
|
||||
print(
|
||||
f"CriminalIP failed to scan: {self.word} does not exist, verify manually"
|
||||
)
|
||||
print(
|
||||
f"Dumping data: scan_response: {response} status_response: {status}"
|
||||
)
|
||||
return
|
||||
if scan_percentage == -1:
|
||||
print(f'CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}')
|
||||
print(
|
||||
f"CriminalIP scan failed dumping data: scan_response: {response} status_response: {status}"
|
||||
)
|
||||
return
|
||||
# Wait for scan to finish
|
||||
if counter >= 5:
|
||||
|
@ -72,18 +80,18 @@ async def do_search(self) -> None:
|
|||
counter += 1
|
||||
if counter == 10:
|
||||
print(
|
||||
'Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop.'
|
||||
"Ten iterations have occurred in CriminalIP waiting for scan to finish, returning to prevent infinite loop."
|
||||
)
|
||||
print(
|
||||
f'Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}'
|
||||
f"Verify results manually on CriminalIP dumping data: scan_response: {response} status_response: {status}"
|
||||
)
|
||||
return
|
||||
|
||||
report_url = f'https://api.criminalip.io/v1/domain/report/{scan_id}'
|
||||
report_url = f"https://api.criminalip.io/v1/domain/report/{scan_id}"
|
||||
scan_response = await AsyncFetcher.fetch_all(
|
||||
[report_url],
|
||||
json=True,
|
||||
headers={'User-Agent': user_agent, 'x-api-key': f'{self.key}'},
|
||||
headers={"User-Agent": user_agent, "x-api-key": f"{self.key}"},
|
||||
proxy=self.proxy,
|
||||
)
|
||||
scan = scan_response[0]
|
||||
|
@ -92,113 +100,125 @@ async def do_search(self) -> None:
|
|||
try:
|
||||
await self.parser(scan)
|
||||
except Exception as e:
|
||||
print(f'An exception occurred while parsing criminalip result: {e}')
|
||||
print('Dumping json: ')
|
||||
print(f"An exception occurred while parsing criminalip result: {e}")
|
||||
print("Dumping json: ")
|
||||
print(scan)
|
||||
|
||||
async def parser(self, jlines):
|
||||
# TODO when new scope field is added to parse lines for potential new scope!
|
||||
# TODO map as_name to asn for asn data
|
||||
# TODO determine if worth storing interesting urls
|
||||
if 'data' not in jlines.keys():
|
||||
print(f'Error with criminalip data, dumping: {jlines}')
|
||||
if "data" not in jlines.keys():
|
||||
print(f"Error with criminalip data, dumping: {jlines}")
|
||||
return
|
||||
data = jlines['data']
|
||||
for cert in data['certificates']:
|
||||
data = jlines["data"]
|
||||
for cert in data["certificates"]:
|
||||
# print(f'Current cert: {cert}')
|
||||
if cert['subject'].endswith('.' + self.word):
|
||||
self.totalhosts.add(cert['subject'])
|
||||
if cert["subject"].endswith("." + self.word):
|
||||
self.totalhosts.add(cert["subject"])
|
||||
|
||||
for connected_domain in data['connected_domain_subdomain']:
|
||||
for connected_domain in data["connected_domain_subdomain"]:
|
||||
try:
|
||||
main_domain = connected_domain['main_domain']['domain']
|
||||
subdomains = [sub['domain'] for sub in connected_domain['subdomains']]
|
||||
if main_domain.endswith('.' + self.word):
|
||||
main_domain = connected_domain["main_domain"]["domain"]
|
||||
subdomains = [sub["domain"] for sub in connected_domain["subdomains"]]
|
||||
if main_domain.endswith("." + self.word):
|
||||
self.totalhosts.add(main_domain)
|
||||
for sub in subdomains:
|
||||
# print(f'Current sub: {sub}')
|
||||
if sub.endswith('.' + self.word):
|
||||
if sub.endswith("." + self.word):
|
||||
self.totalhosts.add(sub)
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {e}')
|
||||
print(f'Main line: {connected_domain}')
|
||||
print(f"An exception has occurred: {e}")
|
||||
print(f"Main line: {connected_domain}")
|
||||
|
||||
for ip_info in data['connected_ip_info']:
|
||||
self.asns.add(str(ip_info['asn']))
|
||||
domains = [sub['domain'] for sub in ip_info['domain_list']]
|
||||
for ip_info in data["connected_ip_info"]:
|
||||
self.asns.add(str(ip_info["asn"]))
|
||||
domains = [sub["domain"] for sub in ip_info["domain_list"]]
|
||||
for sub in domains:
|
||||
if sub.endswith('.' + self.word):
|
||||
if sub.endswith("." + self.word):
|
||||
self.totalhosts.add(sub)
|
||||
self.totalips.add(ip_info['ip'])
|
||||
self.totalips.add(ip_info["ip"])
|
||||
|
||||
for cookie in data['cookies']:
|
||||
if cookie['domain'] != '.' + self.word and cookie['domain'].endswith('.' + self.word):
|
||||
self.totalhosts.add(cookie['domain'])
|
||||
for cookie in data["cookies"]:
|
||||
if cookie["domain"] != "." + self.word and cookie["domain"].endswith(
|
||||
"." + self.word
|
||||
):
|
||||
self.totalhosts.add(cookie["domain"])
|
||||
|
||||
for country in data['country']:
|
||||
if country['domain'].endswith('.' + self.word):
|
||||
self.totalhosts.add(country['domain'])
|
||||
for ip in country['mapped_ips']:
|
||||
self.totalips.add(ip['ip'])
|
||||
for country in data["country"]:
|
||||
if country["domain"].endswith("." + self.word):
|
||||
self.totalhosts.add(country["domain"])
|
||||
for ip in country["mapped_ips"]:
|
||||
self.totalips.add(ip["ip"])
|
||||
|
||||
for k, v in data['dns_record'].items():
|
||||
if k == 'dns_record_type_a':
|
||||
for ip in data['dns_record'][k]['ipv4']:
|
||||
self.totalips.add(ip['ip'])
|
||||
for k, v in data["dns_record"].items():
|
||||
if k == "dns_record_type_a":
|
||||
for ip in data["dns_record"][k]["ipv4"]:
|
||||
self.totalips.add(ip["ip"])
|
||||
else:
|
||||
if isinstance(v, list):
|
||||
for item in v:
|
||||
if isinstance(item, list):
|
||||
for subitem in item:
|
||||
if subitem.endswith('.' + self.word):
|
||||
if subitem.endswith("." + self.word):
|
||||
self.totalhosts.add(subitem)
|
||||
else:
|
||||
if item.endswith('.' + self.word):
|
||||
if item.endswith("." + self.word):
|
||||
self.totalhosts.add(item)
|
||||
|
||||
for domain_list in data['domain_list']:
|
||||
self.asns.add(str(domain_list['asn']))
|
||||
domains = [sub['domain'] for sub in domain_list['domain_list']]
|
||||
for domain_list in data["domain_list"]:
|
||||
self.asns.add(str(domain_list["asn"]))
|
||||
domains = [sub["domain"] for sub in domain_list["domain_list"]]
|
||||
for sub in domains:
|
||||
if sub.endswith('.' + self.word):
|
||||
if sub.endswith("." + self.word):
|
||||
self.totalhosts.add(sub)
|
||||
self.totalips.add(domain_list['ip'])
|
||||
self.totalips.add(domain_list["ip"])
|
||||
|
||||
for html_page_links in data['html_page_link_domains']:
|
||||
domain = html_page_links['domain']
|
||||
if domain.endswith('.' + self.word):
|
||||
for html_page_links in data["html_page_link_domains"]:
|
||||
domain = html_page_links["domain"]
|
||||
if domain.endswith("." + self.word):
|
||||
self.totalhosts.add(domain)
|
||||
for ip in html_page_links['mapped_ips']:
|
||||
self.totalips.add(ip['ip'])
|
||||
for ip in html_page_links["mapped_ips"]:
|
||||
self.totalips.add(ip["ip"])
|
||||
|
||||
# TODO combine data['links'] and data['network_logs'] urls into one list for one run through
|
||||
for link in data['links']:
|
||||
url = link['url']
|
||||
for link in data["links"]:
|
||||
url = link["url"]
|
||||
parsed_url = urlparse(url)
|
||||
netloc = parsed_url.netloc
|
||||
if self.word in netloc:
|
||||
if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
|
||||
if (
|
||||
":" in netloc and netloc.split(":")[0].endswith(self.word)
|
||||
) or netloc.endswith(self.word):
|
||||
self.totalhosts.add(netloc)
|
||||
|
||||
for log in data['network_logs']:
|
||||
url = log['url']
|
||||
for log in data["network_logs"]:
|
||||
url = log["url"]
|
||||
parsed_url = urlparse(url)
|
||||
netloc = parsed_url.netloc
|
||||
if self.word in netloc:
|
||||
if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
|
||||
if (
|
||||
":" in netloc and netloc.split(":")[0].endswith(self.word)
|
||||
) or netloc.endswith(self.word):
|
||||
self.totalhosts.add(netloc)
|
||||
self.asns.add(str(log['as_number']))
|
||||
self.asns.add(str(log["as_number"]))
|
||||
|
||||
for redirects in data['page_redirections']:
|
||||
for redirects in data["page_redirections"]:
|
||||
for redirect in redirects:
|
||||
url = redirect['url']
|
||||
url = redirect["url"]
|
||||
parsed_url = urlparse(url)
|
||||
netloc = parsed_url.netloc
|
||||
if self.word in netloc:
|
||||
if (':' in netloc and netloc.split(':')[0].endswith(self.word)) or netloc.endswith(self.word):
|
||||
if (
|
||||
":" in netloc and netloc.split(":")[0].endswith(self.word)
|
||||
) or netloc.endswith(self.word):
|
||||
self.totalhosts.add(netloc)
|
||||
|
||||
self.totalhosts = {host.replace('www.', '') for host in self.totalhosts if '*.' + self.word != host}
|
||||
self.totalhosts = {
|
||||
host.replace("www.", "")
|
||||
for host in self.totalhosts
|
||||
if "*." + self.word != host
|
||||
}
|
||||
|
||||
# print(f'hostnames: {self.totalhosts}')
|
||||
# print(f'asns: {self.asns}')
|
||||
|
|
|
@ -10,11 +10,24 @@ def __init__(self, word) -> None:
|
|||
async def do_search(self) -> list:
|
||||
data: set = set()
|
||||
try:
|
||||
url = f'https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json'
|
||||
url = f"https://crt.sh/?q=%25.{self.word}&exclude=expired&deduplicate=Y&output=json"
|
||||
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
|
||||
response = response[0]
|
||||
data = set([dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value'] for dct in response])
|
||||
data = {domain for domain in data if (domain[0] != '*' and str(domain[0:4]).isnumeric() is False)}
|
||||
data = set(
|
||||
[
|
||||
(
|
||||
dct["name_value"][2:]
|
||||
if "*." == dct["name_value"][:2]
|
||||
else dct["name_value"]
|
||||
)
|
||||
for dct in response
|
||||
]
|
||||
)
|
||||
data = {
|
||||
domain
|
||||
for domain in data
|
||||
if (domain[0] != "*" and str(domain[0:4]).isnumeric() is False)
|
||||
}
|
||||
except Exception as e:
|
||||
print(e)
|
||||
clean: list = []
|
||||
|
|
|
@ -8,49 +8,51 @@
|
|||
|
||||
class SearchDnsDumpster:
|
||||
def __init__(self, word) -> None:
|
||||
self.word = word.replace(' ', '%20')
|
||||
self.results = ''
|
||||
self.totalresults = ''
|
||||
self.server = 'dnsdumpster.com'
|
||||
self.word = word.replace(" ", "%20")
|
||||
self.results = ""
|
||||
self.totalresults = ""
|
||||
self.server = "dnsdumpster.com"
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
try:
|
||||
agent = Core.get_user_agent()
|
||||
headers = {'User-Agent': agent}
|
||||
headers = {"User-Agent": agent}
|
||||
session = aiohttp.ClientSession(headers=headers)
|
||||
# create a session to properly verify
|
||||
url = f'https://{self.server}'
|
||||
csrftoken = ''
|
||||
url = f"https://{self.server}"
|
||||
csrftoken = ""
|
||||
if self.proxy is False:
|
||||
async with session.get(url, headers=headers) as resp:
|
||||
resp_cookies = str(resp.cookies)
|
||||
cookies = resp_cookies.split('csrftoken=')
|
||||
csrftoken += cookies[1][: cookies[1].find(';')]
|
||||
cookies = resp_cookies.split("csrftoken=")
|
||||
csrftoken += cookies[1][: cookies[1].find(";")]
|
||||
else:
|
||||
async with session.get(url, headers=headers, proxy=self.proxy) as resp:
|
||||
resp_cookies = str(resp.cookies)
|
||||
cookies = resp_cookies.split('csrftoken=')
|
||||
csrftoken += cookies[1][: cookies[1].find(';')]
|
||||
cookies = resp_cookies.split("csrftoken=")
|
||||
csrftoken += cookies[1][: cookies[1].find(";")]
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# extract csrftoken from cookies
|
||||
data = {
|
||||
'Cookie': f'csfrtoken={csrftoken}',
|
||||
'csrfmiddlewaretoken': csrftoken,
|
||||
'targetip': self.word,
|
||||
'user': 'free',
|
||||
"Cookie": f"csfrtoken={csrftoken}",
|
||||
"csrfmiddlewaretoken": csrftoken,
|
||||
"targetip": self.word,
|
||||
"user": "free",
|
||||
}
|
||||
headers['Referer'] = url
|
||||
headers["Referer"] = url
|
||||
if self.proxy is False:
|
||||
async with session.post(url, headers=headers, data=data) as resp:
|
||||
self.results = await resp.text()
|
||||
else:
|
||||
async with session.post(url, headers=headers, data=data, proxy=self.proxy) as resp:
|
||||
async with session.post(
|
||||
url, headers=headers, data=data, proxy=self.proxy
|
||||
) as resp:
|
||||
self.results = await resp.text()
|
||||
await session.close()
|
||||
except Exception as e:
|
||||
print(f'An exception occurred: {e}')
|
||||
print(f"An exception occurred: {e}")
|
||||
self.totalresults += self.results
|
||||
|
||||
async def get_hostnames(self):
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
# DNS FORCE
|
||||
#####################################################################
|
||||
|
||||
DNS_NAMES = DATA_DIR / 'wordlists' / 'dns-names.txt'
|
||||
DNS_NAMES = DATA_DIR / "wordlists" / "dns-names.txt"
|
||||
|
||||
|
||||
class DnsForce:
|
||||
|
@ -32,13 +32,13 @@ def __init__(self, domain, dnsserver, verbose: bool = False) -> None:
|
|||
# self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver
|
||||
# self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver
|
||||
self.dnsserver = dnsserver
|
||||
with DNS_NAMES.open('r') as file:
|
||||
with DNS_NAMES.open("r") as file:
|
||||
self.list = file.readlines()
|
||||
self.domain = domain.replace('www.', '')
|
||||
self.list = [f'{word.strip()}.{self.domain}' for word in self.list]
|
||||
self.domain = domain.replace("www.", "")
|
||||
self.list = [f"{word.strip()}.{self.domain}" for word in self.list]
|
||||
|
||||
async def run(self):
|
||||
print(f'Starting DNS brute forcing with {len(self.list)} words')
|
||||
print(f"Starting DNS brute forcing with {len(self.list)} words")
|
||||
checker = hostchecker.Checker(self.list, nameserver=self.dnsserver)
|
||||
resolved_pair, hosts, ips = await checker.check()
|
||||
return resolved_pair, hosts, ips
|
||||
|
@ -49,13 +49,13 @@ async def run(self):
|
|||
#####################################################################
|
||||
|
||||
|
||||
IP_REGEX = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
|
||||
PORT_REGEX = r'\d{1,5}'
|
||||
NETMASK_REGEX: str = r'\d{1,2}|' + IP_REGEX
|
||||
NETWORK_REGEX: str = rf'\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b'
|
||||
IP_REGEX = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"
|
||||
PORT_REGEX = r"\d{1,5}"
|
||||
NETMASK_REGEX: str = r"\d{1,2}|" + IP_REGEX
|
||||
NETWORK_REGEX: str = rf"\b({IP_REGEX})(?:\:({PORT_REGEX}))?(?:\/({NETMASK_REGEX}))?\b"
|
||||
|
||||
|
||||
def serialize_ip_range(ip: str, netmask: str = '24') -> str:
|
||||
def serialize_ip_range(ip: str, netmask: str = "24") -> str:
|
||||
"""
|
||||
Serialize a network range in a constant format, 'x.x.x.x/y'.
|
||||
|
||||
|
@ -78,12 +78,12 @@ def serialize_ip_range(ip: str, netmask: str = '24') -> str:
|
|||
__ip = __ip_matches.group(1)
|
||||
__netmask = netmask if netmask else __ip_matches.group(3)
|
||||
if __ip and __netmask:
|
||||
return str(IPv4Network(f'{__ip}/{__netmask}', strict=False))
|
||||
return str(IPv4Network(f"{__ip}/{__netmask}", strict=False))
|
||||
elif __ip:
|
||||
return str(IPv4Network('{}/{}'.format(__ip, '24'), strict=False))
|
||||
return str(IPv4Network("{}/{}".format(__ip, "24"), strict=False))
|
||||
|
||||
# invalid input ip
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def list_ips_in_network_range(iprange: str) -> list[str]:
|
||||
|
@ -122,12 +122,14 @@ async def reverse_single_ip(ip: str, resolver: DNSResolver) -> str:
|
|||
"""
|
||||
try:
|
||||
__host = await resolver.gethostbyaddr(ip)
|
||||
return __host.name if __host else ''
|
||||
return __host.name if __host else ""
|
||||
except Exception:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
async def reverse_all_ips_in_range(iprange: str, callback: Callable, nameservers: list[str] | None = None) -> None:
|
||||
async def reverse_all_ips_in_range(
|
||||
iprange: str, callback: Callable, nameservers: list[str] | None = None
|
||||
) -> None:
|
||||
"""
|
||||
Reverse all the IPs stored in a network range.
|
||||
All the queries are made concurrently.
|
||||
|
@ -174,8 +176,8 @@ def log_query(ip: str) -> None:
|
|||
-------
|
||||
out: None.
|
||||
"""
|
||||
sys.stdout.write(chr(27) + '[2K' + chr(27) + '[G')
|
||||
sys.stdout.write('\r' + ip + ' - ')
|
||||
sys.stdout.write(chr(27) + "[2K" + chr(27) + "[G")
|
||||
sys.stdout.write("\r" + ip + " - ")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
|
|
|
@ -7,27 +7,29 @@
|
|||
class SearchDuckDuckGo:
|
||||
def __init__(self, word, limit) -> None:
|
||||
self.word = word
|
||||
self.results = ''
|
||||
self.totalresults = ''
|
||||
self.results = ""
|
||||
self.totalresults = ""
|
||||
self.dorks: list = []
|
||||
self.links: list = []
|
||||
self.database = 'https://duckduckgo.com/?q='
|
||||
self.api = 'https://api.duckduckgo.com/?q=x&format=json&pretty=1' # Currently using API.
|
||||
self.quantity = '100'
|
||||
self.database = "https://duckduckgo.com/?q="
|
||||
self.api = "https://api.duckduckgo.com/?q=x&format=json&pretty=1" # Currently using API.
|
||||
self.quantity = "100"
|
||||
self.limit = limit
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
# Do normal scraping.
|
||||
url = self.api.replace('x', self.word)
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
first_resp = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
|
||||
url = self.api.replace("x", self.word)
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
first_resp = await AsyncFetcher.fetch_all(
|
||||
[url], headers=headers, proxy=self.proxy
|
||||
)
|
||||
self.results = first_resp[0]
|
||||
self.totalresults += self.results
|
||||
urls = await self.crawl(self.results)
|
||||
urls = {url for url in urls if len(url) > 5}
|
||||
all_resps = await AsyncFetcher.fetch_all(urls)
|
||||
self.totalresults += ''.join(all_resps)
|
||||
self.totalresults += "".join(all_resps)
|
||||
|
||||
async def crawl(self, text):
|
||||
"""
|
||||
|
@ -52,27 +54,39 @@ async def crawl(self, text):
|
|||
if isinstance(val, dict): # Validation check.
|
||||
for key in val.keys():
|
||||
value = val.get(key)
|
||||
if isinstance(value, str) and value != '' and 'https://' in value or 'http://' in value:
|
||||
if (
|
||||
isinstance(value, str)
|
||||
and value != ""
|
||||
and "https://" in value
|
||||
or "http://" in value
|
||||
):
|
||||
urls.add(value)
|
||||
|
||||
if isinstance(val, str) and val != '' and 'https://' in val or 'http://' in val:
|
||||
if (
|
||||
isinstance(val, str)
|
||||
and val != ""
|
||||
and "https://" in val
|
||||
or "http://" in val
|
||||
):
|
||||
urls.add(val)
|
||||
tmp = set()
|
||||
for url in urls:
|
||||
if '<' in url and 'href=' in url: # Format is <href="https://www.website.com"/>
|
||||
equal_index = url.index('=')
|
||||
true_url = ''
|
||||
if (
|
||||
"<" in url and "href=" in url
|
||||
): # Format is <href="https://www.website.com"/>
|
||||
equal_index = url.index("=")
|
||||
true_url = ""
|
||||
for ch in url[equal_index + 1 :]:
|
||||
if ch == '"':
|
||||
tmp.add(true_url)
|
||||
break
|
||||
true_url += ch
|
||||
else:
|
||||
if url != '':
|
||||
if url != "":
|
||||
tmp.add(url)
|
||||
return tmp
|
||||
except Exception as e:
|
||||
print(f'Exception occurred: {e}')
|
||||
print(f"Exception occurred: {e}")
|
||||
return []
|
||||
|
||||
async def get_emails(self):
|
||||
|
|
|
@ -7,19 +7,19 @@ def __init__(self, word) -> None:
|
|||
self.word = word
|
||||
self.key = Core.fullhunt_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('fullhunt')
|
||||
raise MissingKey("fullhunt")
|
||||
self.total_results = None
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
url = f'https://fullhunt.io/api/v1/domain/{self.word}/subdomains'
|
||||
url = f"https://fullhunt.io/api/v1/domain/{self.word}/subdomains"
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[url],
|
||||
json=True,
|
||||
headers={'User-Agent': Core.get_user_agent(), 'X-API-KEY': self.key},
|
||||
headers={"User-Agent": Core.get_user_agent(), "X-API-KEY": self.key},
|
||||
proxy=self.proxy,
|
||||
)
|
||||
self.total_results = response[0]['hosts']
|
||||
self.total_results = response[0]["hosts"]
|
||||
|
||||
async def get_hostnames(self):
|
||||
return self.total_results
|
||||
|
|
|
@ -28,8 +28,8 @@ class ErrorResult(NamedTuple):
|
|||
class SearchGithubCode:
|
||||
def __init__(self, word, limit) -> None:
|
||||
self.word = word
|
||||
self.total_results = ''
|
||||
self.server = 'api.github.com'
|
||||
self.total_results = ""
|
||||
self.server = "api.github.com"
|
||||
self.limit = limit
|
||||
self.counter: int = 0
|
||||
self.page: int | None = 1
|
||||
|
@ -38,17 +38,17 @@ def __init__(self, word, limit) -> None:
|
|||
# rate limits you more severely
|
||||
# https://developer.github.com/v3/search/#rate-limit
|
||||
if self.key is None:
|
||||
raise MissingKey('Github')
|
||||
raise MissingKey("Github")
|
||||
self.proxy = False
|
||||
|
||||
@staticmethod
|
||||
async def fragments_from_response(json_data: dict) -> list[str]:
|
||||
items: list[dict[str, Any]] = json_data.get('items') or list()
|
||||
items: list[dict[str, Any]] = json_data.get("items") or list()
|
||||
fragments: list[str] = list()
|
||||
for item in items:
|
||||
matches = item.get('text_matches') or list()
|
||||
matches = item.get("text_matches") or list()
|
||||
for match in matches:
|
||||
fragments.append(match.get('fragment'))
|
||||
fragments.append(match.get("fragment"))
|
||||
|
||||
return [fragment for fragment in fragments if fragment is not None]
|
||||
|
||||
|
@ -56,20 +56,22 @@ async def fragments_from_response(json_data: dict) -> list[str]:
|
|||
async def page_from_response(page: str, links) -> int | None:
|
||||
page_link = links.get(page)
|
||||
if page_link:
|
||||
parsed = urlparse.urlparse(str(page_link.get('url')))
|
||||
parsed = urlparse.urlparse(str(page_link.get("url")))
|
||||
params = urlparse.parse_qs(parsed.query)
|
||||
pages: list[Any] = params.get('page', [None])
|
||||
pages: list[Any] = params.get("page", [None])
|
||||
page_number = pages[0] and int(pages[0])
|
||||
return page_number
|
||||
else:
|
||||
return None
|
||||
|
||||
async def handle_response(self, response: tuple[str, dict, int, Any]) -> ErrorResult | RetryResult | SuccessResult:
|
||||
async def handle_response(
|
||||
self, response: tuple[str, dict, int, Any]
|
||||
) -> ErrorResult | RetryResult | SuccessResult:
|
||||
text, json_data, status, links = response
|
||||
if status == 200:
|
||||
results = await self.fragments_from_response(json_data)
|
||||
next_page = await self.page_from_response('next', links)
|
||||
last_page = await self.page_from_response('last', links)
|
||||
next_page = await self.page_from_response("next", links)
|
||||
last_page = await self.page_from_response("last", links)
|
||||
return SuccessResult(results, next_page, last_page)
|
||||
elif status == 429 or status == 403:
|
||||
return RetryResult(60)
|
||||
|
@ -85,15 +87,17 @@ async def do_search(self, page: int) -> tuple[str, dict, int, Any]:
|
|||
else:
|
||||
url = f'https://{self.server}/search/code?q="{self.word}"&page={page}'
|
||||
headers = {
|
||||
'Host': self.server,
|
||||
'User-agent': Core.get_user_agent(),
|
||||
'Accept': 'application/vnd.github.v3.text-match+json',
|
||||
'Authorization': f'token {self.key}',
|
||||
"Host": self.server,
|
||||
"User-agent": Core.get_user_agent(),
|
||||
"Accept": "application/vnd.github.v3.text-match+json",
|
||||
"Authorization": f"token {self.key}",
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession(headers=headers) as sess:
|
||||
if self.proxy:
|
||||
async with sess.get(url, proxy=random.choice(Core.proxy_list())) as resp:
|
||||
async with sess.get(
|
||||
url, proxy=random.choice(Core.proxy_list())
|
||||
) as resp:
|
||||
return await resp.text(), await resp.json(), resp.status, resp.links
|
||||
else:
|
||||
async with sess.get(url) as resp:
|
||||
|
@ -113,7 +117,7 @@ async def process(self, proxy: bool = False) -> None:
|
|||
api_response = await self.do_search(self.page)
|
||||
result = await self.handle_response(api_response)
|
||||
if isinstance(result, SuccessResult):
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
print(f"\tSearching {self.counter} results.")
|
||||
for fragment in result.fragments:
|
||||
self.total_results += fragment
|
||||
self.counter = self.counter + 1
|
||||
|
@ -121,14 +125,16 @@ async def process(self, proxy: bool = False) -> None:
|
|||
await asyncio.sleep(get_delay())
|
||||
elif isinstance(result, RetryResult):
|
||||
sleepy_time = get_delay() + result.time
|
||||
print(f'\tRetrying page in {sleepy_time} seconds...')
|
||||
print(f"\tRetrying page in {sleepy_time} seconds...")
|
||||
await asyncio.sleep(sleepy_time)
|
||||
elif isinstance(result, ErrorResult):
|
||||
raise Exception(f'\tException occurred: status_code: {result.status_code} reason: {result.body}')
|
||||
raise Exception(
|
||||
f"\tException occurred: status_code: {result.status_code} reason: {result.body}"
|
||||
)
|
||||
else:
|
||||
raise Exception('\tUnknown exception occurred')
|
||||
raise Exception("\tUnknown exception occurred")
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {e}')
|
||||
print(f"An exception has occurred: {e}")
|
||||
|
||||
async def get_emails(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
|
|
|
@ -8,24 +8,30 @@ class SearchHackerTarget:
|
|||
|
||||
def __init__(self, word) -> None:
|
||||
self.word = word
|
||||
self.total_results = ''
|
||||
self.hostname = 'https://api.hackertarget.com'
|
||||
self.total_results = ""
|
||||
self.hostname = "https://api.hackertarget.com"
|
||||
self.proxy = False
|
||||
self.results = None
|
||||
|
||||
async def do_search(self) -> None:
|
||||
headers = {'User-agent': Core.get_user_agent()}
|
||||
headers = {"User-agent": Core.get_user_agent()}
|
||||
urls = [
|
||||
f'{self.hostname}/hostsearch/?q={self.word}',
|
||||
f'{self.hostname}/reversedns/?q={self.word}',
|
||||
f"{self.hostname}/hostsearch/?q={self.word}",
|
||||
f"{self.hostname}/reversedns/?q={self.word}",
|
||||
]
|
||||
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
urls, headers=headers, proxy=self.proxy
|
||||
)
|
||||
for response in responses:
|
||||
self.total_results += response.replace(',', ':')
|
||||
self.total_results += response.replace(",", ":")
|
||||
|
||||
async def process(self, proxy: bool = False) -> None:
|
||||
self.proxy = proxy
|
||||
await self.do_search()
|
||||
|
||||
async def get_hostnames(self) -> list:
|
||||
return [result for result in self.total_results.splitlines() if 'No PTR records found' not in result]
|
||||
return [
|
||||
result
|
||||
for result in self.total_results.splitlines()
|
||||
if "No PTR records found" not in result
|
||||
]
|
||||
|
|
|
@ -12,10 +12,10 @@ def __init__(self, word, limit, start) -> None:
|
|||
self.start = start
|
||||
self.key = Core.hunter_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('Hunter')
|
||||
self.total_results = ''
|
||||
raise MissingKey("Hunter")
|
||||
self.total_results = ""
|
||||
self.counter = start
|
||||
self.database = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10'
|
||||
self.database = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit=10"
|
||||
self.proxy = False
|
||||
self.hostnames: list = []
|
||||
self.emails: list = []
|
||||
|
@ -23,56 +23,76 @@ def __init__(self, word, limit, start) -> None:
|
|||
async def do_search(self) -> None:
|
||||
# First determine if a user account is not a free account, this call is free
|
||||
is_free = True
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
acc_info_url = f'https://api.hunter.io/v2/account?api_key={self.key}'
|
||||
response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True)
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
acc_info_url = f"https://api.hunter.io/v2/account?api_key={self.key}"
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[acc_info_url], headers=headers, json=True
|
||||
)
|
||||
is_free = (
|
||||
is_free if 'plan_name' in response[0]['data'].keys() and response[0]['data']['plan_name'].lower() == 'free' else False
|
||||
is_free
|
||||
if "plan_name" in response[0]["data"].keys()
|
||||
and response[0]["data"]["plan_name"].lower() == "free"
|
||||
else False
|
||||
)
|
||||
# Extract the total number of requests that are available for an account
|
||||
|
||||
total_requests_avail = (
|
||||
response[0]['data']['requests']['searches']['available'] - response[0]['data']['requests']['searches']['used']
|
||||
response[0]["data"]["requests"]["searches"]["available"]
|
||||
- response[0]["data"]["requests"]["searches"]["used"]
|
||||
)
|
||||
if is_free:
|
||||
response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True)
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[self.database], headers=headers, proxy=self.proxy, json=True
|
||||
)
|
||||
self.emails, self.hostnames = await self.parse_resp(json_resp=response[0])
|
||||
else:
|
||||
# Determine the total number of emails that are available
|
||||
# As the most emails you can get within one query are 100
|
||||
# This is only done where paid accounts are in play
|
||||
hunter_dinfo_url = f'https://api.hunter.io/v2/email-count?domain={self.word}'
|
||||
response = await AsyncFetcher.fetch_all([hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True)
|
||||
total_number_reqs = response[0]['data']['total'] // 100
|
||||
hunter_dinfo_url = (
|
||||
f"https://api.hunter.io/v2/email-count?domain={self.word}"
|
||||
)
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[hunter_dinfo_url], headers=headers, proxy=self.proxy, json=True
|
||||
)
|
||||
total_number_reqs = response[0]["data"]["total"] // 100
|
||||
# Parse out meta field within initial JSON response to determine the total number of results
|
||||
if total_requests_avail < total_number_reqs:
|
||||
print('WARNING: account does not have enough requests to gather all emails')
|
||||
print(
|
||||
f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}'
|
||||
"WARNING: account does not have enough requests to gather all emails"
|
||||
)
|
||||
print(
|
||||
f"Total requests available: {total_requests_avail}, total requests "
|
||||
f"needed to be made: {total_number_reqs}"
|
||||
)
|
||||
print(
|
||||
"RETURNING current results, if you would still like to "
|
||||
"run this module comment out the if request"
|
||||
)
|
||||
print('RETURNING current results, if you would still like to ' 'run this module comment out the if request')
|
||||
return
|
||||
self.limit = 100
|
||||
# max number of emails you can get per request is 100
|
||||
# increments of 100 with offset determining where to start
|
||||
# See docs for more details: https://hunter.io/api-documentation/v2#domain-search
|
||||
for offset in range(0, 100 * total_number_reqs, 100):
|
||||
req_url = f'https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}'
|
||||
response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True)
|
||||
req_url = f"https://api.hunter.io/v2/domain-search?domain={self.word}&api_key={self.key}&limit{self.limit}&offset={offset}"
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[req_url], headers=headers, proxy=self.proxy, json=True
|
||||
)
|
||||
temp_emails, temp_hostnames = await self.parse_resp(response[0])
|
||||
self.emails.extend(temp_emails)
|
||||
self.hostnames.extend(temp_hostnames)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def parse_resp(self, json_resp):
|
||||
emails = list(sorted({email['value'] for email in json_resp['data']['emails']}))
|
||||
emails = list(sorted({email["value"] for email in json_resp["data"]["emails"]}))
|
||||
domains = list(
|
||||
sorted(
|
||||
{
|
||||
source['domain']
|
||||
for email in json_resp['data']['emails']
|
||||
for source in email['sources']
|
||||
if self.word in source['domain']
|
||||
source["domain"]
|
||||
for email in json_resp["data"]["emails"]
|
||||
for source in email["sources"]
|
||||
if self.word in source["domain"]
|
||||
}
|
||||
)
|
||||
)
|
||||
|
|
|
@ -14,8 +14,8 @@ def __init__(self, word) -> None:
|
|||
self.word = word
|
||||
self.key = Core.intelx_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('Intelx')
|
||||
self.database = 'https://2.intelx.io'
|
||||
raise MissingKey("Intelx")
|
||||
self.database = "https://2.intelx.io"
|
||||
self.results: Any = None
|
||||
self.info: tuple[Any, ...] = ()
|
||||
self.limit: int = 10000
|
||||
|
@ -28,30 +28,34 @@ async def do_search(self) -> None:
|
|||
# API requests self identification
|
||||
# https://intelx.io/integrations
|
||||
headers = {
|
||||
'x-key': self.key,
|
||||
'User-Agent': f'{Core.get_user_agent()}-theHarvester',
|
||||
"x-key": self.key,
|
||||
"User-Agent": f"{Core.get_user_agent()}-theHarvester",
|
||||
}
|
||||
data = {
|
||||
'term': self.word,
|
||||
'buckets': [],
|
||||
'lookuplevel': 0,
|
||||
'maxresults': self.limit,
|
||||
'timeout': 5,
|
||||
'datefrom': '',
|
||||
'dateto': '',
|
||||
'sort': 2,
|
||||
'media': 0,
|
||||
'terminate': [],
|
||||
'target': 0,
|
||||
"term": self.word,
|
||||
"buckets": [],
|
||||
"lookuplevel": 0,
|
||||
"maxresults": self.limit,
|
||||
"timeout": 5,
|
||||
"datefrom": "",
|
||||
"dateto": "",
|
||||
"sort": 2,
|
||||
"media": 0,
|
||||
"terminate": [],
|
||||
"target": 0,
|
||||
}
|
||||
|
||||
total_resp = requests.post(f'{self.database}/phonebook/search', headers=headers, json=data)
|
||||
phonebook_id = ujson.loads(total_resp.text)['id']
|
||||
total_resp = requests.post(
|
||||
f"{self.database}/phonebook/search", headers=headers, json=data
|
||||
)
|
||||
phonebook_id = ujson.loads(total_resp.text)["id"]
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Fetch results from phonebook based on ID
|
||||
resp = await AsyncFetcher.fetch_all(
|
||||
[f'{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}'],
|
||||
[
|
||||
f"{self.database}/phonebook/search/result?id={phonebook_id}&limit={self.limit}&offset={self.offset}"
|
||||
],
|
||||
headers=headers,
|
||||
json=True,
|
||||
proxy=self.proxy,
|
||||
|
@ -59,7 +63,7 @@ async def do_search(self) -> None:
|
|||
resp = resp[0]
|
||||
self.results = resp # TODO: give self.results more appropriate typing
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred in Intelx: {e}')
|
||||
print(f"An exception has occurred in Intelx: {e}")
|
||||
|
||||
async def process(self, proxy: bool = False):
|
||||
self.proxy = proxy
|
||||
|
|
|
@ -9,15 +9,17 @@ def __init__(self, word) -> None:
|
|||
self.totalips: list = []
|
||||
self.key = Core.netlas_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('netlas')
|
||||
raise MissingKey("netlas")
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
api = f'https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*'
|
||||
headers = {'X-API-Key': self.key}
|
||||
response = await AsyncFetcher.fetch_all([api], json=True, headers=headers, proxy=self.proxy)
|
||||
for domain in response[0]['items']:
|
||||
self.totalhosts.append(domain['data']['domain'])
|
||||
api = f"https://app.netlas.io/api/domains/?q=*.{self.word}&source_type=include&start=0&fields=*"
|
||||
headers = {"X-API-Key": self.key}
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[api], json=True, headers=headers, proxy=self.proxy
|
||||
)
|
||||
for domain in response[0]["items"]:
|
||||
self.totalhosts.append(domain["data"]["domain"])
|
||||
|
||||
async def get_hostnames(self) -> list:
|
||||
return self.totalhosts
|
||||
|
|
|
@ -9,26 +9,28 @@
|
|||
class SearchOnyphe:
|
||||
def __init__(self, word) -> None:
|
||||
self.word = word
|
||||
self.response = ''
|
||||
self.response = ""
|
||||
self.totalhosts: set = set()
|
||||
self.totalips: set = set()
|
||||
self.asns: set = set()
|
||||
self.key = Core.onyphe_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('onyphe')
|
||||
raise MissingKey("onyphe")
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
# https://www.onyphe.io/docs/apis/search
|
||||
# https://www.onyphe.io/search?q=domain%3Acharter.com&captcharesponse=j5cGT
|
||||
# base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:domain:{self.word}'
|
||||
base_url = f'https://www.onyphe.io/api/v2/search/?q=domain:{self.word}'
|
||||
base_url = f"https://www.onyphe.io/api/v2/search/?q=domain:{self.word}"
|
||||
headers = {
|
||||
'User-Agent': Core.get_user_agent(),
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'bearer {self.key}',
|
||||
"User-Agent": Core.get_user_agent(),
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"bearer {self.key}",
|
||||
}
|
||||
response = await AsyncFetcher.fetch_all([base_url], json=True, headers=headers, proxy=self.proxy)
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[base_url], json=True, headers=headers, proxy=self.proxy
|
||||
)
|
||||
self.response = response[0]
|
||||
await self.parse_onyphe_resp_json()
|
||||
|
||||
|
@ -36,52 +38,74 @@ async def parse_onyphe_resp_json(self):
|
|||
if isinstance(self.response, list):
|
||||
self.response = self.response[0]
|
||||
if not isinstance(self.response, dict):
|
||||
raise Exception(f'An exception has occurred {self.response} is not a dict')
|
||||
if 'Success' == self.response['text']:
|
||||
if 'results' in self.response.keys():
|
||||
for result in self.response['results']:
|
||||
raise Exception(f"An exception has occurred {self.response} is not a dict")
|
||||
if "Success" == self.response["text"]:
|
||||
if "results" in self.response.keys():
|
||||
for result in self.response["results"]:
|
||||
try:
|
||||
if 'alternativeip' in result.keys():
|
||||
self.totalips.update({altip for altip in result['alternativeip']})
|
||||
if 'url' in result.keys() and isinstance(result['url'], list):
|
||||
self.totalhosts.update(
|
||||
urlparse(url).netloc for url in result['url'] if urlparse(url).netloc.endswith(self.word)
|
||||
if "alternativeip" in result.keys():
|
||||
self.totalips.update(
|
||||
{altip for altip in result["alternativeip"]}
|
||||
)
|
||||
self.asns.add(result['asn'])
|
||||
self.asns.add(result['geolocus']['asn'])
|
||||
self.totalips.add(result['geolocus']['subnet'])
|
||||
self.totalips.add(result['ip'])
|
||||
self.totalips.add(result['subnet'])
|
||||
if "url" in result.keys() and isinstance(result["url"], list):
|
||||
self.totalhosts.update(
|
||||
urlparse(url).netloc
|
||||
for url in result["url"]
|
||||
if urlparse(url).netloc.endswith(self.word)
|
||||
)
|
||||
self.asns.add(result["asn"])
|
||||
self.asns.add(result["geolocus"]["asn"])
|
||||
self.totalips.add(result["geolocus"]["subnet"])
|
||||
self.totalips.add(result["ip"])
|
||||
self.totalips.add(result["subnet"])
|
||||
# Shouldn't be needed as API autoparses urls from html raw data
|
||||
# rawres = myparser.Parser(result['data'], self.word)
|
||||
# if await rawres.hostnames():
|
||||
# self.totalhosts.update(set(await rawres.hostnames()))
|
||||
for subdomain_key in [
|
||||
'domain',
|
||||
'hostname',
|
||||
'subdomains',
|
||||
'subject',
|
||||
'reverse',
|
||||
'geolocus',
|
||||
"domain",
|
||||
"hostname",
|
||||
"subdomains",
|
||||
"subject",
|
||||
"reverse",
|
||||
"geolocus",
|
||||
]:
|
||||
if subdomain_key in result.keys():
|
||||
if subdomain_key == 'subject':
|
||||
if subdomain_key == "subject":
|
||||
self.totalhosts.update(
|
||||
{domain for domain in result[subdomain_key]['altname'] if domain.endswith(self.word)}
|
||||
{
|
||||
domain
|
||||
for domain in result[subdomain_key][
|
||||
"altname"
|
||||
]
|
||||
if domain.endswith(self.word)
|
||||
}
|
||||
)
|
||||
elif subdomain_key == 'geolocus':
|
||||
elif subdomain_key == "geolocus":
|
||||
self.totalhosts.update(
|
||||
{domain for domain in result[subdomain_key]['domain'] if domain.endswith(self.word)}
|
||||
{
|
||||
domain
|
||||
for domain in result[subdomain_key][
|
||||
"domain"
|
||||
]
|
||||
if domain.endswith(self.word)
|
||||
}
|
||||
)
|
||||
else:
|
||||
self.totalhosts.update(
|
||||
{domain for domain in result[subdomain_key] if domain.endswith(self.word)}
|
||||
{
|
||||
domain
|
||||
for domain in result[subdomain_key]
|
||||
if domain.endswith(self.word)
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred on result: {result}: {e}')
|
||||
print(f"An exception has occurred on result: {result}: {e}")
|
||||
continue
|
||||
else:
|
||||
print(f'Onhyphe API query did not succeed dumping current response: {self.response}')
|
||||
print(
|
||||
f"Onhyphe API query did not succeed dumping current response: {self.response}"
|
||||
)
|
||||
|
||||
async def get_asns(self) -> set:
|
||||
return self.asns
|
||||
|
|
|
@ -11,14 +11,16 @@ def __init__(self, word) -> None:
|
|||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
url = f'https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns'
|
||||
url = f"https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns"
|
||||
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
|
||||
responses = response[0]
|
||||
dct = responses
|
||||
self.totalhosts = {host['hostname'] for host in dct['passive_dns']}
|
||||
self.totalhosts = {host["hostname"] for host in dct["passive_dns"]}
|
||||
# filter out ips that are just called NXDOMAIN
|
||||
self.totalips = {
|
||||
ip['address'] for ip in dct['passive_dns'] if re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip['address'])
|
||||
ip["address"]
|
||||
for ip in dct["passive_dns"]
|
||||
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip["address"])
|
||||
}
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
|
|
|
@ -12,41 +12,54 @@ def __init__(self, word) -> None:
|
|||
self.word = word
|
||||
self.key = Core.pentest_tools_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('PentestTools')
|
||||
raise MissingKey("PentestTools")
|
||||
self.total_results: list = []
|
||||
self.api = f'https://pentest-tools.com/api?key={self.key}'
|
||||
self.api = f"https://pentest-tools.com/api?key={self.key}"
|
||||
self.proxy = False
|
||||
|
||||
async def poll(self, scan_id):
|
||||
while True:
|
||||
time.sleep(3)
|
||||
# Get the status of our scan
|
||||
scan_status_data = {'op': 'get_scan_status', 'scan_id': scan_id}
|
||||
responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy)
|
||||
scan_status_data = {"op": "get_scan_status", "scan_id": scan_id}
|
||||
responses = await AsyncFetcher.post_fetch(
|
||||
url=self.api, data=ujson.dumps(scan_status_data), proxy=self.proxy
|
||||
)
|
||||
res_json = ujson.loads(responses.strip())
|
||||
if res_json['op_status'] == 'success':
|
||||
if res_json['scan_status'] != 'waiting' and res_json['scan_status'] != 'running':
|
||||
if res_json["op_status"] == "success":
|
||||
if (
|
||||
res_json["scan_status"] != "waiting"
|
||||
and res_json["scan_status"] != "running"
|
||||
):
|
||||
getoutput_data = {
|
||||
'op': 'get_output',
|
||||
'scan_id': scan_id,
|
||||
'output_format': 'json',
|
||||
"op": "get_output",
|
||||
"scan_id": scan_id,
|
||||
"output_format": "json",
|
||||
}
|
||||
responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy)
|
||||
responses = await AsyncFetcher.post_fetch(
|
||||
url=self.api, data=ujson.dumps(getoutput_data), proxy=self.proxy
|
||||
)
|
||||
|
||||
res_json = ujson.loads(responses.strip('\n'))
|
||||
res_json = ujson.loads(responses.strip("\n"))
|
||||
self.total_results = await self.parse_json(res_json)
|
||||
break
|
||||
else:
|
||||
print(f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}")
|
||||
print(
|
||||
f"Operation get_scan_status failed because: {res_json['error']}. {res_json['details']}"
|
||||
)
|
||||
break
|
||||
|
||||
@staticmethod
|
||||
async def parse_json(json_results):
|
||||
status = json_results['op_status']
|
||||
if status == 'success':
|
||||
scan_tests = json_results['scan_output']['output_json']
|
||||
output_data = scan_tests[0]['output_data']
|
||||
host_to_ip = [f'{subdomain[0]}:{subdomain[1]}' for subdomain in output_data if len(subdomain) > 0]
|
||||
status = json_results["op_status"]
|
||||
if status == "success":
|
||||
scan_tests = json_results["scan_output"]["output_json"]
|
||||
output_data = scan_tests[0]["output_data"]
|
||||
host_to_ip = [
|
||||
f"{subdomain[0]}:{subdomain[1]}"
|
||||
for subdomain in output_data
|
||||
if len(subdomain) > 0
|
||||
]
|
||||
return host_to_ip
|
||||
return []
|
||||
|
||||
|
@ -55,18 +68,20 @@ async def get_hostnames(self) -> list:
|
|||
|
||||
async def do_search(self) -> None:
|
||||
subdomain_payload = {
|
||||
'op': 'start_scan',
|
||||
'tool_id': 20,
|
||||
'tool_params': {
|
||||
'target': f'{self.word}',
|
||||
'web_details': 'off',
|
||||
'do_smart_search': 'off',
|
||||
"op": "start_scan",
|
||||
"tool_id": 20,
|
||||
"tool_params": {
|
||||
"target": f"{self.word}",
|
||||
"web_details": "off",
|
||||
"do_smart_search": "off",
|
||||
},
|
||||
}
|
||||
responses = await AsyncFetcher.post_fetch(url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy)
|
||||
responses = await AsyncFetcher.post_fetch(
|
||||
url=self.api, data=ujson.dumps(subdomain_payload), proxy=self.proxy
|
||||
)
|
||||
res_json = ujson.loads(responses.strip())
|
||||
if res_json['op_status'] == 'success':
|
||||
scan_id = res_json['scan_id']
|
||||
if res_json["op_status"] == "success":
|
||||
scan_id = res_json["scan_id"]
|
||||
await self.poll(scan_id)
|
||||
|
||||
async def process(self, proxy: bool = False) -> None:
|
||||
|
|
|
@ -7,19 +7,21 @@ def __init__(self, word) -> None:
|
|||
self.word = word
|
||||
self.key = Core.projectdiscovery_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('ProjectDiscovery')
|
||||
raise MissingKey("ProjectDiscovery")
|
||||
self.total_results = None
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self):
|
||||
url = f'https://dns.projectdiscovery.io/dns/{self.word}/subdomains'
|
||||
url = f"https://dns.projectdiscovery.io/dns/{self.word}/subdomains"
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[url],
|
||||
json=True,
|
||||
headers={'User-Agent': Core.get_user_agent(), 'Authorization': self.key},
|
||||
headers={"User-Agent": Core.get_user_agent(), "Authorization": self.key},
|
||||
proxy=self.proxy,
|
||||
)
|
||||
self.total_results = [f'{domains}.{self.word}' for domains in response[0]['subdomains']]
|
||||
self.total_results = [
|
||||
f"{domains}.{self.word}" for domains in response[0]["subdomains"]
|
||||
]
|
||||
|
||||
async def get_hostnames(self):
|
||||
return self.total_results
|
||||
|
|
|
@ -11,29 +11,33 @@ def __init__(self, word) -> None:
|
|||
|
||||
async def do_search(self):
|
||||
try:
|
||||
headers = {'User-agent': Core.get_user_agent()}
|
||||
headers = {"User-agent": Core.get_user_agent()}
|
||||
# TODO see if it's worth adding sameip searches
|
||||
# f'{self.hostname}/sameip/{self.word}?full=1#result'
|
||||
urls = [f'https://rapiddns.io/subdomain/{self.word}?full=1#result']
|
||||
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
|
||||
urls = [f"https://rapiddns.io/subdomain/{self.word}?full=1#result"]
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
urls, headers=headers, proxy=self.proxy
|
||||
)
|
||||
if len(responses[0]) <= 1:
|
||||
return self.total_results
|
||||
soup = BeautifulSoup(responses[0], 'html.parser')
|
||||
rows = soup.find('table').find('tbody').find_all('tr')
|
||||
soup = BeautifulSoup(responses[0], "html.parser")
|
||||
rows = soup.find("table").find("tbody").find_all("tr")
|
||||
if rows:
|
||||
# Validation check
|
||||
for row in rows:
|
||||
cells = row.find_all('td')
|
||||
cells = row.find_all("td")
|
||||
if len(cells) > 0:
|
||||
# sanity check
|
||||
subdomain = str(cells[0].get_text())
|
||||
if cells[-1].get_text() == 'CNAME':
|
||||
self.total_results.append(f'{subdomain}')
|
||||
if cells[-1].get_text() == "CNAME":
|
||||
self.total_results.append(f"{subdomain}")
|
||||
else:
|
||||
self.total_results.append(f'{subdomain}:{str(cells[1].get_text()).strip()}')
|
||||
self.total_results.append(
|
||||
f"{subdomain}:{str(cells[1].get_text()).strip()}"
|
||||
)
|
||||
self.total_results = list({domain for domain in self.total_results})
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {str(e)}')
|
||||
print(f"An exception has occurred: {str(e)}")
|
||||
|
||||
async def process(self, proxy: bool = False) -> None:
|
||||
self.proxy = proxy
|
||||
|
|
|
@ -10,50 +10,59 @@ def __init__(self, word, limit) -> None:
|
|||
self.word = word
|
||||
self.key = Core.rocketreach_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('RocketReach')
|
||||
raise MissingKey("RocketReach")
|
||||
self.hosts: set = set()
|
||||
self.proxy = False
|
||||
self.baseurl = 'https://rocketreach.co/api/v2/person/search'
|
||||
self.baseurl = "https://rocketreach.co/api/v2/person/search"
|
||||
self.links: set = set()
|
||||
self.limit = limit
|
||||
|
||||
async def do_search(self) -> None:
|
||||
try:
|
||||
headers = {
|
||||
'Api-Key': self.key,
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': Core.get_user_agent(),
|
||||
"Api-Key": self.key,
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": Core.get_user_agent(),
|
||||
}
|
||||
|
||||
next_page = 1 # track pagination
|
||||
for count in range(1, self.limit):
|
||||
data = f'{{"query":{{"company_domain": ["{self.word}"]}}, "start": {next_page}, "page_size": 100}}'
|
||||
result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True)
|
||||
if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result['detail']:
|
||||
result = await AsyncFetcher.post_fetch(
|
||||
self.baseurl, headers=headers, data=data, json=True
|
||||
)
|
||||
if (
|
||||
"detail" in result.keys()
|
||||
and "error" in result.keys()
|
||||
and "Subscribe to a plan to access" in result["detail"]
|
||||
):
|
||||
# No more results can be fetched
|
||||
break
|
||||
if 'detail' in result.keys() and 'Request was throttled.' in result['detail']:
|
||||
if (
|
||||
"detail" in result.keys()
|
||||
and "Request was throttled." in result["detail"]
|
||||
):
|
||||
# Rate limit has been triggered need to sleep extra
|
||||
print(
|
||||
f"RocketReach requests have been throttled; "
|
||||
f'{result["detail"].split(" ", 3)[-1].replace("available", "availability")}'
|
||||
)
|
||||
break
|
||||
if 'profiles' in dict(result).keys():
|
||||
if len(result['profiles']) == 0:
|
||||
if "profiles" in dict(result).keys():
|
||||
if len(result["profiles"]) == 0:
|
||||
break
|
||||
for profile in result['profiles']:
|
||||
if 'linkedin_url' in dict(profile).keys():
|
||||
self.links.add(profile['linkedin_url'])
|
||||
if 'pagination' in dict(result).keys():
|
||||
next_page = int(result['pagination']['next'])
|
||||
if next_page > int(result['pagination']['total']):
|
||||
for profile in result["profiles"]:
|
||||
if "linkedin_url" in dict(profile).keys():
|
||||
self.links.add(profile["linkedin_url"])
|
||||
if "pagination" in dict(result).keys():
|
||||
next_page = int(result["pagination"]["next"])
|
||||
if next_page > int(result["pagination"]["total"]):
|
||||
break
|
||||
|
||||
await asyncio.sleep(get_delay() + 5)
|
||||
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {e}')
|
||||
print(f"An exception has occurred: {e}")
|
||||
|
||||
async def get_links(self):
|
||||
return self.links
|
||||
|
|
|
@ -13,52 +13,55 @@ def __init__(self, word) -> None:
|
|||
self.total_hostnames: set = set()
|
||||
self.key = Core.hunterhow_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('hunterhow')
|
||||
raise MissingKey("hunterhow")
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
# https://hunter.how/search-api
|
||||
query = f'domain.suffix="{self.word}"'
|
||||
# second_query = f'domain="{self.word}"'
|
||||
encoded_query = base64.urlsafe_b64encode(query.encode('utf-8')).decode('ascii')
|
||||
encoded_query = base64.urlsafe_b64encode(query.encode("utf-8")).decode("ascii")
|
||||
page = 1
|
||||
page_size = 100 # can be either: 10,20,50,100)
|
||||
# The interval between the start time and the end time cannot exceed one year
|
||||
# Can not exceed one year, but years=1 does not work due to their backend, 364 will suffice
|
||||
today = datetime.today()
|
||||
one_year_ago = today - relativedelta(days=364)
|
||||
start_time = one_year_ago.strftime('%Y-%m-%d')
|
||||
end_time = today.strftime('%Y-%m-%d')
|
||||
start_time = one_year_ago.strftime("%Y-%m-%d")
|
||||
end_time = today.strftime("%Y-%m-%d")
|
||||
# two_years_ago = one_year_ago - relativedelta(days=364)
|
||||
# start_time = two_years_ago.strftime('%Y-%m-%d')
|
||||
# end_time = one_year_ago.strftime('%Y-%m-%d')
|
||||
url = 'https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s' % (
|
||||
# self.key, encoded_query, page, page_size, start_time, end_time
|
||||
self.key,
|
||||
encoded_query,
|
||||
page,
|
||||
page_size,
|
||||
start_time,
|
||||
end_time,
|
||||
url = (
|
||||
"https://api.hunter.how/search?api-key=%s&query=%s&page=%d&page_size=%d&start_time=%s&end_time=%s"
|
||||
% (
|
||||
# self.key, encoded_query, page, page_size, start_time, end_time
|
||||
self.key,
|
||||
encoded_query,
|
||||
page,
|
||||
page_size,
|
||||
start_time,
|
||||
end_time,
|
||||
)
|
||||
)
|
||||
# print(f'Sending url: {url}')
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[url],
|
||||
json=True,
|
||||
headers={'User-Agent': Core.get_user_agent(), 'x-api-key': f'{self.key}'},
|
||||
headers={"User-Agent": Core.get_user_agent(), "x-api-key": f"{self.key}"},
|
||||
proxy=self.proxy,
|
||||
)
|
||||
dct = response[0]
|
||||
# print(f'json response: ')
|
||||
# print(dct)
|
||||
if 'code' in dct.keys():
|
||||
if dct['code'] == 40001:
|
||||
if "code" in dct.keys():
|
||||
if dct["code"] == 40001:
|
||||
print(f'Code 40001 indicates for searchhunterhow: {dct["message"]}')
|
||||
return
|
||||
# total = dct['data']['total']
|
||||
# TODO determine if total is ever 100 how to get more subdomains?
|
||||
for sub in dct['data']['list']:
|
||||
self.total_hostnames.add(sub['domain'])
|
||||
for sub in dct["data"]["list"]:
|
||||
self.total_hostnames.add(sub["domain"])
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
return self.total_hostnames
|
||||
|
|
|
@ -10,33 +10,41 @@ def __init__(self, word) -> None:
|
|||
self.word = word
|
||||
self.key = Core.security_trails_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('Securitytrail')
|
||||
self.results = ''
|
||||
self.totalresults = ''
|
||||
self.api = 'https://api.securitytrails.com/v1/'
|
||||
raise MissingKey("Securitytrail")
|
||||
self.results = ""
|
||||
self.totalresults = ""
|
||||
self.api = "https://api.securitytrails.com/v1/"
|
||||
self.info: tuple[set, set] = (set(), set())
|
||||
self.proxy = False
|
||||
|
||||
async def authenticate(self) -> None:
|
||||
# Method to authenticate API key before sending requests.
|
||||
headers = {'APIKEY': self.key}
|
||||
url = f'{self.api}ping'
|
||||
auth_responses = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
|
||||
headers = {"APIKEY": self.key}
|
||||
url = f"{self.api}ping"
|
||||
auth_responses = await AsyncFetcher.fetch_all(
|
||||
[url], headers=headers, proxy=self.proxy
|
||||
)
|
||||
auth_responses = auth_responses[0]
|
||||
if 'False' in auth_responses or 'Invalid authentication' in auth_responses:
|
||||
print('\tKey could not be authenticated exiting program.')
|
||||
if "False" in auth_responses or "Invalid authentication" in auth_responses:
|
||||
print("\tKey could not be authenticated exiting program.")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
async def do_search(self) -> None:
|
||||
# https://api.securitytrails.com/v1/domain/domain.com
|
||||
url = f'{self.api}domain/{self.word}'
|
||||
headers = {'APIKEY': self.key}
|
||||
response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
|
||||
await asyncio.sleep(5) # Not random delay because 2 seconds is required due to rate limit.
|
||||
url = f"{self.api}domain/{self.word}"
|
||||
headers = {"APIKEY": self.key}
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[url], headers=headers, proxy=self.proxy
|
||||
)
|
||||
await asyncio.sleep(
|
||||
5
|
||||
) # Not random delay because 2 seconds is required due to rate limit.
|
||||
self.results = response[0]
|
||||
self.totalresults += self.results
|
||||
url += '/subdomains' # Get subdomains now.
|
||||
subdomain_response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
|
||||
url += "/subdomains" # Get subdomains now.
|
||||
subdomain_response = await AsyncFetcher.fetch_all(
|
||||
[url], headers=headers, proxy=self.proxy
|
||||
)
|
||||
await asyncio.sleep(5)
|
||||
self.results = subdomain_response[0]
|
||||
self.totalresults += self.results
|
||||
|
@ -48,7 +56,7 @@ async def process(self, proxy: bool = False) -> None:
|
|||
parser = securitytrailsparser.Parser(word=self.word, text=self.totalresults)
|
||||
self.info = await parser.parse_text()
|
||||
# Create parser and set self.info to tuple returned from parsing text.
|
||||
print('\tDone Searching Results')
|
||||
print("\tDone Searching Results")
|
||||
|
||||
async def get_ips(self) -> set:
|
||||
return self.info[0]
|
||||
|
|
|
@ -10,7 +10,7 @@ class SearchShodan:
|
|||
def __init__(self) -> None:
|
||||
self.key = Core.shodan_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('Shodan')
|
||||
raise MissingKey("Shodan")
|
||||
self.api = Shodan(self.key)
|
||||
self.hostdatarow: list = []
|
||||
self.tracker: OrderedDict = OrderedDict()
|
||||
|
@ -19,81 +19,81 @@ async def search_ip(self, ip) -> OrderedDict:
|
|||
try:
|
||||
ipaddress = ip
|
||||
results = self.api.host(ipaddress)
|
||||
asn = ''
|
||||
asn = ""
|
||||
domains: list = list()
|
||||
hostnames: list = list()
|
||||
ip_str = ''
|
||||
isp = ''
|
||||
org = ''
|
||||
ip_str = ""
|
||||
isp = ""
|
||||
org = ""
|
||||
ports: list = list()
|
||||
title = ''
|
||||
server = ''
|
||||
product = ''
|
||||
title = ""
|
||||
server = ""
|
||||
product = ""
|
||||
technologies: list = list()
|
||||
|
||||
data_first_dict = dict(results['data'][0])
|
||||
data_first_dict = dict(results["data"][0])
|
||||
|
||||
if 'ip_str' in data_first_dict.keys():
|
||||
ip_str += data_first_dict['ip_str']
|
||||
if "ip_str" in data_first_dict.keys():
|
||||
ip_str += data_first_dict["ip_str"]
|
||||
|
||||
if 'http' in data_first_dict.keys():
|
||||
http_results_dict = dict(data_first_dict['http'])
|
||||
if 'title' in http_results_dict.keys():
|
||||
title_val = str(http_results_dict['title']).strip()
|
||||
if title_val != 'None':
|
||||
if "http" in data_first_dict.keys():
|
||||
http_results_dict = dict(data_first_dict["http"])
|
||||
if "title" in http_results_dict.keys():
|
||||
title_val = str(http_results_dict["title"]).strip()
|
||||
if title_val != "None":
|
||||
title += title_val
|
||||
if 'components' in http_results_dict.keys():
|
||||
for key in http_results_dict['components'].keys():
|
||||
if "components" in http_results_dict.keys():
|
||||
for key in http_results_dict["components"].keys():
|
||||
technologies.append(key)
|
||||
if 'server' in http_results_dict.keys():
|
||||
server_val = str(http_results_dict['server']).strip()
|
||||
if server_val != 'None':
|
||||
if "server" in http_results_dict.keys():
|
||||
server_val = str(http_results_dict["server"]).strip()
|
||||
if server_val != "None":
|
||||
server += server_val
|
||||
|
||||
for key, value in results.items():
|
||||
if key == 'asn':
|
||||
if key == "asn":
|
||||
asn += value
|
||||
if key == 'domains':
|
||||
if key == "domains":
|
||||
value = list(value)
|
||||
value.sort()
|
||||
domains.extend(value)
|
||||
if key == 'hostnames':
|
||||
if key == "hostnames":
|
||||
value = [host.strip() for host in list(value)]
|
||||
value.sort()
|
||||
hostnames.extend(value)
|
||||
if key == 'isp':
|
||||
if key == "isp":
|
||||
isp += value
|
||||
if key == 'org':
|
||||
if key == "org":
|
||||
org += str(value)
|
||||
if key == 'ports':
|
||||
if key == "ports":
|
||||
value = list(value)
|
||||
value.sort()
|
||||
ports.extend(value)
|
||||
if key == 'product':
|
||||
if key == "product":
|
||||
product += value
|
||||
|
||||
technologies = list(set(technologies))
|
||||
|
||||
self.tracker[ip] = {
|
||||
'asn': asn.strip(),
|
||||
'domains': domains,
|
||||
'hostnames': hostnames,
|
||||
'ip_str': ip_str.strip(),
|
||||
'isp': isp.strip(),
|
||||
'org': org.strip(),
|
||||
'ports': ports,
|
||||
'product': product.strip(),
|
||||
'server': server.strip(),
|
||||
'technologies': technologies,
|
||||
'title': title.strip(),
|
||||
"asn": asn.strip(),
|
||||
"domains": domains,
|
||||
"hostnames": hostnames,
|
||||
"ip_str": ip_str.strip(),
|
||||
"isp": isp.strip(),
|
||||
"org": org.strip(),
|
||||
"ports": ports,
|
||||
"product": product.strip(),
|
||||
"server": server.strip(),
|
||||
"technologies": technologies,
|
||||
"title": title.strip(),
|
||||
}
|
||||
|
||||
return self.tracker
|
||||
except exception.APIError:
|
||||
print(f'{ip}: Not in Shodan')
|
||||
self.tracker[ip] = 'Not in Shodan'
|
||||
print(f"{ip}: Not in Shodan")
|
||||
self.tracker[ip] = "Not in Shodan"
|
||||
except Exception as e:
|
||||
# print(f'Error occurred in the Shodan IP search module: {e}')
|
||||
self.tracker[ip] = f'Error occurred in the Shodan IP search module: {e}'
|
||||
self.tracker[ip] = f"Error occurred in the Shodan IP search module: {e}"
|
||||
finally:
|
||||
return self.tracker
|
||||
|
|
|
@ -10,7 +10,7 @@ class SearchSitedossier:
|
|||
def __init__(self, word):
|
||||
self.word = word
|
||||
self.totalhosts = set()
|
||||
self.server = 'www.sitedossier.com'
|
||||
self.server = "www.sitedossier.com"
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self):
|
||||
|
@ -18,75 +18,92 @@ async def do_search(self):
|
|||
# This site seems to yield a lot of results but is a bit annoying to scrape
|
||||
# Hence the need for delays after each request to get the most results
|
||||
# Feel free to tweak the delays as needed
|
||||
url = f'http://{self.server}/parentdomain/{self.word}'
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
response = await AsyncFetcher.fetch_all([url], headers=headers, proxy=self.proxy)
|
||||
url = f"http://{self.server}/parentdomain/{self.word}"
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[url], headers=headers, proxy=self.proxy
|
||||
)
|
||||
base_response = response[0]
|
||||
soup = BeautifulSoup(base_response, 'html.parser')
|
||||
soup = BeautifulSoup(base_response, "html.parser")
|
||||
# iter_counter = 1
|
||||
# iterations_needed = total_number // 100
|
||||
# iterations_needed += 1
|
||||
flagged_counter = 0
|
||||
stop_conditions = ['End of list.', 'No data currently available.']
|
||||
stop_conditions = ["End of list.", "No data currently available."]
|
||||
bot_string = (
|
||||
'Our web servers have detected unusual or excessive requests '
|
||||
"Our web servers have detected unusual or excessive requests "
|
||||
'from your computer or network. Please enter the unique "word"'
|
||||
' below to confirm that you are a human interactively using this site.'
|
||||
" below to confirm that you are a human interactively using this site."
|
||||
)
|
||||
if (
|
||||
stop_conditions[0] not in base_response and stop_conditions[1] not in base_response
|
||||
stop_conditions[0] not in base_response
|
||||
and stop_conditions[1] not in base_response
|
||||
) and bot_string not in base_response:
|
||||
total_number = soup.find('i')
|
||||
total_number = int(total_number.text.strip().split(' ')[-1].replace(',', ''))
|
||||
hrefs = soup.find_all('a', href=True)
|
||||
total_number = soup.find("i")
|
||||
total_number = int(
|
||||
total_number.text.strip().split(" ")[-1].replace(",", "")
|
||||
)
|
||||
hrefs = soup.find_all("a", href=True)
|
||||
for a in hrefs:
|
||||
unparsed = a['href']
|
||||
if '/site/' in unparsed:
|
||||
subdomain = str(unparsed.split('/')[-1]).lower()
|
||||
unparsed = a["href"]
|
||||
if "/site/" in unparsed:
|
||||
subdomain = str(unparsed.split("/")[-1]).lower()
|
||||
self.totalhosts.add(subdomain)
|
||||
await asyncio.sleep(get_delay() + 15 + get_delay())
|
||||
for i in range(101, total_number, 100):
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
iter_url = f'http://{self.server}/parentdomain/{self.word}/{i}'
|
||||
print(f'My current iter_url: {iter_url}')
|
||||
response = await AsyncFetcher.fetch_all([iter_url], headers=headers, proxy=self.proxy)
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
iter_url = f"http://{self.server}/parentdomain/{self.word}/{i}"
|
||||
print(f"My current iter_url: {iter_url}")
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[iter_url], headers=headers, proxy=self.proxy
|
||||
)
|
||||
response = response[0]
|
||||
if stop_conditions[0] in response or stop_conditions[1] in response or flagged_counter >= 3:
|
||||
if (
|
||||
stop_conditions[0] in response
|
||||
or stop_conditions[1] in response
|
||||
or flagged_counter >= 3
|
||||
):
|
||||
break
|
||||
if bot_string in response:
|
||||
new_sleep_time = get_delay() * 30
|
||||
print(f'Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds')
|
||||
print(
|
||||
f"Triggered a captcha for sitedossier sleeping for: {new_sleep_time} seconds"
|
||||
)
|
||||
flagged_counter += 1
|
||||
await asyncio.sleep(new_sleep_time)
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[iter_url],
|
||||
headers={'User-Agent': Core.get_user_agent()},
|
||||
headers={"User-Agent": Core.get_user_agent()},
|
||||
proxy=self.proxy,
|
||||
)
|
||||
response = response[0]
|
||||
if bot_string in response:
|
||||
new_sleep_time = get_delay() * 30 * get_delay()
|
||||
print(
|
||||
f'Still triggering a captcha, sleeping longer for: {new_sleep_time}'
|
||||
f' and skipping this batch: {iter_url}'
|
||||
f"Still triggering a captcha, sleeping longer for: {new_sleep_time}"
|
||||
f" and skipping this batch: {iter_url}"
|
||||
)
|
||||
await asyncio.sleep(new_sleep_time)
|
||||
flagged_counter += 1
|
||||
if flagged_counter >= 3:
|
||||
break
|
||||
soup = BeautifulSoup(response, 'html.parser')
|
||||
hrefs = soup.find_all('a', href=True)
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
hrefs = soup.find_all("a", href=True)
|
||||
for a in hrefs:
|
||||
unparsed = a['href']
|
||||
if '/site/' in unparsed:
|
||||
subdomain = str(unparsed.split('/')[-1]).lower()
|
||||
unparsed = a["href"]
|
||||
if "/site/" in unparsed:
|
||||
subdomain = str(unparsed.split("/")[-1]).lower()
|
||||
self.totalhosts.add(subdomain)
|
||||
await asyncio.sleep(get_delay() + 15 + get_delay())
|
||||
print(f'In total found: {len(self.totalhosts)}')
|
||||
print(f"In total found: {len(self.totalhosts)}")
|
||||
print(self.totalhosts)
|
||||
else:
|
||||
print('Sitedossier module has triggered a captcha on first iteration, no results can be found.')
|
||||
print('Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module')
|
||||
print(
|
||||
"Sitedossier module has triggered a captcha on first iteration, no results can be found."
|
||||
)
|
||||
print(
|
||||
"Change IPs, manually solve the captcha, or wait before rerunning Sitedossier module"
|
||||
)
|
||||
|
||||
async def get_hostnames(self):
|
||||
return self.totalhosts
|
||||
|
|
|
@ -5,18 +5,23 @@ class SubdomainCenter:
|
|||
def __init__(self, word):
|
||||
self.word = word
|
||||
self.results = set()
|
||||
self.server = 'https://api.subdomain.center/?domain='
|
||||
self.server = "https://api.subdomain.center/?domain="
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self):
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
try:
|
||||
current_url = f'{self.server}{self.word}'
|
||||
resp = await AsyncFetcher.fetch_all([current_url], headers=headers, proxy=self.proxy, json=True)
|
||||
current_url = f"{self.server}{self.word}"
|
||||
resp = await AsyncFetcher.fetch_all(
|
||||
[current_url], headers=headers, proxy=self.proxy, json=True
|
||||
)
|
||||
self.results = resp[0]
|
||||
self.results = {sub[4:] if sub[:4] == 'www.' and sub[4:] else sub for sub in self.results}
|
||||
self.results = {
|
||||
sub[4:] if sub[:4] == "www." and sub[4:] else sub
|
||||
for sub in self.results
|
||||
}
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred in SubdomainCenter on : {e}')
|
||||
print(f"An exception has occurred in SubdomainCenter on : {e}")
|
||||
|
||||
async def get_hostnames(self):
|
||||
return self.results
|
||||
|
|
|
@ -14,20 +14,24 @@ def __init__(self, word) -> None:
|
|||
self.total_results: set = set()
|
||||
self.proxy = False
|
||||
# TODO add api support
|
||||
self.server = 'https://subdomainfinder.c99.nl/'
|
||||
self.totalresults = ''
|
||||
self.server = "https://subdomainfinder.c99.nl/"
|
||||
self.totalresults = ""
|
||||
|
||||
async def do_search(self) -> None:
|
||||
# Based on https://gist.github.com/th3gundy/bc83580cbe04031e9164362b33600962
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
resp = await AsyncFetcher.fetch_all([self.server], headers=headers, proxy=self.proxy)
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
resp = await AsyncFetcher.fetch_all(
|
||||
[self.server], headers=headers, proxy=self.proxy
|
||||
)
|
||||
data = await self.get_csrf_params(resp[0])
|
||||
|
||||
data['scan_subdomains'] = ''
|
||||
data['domain'] = self.word
|
||||
data['privatequery'] = 'on'
|
||||
data["scan_subdomains"] = ""
|
||||
data["domain"] = self.word
|
||||
data["privatequery"] = "on"
|
||||
await asyncio.sleep(get_delay())
|
||||
second_resp = await AsyncFetcher.post_fetch(self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data))
|
||||
second_resp = await AsyncFetcher.post_fetch(
|
||||
self.server, headers=headers, proxy=self.proxy, data=ujson.dumps(data)
|
||||
)
|
||||
|
||||
# print(second_resp)
|
||||
self.totalresults += second_resp
|
||||
|
@ -51,10 +55,10 @@ async def process(self, proxy: bool = False) -> None:
|
|||
@staticmethod
|
||||
async def get_csrf_params(data):
|
||||
csrf_params = {}
|
||||
html = BeautifulSoup(data, 'html.parser').find('div', {'class': 'input-group'})
|
||||
for c in html.find_all('input'):
|
||||
html = BeautifulSoup(data, "html.parser").find("div", {"class": "input-group"})
|
||||
for c in html.find_all("input"):
|
||||
try:
|
||||
csrf_params[c.get('name')] = c.get('value')
|
||||
csrf_params[c.get("name")] = c.get("value")
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
|
|
@ -18,59 +18,70 @@ def __init__(self, hosts) -> None:
|
|||
|
||||
async def populate_fingerprints(self):
|
||||
# Thank you to https://github.com/EdOverflow/can-i-take-over-xyz for these fingerprints
|
||||
populate_url = 'https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json'
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
populate_url = "https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json"
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
response = await AsyncFetcher.fetch_all([populate_url], headers=headers)
|
||||
try:
|
||||
resp = response[0]
|
||||
unparsed_json = ujson.loads(resp)
|
||||
for unparsed_fingerprint in unparsed_json:
|
||||
if unparsed_fingerprint['service'] in ['Smugsmug']:
|
||||
if unparsed_fingerprint["service"] in ["Smugsmug"]:
|
||||
# Subdomain must be in format domain.smugsmug.com
|
||||
# This will never happen as subdomains are parsed and filtered to be in format of *.word.com
|
||||
continue
|
||||
if unparsed_fingerprint['status'] == 'Vulnerable' or unparsed_fingerprint['status'] == 'Edge case':
|
||||
self.fingerprints[unparsed_fingerprint['fingerprint']] = unparsed_fingerprint['service']
|
||||
if (
|
||||
unparsed_fingerprint["status"] == "Vulnerable"
|
||||
or unparsed_fingerprint["status"] == "Edge case"
|
||||
):
|
||||
self.fingerprints[unparsed_fingerprint["fingerprint"]] = (
|
||||
unparsed_fingerprint["service"]
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred populating takeover fingerprints: {e}, defaulting to static list')
|
||||
print(
|
||||
f"An exception has occurred populating takeover fingerprints: {e}, defaulting to static list"
|
||||
)
|
||||
self.fingerprints = {
|
||||
"'Trying to access your account?'": 'Campaign Monitor',
|
||||
'404 Not Found': 'Fly.io',
|
||||
'404 error unknown site!': 'Pantheon',
|
||||
'Do you want to register *.wordpress.com?': 'Wordpress',
|
||||
'Domain uses DO name serves with no records in DO.': 'Digital Ocean',
|
||||
"It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": 'LaunchRock',
|
||||
'No Site For Domain': 'Kinsta',
|
||||
'No settings were found for this company:': 'Help Scout',
|
||||
'Project doesnt exist... yet!': 'Readme.io',
|
||||
'Repository not found': 'Bitbucket',
|
||||
'The feed has not been found.': 'Feedpress',
|
||||
'No such app': 'Heroku',
|
||||
'The specified bucket does not exist': 'AWS/S3',
|
||||
'The thing you were looking for is no longer here, or never was': 'Ghost',
|
||||
"There isn't a Github Pages site here.": 'Github',
|
||||
'This UserVoice subdomain is currently available!': 'UserVoice',
|
||||
"Uh oh. That page doesn't exist.": 'Intercom',
|
||||
"We could not find what you're looking for.": 'Help Juice',
|
||||
"Whatever you were looking for doesn't currently exist at this address": 'Tumblr',
|
||||
'is not a registered InCloud YouTrack': 'JetBrains',
|
||||
'page not found': 'Uptimerobot',
|
||||
'project not found': 'Surge.sh',
|
||||
"'Trying to access your account?'": "Campaign Monitor",
|
||||
"404 Not Found": "Fly.io",
|
||||
"404 error unknown site!": "Pantheon",
|
||||
"Do you want to register *.wordpress.com?": "Wordpress",
|
||||
"Domain uses DO name serves with no records in DO.": "Digital Ocean",
|
||||
"It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": "LaunchRock",
|
||||
"No Site For Domain": "Kinsta",
|
||||
"No settings were found for this company:": "Help Scout",
|
||||
"Project doesnt exist... yet!": "Readme.io",
|
||||
"Repository not found": "Bitbucket",
|
||||
"The feed has not been found.": "Feedpress",
|
||||
"No such app": "Heroku",
|
||||
"The specified bucket does not exist": "AWS/S3",
|
||||
"The thing you were looking for is no longer here, or never was": "Ghost",
|
||||
"There isn't a Github Pages site here.": "Github",
|
||||
"This UserVoice subdomain is currently available!": "UserVoice",
|
||||
"Uh oh. That page doesn't exist.": "Intercom",
|
||||
"We could not find what you're looking for.": "Help Juice",
|
||||
"Whatever you were looking for doesn't currently exist at this address": "Tumblr",
|
||||
"is not a registered InCloud YouTrack": "JetBrains",
|
||||
"page not found": "Uptimerobot",
|
||||
"project not found": "Surge.sh",
|
||||
}
|
||||
|
||||
async def check(self, url, resp) -> None:
|
||||
# Simple function that takes response and checks if any fingerprints exist
|
||||
# If a fingerprint exists figures out which one and prints it out
|
||||
regex = re.compile('(?=(' + '|'.join(map(re.escape, list(self.fingerprints.keys()))) + '))')
|
||||
regex = re.compile(
|
||||
"(?=(" + "|".join(map(re.escape, list(self.fingerprints.keys()))) + "))"
|
||||
)
|
||||
# Sanitize fingerprints
|
||||
matches = re.findall(regex, resp)
|
||||
matches = list(set(matches))
|
||||
for match in matches:
|
||||
print(f'\t\033[91m Takeover detected: {url}\033[1;32;40m')
|
||||
print(f"\t\033[91m Takeover detected: {url}\033[1;32;40m")
|
||||
if match in self.fingerprints.keys():
|
||||
# Validation check as to not error out
|
||||
service = self.fingerprints[match]
|
||||
print(f'\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m')
|
||||
print(
|
||||
f"\t\033[91m Type of takeover is: {service} with match: {match}\033[1;32;40m"
|
||||
)
|
||||
self.results[url].append({match: service})
|
||||
|
||||
async def do_take(self) -> None:
|
||||
|
@ -78,11 +89,13 @@ async def do_take(self) -> None:
|
|||
if len(self.hosts) > 0:
|
||||
# Returns a list of tuples in this format: (url, response)
|
||||
# Filter out responses whose responses are empty strings (indicates errored)
|
||||
https_hosts = [f'https://{host}' for host in self.hosts]
|
||||
http_hosts = [f'http://{host}' for host in self.hosts]
|
||||
https_hosts = [f"https://{host}" for host in self.hosts]
|
||||
http_hosts = [f"http://{host}" for host in self.hosts]
|
||||
all_hosts = https_hosts + http_hosts
|
||||
shuffle(all_hosts)
|
||||
resps: list = await AsyncFetcher.fetch_all(all_hosts, takeover=True, proxy=self.proxy)
|
||||
resps: list = await AsyncFetcher.fetch_all(
|
||||
all_hosts, takeover=True, proxy=self.proxy
|
||||
)
|
||||
for url, resp in tuple(resp for resp in resps if len(resp[1]) >= 1):
|
||||
await self.check(url, resp)
|
||||
else:
|
||||
|
|
|
@ -9,13 +9,15 @@ def __init__(self, word) -> None:
|
|||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5'
|
||||
url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=5"
|
||||
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
|
||||
self.totalhosts = {host for host in response[0]['results']}
|
||||
second_url = f'https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2'
|
||||
secondresp = await AsyncFetcher.fetch_all([second_url], json=True, proxy=self.proxy)
|
||||
self.totalhosts = {host for host in response[0]["results"]}
|
||||
second_url = f"https://api.threatminer.org/v2/domain.php?q={self.word}&rt=2"
|
||||
secondresp = await AsyncFetcher.fetch_all(
|
||||
[second_url], json=True, proxy=self.proxy
|
||||
)
|
||||
try:
|
||||
self.totalips = {resp['ip'] for resp in secondresp[0]['results']}
|
||||
self.totalips = {resp["ip"] for resp in secondresp[0]["results"]}
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
|
|
|
@ -12,10 +12,12 @@ def __init__(self, word, limit, start) -> None:
|
|||
self.start = start
|
||||
self.key = Core.tomba_key()
|
||||
if self.key[0] is None or self.key[1] is None:
|
||||
raise MissingKey('Tomba Key and/or Secret')
|
||||
self.total_results = ''
|
||||
raise MissingKey("Tomba Key and/or Secret")
|
||||
self.total_results = ""
|
||||
self.counter = start
|
||||
self.database = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10'
|
||||
self.database = (
|
||||
f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit=10"
|
||||
)
|
||||
self.proxy = False
|
||||
self.hostnames: list = []
|
||||
self.emails: list = []
|
||||
|
@ -24,38 +26,49 @@ async def do_search(self) -> None:
|
|||
# First determine if a user account is not a free account, this call is free
|
||||
is_free = True
|
||||
headers = {
|
||||
'User-Agent': Core.get_user_agent(),
|
||||
'X-Tomba-Key': self.key[0],
|
||||
'X-Tomba-Secret': self.key[1],
|
||||
"User-Agent": Core.get_user_agent(),
|
||||
"X-Tomba-Key": self.key[0],
|
||||
"X-Tomba-Secret": self.key[1],
|
||||
}
|
||||
acc_info_url = 'https://api.tomba.io/v1/me'
|
||||
response = await AsyncFetcher.fetch_all([acc_info_url], headers=headers, json=True)
|
||||
acc_info_url = "https://api.tomba.io/v1/me"
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[acc_info_url], headers=headers, json=True
|
||||
)
|
||||
is_free = (
|
||||
is_free
|
||||
if 'name' in response[0]['data']['pricing'].keys() and response[0]['data']['pricing']['name'].lower() == 'free'
|
||||
if "name" in response[0]["data"]["pricing"].keys()
|
||||
and response[0]["data"]["pricing"]["name"].lower() == "free"
|
||||
else False
|
||||
)
|
||||
# Extract the total number of requests that are available for an account
|
||||
|
||||
total_requests_avail = (
|
||||
response[0]['data']['requests']['domains']['available'] - response[0]['data']['requests']['domains']['used']
|
||||
response[0]["data"]["requests"]["domains"]["available"]
|
||||
- response[0]["data"]["requests"]["domains"]["used"]
|
||||
)
|
||||
|
||||
if is_free:
|
||||
response = await AsyncFetcher.fetch_all([self.database], headers=headers, proxy=self.proxy, json=True)
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[self.database], headers=headers, proxy=self.proxy, json=True
|
||||
)
|
||||
self.emails, self.hostnames = await self.parse_resp(json_resp=response[0])
|
||||
else:
|
||||
# Determine the total number of emails that are available
|
||||
# As the most emails you can get within one query are 100
|
||||
# This is only done where paid accounts are in play
|
||||
tomba_counter = f'https://api.tomba.io/v1/email-count?domain={self.word}'
|
||||
response = await AsyncFetcher.fetch_all([tomba_counter], headers=headers, proxy=self.proxy, json=True)
|
||||
total_number_reqs = response[0]['data']['total'] // 100
|
||||
tomba_counter = f"https://api.tomba.io/v1/email-count?domain={self.word}"
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[tomba_counter], headers=headers, proxy=self.proxy, json=True
|
||||
)
|
||||
total_number_reqs = response[0]["data"]["total"] // 100
|
||||
# Parse out meta field within initial JSON response to determine the total number of results
|
||||
if total_requests_avail < total_number_reqs:
|
||||
print('WARNING: The account does not have enough requests to gather all the emails.')
|
||||
print(
|
||||
f'Total requests available: {total_requests_avail}, total requests ' f'needed to be made: {total_number_reqs}'
|
||||
"WARNING: The account does not have enough requests to gather all the emails."
|
||||
)
|
||||
print(
|
||||
f"Total requests available: {total_requests_avail}, total requests "
|
||||
f"needed to be made: {total_number_reqs}"
|
||||
)
|
||||
print(
|
||||
'RETURNING current results, If you still wish to run this module despite the current results, please comment out the "if request" line.'
|
||||
|
@ -66,22 +79,24 @@ async def do_search(self) -> None:
|
|||
# increments of max number with page determining where to start
|
||||
# See docs for more details: https://developer.tomba.io/#domain-search
|
||||
for page in range(0, total_number_reqs + 1):
|
||||
req_url = f'https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}'
|
||||
response = await AsyncFetcher.fetch_all([req_url], headers=headers, proxy=self.proxy, json=True)
|
||||
req_url = f"https://api.tomba.io/v1/domain-search?domain={self.word}&limit={self.limit}&page={page}"
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[req_url], headers=headers, proxy=self.proxy, json=True
|
||||
)
|
||||
temp_emails, temp_hostnames = await self.parse_resp(response[0])
|
||||
self.emails.extend(temp_emails)
|
||||
self.hostnames.extend(temp_hostnames)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def parse_resp(self, json_resp):
|
||||
emails = list(sorted({email['email'] for email in json_resp['data']['emails']}))
|
||||
emails = list(sorted({email["email"] for email in json_resp["data"]["emails"]}))
|
||||
domains = list(
|
||||
sorted(
|
||||
{
|
||||
source['website_url']
|
||||
for email in json_resp['data']['emails']
|
||||
for source in email['sources']
|
||||
if self.word in source['website_url']
|
||||
source["website_url"]
|
||||
for email in json_resp["data"]["emails"]
|
||||
for source in email["sources"]
|
||||
if self.word in source["website_url"]
|
||||
}
|
||||
)
|
||||
)
|
||||
|
|
|
@ -11,17 +11,25 @@ def __init__(self, word) -> None:
|
|||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
url = f'https://urlscan.io/api/v1/search/?q=domain:{self.word}'
|
||||
url = f"https://urlscan.io/api/v1/search/?q=domain:{self.word}"
|
||||
response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
|
||||
resp = response[0]
|
||||
self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']}
|
||||
self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()}
|
||||
self.totalhosts = {f"{page['page']['domain']}" for page in resp["results"]}
|
||||
self.totalips = {
|
||||
f"{page['page']['ip']}"
|
||||
for page in resp["results"]
|
||||
if "ip" in page["page"].keys()
|
||||
}
|
||||
self.interestingurls = {
|
||||
f"{page['page']['url']}"
|
||||
for page in resp['results']
|
||||
if self.word in page['page']['url'] and 'url' in page['page'].keys()
|
||||
for page in resp["results"]
|
||||
if self.word in page["page"]["url"] and "url" in page["page"].keys()
|
||||
}
|
||||
self.totalasns = {
|
||||
f"{page['page']['asn']}"
|
||||
for page in resp["results"]
|
||||
if "asn" in page["page"].keys()
|
||||
}
|
||||
self.totalasns = {f"{page['page']['asn']}" for page in resp['results'] if 'asn' in page['page'].keys()}
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
return self.totalhosts
|
||||
|
|
|
@ -8,7 +8,7 @@ class SearchVirustotal:
|
|||
def __init__(self, word) -> None:
|
||||
self.key = Core.virustotal_key()
|
||||
if self.key is None:
|
||||
raise MissingKey('virustotal')
|
||||
raise MissingKey("virustotal")
|
||||
self.word = word
|
||||
self.proxy = False
|
||||
self.hostnames: list = []
|
||||
|
@ -18,12 +18,14 @@ async def do_search(self) -> None:
|
|||
# based on: https://developers.virustotal.com/reference/domains-relationships
|
||||
# base_url = "https://www.virustotal.com/api/v3/domains/domain/subdomains?limit=40"
|
||||
headers = {
|
||||
'User-Agent': Core.get_user_agent(),
|
||||
'Accept': 'application/json',
|
||||
'x-apikey': self.key,
|
||||
"User-Agent": Core.get_user_agent(),
|
||||
"Accept": "application/json",
|
||||
"x-apikey": self.key,
|
||||
}
|
||||
base_url = f'https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40'
|
||||
cursor = ''
|
||||
base_url = (
|
||||
f"https://www.virustotal.com/api/v3/domains/{self.word}/subdomains?limit=40"
|
||||
)
|
||||
cursor = ""
|
||||
count = 0
|
||||
fail_counter = 0
|
||||
counter = 0
|
||||
|
@ -35,29 +37,42 @@ async def do_search(self) -> None:
|
|||
# TODO add timer logic if proven to be needed
|
||||
# in the meantime sleeping 16 seconds should eliminate hitting the rate limit
|
||||
# in case rate limit is hit, fail counter exists and sleep for 65 seconds
|
||||
send_url = base_url + '&cursor=' + cursor if cursor != '' and len(cursor) > 2 else base_url
|
||||
responses = await AsyncFetcher.fetch_all([send_url], headers=headers, proxy=self.proxy, json=True)
|
||||
send_url = (
|
||||
base_url + "&cursor=" + cursor
|
||||
if cursor != "" and len(cursor) > 2
|
||||
else base_url
|
||||
)
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
[send_url], headers=headers, proxy=self.proxy, json=True
|
||||
)
|
||||
jdata = responses[0]
|
||||
if 'data' not in jdata.keys():
|
||||
if "data" not in jdata.keys():
|
||||
await asyncio.sleep(60 + 5)
|
||||
fail_counter += 1
|
||||
if 'meta' in jdata.keys():
|
||||
cursor = jdata['meta']['cursor'] if 'cursor' in jdata['meta'].keys() else ''
|
||||
if len(cursor) == 0 and 'data' in jdata.keys():
|
||||
if "meta" in jdata.keys():
|
||||
cursor = (
|
||||
jdata["meta"]["cursor"] if "cursor" in jdata["meta"].keys() else ""
|
||||
)
|
||||
if len(cursor) == 0 and "data" in jdata.keys():
|
||||
# if cursor no longer is within the meta field have hit last entry
|
||||
breakcon = True
|
||||
count += jdata['meta']['count']
|
||||
count += jdata["meta"]["count"]
|
||||
if count == 0 or fail_counter >= 2:
|
||||
break
|
||||
if 'data' in jdata.keys():
|
||||
data = jdata['data']
|
||||
if "data" in jdata.keys():
|
||||
data = jdata["data"]
|
||||
self.hostnames.extend(await self.parse_hostnames(data, self.word))
|
||||
counter += 1
|
||||
await asyncio.sleep(16)
|
||||
self.hostnames = list(sorted(set(self.hostnames)))
|
||||
# verify domains such as x.x.com.multicdn.x.com are parsed properly
|
||||
self.hostnames = [
|
||||
host for host in self.hostnames if ((len(host.split('.')) >= 3) and host.split('.')[-2] == self.word.split('.')[-2])
|
||||
host
|
||||
for host in self.hostnames
|
||||
if (
|
||||
(len(host.split(".")) >= 3)
|
||||
and host.split(".")[-2] == self.word.split(".")[-2]
|
||||
)
|
||||
]
|
||||
|
||||
async def get_hostnames(self) -> list:
|
||||
|
@ -67,20 +82,22 @@ async def get_hostnames(self) -> list:
|
|||
async def parse_hostnames(data, word):
|
||||
total_subdomains = set()
|
||||
for attribute in data:
|
||||
total_subdomains.add(attribute['id'].replace('"', '').replace('www.', ''))
|
||||
attributes = attribute['attributes']
|
||||
total_subdomains.add(attribute["id"].replace('"', "").replace("www.", ""))
|
||||
attributes = attribute["attributes"]
|
||||
total_subdomains.update(
|
||||
{
|
||||
value['value'].replace('"', '').replace('www.', '')
|
||||
for value in attributes['last_dns_records']
|
||||
if word in value['value']
|
||||
value["value"].replace('"', "").replace("www.", "")
|
||||
for value in attributes["last_dns_records"]
|
||||
if word in value["value"]
|
||||
}
|
||||
)
|
||||
if 'last_https_certificate' in attributes.keys():
|
||||
if "last_https_certificate" in attributes.keys():
|
||||
total_subdomains.update(
|
||||
{
|
||||
value.replace('"', '').replace('www.', '')
|
||||
for value in attributes['last_https_certificate']['extensions']['subject_alternative_name']
|
||||
value.replace('"', "").replace("www.", "")
|
||||
for value in attributes["last_https_certificate"]["extensions"][
|
||||
"subject_alternative_name"
|
||||
]
|
||||
if word in value
|
||||
}
|
||||
)
|
||||
|
@ -91,7 +108,9 @@ async def parse_hostnames(data, word):
|
|||
total_subdomains = [
|
||||
x
|
||||
for x in total_subdomains
|
||||
if 'edgekey.net' not in str(x) and 'akadns.net' not in str(x) and 'include:_spf' not in str(x)
|
||||
if "edgekey.net" not in str(x)
|
||||
and "akadns.net" not in str(x)
|
||||
and "include:_spf" not in str(x)
|
||||
]
|
||||
total_subdomains.sort()
|
||||
return total_subdomains
|
||||
|
|
|
@ -5,16 +5,22 @@
|
|||
class SearchYahoo:
|
||||
def __init__(self, word, limit) -> None:
|
||||
self.word = word
|
||||
self.total_results = ''
|
||||
self.server = 'search.yahoo.com'
|
||||
self.total_results = ""
|
||||
self.server = "search.yahoo.com"
|
||||
self.limit = limit
|
||||
self.proxy = False
|
||||
|
||||
async def do_search(self) -> None:
|
||||
base_url = f'https://{self.server}/search?p=%40{self.word}&b=xx&pz=10'
|
||||
headers = {'Host': self.server, 'User-agent': Core.get_user_agent()}
|
||||
urls = [base_url.replace('xx', str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
|
||||
responses = await AsyncFetcher.fetch_all(urls, headers=headers, proxy=self.proxy)
|
||||
base_url = f"https://{self.server}/search?p=%40{self.word}&b=xx&pz=10"
|
||||
headers = {"Host": self.server, "User-agent": Core.get_user_agent()}
|
||||
urls = [
|
||||
base_url.replace("xx", str(num))
|
||||
for num in range(0, self.limit, 10)
|
||||
if num <= self.limit
|
||||
]
|
||||
responses = await AsyncFetcher.fetch_all(
|
||||
urls, headers=headers, proxy=self.proxy
|
||||
)
|
||||
for response in responses:
|
||||
self.total_results += response
|
||||
|
||||
|
@ -29,8 +35,8 @@ async def get_emails(self):
|
|||
# strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld
|
||||
for email in toparse_emails:
|
||||
email = str(email)
|
||||
if '-' in email and email[0].isdigit() and email.index('-') <= 9:
|
||||
while email[0] == '-' or email[0].isdigit():
|
||||
if "-" in email and email[0].isdigit() and email.index("-") <= 9:
|
||||
while email[0] == "-" or email[0].isdigit():
|
||||
email = email[1:]
|
||||
emails.add(email)
|
||||
return list(emails)
|
||||
|
|
|
@ -16,8 +16,8 @@ def __init__(self, word, limit) -> None:
|
|||
# If you wish to extract as many subdomains as possible visit the fetch_subdomains
|
||||
# To see how
|
||||
if self.key is None:
|
||||
raise MissingKey('zoomeye')
|
||||
self.baseurl = 'https://api.zoomeye.org/host/search'
|
||||
raise MissingKey("zoomeye")
|
||||
self.baseurl = "https://api.zoomeye.org/host/search"
|
||||
self.proxy = False
|
||||
self.totalasns: list = list()
|
||||
self.totalhosts: list = list()
|
||||
|
@ -58,38 +58,40 @@ def __init__(self, word, limit) -> None:
|
|||
|
||||
async def fetch_subdomains(self) -> None:
|
||||
# Based on docs from: https://www.zoomeye.org/doc#search-sub-domain-ip
|
||||
headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()}
|
||||
headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()}
|
||||
|
||||
subdomain_search_endpoint = f'https://api.zoomeye.org/domain/search?q={self.word}&type=0&'
|
||||
subdomain_search_endpoint = (
|
||||
f"https://api.zoomeye.org/domain/search?q={self.word}&type=0&"
|
||||
)
|
||||
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[subdomain_search_endpoint + 'page=1'],
|
||||
[subdomain_search_endpoint + "page=1"],
|
||||
json=True,
|
||||
proxy=self.proxy,
|
||||
headers=headers,
|
||||
)
|
||||
# Make initial request to determine total number of subdomains
|
||||
resp = response[0]
|
||||
if resp['status'] != 200:
|
||||
if resp["status"] != 200:
|
||||
return
|
||||
total = resp['total']
|
||||
total = resp["total"]
|
||||
# max number of results per request seems to be 30
|
||||
# NOTE: If you wish to get as many subdomains as possible
|
||||
# Change the line below to:
|
||||
# self.limit = (total // 30) + 1
|
||||
self.limit = self.limit if total > self.limit else (total // 30) + 1
|
||||
self.totalhosts.extend([item['name'] for item in resp['list']])
|
||||
self.totalhosts.extend([item["name"] for item in resp["list"]])
|
||||
for i in range(2, self.limit):
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[subdomain_search_endpoint + f'page={i}'],
|
||||
[subdomain_search_endpoint + f"page={i}"],
|
||||
json=True,
|
||||
proxy=self.proxy,
|
||||
headers=headers,
|
||||
)
|
||||
resp = response[0]
|
||||
if resp['status'] != 200:
|
||||
if resp["status"] != 200:
|
||||
return
|
||||
found_subdomains = [item['name'] for item in resp['list']]
|
||||
found_subdomains = [item["name"] for item in resp["list"]]
|
||||
if len(found_subdomains) == 0:
|
||||
break
|
||||
self.totalhosts.extend(found_subdomains)
|
||||
|
@ -97,17 +99,19 @@ async def fetch_subdomains(self) -> None:
|
|||
await asyncio.sleep(get_delay() + 1)
|
||||
|
||||
async def do_search(self) -> None:
|
||||
headers = {'API-KEY': self.key, 'User-Agent': Core.get_user_agent()}
|
||||
headers = {"API-KEY": self.key, "User-Agent": Core.get_user_agent()}
|
||||
# Fetch subdomains first
|
||||
await self.fetch_subdomains()
|
||||
params = (
|
||||
('query', f'site:{self.word}'),
|
||||
('page', '1'),
|
||||
("query", f"site:{self.word}"),
|
||||
("page", "1"),
|
||||
)
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params
|
||||
)
|
||||
response = await AsyncFetcher.fetch_all([self.baseurl], json=True, proxy=self.proxy, headers=headers, params=params)
|
||||
# The First request determines how many pages there in total
|
||||
resp = response[0]
|
||||
total_pages = int(resp['available'])
|
||||
total_pages = int(resp["available"])
|
||||
self.limit = self.limit if total_pages > self.limit else total_pages
|
||||
self.limit = 3 if self.limit == 2 else self.limit
|
||||
cur_page = 2 if self.limit >= 2 else -1
|
||||
|
@ -117,17 +121,21 @@ async def do_search(self) -> None:
|
|||
# cur_page = -1
|
||||
if cur_page == -1:
|
||||
# No need to do loop just parse and leave
|
||||
if 'matches' in resp.keys():
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
|
||||
if "matches" in resp.keys():
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matches(
|
||||
resp["matches"]
|
||||
)
|
||||
self.totalhosts.extend(hostnames)
|
||||
self.totalemails.extend(emails)
|
||||
self.totalips.extend(ips)
|
||||
self.totalasns.extend(asns)
|
||||
self.interestingurls.extend(iurls)
|
||||
else:
|
||||
if 'matches' in resp.keys():
|
||||
if "matches" in resp.keys():
|
||||
# Parse out initial results and then continue to loop
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matches(
|
||||
resp["matches"]
|
||||
)
|
||||
self.totalhosts.extend(hostnames)
|
||||
self.totalemails.extend(emails)
|
||||
self.totalips.extend(ips)
|
||||
|
@ -137,8 +145,8 @@ async def do_search(self) -> None:
|
|||
for num in range(2, self.limit):
|
||||
# print(f'Currently on page: {num}')
|
||||
params = (
|
||||
('query', f'site:{self.word}'),
|
||||
('page', f'{num}'),
|
||||
("query", f"site:{self.word}"),
|
||||
("page", f"{num}"),
|
||||
)
|
||||
response = await AsyncFetcher.fetch_all(
|
||||
[self.baseurl],
|
||||
|
@ -148,14 +156,22 @@ async def do_search(self) -> None:
|
|||
params=params,
|
||||
)
|
||||
resp = response[0]
|
||||
if 'matches' not in resp.keys():
|
||||
print(f'Your resp: {resp}')
|
||||
print('Match not found in keys')
|
||||
if "matches" not in resp.keys():
|
||||
print(f"Your resp: {resp}")
|
||||
print("Match not found in keys")
|
||||
break
|
||||
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matches(resp['matches'])
|
||||
hostnames, emails, ips, asns, iurls = await self.parse_matches(
|
||||
resp["matches"]
|
||||
)
|
||||
|
||||
if len(hostnames) == 0 and len(emails) == 0 and len(ips) == 0 and len(asns) == 0 and len(iurls) == 0:
|
||||
if (
|
||||
len(hostnames) == 0
|
||||
and len(emails) == 0
|
||||
and len(ips) == 0
|
||||
and len(asns) == 0
|
||||
and len(iurls) == 0
|
||||
):
|
||||
nomatches_counter += 1
|
||||
|
||||
if nomatches_counter >= 5:
|
||||
|
@ -180,42 +196,48 @@ async def parse_matches(self, matches):
|
|||
emails = set()
|
||||
for match in matches:
|
||||
try:
|
||||
ips.add(match['ip'])
|
||||
ips.add(match["ip"])
|
||||
|
||||
if 'geoinfo' in match.keys():
|
||||
if "geoinfo" in match.keys():
|
||||
asns.add(f"AS{match['geoinfo']['asn']}")
|
||||
|
||||
if 'rdns_new' in match.keys():
|
||||
rdns_new = match['rdns_new']
|
||||
if "rdns_new" in match.keys():
|
||||
rdns_new = match["rdns_new"]
|
||||
|
||||
if ',' in rdns_new:
|
||||
parts = str(rdns_new).split(',')
|
||||
if "," in rdns_new:
|
||||
parts = str(rdns_new).split(",")
|
||||
rdns_new = parts[0]
|
||||
if len(parts) == 2:
|
||||
hostnames.add(parts[1])
|
||||
rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
|
||||
rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new
|
||||
hostnames.add(rdns_new)
|
||||
else:
|
||||
rdns_new = rdns_new[:-1] if rdns_new[-1] == '.' else rdns_new
|
||||
rdns_new = rdns_new[:-1] if rdns_new[-1] == "." else rdns_new
|
||||
hostnames.add(rdns_new)
|
||||
|
||||
if 'rdns' in match.keys():
|
||||
rdns = match['rdns']
|
||||
rdns = rdns[:-1] if rdns[-1] == '.' else rdns
|
||||
if "rdns" in match.keys():
|
||||
rdns = match["rdns"]
|
||||
rdns = rdns[:-1] if rdns[-1] == "." else rdns
|
||||
hostnames.add(rdns)
|
||||
|
||||
if 'portinfo' in match.keys():
|
||||
if "portinfo" in match.keys():
|
||||
# re.
|
||||
temp_emails = set(await self.parse_emails(match['portinfo']['banner']))
|
||||
temp_emails = set(
|
||||
await self.parse_emails(match["portinfo"]["banner"])
|
||||
)
|
||||
emails.update(temp_emails)
|
||||
hostnames.update(set(await self.parse_hostnames(match['portinfo']['banner'])))
|
||||
hostnames.update(
|
||||
set(await self.parse_hostnames(match["portinfo"]["banner"]))
|
||||
)
|
||||
iurls = {
|
||||
str(iurl.group(1)).replace('"', '')
|
||||
for iurl in re.finditer(self.iurl_regex, match['portinfo']['banner'])
|
||||
str(iurl.group(1)).replace('"', "")
|
||||
for iurl in re.finditer(
|
||||
self.iurl_regex, match["portinfo"]["banner"]
|
||||
)
|
||||
if self.word in str(iurl.group(1))
|
||||
}
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {e}')
|
||||
print(f"An exception has occurred: {e}")
|
||||
return hostnames, emails, ips, asns, iurls
|
||||
|
||||
async def process(self, proxy: bool = False) -> None:
|
||||
|
|
|
@ -1 +1 @@
|
|||
__all__ = ['hostchecker']
|
||||
__all__ = ["hostchecker"]
|
||||
|
|
|
@ -12,32 +12,36 @@
|
|||
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
app = FastAPI(
|
||||
title='Restful Harvest',
|
||||
description='Rest API for theHarvester powered by FastAPI',
|
||||
version='0.0.2',
|
||||
title="Restful Harvest",
|
||||
description="Rest API for theHarvester powered by FastAPI",
|
||||
version="0.0.2",
|
||||
)
|
||||
app.state.limiter = limiter
|
||||
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore
|
||||
|
||||
# This is where we will host files that arise if the user specifies a filename
|
||||
try:
|
||||
app.mount('/static', StaticFiles(directory='theHarvester/lib/api/static/'), name='static')
|
||||
app.mount(
|
||||
"/static", StaticFiles(directory="theHarvester/lib/api/static/"), name="static"
|
||||
)
|
||||
except RuntimeError:
|
||||
static_path = os.path.expanduser('~/.local/share/theHarvester/static/')
|
||||
static_path = os.path.expanduser("~/.local/share/theHarvester/static/")
|
||||
if not os.path.isdir(static_path):
|
||||
os.makedirs(static_path)
|
||||
app.mount(
|
||||
'/static',
|
||||
"/static",
|
||||
StaticFiles(directory=static_path),
|
||||
name='static',
|
||||
name="static",
|
||||
)
|
||||
|
||||
|
||||
@app.get('/')
|
||||
@app.get("/")
|
||||
async def root(*, user_agent: str = Header(None)) -> Response:
|
||||
# very basic user agent filtering
|
||||
if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
|
||||
response = RedirectResponse(app.url_path_for('bot'))
|
||||
if user_agent and (
|
||||
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
|
||||
):
|
||||
response = RedirectResponse(app.url_path_for("bot"))
|
||||
return response
|
||||
|
||||
return HTMLResponse(
|
||||
|
@ -66,34 +70,36 @@ async def root(*, user_agent: str = Header(None)) -> Response:
|
|||
)
|
||||
|
||||
|
||||
@app.get('/nicebot')
|
||||
@app.get("/nicebot")
|
||||
async def bot() -> dict[str, str]:
|
||||
# nice bot
|
||||
string = {'bot': 'These are not the droids you are looking for'}
|
||||
string = {"bot": "These are not the droids you are looking for"}
|
||||
return string
|
||||
|
||||
|
||||
@app.get('/sources', response_class=UJSONResponse)
|
||||
@limiter.limit('5/minute')
|
||||
@app.get("/sources", response_class=UJSONResponse)
|
||||
@limiter.limit("5/minute")
|
||||
async def getsources(request: Request):
|
||||
# Endpoint for user to query for available sources theHarvester supports
|
||||
# Rate limit of 5 requests per minute
|
||||
sources = __main__.Core.get_supportedengines()
|
||||
return {'sources': sources}
|
||||
return {"sources": sources}
|
||||
|
||||
|
||||
@app.get('/dnsbrute')
|
||||
@limiter.limit('5/minute')
|
||||
@app.get("/dnsbrute")
|
||||
@limiter.limit("5/minute")
|
||||
async def dnsbrute(
|
||||
request: Request,
|
||||
user_agent: str = Header(None),
|
||||
domain: str = Query(..., description='Domain to be brute forced'),
|
||||
domain: str = Query(..., description="Domain to be brute forced"),
|
||||
) -> Response:
|
||||
# Endpoint for user to signal to do DNS brute forcing
|
||||
# Rate limit of 5 requests per minute
|
||||
# basic user agent filtering
|
||||
if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
|
||||
response = RedirectResponse(app.url_path_for('bot'))
|
||||
if user_agent and (
|
||||
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
|
||||
):
|
||||
response = RedirectResponse(app.url_path_for("bot"))
|
||||
return response
|
||||
dns_bruteforce = await __main__.start(
|
||||
argparse.Namespace(
|
||||
|
@ -102,45 +108,49 @@ async def dnsbrute(
|
|||
dns_server=False,
|
||||
dns_tld=False,
|
||||
domain=domain,
|
||||
filename='',
|
||||
filename="",
|
||||
google_dork=False,
|
||||
limit=500,
|
||||
proxies=False,
|
||||
shodan=False,
|
||||
source=','.join([]),
|
||||
source=",".join([]),
|
||||
start=0,
|
||||
take_over=False,
|
||||
virtual_host=False,
|
||||
)
|
||||
)
|
||||
return UJSONResponse({'dns_bruteforce': dns_bruteforce})
|
||||
return UJSONResponse({"dns_bruteforce": dns_bruteforce})
|
||||
|
||||
|
||||
@app.get('/query')
|
||||
@limiter.limit('2/minute')
|
||||
@app.get("/query")
|
||||
@limiter.limit("2/minute")
|
||||
async def query(
|
||||
request: Request,
|
||||
dns_server: str = Query(''),
|
||||
dns_server: str = Query(""),
|
||||
user_agent: str = Header(None),
|
||||
dns_brute: bool = Query(False),
|
||||
dns_lookup: bool = Query(False),
|
||||
dns_tld: bool = Query(False),
|
||||
filename: str = Query(''),
|
||||
filename: str = Query(""),
|
||||
google_dork: bool = Query(False),
|
||||
proxies: bool = Query(False),
|
||||
shodan: bool = Query(False),
|
||||
take_over: bool = Query(False),
|
||||
virtual_host: bool = Query(False),
|
||||
source: list[str] = Query(..., description='Data sources to query comma separated with no space'),
|
||||
source: list[str] = Query(
|
||||
..., description="Data sources to query comma separated with no space"
|
||||
),
|
||||
limit: int = Query(500),
|
||||
start: int = Query(0),
|
||||
domain: str = Query(..., description='Domain to be harvested'),
|
||||
domain: str = Query(..., description="Domain to be harvested"),
|
||||
) -> Response:
|
||||
# Query function that allows user to query theHarvester rest API
|
||||
# Rate limit of 2 requests per minute
|
||||
# basic user agent filtering
|
||||
if user_agent and ('gobuster' in user_agent or 'sqlmap' in user_agent or 'rustbuster' in user_agent):
|
||||
response = RedirectResponse(app.url_path_for('bot'))
|
||||
if user_agent and (
|
||||
"gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent
|
||||
):
|
||||
response = RedirectResponse(app.url_path_for("bot"))
|
||||
return response
|
||||
try:
|
||||
(
|
||||
|
@ -165,7 +175,7 @@ async def query(
|
|||
limit=limit,
|
||||
proxies=proxies,
|
||||
shodan=shodan,
|
||||
source=','.join(source),
|
||||
source=",".join(source),
|
||||
start=start,
|
||||
take_over=take_over,
|
||||
virtual_host=virtual_host,
|
||||
|
@ -174,16 +184,18 @@ async def query(
|
|||
|
||||
return UJSONResponse(
|
||||
{
|
||||
'asns': asns,
|
||||
'interesting_urls': iurls,
|
||||
'twitter_people': twitter_people_list,
|
||||
'linkedin_people': linkedin_people_list,
|
||||
'linkedin_links': linkedin_links,
|
||||
'trello_urls': aurls,
|
||||
'ips': aips,
|
||||
'emails': aemails,
|
||||
'hosts': ahosts,
|
||||
"asns": asns,
|
||||
"interesting_urls": iurls,
|
||||
"twitter_people": twitter_people_list,
|
||||
"linkedin_people": linkedin_people_list,
|
||||
"linkedin_links": linkedin_links,
|
||||
"trello_urls": aurls,
|
||||
"ips": aips,
|
||||
"emails": aemails,
|
||||
"hosts": ahosts,
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
return UJSONResponse({'exception': 'Please contact the server administrator to check the issue'})
|
||||
return UJSONResponse(
|
||||
{"exception": "Please contact the server administrator to check the issue"}
|
||||
)
|
||||
|
|
|
@ -23,94 +23,100 @@ async def main() -> None:
|
|||
Just a simple example of how to interact with the rest api
|
||||
you can easily use requests instead of aiohttp or whatever you best see fit
|
||||
"""
|
||||
url = 'http://127.0.0.1:5000'
|
||||
domain = 'netflix.com'
|
||||
query_url = f'{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}'
|
||||
url = "http://127.0.0.1:5000"
|
||||
domain = "netflix.com"
|
||||
query_url = (
|
||||
f"{url}/query?limit=300&source=bing,baidu,duckduckgo,dogpile&domain={domain}"
|
||||
)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
fetched_json = await fetch_json(session, query_url)
|
||||
total_asns = fetched_json['asns']
|
||||
interesting_urls = fetched_json['interesting_urls']
|
||||
twitter_people_list_tracker = fetched_json['twitter_people']
|
||||
linkedin_people_list_tracker = fetched_json['linkedin_people']
|
||||
linkedin_links_tracker = fetched_json['linkedin_links']
|
||||
trello_urls = fetched_json['trello_urls']
|
||||
ips = fetched_json['ips']
|
||||
emails = fetched_json['emails']
|
||||
hosts = fetched_json['hosts']
|
||||
total_asns = fetched_json["asns"]
|
||||
interesting_urls = fetched_json["interesting_urls"]
|
||||
twitter_people_list_tracker = fetched_json["twitter_people"]
|
||||
linkedin_people_list_tracker = fetched_json["linkedin_people"]
|
||||
linkedin_links_tracker = fetched_json["linkedin_links"]
|
||||
trello_urls = fetched_json["trello_urls"]
|
||||
ips = fetched_json["ips"]
|
||||
emails = fetched_json["emails"]
|
||||
hosts = fetched_json["hosts"]
|
||||
|
||||
if len(total_asns) > 0:
|
||||
print(f'\n[*] ASNS found: {len(total_asns)}')
|
||||
print('--------------------')
|
||||
print(f"\n[*] ASNS found: {len(total_asns)}")
|
||||
print("--------------------")
|
||||
total_asns = list(sorted(set(total_asns)))
|
||||
for asn in total_asns:
|
||||
print(asn)
|
||||
|
||||
if len(interesting_urls) > 0:
|
||||
print(f'\n[*] Interesting Urls found: {len(interesting_urls)}')
|
||||
print('--------------------')
|
||||
print(f"\n[*] Interesting Urls found: {len(interesting_urls)}")
|
||||
print("--------------------")
|
||||
interesting_urls = list(sorted(set(interesting_urls)))
|
||||
for iurl in interesting_urls:
|
||||
print(iurl)
|
||||
|
||||
if len(twitter_people_list_tracker) == 0:
|
||||
print('\n[*] No Twitter users found.\n\n')
|
||||
print("\n[*] No Twitter users found.\n\n")
|
||||
else:
|
||||
if len(twitter_people_list_tracker) >= 1:
|
||||
print('\n[*] Twitter Users found: ' + str(len(twitter_people_list_tracker)))
|
||||
print('---------------------')
|
||||
print("\n[*] Twitter Users found: " + str(len(twitter_people_list_tracker)))
|
||||
print("---------------------")
|
||||
twitter_people_list_tracker = list(sorted(set(twitter_people_list_tracker)))
|
||||
for usr in twitter_people_list_tracker:
|
||||
print(usr)
|
||||
|
||||
if len(linkedin_people_list_tracker) == 0:
|
||||
print('\n[*] No LinkedIn users found.\n\n')
|
||||
print("\n[*] No LinkedIn users found.\n\n")
|
||||
else:
|
||||
if len(linkedin_people_list_tracker) >= 1:
|
||||
print('\n[*] LinkedIn Users found: ' + str(len(linkedin_people_list_tracker)))
|
||||
print('---------------------')
|
||||
linkedin_people_list_tracker = list(sorted(set(linkedin_people_list_tracker)))
|
||||
print(
|
||||
"\n[*] LinkedIn Users found: " + str(len(linkedin_people_list_tracker))
|
||||
)
|
||||
print("---------------------")
|
||||
linkedin_people_list_tracker = list(
|
||||
sorted(set(linkedin_people_list_tracker))
|
||||
)
|
||||
for usr in linkedin_people_list_tracker:
|
||||
print(usr)
|
||||
|
||||
if len(linkedin_links_tracker) == 0:
|
||||
print(f'\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}')
|
||||
print(f"\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}")
|
||||
linkedin_links_tracker = list(sorted(set(linkedin_links_tracker)))
|
||||
print('---------------------')
|
||||
print("---------------------")
|
||||
for link in linkedin_links_tracker:
|
||||
print(link)
|
||||
|
||||
length_urls = len(trello_urls)
|
||||
total = length_urls
|
||||
print('\n[*] Trello URLs found: ' + str(total))
|
||||
print('--------------------')
|
||||
print("\n[*] Trello URLs found: " + str(total))
|
||||
print("--------------------")
|
||||
all_urls = list(sorted(set(trello_urls)))
|
||||
for url in sorted(all_urls):
|
||||
print(url)
|
||||
|
||||
if len(ips) == 0:
|
||||
print('\n[*] No IPs found.')
|
||||
print("\n[*] No IPs found.")
|
||||
else:
|
||||
print('\n[*] IPs found: ' + str(len(ips)))
|
||||
print('-------------------')
|
||||
print("\n[*] IPs found: " + str(len(ips)))
|
||||
print("-------------------")
|
||||
# use netaddr as the list may contain ipv4 and ipv6 addresses
|
||||
ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(ips)])
|
||||
print('\n'.join(map(str, ip_list)))
|
||||
print("\n".join(map(str, ip_list)))
|
||||
|
||||
if len(emails) == 0:
|
||||
print('\n[*] No emails found.')
|
||||
print("\n[*] No emails found.")
|
||||
else:
|
||||
print('\n[*] Emails found: ' + str(len(emails)))
|
||||
print('----------------------')
|
||||
print("\n[*] Emails found: " + str(len(emails)))
|
||||
print("----------------------")
|
||||
all_emails = sorted(list(set(emails)))
|
||||
print('\n'.join(all_emails))
|
||||
print("\n".join(all_emails))
|
||||
|
||||
if len(hosts) == 0:
|
||||
print('\n[*] No hosts found.\n\n')
|
||||
print("\n[*] No hosts found.\n\n")
|
||||
else:
|
||||
print('\n[*] Hosts found: ' + str(len(hosts)))
|
||||
print('---------------------')
|
||||
print('\n'.join(hosts))
|
||||
print("\n[*] Hosts found: " + str(len(hosts)))
|
||||
print("---------------------")
|
||||
print("\n".join(hosts))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
|
|
@ -19,11 +19,11 @@
|
|||
if TYPE_CHECKING:
|
||||
from collections.abc import Sized
|
||||
|
||||
DATA_DIR = Path(__file__).parents[1] / 'data'
|
||||
DATA_DIR = Path(__file__).parents[1] / "data"
|
||||
CONFIG_DIRS = [
|
||||
Path('/etc/theHarvester/'),
|
||||
Path('/usr/local/etc/theHarvester/'),
|
||||
Path('~/.theHarvester'),
|
||||
Path("/etc/theHarvester/"),
|
||||
Path("/usr/local/etc/theHarvester/"),
|
||||
Path("~/.theHarvester"),
|
||||
]
|
||||
|
||||
|
||||
|
@ -35,7 +35,7 @@ def _read_config(filename: str) -> str:
|
|||
with contextlib.suppress(FileNotFoundError):
|
||||
file = path.expanduser() / filename
|
||||
config = file.read_text()
|
||||
print(f'Read {filename} from {file}')
|
||||
print(f"Read {filename} from {file}")
|
||||
return config
|
||||
|
||||
# Fallback to creating default in user's home dir
|
||||
|
@ -43,160 +43,168 @@ def _read_config(filename: str) -> str:
|
|||
dest = CONFIG_DIRS[-1].expanduser() / filename
|
||||
dest.parent.mkdir(exist_ok=True)
|
||||
dest.write_text(default)
|
||||
print(f'Created default {filename} at {dest}')
|
||||
print(f"Created default {filename} at {dest}")
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def api_keys() -> dict:
|
||||
keys = yaml.safe_load(Core._read_config('api-keys.yaml'))
|
||||
return keys['apikeys']
|
||||
keys = yaml.safe_load(Core._read_config("api-keys.yaml"))
|
||||
return keys["apikeys"]
|
||||
|
||||
@staticmethod
|
||||
def bevigil_key() -> str:
|
||||
return Core.api_keys()['bevigil']['key']
|
||||
return Core.api_keys()["bevigil"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def binaryedge_key() -> str:
|
||||
return Core.api_keys()['binaryedge']['key']
|
||||
return Core.api_keys()["binaryedge"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def bing_key() -> str:
|
||||
return Core.api_keys()['bing']['key']
|
||||
return Core.api_keys()["bing"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def bufferoverun_key() -> str:
|
||||
return Core.api_keys()['bufferoverun']['key']
|
||||
return Core.api_keys()["bufferoverun"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def censys_key() -> tuple:
|
||||
return Core.api_keys()['censys']['id'], Core.api_keys()['censys']['secret']
|
||||
return Core.api_keys()["censys"]["id"], Core.api_keys()["censys"]["secret"]
|
||||
|
||||
@staticmethod
|
||||
def criminalip_key() -> str:
|
||||
return Core.api_keys()['criminalip']['key']
|
||||
return Core.api_keys()["criminalip"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def fullhunt_key() -> str:
|
||||
return Core.api_keys()['fullhunt']['key']
|
||||
return Core.api_keys()["fullhunt"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def github_key() -> str:
|
||||
return Core.api_keys()['github']['key']
|
||||
return Core.api_keys()["github"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def hunter_key() -> str:
|
||||
return Core.api_keys()['hunter']['key']
|
||||
return Core.api_keys()["hunter"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def hunterhow_key() -> str:
|
||||
return Core.api_keys()['hunterhow']['key']
|
||||
return Core.api_keys()["hunterhow"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def intelx_key() -> str:
|
||||
return Core.api_keys()['intelx']['key']
|
||||
return Core.api_keys()["intelx"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def netlas_key() -> str:
|
||||
return Core.api_keys()['netlas']['key']
|
||||
return Core.api_keys()["netlas"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def pentest_tools_key() -> str:
|
||||
return Core.api_keys()['pentestTools']['key']
|
||||
return Core.api_keys()["pentestTools"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def onyphe_key() -> str:
|
||||
return Core.api_keys()['onyphe']['key']
|
||||
return Core.api_keys()["onyphe"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def projectdiscovery_key() -> str:
|
||||
return Core.api_keys()['projectDiscovery']['key']
|
||||
return Core.api_keys()["projectDiscovery"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def rocketreach_key() -> str:
|
||||
return Core.api_keys()['rocketreach']['key']
|
||||
return Core.api_keys()["rocketreach"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def security_trails_key() -> str:
|
||||
return Core.api_keys()['securityTrails']['key']
|
||||
return Core.api_keys()["securityTrails"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def shodan_key() -> str:
|
||||
return Core.api_keys()['shodan']['key']
|
||||
return Core.api_keys()["shodan"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def zoomeye_key() -> str:
|
||||
return Core.api_keys()['zoomeye']['key']
|
||||
return Core.api_keys()["zoomeye"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def tomba_key() -> tuple[str, str]:
|
||||
return Core.api_keys()['tomba']['key'], Core.api_keys()['tomba']['secret']
|
||||
return Core.api_keys()["tomba"]["key"], Core.api_keys()["tomba"]["secret"]
|
||||
|
||||
@staticmethod
|
||||
def virustotal_key() -> str:
|
||||
return Core.api_keys()['virustotal']['key']
|
||||
return Core.api_keys()["virustotal"]["key"]
|
||||
|
||||
@staticmethod
|
||||
def proxy_list() -> list:
|
||||
keys = yaml.safe_load(Core._read_config('proxies.yaml'))
|
||||
http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else []
|
||||
keys = yaml.safe_load(Core._read_config("proxies.yaml"))
|
||||
http_list = (
|
||||
[f"http://{proxy}" for proxy in keys["http"]]
|
||||
if keys["http"] is not None
|
||||
else []
|
||||
)
|
||||
return http_list
|
||||
|
||||
@staticmethod
|
||||
def banner() -> None:
|
||||
print('*******************************************************************')
|
||||
print('* _ _ _ *')
|
||||
print(r'* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *')
|
||||
print("*******************************************************************")
|
||||
print("* _ _ _ *")
|
||||
print(r"* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *")
|
||||
print(r"* | __| _ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *")
|
||||
print(r'* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *')
|
||||
print(r'* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *')
|
||||
print('* *')
|
||||
print('* theHarvester {version}{filler}*'.format(version=version(), filler=' ' * (51 - len(version()))))
|
||||
print('* Coded by Christian Martorella *')
|
||||
print('* Edge-Security Research *')
|
||||
print('* cmartorella@edge-security.com *')
|
||||
print('* *')
|
||||
print('*******************************************************************')
|
||||
print(r"* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *")
|
||||
print(r"* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *")
|
||||
print("* *")
|
||||
print(
|
||||
"* theHarvester {version}{filler}*".format(
|
||||
version=version(), filler=" " * (51 - len(version()))
|
||||
)
|
||||
)
|
||||
print("* Coded by Christian Martorella *")
|
||||
print("* Edge-Security Research *")
|
||||
print("* cmartorella@edge-security.com *")
|
||||
print("* *")
|
||||
print("*******************************************************************")
|
||||
|
||||
@staticmethod
|
||||
def get_supportedengines() -> list[str | Any]:
|
||||
supportedengines = [
|
||||
'anubis',
|
||||
'baidu',
|
||||
'bevigil',
|
||||
'binaryedge',
|
||||
'bing',
|
||||
'bingapi',
|
||||
'bufferoverun',
|
||||
'brave',
|
||||
'censys',
|
||||
'certspotter',
|
||||
'criminalip',
|
||||
'crtsh',
|
||||
'dnsdumpster',
|
||||
'duckduckgo',
|
||||
'fullhunt',
|
||||
'github-code',
|
||||
'hackertarget',
|
||||
'hunter',
|
||||
'hunterhow',
|
||||
'intelx',
|
||||
'netlas',
|
||||
'onyphe',
|
||||
'otx',
|
||||
'pentesttools',
|
||||
'projectdiscovery',
|
||||
'rapiddns',
|
||||
'rocketreach',
|
||||
'securityTrails',
|
||||
'sitedossier',
|
||||
'subdomaincenter',
|
||||
'subdomainfinderc99',
|
||||
'threatminer',
|
||||
'tomba',
|
||||
'urlscan',
|
||||
'virustotal',
|
||||
'yahoo',
|
||||
'zoomeye',
|
||||
"anubis",
|
||||
"baidu",
|
||||
"bevigil",
|
||||
"binaryedge",
|
||||
"bing",
|
||||
"bingapi",
|
||||
"bufferoverun",
|
||||
"brave",
|
||||
"censys",
|
||||
"certspotter",
|
||||
"criminalip",
|
||||
"crtsh",
|
||||
"dnsdumpster",
|
||||
"duckduckgo",
|
||||
"fullhunt",
|
||||
"github-code",
|
||||
"hackertarget",
|
||||
"hunter",
|
||||
"hunterhow",
|
||||
"intelx",
|
||||
"netlas",
|
||||
"onyphe",
|
||||
"otx",
|
||||
"pentesttools",
|
||||
"projectdiscovery",
|
||||
"rapiddns",
|
||||
"rocketreach",
|
||||
"securityTrails",
|
||||
"sitedossier",
|
||||
"subdomaincenter",
|
||||
"subdomainfinderc99",
|
||||
"threatminer",
|
||||
"tomba",
|
||||
"urlscan",
|
||||
"virustotal",
|
||||
"yahoo",
|
||||
"zoomeye",
|
||||
]
|
||||
return supportedengines
|
||||
|
||||
|
@ -206,58 +214,58 @@ def get_user_agent() -> str:
|
|||
# Lasted updated 7/2/23
|
||||
# TODO use bs4 to auto parse user agents
|
||||
user_agents = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37',
|
||||
'Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0',
|
||||
'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; rv:114.0) Gecko/20100101 Firefox/114.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.37",
|
||||
"Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 YaBrowser/23.5.2.625 Yowser/2.5 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0",
|
||||
"Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Mobile Safari/537.36 Chrome-Lighthouse",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
]
|
||||
return random.choice(user_agents)
|
||||
|
||||
|
@ -270,85 +278,129 @@ async def post_fetch(
|
|||
cls,
|
||||
url,
|
||||
headers=None,
|
||||
data: str | dict[str, str] = '',
|
||||
params: str = '',
|
||||
data: str | dict[str, str] = "",
|
||||
params: str = "",
|
||||
json: bool = False,
|
||||
proxy: bool = False,
|
||||
):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
if len(headers) == 0:
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
timeout = aiohttp.ClientTimeout(total=720)
|
||||
# By default, timeout is 5 minutes, changed to 12-minutes
|
||||
# results are well worth the wait
|
||||
try:
|
||||
if proxy:
|
||||
proxy = random.choice(cls().proxy_list)
|
||||
if params != '':
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
async with session.get(url, params=params, proxy=proxy) as response:
|
||||
if params != "":
|
||||
async with aiohttp.ClientSession(
|
||||
headers=headers, timeout=timeout
|
||||
) as session:
|
||||
async with session.get(
|
||||
url, params=params, proxy=proxy
|
||||
) as response:
|
||||
await asyncio.sleep(5)
|
||||
return await response.text() if json is False else await response.json()
|
||||
return (
|
||||
await response.text()
|
||||
if json is False
|
||||
else await response.json()
|
||||
)
|
||||
else:
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
headers=headers, timeout=timeout
|
||||
) as session:
|
||||
async with session.get(url, proxy=proxy) as response:
|
||||
await asyncio.sleep(5)
|
||||
return await response.text() if json is False else await response.json()
|
||||
elif params == '':
|
||||
return (
|
||||
await response.text()
|
||||
if json is False
|
||||
else await response.json()
|
||||
)
|
||||
elif params == "":
|
||||
if isinstance(data, str):
|
||||
data = json_loader.loads(data)
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
headers=headers, timeout=timeout
|
||||
) as session:
|
||||
async with session.post(url, data=data) as resp:
|
||||
await asyncio.sleep(3)
|
||||
return await resp.text() if json is False else await resp.json()
|
||||
else:
|
||||
if isinstance(data, str):
|
||||
data = json_loader.loads(data)
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
headers=headers, timeout=timeout
|
||||
) as session:
|
||||
sslcontext = ssl.create_default_context(cafile=certifi.where())
|
||||
async with session.post(url, data=data, ssl=sslcontext, params=params) as resp:
|
||||
async with session.post(
|
||||
url, data=data, ssl=sslcontext, params=params
|
||||
) as resp:
|
||||
await asyncio.sleep(3)
|
||||
return await resp.text() if json is False else await resp.json()
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred in post_fetch: {e}')
|
||||
return ''
|
||||
print(f"An exception has occurred in post_fetch: {e}")
|
||||
return ""
|
||||
|
||||
@classmethod
|
||||
async def fetch(cls, session, url, params: Sized = '', json: bool = False, proxy: str = '') -> str | dict | list | bool:
|
||||
async def fetch(
|
||||
cls, session, url, params: Sized = "", json: bool = False, proxy: str = ""
|
||||
) -> str | dict | list | bool:
|
||||
# This fetch method solely focuses on get requests
|
||||
try:
|
||||
# Wrap in try except due to 0x89 png/jpg files
|
||||
# This fetch method solely focuses on get requests
|
||||
if proxy != '':
|
||||
if proxy != "":
|
||||
proxy = str(random.choice(cls().proxy_list))
|
||||
if len(params) != 0:
|
||||
sslcontext = ssl.create_default_context(cafile=certifi.where())
|
||||
async with session.get(url, ssl=sslcontext, params=params, proxy=proxy) as response:
|
||||
return await response.text() if json is False else await response.json()
|
||||
async with session.get(
|
||||
url, ssl=sslcontext, params=params, proxy=proxy
|
||||
) as response:
|
||||
return (
|
||||
await response.text()
|
||||
if json is False
|
||||
else await response.json()
|
||||
)
|
||||
else:
|
||||
sslcontext = ssl.create_default_context(cafile=certifi.where())
|
||||
async with session.get(url, ssl=sslcontext, proxy=proxy) as response:
|
||||
async with session.get(
|
||||
url, ssl=sslcontext, proxy=proxy
|
||||
) as response:
|
||||
await asyncio.sleep(5)
|
||||
return await response.text() if json is False else await response.json()
|
||||
return (
|
||||
await response.text()
|
||||
if json is False
|
||||
else await response.json()
|
||||
)
|
||||
|
||||
if len(params) != 0:
|
||||
sslcontext = ssl.create_default_context(cafile=certifi.where())
|
||||
async with session.get(url, ssl=sslcontext, params=params) as response:
|
||||
await asyncio.sleep(5)
|
||||
return await response.text() if json is False else await response.json()
|
||||
return (
|
||||
await response.text()
|
||||
if json is False
|
||||
else await response.json()
|
||||
)
|
||||
|
||||
else:
|
||||
sslcontext = ssl.create_default_context(cafile=certifi.where())
|
||||
async with session.get(url, ssl=sslcontext) as response:
|
||||
await asyncio.sleep(5)
|
||||
return await response.text() if json is False else await response.json()
|
||||
return (
|
||||
await response.text()
|
||||
if json is False
|
||||
else await response.json()
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {e}')
|
||||
return ''
|
||||
print(f"An exception has occurred: {e}")
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
async def takeover_fetch(session, url: str, proxy: str = '') -> tuple[Any, Any] | str:
|
||||
async def takeover_fetch(
|
||||
session, url: str, proxy: str = ""
|
||||
) -> tuple[Any, Any] | str:
|
||||
# This fetch method solely focuses on get requests
|
||||
try:
|
||||
# Wrap in try except due to 0x89 png/jpg files
|
||||
|
@ -356,10 +408,12 @@ async def takeover_fetch(session, url: str, proxy: str = '') -> tuple[Any, Any]
|
|||
# TODO determine if method for post requests is necessary
|
||||
# url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url
|
||||
# Clean up urls with proper schemas
|
||||
if proxy != '':
|
||||
if 'https://' in url:
|
||||
if proxy != "":
|
||||
if "https://" in url:
|
||||
sslcontext = ssl.create_default_context(cafile=certifi.where())
|
||||
async with session.get(url, proxy=proxy, ssl=sslcontext) as response:
|
||||
async with session.get(
|
||||
url, proxy=proxy, ssl=sslcontext
|
||||
) as response:
|
||||
await asyncio.sleep(5)
|
||||
return url, await response.text()
|
||||
else:
|
||||
|
@ -367,7 +421,7 @@ async def takeover_fetch(session, url: str, proxy: str = '') -> tuple[Any, Any]
|
|||
await asyncio.sleep(5)
|
||||
return url, await response.text()
|
||||
else:
|
||||
if 'https://' in url:
|
||||
if "https://" in url:
|
||||
sslcontext = ssl.create_default_context(cafile=certifi.where())
|
||||
async with session.get(url, ssl=sslcontext) as response:
|
||||
await asyncio.sleep(5)
|
||||
|
@ -377,15 +431,15 @@ async def takeover_fetch(session, url: str, proxy: str = '') -> tuple[Any, Any]
|
|||
await asyncio.sleep(5)
|
||||
return url, await response.text()
|
||||
except Exception as e:
|
||||
print(f'Takeover check error: {e}')
|
||||
return url, ''
|
||||
print(f"Takeover check error: {e}")
|
||||
return url, ""
|
||||
|
||||
@classmethod
|
||||
async def fetch_all(
|
||||
cls,
|
||||
urls,
|
||||
headers=None,
|
||||
params: Sized = '',
|
||||
params: Sized = "",
|
||||
json: bool = False,
|
||||
takeover: bool = False,
|
||||
proxy: bool = False,
|
||||
|
@ -395,18 +449,29 @@ async def fetch_all(
|
|||
headers = {}
|
||||
timeout = aiohttp.ClientTimeout(total=60)
|
||||
if len(headers) == 0:
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
headers = {"User-Agent": Core.get_user_agent()}
|
||||
if takeover:
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
headers=headers, timeout=aiohttp.ClientTimeout(total=15)
|
||||
) as session:
|
||||
if proxy:
|
||||
return await asyncio.gather(
|
||||
*[AsyncFetcher.takeover_fetch(session, url, proxy=random.choice(cls().proxy_list)) for url in urls]
|
||||
*[
|
||||
AsyncFetcher.takeover_fetch(
|
||||
session, url, proxy=random.choice(cls().proxy_list)
|
||||
)
|
||||
for url in urls
|
||||
]
|
||||
)
|
||||
else:
|
||||
return await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls])
|
||||
return await asyncio.gather(
|
||||
*[AsyncFetcher.takeover_fetch(session, url) for url in urls]
|
||||
)
|
||||
|
||||
if len(params) == 0:
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout, max_field_size=13000) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
headers=headers, timeout=timeout, max_field_size=13000
|
||||
) as session:
|
||||
if proxy:
|
||||
return await asyncio.gather(
|
||||
*[
|
||||
|
@ -420,10 +485,14 @@ async def fetch_all(
|
|||
]
|
||||
)
|
||||
else:
|
||||
return await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
|
||||
return await asyncio.gather(
|
||||
*[AsyncFetcher.fetch(session, url, json=json) for url in urls]
|
||||
)
|
||||
else:
|
||||
# Indicates the request has certain params
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
headers=headers, timeout=timeout
|
||||
) as session:
|
||||
if proxy:
|
||||
return await asyncio.gather(
|
||||
*[
|
||||
|
@ -438,4 +507,9 @@ async def fetch_all(
|
|||
]
|
||||
)
|
||||
else:
|
||||
return await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
|
||||
return await asyncio.gather(
|
||||
*[
|
||||
AsyncFetcher.fetch(session, url, params, json)
|
||||
for url in urls
|
||||
]
|
||||
)
|
||||
|
|
|
@ -40,13 +40,13 @@ async def resolve_host(host, resolver) -> str:
|
|||
result = await resolver.gethostbyname(host, socket.AF_INET)
|
||||
addresses = result.addresses
|
||||
if addresses == [] or addresses is None or result is None:
|
||||
return f'{host}:'
|
||||
return f"{host}:"
|
||||
else:
|
||||
addresses = ','.join(map(str, list(sorted(set(addresses)))))
|
||||
addresses = ",".join(map(str, list(sorted(set(addresses)))))
|
||||
# addresses = list(sorted(addresses))
|
||||
return f'{host}:{addresses}'
|
||||
return f"{host}:{addresses}"
|
||||
except Exception:
|
||||
return f'{host}:'
|
||||
return f"{host}:"
|
||||
|
||||
# https://stackoverflow.com/questions/312443/how-do-i-split-a-list-into-equally-sized-chunks
|
||||
@staticmethod
|
||||
|
@ -57,7 +57,9 @@ def chunks(lst, n):
|
|||
|
||||
async def query_all(self, resolver, hosts) -> list[Any]:
|
||||
# TODO chunk list into 50 pieces regardless of IPs and subnets
|
||||
results = await asyncio.gather(*[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts])
|
||||
results = await asyncio.gather(
|
||||
*[asyncio.create_task(self.resolve_host(host, resolver)) for host in hosts]
|
||||
)
|
||||
return results
|
||||
|
||||
async def check(self):
|
||||
|
@ -73,9 +75,9 @@ async def check(self):
|
|||
results = await self.query_all(resolver, chunk)
|
||||
all_results.update(results)
|
||||
for pair in results:
|
||||
host, addresses = pair.split(':')
|
||||
host, addresses = pair.split(":")
|
||||
self.realhosts.append(host)
|
||||
self.addresses.update({addr for addr in addresses.split(',')})
|
||||
self.addresses.update({addr for addr in addresses.split(",")})
|
||||
# address may be a list of ips
|
||||
# and do a set comprehension to remove duplicates
|
||||
self.realhosts.sort()
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
import aiosqlite
|
||||
|
||||
db_path = os.path.expanduser('~/.local/share/theHarvester')
|
||||
db_path = os.path.expanduser("~/.local/share/theHarvester")
|
||||
|
||||
if not os.path.isdir(db_path):
|
||||
os.makedirs(db_path)
|
||||
|
@ -13,9 +13,9 @@
|
|||
|
||||
class StashManager:
|
||||
def __init__(self) -> None:
|
||||
self.db = os.path.join(db_path, 'stash.sqlite')
|
||||
self.results = ''
|
||||
self.totalresults = ''
|
||||
self.db = os.path.join(db_path, "stash.sqlite")
|
||||
self.results = ""
|
||||
self.totalresults = ""
|
||||
self.latestscandomain: dict = {}
|
||||
self.domainscanhistory: list = []
|
||||
self.scanboarddata: dict = {}
|
||||
|
@ -26,7 +26,7 @@ def __init__(self) -> None:
|
|||
async def do_init(self) -> None:
|
||||
async with aiosqlite.connect(self.db) as db:
|
||||
await db.execute(
|
||||
'CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)'
|
||||
"CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)"
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
|
@ -39,7 +39,7 @@ async def store(self, domain, resource, res_type, source) -> None:
|
|||
try:
|
||||
async with aiosqlite.connect(self.db, timeout=30) as db:
|
||||
await db.execute(
|
||||
'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
|
||||
"INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)",
|
||||
(self.domain, self.resource, self.type, self.date, self.source),
|
||||
)
|
||||
await db.commit()
|
||||
|
@ -52,11 +52,13 @@ async def store_all(self, domain, all, res_type, source) -> None:
|
|||
self.type = res_type
|
||||
self.source = source
|
||||
self.date = datetime.date.today()
|
||||
master_list = [(self.domain, x, self.type, self.date, self.source) for x in self.all]
|
||||
master_list = [
|
||||
(self.domain, x, self.type, self.date, self.source) for x in self.all
|
||||
]
|
||||
async with aiosqlite.connect(self.db, timeout=30) as db:
|
||||
try:
|
||||
await db.executemany(
|
||||
'INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
|
||||
"INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)",
|
||||
master_list,
|
||||
)
|
||||
await db.commit()
|
||||
|
@ -66,41 +68,43 @@ async def store_all(self, domain, all, res_type, source) -> None:
|
|||
async def generatedashboardcode(self, domain):
|
||||
try:
|
||||
# TODO refactor into generic method
|
||||
self.latestscandomain['domain'] = domain
|
||||
self.latestscandomain["domain"] = domain
|
||||
async with aiosqlite.connect(self.db, timeout=30) as conn:
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="host"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['host'] = data[0]
|
||||
self.latestscandomain["host"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="email"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['email'] = data[0]
|
||||
self.latestscandomain["email"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['ip'] = data[0]
|
||||
self.latestscandomain["ip"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['vhost'] = data[0]
|
||||
self.latestscandomain["vhost"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['shodan'] = data[0]
|
||||
cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,))
|
||||
self.latestscandomain["shodan"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
"""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['latestdate'] = data[0]
|
||||
self.latestscandomain["latestdate"] = data[0]
|
||||
latestdate = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''',
|
||||
|
@ -110,7 +114,7 @@ async def generatedashboardcode(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailshost = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailshost'] = scandetailshost
|
||||
self.latestscandomain["scandetailshost"] = scandetailshost
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
|
||||
(
|
||||
|
@ -119,7 +123,7 @@ async def generatedashboardcode(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailsemail = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailsemail'] = scandetailsemail
|
||||
self.latestscandomain["scandetailsemail"] = scandetailsemail
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''',
|
||||
(
|
||||
|
@ -128,7 +132,7 @@ async def generatedashboardcode(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailsip = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailsip'] = scandetailsip
|
||||
self.latestscandomain["scandetailsip"] = scandetailsip
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
|
||||
(
|
||||
|
@ -137,7 +141,7 @@ async def generatedashboardcode(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailsvhost = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailsvhost'] = scandetailsvhost
|
||||
self.latestscandomain["scandetailsvhost"] = scandetailsvhost
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
|
||||
(
|
||||
|
@ -146,12 +150,14 @@ async def generatedashboardcode(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailsshodan = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailsshodan'] = scandetailsshodan
|
||||
self.latestscandomain["scandetailsshodan"] = scandetailsshodan
|
||||
return self.latestscandomain
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
async def getlatestscanresults(self, domain, previousday: bool = False) -> Iterable[Row | str] | None:
|
||||
async def getlatestscanresults(
|
||||
self, domain, previousday: bool = False
|
||||
) -> Iterable[Row | str] | None:
|
||||
try:
|
||||
async with aiosqlite.connect(self.db, timeout=30) as conn:
|
||||
if previousday:
|
||||
|
@ -164,13 +170,15 @@ async def getlatestscanresults(self, domain, previousday: bool = False) -> Itera
|
|||
(domain,),
|
||||
)
|
||||
previousscandate = await cursor.fetchone()
|
||||
if not previousscandate: # When theHarvester runs first time/day, this query will return.
|
||||
if (
|
||||
not previousscandate
|
||||
): # When theHarvester runs first time/day, this query will return.
|
||||
self.previousscanresults = [
|
||||
'No results',
|
||||
'No results',
|
||||
'No results',
|
||||
'No results',
|
||||
'No results',
|
||||
"No results",
|
||||
"No results",
|
||||
"No results",
|
||||
"No results",
|
||||
"No results",
|
||||
]
|
||||
else:
|
||||
cursor = await conn.execute(
|
||||
|
@ -189,7 +197,9 @@ async def getlatestscanresults(self, domain, previousday: bool = False) -> Itera
|
|||
self.previousscanresults = list(results)
|
||||
return self.previousscanresults
|
||||
except Exception as e:
|
||||
print(f'Error in getting the previous scan results from the database: {e}')
|
||||
print(
|
||||
f"Error in getting the previous scan results from the database: {e}"
|
||||
)
|
||||
else:
|
||||
try:
|
||||
cursor = await conn.execute(
|
||||
|
@ -213,32 +223,46 @@ async def getlatestscanresults(self, domain, previousday: bool = False) -> Itera
|
|||
self.latestscanresults = list(results)
|
||||
return self.latestscanresults
|
||||
except Exception as e:
|
||||
print(f'Error in getting the latest scan results from the database: {e}')
|
||||
print(
|
||||
f"Error in getting the latest scan results from the database: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'Error connecting to theHarvester database: {e}')
|
||||
print(f"Error connecting to theHarvester database: {e}")
|
||||
return self.latestscanresults
|
||||
|
||||
async def getscanboarddata(self):
|
||||
try:
|
||||
async with aiosqlite.connect(self.db, timeout=30) as conn:
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="host"''')
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE type="host"'''
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.scanboarddata['host'] = data[0]
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="email"''')
|
||||
self.scanboarddata["host"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE type="email"'''
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.scanboarddata['email'] = data[0]
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="ip"''')
|
||||
self.scanboarddata["email"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE type="ip"'''
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.scanboarddata['ip'] = data[0]
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="vhost"''')
|
||||
self.scanboarddata["ip"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE type="vhost"'''
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.scanboarddata['vhost'] = data[0]
|
||||
cursor = await conn.execute('''SELECT COUNT(*) from results WHERE type="shodan"''')
|
||||
self.scanboarddata["vhost"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE type="shodan"'''
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.scanboarddata['shodan'] = data[0]
|
||||
cursor = await conn.execute("""SELECT COUNT(DISTINCT(domain)) FROM results """)
|
||||
self.scanboarddata["shodan"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
"""SELECT COUNT(DISTINCT(domain)) FROM results """
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.scanboarddata['domains'] = data[0]
|
||||
self.scanboarddata["domains"] = data[0]
|
||||
return self.scanboarddata
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
@ -278,12 +302,12 @@ async def getscanhistorydomain(self, domain):
|
|||
)
|
||||
countshodan = await cursor.fetchone()
|
||||
results = {
|
||||
'date': str(date[0]),
|
||||
'hosts': str(counthost[0]),
|
||||
'email': str(countemail[0]),
|
||||
'ip': str(countip[0]),
|
||||
'vhost': str(countvhost[0]),
|
||||
'shodan': str(countshodan[0]),
|
||||
"date": str(date[0]),
|
||||
"hosts": str(counthost[0]),
|
||||
"email": str(countemail[0]),
|
||||
"ip": str(countip[0]),
|
||||
"vhost": str(countvhost[0]),
|
||||
"shodan": str(countshodan[0]),
|
||||
}
|
||||
self.domainscanhistory.append(results)
|
||||
return self.domainscanhistory
|
||||
|
@ -309,40 +333,42 @@ async def getpluginscanstatistics(self) -> Iterable[Row] | None:
|
|||
async def latestscanchartdata(self, domain):
|
||||
try:
|
||||
async with aiosqlite.connect(self.db, timeout=30) as conn:
|
||||
self.latestscandomain['domain'] = domain
|
||||
self.latestscandomain["domain"] = domain
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="host"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['host'] = data[0]
|
||||
self.latestscandomain["host"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="email"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['email'] = data[0]
|
||||
self.latestscandomain["email"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['ip'] = data[0]
|
||||
self.latestscandomain["ip"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['vhost'] = data[0]
|
||||
self.latestscandomain["vhost"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''',
|
||||
(domain,),
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['shodan'] = data[0]
|
||||
cursor = await conn.execute("""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,))
|
||||
self.latestscandomain["shodan"] = data[0]
|
||||
cursor = await conn.execute(
|
||||
"""SELECT MAX(find_date) FROM results WHERE domain=?""", (domain,)
|
||||
)
|
||||
data = await cursor.fetchone()
|
||||
self.latestscandomain['latestdate'] = data[0]
|
||||
self.latestscandomain["latestdate"] = data[0]
|
||||
latestdate = data[0]
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''',
|
||||
|
@ -352,7 +378,7 @@ async def latestscanchartdata(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailshost = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailshost'] = scandetailshost
|
||||
self.latestscandomain["scandetailshost"] = scandetailshost
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
|
||||
(
|
||||
|
@ -361,7 +387,7 @@ async def latestscanchartdata(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailsemail = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailsemail'] = scandetailsemail
|
||||
self.latestscandomain["scandetailsemail"] = scandetailsemail
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''',
|
||||
(
|
||||
|
@ -370,7 +396,7 @@ async def latestscanchartdata(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailsip = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailsip'] = scandetailsip
|
||||
self.latestscandomain["scandetailsip"] = scandetailsip
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
|
||||
(
|
||||
|
@ -379,7 +405,7 @@ async def latestscanchartdata(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailsvhost = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailsvhost'] = scandetailsvhost
|
||||
self.latestscandomain["scandetailsvhost"] = scandetailsvhost
|
||||
cursor = await conn.execute(
|
||||
'''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
|
||||
(
|
||||
|
@ -388,7 +414,7 @@ async def latestscanchartdata(self, domain):
|
|||
),
|
||||
)
|
||||
scandetailsshodan = await cursor.fetchall()
|
||||
self.latestscandomain['scandetailsshodan'] = scandetailsshodan
|
||||
self.latestscandomain["scandetailsshodan"] = scandetailsshodan
|
||||
return self.latestscandomain
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
VERSION = '4.6.0'
|
||||
VERSION = "4.6.0"
|
||||
|
||||
|
||||
def version() -> str:
|
||||
|
|
|
@ -10,17 +10,17 @@ async def parse_dictionaries(self, results: dict) -> tuple:
|
|||
:return: tuple of emails and hosts
|
||||
"""
|
||||
if results is not None:
|
||||
for dictionary in results['selectors']:
|
||||
field = dictionary['selectorvalue']
|
||||
if '@' in field:
|
||||
for dictionary in results["selectors"]:
|
||||
field = dictionary["selectorvalue"]
|
||||
if "@" in field:
|
||||
self.emails.add(field)
|
||||
else:
|
||||
field = str(field)
|
||||
if 'http' in field or 'https' in field:
|
||||
if field[:5] == 'https':
|
||||
if "http" in field or "https" in field:
|
||||
if field[:5] == "https":
|
||||
field = field[8:]
|
||||
else:
|
||||
field = field[7:]
|
||||
self.hosts.add(field.replace(')', '').replace(',', ''))
|
||||
self.hosts.add(field.replace(")", "").replace(",", ""))
|
||||
return self.emails, self.hosts
|
||||
return None, None
|
||||
|
|
|
@ -10,49 +10,61 @@ def __init__(self, results, word) -> None:
|
|||
|
||||
async def genericClean(self) -> None:
|
||||
self.results = (
|
||||
self.results.replace('<em>', '')
|
||||
.replace('<b>', '')
|
||||
.replace('</b>', '')
|
||||
.replace('</em>', '')
|
||||
.replace('%3a', '')
|
||||
.replace('<strong>', '')
|
||||
.replace('</strong>', '')
|
||||
.replace('<wbr>', '')
|
||||
.replace('</wbr>', '')
|
||||
self.results.replace("<em>", "")
|
||||
.replace("<b>", "")
|
||||
.replace("</b>", "")
|
||||
.replace("</em>", "")
|
||||
.replace("%3a", "")
|
||||
.replace("<strong>", "")
|
||||
.replace("</strong>", "")
|
||||
.replace("<wbr>", "")
|
||||
.replace("</wbr>", "")
|
||||
)
|
||||
|
||||
for search in (
|
||||
'<',
|
||||
'>',
|
||||
':',
|
||||
'=',
|
||||
';',
|
||||
'&',
|
||||
'%3A',
|
||||
'%3D',
|
||||
'%3C',
|
||||
'%2f',
|
||||
'/',
|
||||
'\\',
|
||||
"<",
|
||||
">",
|
||||
":",
|
||||
"=",
|
||||
";",
|
||||
"&",
|
||||
"%3A",
|
||||
"%3D",
|
||||
"%3C",
|
||||
"%2f",
|
||||
"/",
|
||||
"\\",
|
||||
):
|
||||
self.results = self.results.replace(search, ' ')
|
||||
self.results = self.results.replace(search, " ")
|
||||
|
||||
async def urlClean(self) -> None:
|
||||
self.results = self.results.replace('<em>', '').replace('</em>', '').replace('%2f', '').replace('%3a', '')
|
||||
for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
|
||||
self.results = self.results.replace(search, ' ')
|
||||
self.results = (
|
||||
self.results.replace("<em>", "")
|
||||
.replace("</em>", "")
|
||||
.replace("%2f", "")
|
||||
.replace("%3a", "")
|
||||
)
|
||||
for search in ("<", ">", ":", "=", ";", "&", "%3A", "%3D", "%3C"):
|
||||
self.results = self.results.replace(search, " ")
|
||||
|
||||
async def emails(self):
|
||||
await self.genericClean()
|
||||
# Local part is required, charset is flexible.
|
||||
# https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly)
|
||||
reg_emails = re.compile(r'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' + '@' + '[a-zA-Z0-9.-]*' + self.word.replace('www.', ''))
|
||||
reg_emails = re.compile(
|
||||
r"[a-zA-Z0-9.\-_+#~!$&\',;=:]+"
|
||||
+ "@"
|
||||
+ "[a-zA-Z0-9.-]*"
|
||||
+ self.word.replace("www.", "")
|
||||
)
|
||||
self.temp = reg_emails.findall(self.results)
|
||||
emails = await self.unique()
|
||||
true_emails = {
|
||||
str(email)[1:].lower().strip()
|
||||
if len(str(email)) > 1 and str(email)[0] == '.'
|
||||
else len(str(email)) > 1 and str(email).lower().strip()
|
||||
(
|
||||
str(email)[1:].lower().strip()
|
||||
if len(str(email)) > 1 and str(email)[0] == "."
|
||||
else len(str(email)) > 1 and str(email).lower().strip()
|
||||
)
|
||||
for email in emails
|
||||
}
|
||||
# if email starts with dot shift email string and make sure all emails are lowercase
|
||||
|
@ -64,7 +76,11 @@ async def fileurls(self, file) -> list:
|
|||
self.temp = reg_urls.findall(self.results)
|
||||
allurls = await self.unique()
|
||||
for iteration in allurls:
|
||||
if iteration.count('webcache') or iteration.count('google.com') or iteration.count('search?hl'):
|
||||
if (
|
||||
iteration.count("webcache")
|
||||
or iteration.count("google.com")
|
||||
or iteration.count("search?hl")
|
||||
):
|
||||
pass
|
||||
else:
|
||||
urls.append(iteration)
|
||||
|
@ -74,11 +90,11 @@ async def hostnames(self):
|
|||
# should check both www. and not www.
|
||||
hostnames = []
|
||||
await self.genericClean()
|
||||
reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word)
|
||||
reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word)
|
||||
first_hostnames = reg_hosts.findall(self.results)
|
||||
hostnames.extend(first_hostnames)
|
||||
# TODO determine if necessary below or if only pass through is fine
|
||||
reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word.replace('www.', ''))
|
||||
reg_hosts = re.compile(r"[a-zA-Z0-9.-]*\." + self.word.replace("www.", ""))
|
||||
# reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.' + 'www.' + self.word)
|
||||
# reg_hosts = re.compile(r'www\.[a-zA-Z0-9.-]*\.(?:' + 'www.' + self.word + ')?')
|
||||
second_hostnames = reg_hosts.findall(self.results)
|
||||
|
@ -86,29 +102,31 @@ async def hostnames(self):
|
|||
return list(set(hostnames))
|
||||
|
||||
async def hostnames_all(self):
|
||||
reg_hosts = re.compile('<cite>(.*?)</cite>')
|
||||
reg_hosts = re.compile("<cite>(.*?)</cite>")
|
||||
temp = reg_hosts.findall(self.results)
|
||||
for iteration in temp:
|
||||
if iteration.count(':'):
|
||||
res = iteration.split(':')[1].split('/')[2]
|
||||
if iteration.count(":"):
|
||||
res = iteration.split(":")[1].split("/")[2]
|
||||
else:
|
||||
res = iteration.split('/')[0]
|
||||
res = iteration.split("/")[0]
|
||||
self.temp.append(res)
|
||||
hostnames = await self.unique()
|
||||
return hostnames
|
||||
|
||||
async def set(self):
|
||||
reg_sets = re.compile(r'>[a-zA-Z\d]*</a></font>')
|
||||
reg_sets = re.compile(r">[a-zA-Z\d]*</a></font>")
|
||||
self.temp = reg_sets.findall(self.results)
|
||||
sets = []
|
||||
for iteration in self.temp:
|
||||
delete = iteration.replace('>', '')
|
||||
delete = delete.replace('</a</font', '')
|
||||
delete = iteration.replace(">", "")
|
||||
delete = delete.replace("</a</font", "")
|
||||
sets.append(delete)
|
||||
return sets
|
||||
|
||||
async def urls(self) -> Set[str]:
|
||||
found = re.finditer(r'(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*', self.results)
|
||||
found = re.finditer(
|
||||
r"(http|https)://(www\.)?trello.com/([a-zA-Z\d\-_\.]+/?)*", self.results
|
||||
)
|
||||
urls = {match.group().strip() for match in found}
|
||||
return urls
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ async def parse_text(self) -> tuple[set, set]:
|
|||
line = self.text[index].strip()
|
||||
if '"ip":' in line:
|
||||
# Extract IP.
|
||||
ip = ''
|
||||
ip = ""
|
||||
for ch in line[7:]:
|
||||
if ch == '"':
|
||||
break
|
||||
|
@ -25,13 +25,17 @@ async def parse_text(self) -> tuple[set, set]:
|
|||
sub_domain_flag = 1
|
||||
continue
|
||||
elif sub_domain_flag > 0:
|
||||
if ']' in line:
|
||||
if "]" in line:
|
||||
sub_domain_flag = 0
|
||||
else:
|
||||
if 'www' in self.word:
|
||||
self.word = str(self.word).replace('www.', '').replace('www', '')
|
||||
if "www" in self.word:
|
||||
self.word = (
|
||||
str(self.word).replace("www.", "").replace("www", "")
|
||||
)
|
||||
# Remove www from word if entered
|
||||
self.hostnames.add(str(line).replace('"', '').replace(',', '') + '.' + self.word)
|
||||
self.hostnames.add(
|
||||
str(line).replace('"', "").replace(",", "") + "." + self.word
|
||||
)
|
||||
else:
|
||||
continue
|
||||
return self.ips, self.hostnames
|
||||
|
|
|
@ -6,35 +6,35 @@
|
|||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'-H',
|
||||
'--host',
|
||||
default='127.0.0.1',
|
||||
help='IP address to listen on default is 127.0.0.1',
|
||||
"-H",
|
||||
"--host",
|
||||
default="127.0.0.1",
|
||||
help="IP address to listen on default is 127.0.0.1",
|
||||
)
|
||||
parser.add_argument(
|
||||
'-p',
|
||||
'--port',
|
||||
"-p",
|
||||
"--port",
|
||||
default=5000,
|
||||
help='Port to bind the web server to, default is 5000',
|
||||
help="Port to bind the web server to, default is 5000",
|
||||
type=int,
|
||||
)
|
||||
parser.add_argument(
|
||||
'-l',
|
||||
'--log-level',
|
||||
default='info',
|
||||
help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set',
|
||||
"-l",
|
||||
"--log-level",
|
||||
default="info",
|
||||
help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set",
|
||||
)
|
||||
parser.add_argument(
|
||||
'-r',
|
||||
'--reload',
|
||||
"-r",
|
||||
"--reload",
|
||||
default=False,
|
||||
help='Enable automatic reload used during development of the api',
|
||||
action='store_true',
|
||||
help="Enable automatic reload used during development of the api",
|
||||
action="store_true",
|
||||
)
|
||||
|
||||
args: argparse.Namespace = parser.parse_args()
|
||||
uvicorn.run(
|
||||
'theHarvester.lib.api.api:app',
|
||||
"theHarvester.lib.api.api:app",
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
log_level=args.log_level,
|
||||
|
@ -42,5 +42,5 @@ def main():
|
|||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -17,21 +17,27 @@
|
|||
class ScreenShotter:
|
||||
def __init__(self, output) -> None:
|
||||
self.output = output
|
||||
self.slash = '\\' if 'win' in sys.platform else '/'
|
||||
self.slash = '' if (self.output[-1] == '\\' or self.output[-1] == '/') else self.slash
|
||||
self.slash = "\\" if "win" in sys.platform else "/"
|
||||
self.slash = (
|
||||
"" if (self.output[-1] == "\\" or self.output[-1] == "/") else self.slash
|
||||
)
|
||||
|
||||
def verify_path(self) -> bool:
|
||||
try:
|
||||
if not os.path.isdir(self.output):
|
||||
answer = input('[+] The output path you have entered does not exist would you like to create it (y/n): ')
|
||||
if answer.lower() == 'yes' or answer.lower() == 'y':
|
||||
answer = input(
|
||||
"[+] The output path you have entered does not exist would you like to create it (y/n): "
|
||||
)
|
||||
if answer.lower() == "yes" or answer.lower() == "y":
|
||||
os.makedirs(self.output)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"An exception has occurred while attempting to verify output path's existence: {e}")
|
||||
print(
|
||||
f"An exception has occurred while attempting to verify output path's existence: {e}"
|
||||
)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
|
@ -41,25 +47,29 @@ async def verify_installation() -> None:
|
|||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch()
|
||||
await browser.close()
|
||||
print('Playwright and Chromium are successfully installed.')
|
||||
print("Playwright and Chromium are successfully installed.")
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred while attempting to verify installation: {e}')
|
||||
print(
|
||||
f"An exception has occurred while attempting to verify installation: {e}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def chunk_list(items: Collection, chunk_size: int) -> list:
|
||||
# Based off of: https://github.com/apache/incubator-sdap-ingester
|
||||
return [list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)]
|
||||
return [
|
||||
list(items)[i : i + chunk_size] for i in range(0, len(items), chunk_size)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
async def visit(url: str) -> tuple[str, str]:
|
||||
try:
|
||||
timeout = aiohttp.ClientTimeout(total=35)
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/122.0.0.0 Safari/537.36'
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/122.0.0.0 Safari/537.36"
|
||||
}
|
||||
url = f'http://{url}' if not url.startswith('http') else url
|
||||
url = url.replace('www.', '')
|
||||
url = f"http://{url}" if not url.startswith("http") else url
|
||||
url = url.replace("www.", "")
|
||||
sslcontext = ssl.create_default_context(cafile=certifi.where())
|
||||
async with aiohttp.ClientSession(
|
||||
timeout=timeout,
|
||||
|
@ -67,16 +77,16 @@ async def visit(url: str) -> tuple[str, str]:
|
|||
connector=aiohttp.TCPConnector(ssl=sslcontext),
|
||||
) as session:
|
||||
async with session.get(url, verify_ssl=False) as resp:
|
||||
text = await resp.text('UTF-8')
|
||||
return f'http://{url}' if not url.startswith('http') else url, text
|
||||
text = await resp.text("UTF-8")
|
||||
return f"http://{url}" if not url.startswith("http") else url, text
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred while attempting to visit {url} : {e}')
|
||||
return '', ''
|
||||
print(f"An exception has occurred while attempting to visit {url} : {e}")
|
||||
return "", ""
|
||||
|
||||
async def take_screenshot(self, url: str) -> tuple[str, ...]:
|
||||
url = f'http://{url}' if not url.startswith('http') else url
|
||||
url = url.replace('www.', '')
|
||||
print(f'Attempting to take a screenshot of: {url}')
|
||||
url = f"http://{url}" if not url.startswith("http") else url
|
||||
url = url.replace("www.", "")
|
||||
print(f"Attempting to take a screenshot of: {url}")
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
# New browser context
|
||||
|
@ -90,8 +100,10 @@ async def take_screenshot(self, url: str) -> tuple[str, ...]:
|
|||
await page.goto(url, timeout=35000)
|
||||
await page.screenshot(path=path)
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred attempting to screenshot: {url} : {e}')
|
||||
path = ''
|
||||
print(
|
||||
f"An exception has occurred attempting to screenshot: {url} : {e}"
|
||||
)
|
||||
path = ""
|
||||
finally:
|
||||
await page.close()
|
||||
await context.close()
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
def main():
|
||||
platform = sys.platform
|
||||
if platform == 'win32':
|
||||
if platform == "win32":
|
||||
# Required or things will break if trying to take screenshots
|
||||
import multiprocessing
|
||||
|
||||
|
@ -14,6 +14,7 @@ def main():
|
|||
try:
|
||||
# See if we have winloop as a performance enhancement on windows
|
||||
import winloop
|
||||
|
||||
asyncio.DefaultEventLoopPolicy = winloop.EventLoopPolicy
|
||||
except ModuleNotFoundError:
|
||||
asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy
|
||||
|
@ -22,9 +23,9 @@ def main():
|
|||
|
||||
uvloop.install()
|
||||
|
||||
if 'linux' in platform:
|
||||
if "linux" in platform:
|
||||
import aiomultiprocess
|
||||
|
||||
# As we are not using Windows, we can change the spawn method to fork for greater performance
|
||||
aiomultiprocess.set_context('fork')
|
||||
aiomultiprocess.set_context("fork")
|
||||
asyncio.run(__main__.entry_point())
|
||||
|
|
Loading…
Reference in a new issue