mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 00:06:30 +08:00
Merge branch 'dev' into dev
This commit is contained in:
commit
40eaadfd2b
8
.github/workflows/theHarvester.yml
vendored
8
.github/workflows/theHarvester.yml
vendored
|
@ -35,10 +35,6 @@ jobs:
|
|||
run: |
|
||||
python theHarvester.py -d metasploit.com -b bing
|
||||
|
||||
- name: Run theHarvester module censys
|
||||
run: |
|
||||
python theHarvester.py -d metasploit.com -b censys
|
||||
|
||||
- name: Run theHarvester module crtsh
|
||||
run: |
|
||||
python theHarvester.py -d metasploit.com -b crtsh
|
||||
|
@ -112,6 +108,6 @@ jobs:
|
|||
- name: Test with pytest
|
||||
run: |
|
||||
pytest
|
||||
# - name: Check static type checking with mypy
|
||||
# - name: Static type checking with mypy
|
||||
# run: |
|
||||
# mypy *.py
|
||||
# mypy --pretty *.py
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -9,4 +9,4 @@ debug_results.txt
|
|||
tests/myparser.py
|
||||
venv
|
||||
.mypy_cache
|
||||
.pytest_cache
|
||||
.pytest_cache
|
|
@ -14,10 +14,10 @@ before_install:
|
|||
install:
|
||||
- python setup.py test
|
||||
script:
|
||||
- python theHarvester.py -d metasploit.com -b baidu,bing,censys,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,otx,intelx,threatcrowd,trello,twitter,virustotal,yahoo -l 200
|
||||
- python theHarvester.py -d metasploit.com -b baidu,bing,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,otx,intelx,threatcrowd,trello,twitter,virustotal,yahoo -l 200
|
||||
- pytest
|
||||
- flake8 . --count --show-source --statistics
|
||||
#- mypy *.py
|
||||
#- mypy --pretty *.py
|
||||
notifications:
|
||||
email: false
|
||||
slack:
|
||||
|
|
11
README.md
11
README.md
|
@ -19,8 +19,6 @@ Passive:
|
|||
|
||||
* bingapi: Microsoft search engine, through the API (Requires API key, see below.)
|
||||
|
||||
* censys: Censys.io search engine - www.censys.io
|
||||
|
||||
* crtsh: Comodo Certificate search - www.crt.sh
|
||||
|
||||
* dnsdumpster: DNSdumpster search engine - dnsdumpster.com
|
||||
|
@ -51,6 +49,8 @@ Passive:
|
|||
* shodan: Shodan search engine, will search for ports and banners from discovered<br>
|
||||
hosts - www.shodanhq.com
|
||||
|
||||
* Spyse: Web research tools for professionals(Requires an API key) - https://spyse.com/
|
||||
|
||||
* threatcrowd: Open source threat intelligence - www.threatcrowd.org
|
||||
|
||||
* trello: Search trello boards (Uses Google search.)
|
||||
|
@ -79,6 +79,7 @@ Add your keys to api-keys.yaml
|
|||
* intelx
|
||||
* securityTrails
|
||||
* shodan
|
||||
* spyse
|
||||
|
||||
Dependencies:
|
||||
-------------
|
||||
|
@ -89,14 +90,16 @@ Dependencies:
|
|||
Comments, bugs, or requests?
|
||||
----------------------------
|
||||
* [![Twitter Follow](https://img.shields.io/twitter/follow/laramies.svg?style=social&label=Follow)](https://twitter.com/laramies) Christian Martorella @laramies
|
||||
* cmartorella@edge-security.com
|
||||
cmartorella@edge-security.com
|
||||
* [![Twitter Follow](https://img.shields.io/twitter/follow/NotoriousRebel1.svg?style=social&label=Follow)](https://twitter.com/NotoriousRebel1) Matthew Brown @NotoriousRebel1
|
||||
* [![Twitter Follow](https://img.shields.io/twitter/follow/jay_townsend1.svg?style=social&label=Follow)](https://twitter.com/jay_townsend1) Jay "L1ghtn1ng" Townsend @jay_townsend1
|
||||
|
||||
Main contributors:
|
||||
------------------
|
||||
* [![Twitter Follow](https://img.shields.io/twitter/follow/NotoriousRebel1.svg?style=social&label=Follow)](https://twitter.com/NotoriousRebel1) Matthew Brown @NotoriousRebel1
|
||||
* [![Twitter Follow](https://img.shields.io/twitter/follow/jay_townsend1.svg?style=social&label=Follow)](https://twitter.com/jay_townsend1) Jay "L1ghtn1ng" Townsend @jay_townsend1
|
||||
* [![LinkedIn](https://static.licdn.com/scds/common/u/img/webpromo/btn_viewmy_160x25.png)](https://www.linkedin.com/in/janoszold/) Janos Zold
|
||||
* [![Twitter Follow](https://img.shields.io/twitter/follow/discoverscripts.svg?style=social&label=Follow)](https://twitter.com/discoverscripts) Lee Baird @discoverscripts
|
||||
* [![LinkedIn](https://static.licdn.com/scds/common/u/img/webpromo/btn_viewmy_160x25.png)](https://www.linkedin.com/in/janoszold/) Janos Zold
|
||||
|
||||
Thanks:
|
||||
-------
|
||||
|
|
|
@ -16,3 +16,6 @@ apikeys:
|
|||
|
||||
shodan:
|
||||
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
|
||||
|
||||
spyse:
|
||||
key:
|
3
mypy.ini
3
mypy.ini
|
@ -1,3 +1,4 @@
|
|||
[mypy]
|
||||
ignore_missing_imports = True
|
||||
show_traceback = True
|
||||
show_traceback = True
|
||||
show_error_codes = True
|
|
@ -1,14 +1,13 @@
|
|||
aiodns==2.0.0
|
||||
beautifulsoup4==4.8.0
|
||||
censys==0.0.8
|
||||
dnspython==1.16.0
|
||||
flake8==3.7.8
|
||||
grequests==0.4.0
|
||||
mypy==0.720
|
||||
mypy==0.730
|
||||
netaddr==0.7.19
|
||||
plotly==4.1.1
|
||||
pytest==5.1.3
|
||||
pytest==5.2.0
|
||||
PyYaml==5.1.2
|
||||
requests==2.22.0
|
||||
shodan==1.17.0
|
||||
shodan==1.19.0
|
||||
texttable==1.6.2
|
||||
|
|
|
@ -1,3 +1,2 @@
|
|||
[flake8]
|
||||
ignore = E501, F405, F403, E402
|
||||
exclude = theHarvester/discovery/IPy.py,theHarvester/discovery/s3_scanner.py
|
||||
ignore = E501, F405, F403, E402
|
|
@ -32,10 +32,10 @@ def start():
|
|||
parser.add_argument('-n', '--dns-lookup', help='enable DNS server lookup, default False', default=False, action='store_true')
|
||||
parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
|
||||
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
|
||||
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
|
||||
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, crtsh, dnsdumpster,
|
||||
dogpile, duckduckgo, github-code, google,
|
||||
hunter, intelx,
|
||||
linkedin, linkedin_links, netcraft, otx, securityTrails, threatcrowd,
|
||||
linkedin, linkedin_links, netcraft, otx, securityTrails, spyse, threatcrowd,
|
||||
trello, twitter, vhost, virustotal, yahoo''')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
@ -112,19 +112,6 @@ def start():
|
|||
else:
|
||||
pass
|
||||
|
||||
elif engineitem == 'censys':
|
||||
print('\033[94m[*] Searching Censys. \033[0m')
|
||||
from theHarvester.discovery import censys
|
||||
# Import locally or won't work
|
||||
censys_search = censys.SearchCensys(word, limit)
|
||||
censys_search.process()
|
||||
all_ip = censys_search.get_ipaddresses()
|
||||
hosts = filter(censys_search.get_hostnames())
|
||||
all_hosts.extend(hosts)
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'censys')
|
||||
db.store_all(word, all_ip, 'ip', 'censys')
|
||||
|
||||
elif engineitem == 'crtsh':
|
||||
try:
|
||||
print('\033[94m[*] Searching CRT.sh. \033[0m')
|
||||
|
@ -356,6 +343,22 @@ def start():
|
|||
all_hosts.extend(hosts)
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'suip')
|
||||
|
||||
elif engineitem == 'spyse':
|
||||
print('\033[94m[*] Searching Spyse. \033[0m')
|
||||
from theHarvester.discovery import spyse
|
||||
try:
|
||||
spysesearch_search = spyse.SearchSpyse(word)
|
||||
spysesearch_search.process()
|
||||
hosts = filter(spysesearch_search.get_hostnames())
|
||||
all_hosts.extend(list(hosts))
|
||||
# ips = filter(spysesearch_search.get_ips())
|
||||
# all_ip.extend(list(ips))
|
||||
all_hosts.extend(hosts)
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'spyse')
|
||||
# db.store_all(word, all_ip, 'ip', 'spyse')
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
@ -615,8 +618,7 @@ def start():
|
|||
|
||||
# Here we need to add explosion mode.
|
||||
# We have to take out the TLDs to do this.
|
||||
recursion = False
|
||||
if recursion:
|
||||
if args.dns_tld is not False:
|
||||
counter = 0
|
||||
for word in vhost:
|
||||
search = googlesearch.SearchGoogle(word, limit, counter)
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,5 @@
|
|||
__all__ = ['baidusearch',
|
||||
'bingsearch',
|
||||
'censys',
|
||||
'crtsh',
|
||||
'dnssearch',
|
||||
'dogpilesearch',
|
||||
|
@ -16,6 +15,7 @@
|
|||
'port_scanner',
|
||||
'securitytrailssearch',
|
||||
'shodansearch',
|
||||
'spyse',
|
||||
'takeover',
|
||||
'threatcrowd',
|
||||
'trello',
|
||||
|
|
|
@ -1,133 +0,0 @@
|
|||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import censysparser
|
||||
import requests
|
||||
|
||||
# TODO rewrite this module to use the censys api as the current way does not work
|
||||
# TODO And not really that maintainable as it currently stands
|
||||
|
||||
|
||||
class SearchCensys:
|
||||
|
||||
def __init__(self, word, limit):
|
||||
self.word = word
|
||||
self.urlhost = ""
|
||||
self.urlcert = ""
|
||||
self.page = ""
|
||||
self.resultshosts = ""
|
||||
self.resultcerts = ""
|
||||
self.total_resultshosts = ""
|
||||
self.total_resultscerts = ""
|
||||
self.server = 'censys.io'
|
||||
self.ips = []
|
||||
self.hostnamesall = []
|
||||
self.limit = limit
|
||||
|
||||
def do_searchhosturl(self):
|
||||
try:
|
||||
headers = {'user-agent': Core.get_user_agent(), 'Accept': '*/*', 'Referer': self.urlhost}
|
||||
responsehost = requests.get(self.urlhost, headers=headers)
|
||||
self.resultshosts = responsehost.text
|
||||
self.total_resultshosts += self.resultshosts
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the Censys module downloading pages from Censys - IP search: + {e}')
|
||||
|
||||
def do_searchcertificateurl(self):
|
||||
try:
|
||||
headers = {'user-agent': Core.get_user_agent(), 'Accept': '*/*', 'Referer': self.urlcert}
|
||||
responsecert = requests.get(self.urlcert, headers=headers)
|
||||
self.resultcerts = responsecert.text
|
||||
self.total_resultscerts += self.resultcerts
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the Censys module downloading pages from Censys - certificates search: {e}')
|
||||
|
||||
def process(self):
|
||||
try:
|
||||
self.urlhost = 'https://' + self.server + '/ipv4/_search?q=' + str(self.word) + '&page=1'
|
||||
self.urlcert = 'https://' + self.server + '/certificates/_search?q=' + str(self.word) + '&page=1'
|
||||
self.do_searchhosturl()
|
||||
self.do_searchcertificateurl()
|
||||
counter = 2
|
||||
pages = censysparser.Parser(self)
|
||||
totalpages = pages.search_totalpageshosts()
|
||||
pagestosearch = int(self.limit / 25) # 25 results/page
|
||||
if totalpages is None:
|
||||
totalpages = 0
|
||||
if totalpages <= pagestosearch:
|
||||
while counter <= totalpages:
|
||||
try:
|
||||
self.page = str(counter)
|
||||
self.urlhost = 'https://' + self.server + '/ipv4/_search?q=' + str(self.word) + '&page=' + str(
|
||||
self.page)
|
||||
print('\tSearching IP results page ' + self.page + '.')
|
||||
self.do_searchhosturl()
|
||||
counter += 1
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the Censys module requesting the pages: {e}')
|
||||
else:
|
||||
while counter <= pagestosearch:
|
||||
try:
|
||||
self.page = str(counter)
|
||||
self.urlhost = 'https://' + self.server + '/ipv4/_search?q=' + str(self.word) + '&page=' + str(
|
||||
self.page)
|
||||
print(f'\tSearching results page {self.page}.')
|
||||
self.do_searchhosturl()
|
||||
counter += 1
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the Censys module requesting the pages: {e}')
|
||||
counter = 2
|
||||
totalpages = pages.search_totalpagescerts()
|
||||
if totalpages is None:
|
||||
totalpages = 0
|
||||
if totalpages <= pagestosearch:
|
||||
while counter <= totalpages:
|
||||
try:
|
||||
self.page = str(counter)
|
||||
self.urlhost = 'https://' + self.server + '/certificates/_search?q=' + str(
|
||||
self.word) + '&page=' + str(self.page)
|
||||
print(f'\tSearching certificates results page {self.page}.')
|
||||
self.do_searchcertificateurl()
|
||||
counter += 1
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the Censys module requesting the pages: {e}')
|
||||
else:
|
||||
while counter <= pagestosearch:
|
||||
try:
|
||||
self.page = str(counter)
|
||||
self.urlhost = 'https://' + self.server + '/ipv4/_search?q=' + str(self.word) + '&page=' + str(
|
||||
self.page)
|
||||
print('\tSearching IP results page ' + self.page + '.')
|
||||
self.do_searchhosturl()
|
||||
counter += 1
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the Censys module requesting the pages: {e}')
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the main Censys module: {e}')
|
||||
|
||||
def get_hostnames(self):
|
||||
try:
|
||||
ips = self.get_ipaddresses()
|
||||
headers = {'user-agent': Core.get_user_agent(), 'Accept': '*/*', 'Referer': self.urlcert}
|
||||
response = requests.post('https://censys.io/ipv4/getdns', json={'ips': ips}, headers=headers)
|
||||
responsejson = response.json()
|
||||
domainsfromcensys = []
|
||||
for key, jdata in responsejson.items():
|
||||
if jdata is not None:
|
||||
domainsfromcensys.append(jdata)
|
||||
else:
|
||||
pass
|
||||
matchingdomains = [s for s in domainsfromcensys if str(self.word) in s]
|
||||
self.hostnamesall.extend(matchingdomains)
|
||||
hostnamesfromcerts = censysparser.Parser(self)
|
||||
self.hostnamesall.extend(hostnamesfromcerts.search_hostnamesfromcerts())
|
||||
return self.hostnamesall
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the Censys module - hostname search: {e}')
|
||||
|
||||
def get_ipaddresses(self):
|
||||
try:
|
||||
ips = censysparser.Parser(self)
|
||||
self.ips = ips.search_ipaddresses()
|
||||
return self.ips
|
||||
except Exception as e:
|
||||
print(f'Error occurred in the main Censys module - IP address search: {e}')
|
|
@ -20,12 +20,10 @@ def __init__(self, word, limit):
|
|||
self.limit = limit
|
||||
|
||||
def do_search(self):
|
||||
try: # Do normal scraping.
|
||||
url = self.api.replace('x', self.word)
|
||||
headers = {'User-Agent': googleUA}
|
||||
r = requests.get(url, headers=headers)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
# Do normal scraping.
|
||||
url = self.api.replace('x', self.word)
|
||||
headers = {'User-Agent': googleUA}
|
||||
r = requests.get(url, headers=headers)
|
||||
time.sleep(getDelay())
|
||||
self.results = r.text
|
||||
self.totalresults += self.results
|
||||
|
@ -46,8 +44,8 @@ def crawl(self, text):
|
|||
urls = set()
|
||||
try:
|
||||
load = json.loads(text)
|
||||
for key in load.keys(): # Iterate through keys of dict.
|
||||
val = load.get(key)
|
||||
for keys in load.keys(): # Iterate through keys of dict.
|
||||
val = load.get(keys)
|
||||
if isinstance(val, int) or isinstance(val, dict) or val is None:
|
||||
continue
|
||||
if isinstance(val, list):
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
import re
|
||||
import requests
|
||||
|
||||
|
||||
class s3_scanner:
|
||||
|
||||
def __init__(self, host):
|
||||
self.host = host
|
||||
self.results = ""
|
||||
self.totalresults = ""
|
||||
self.fingerprints = ['www.herokucdn.com/error-pages/no-such-app.html', '<title>Squarespace - No Such Account</title>', "<p> If you're trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>", "<p> If you\'re trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>", "<span class=\"title\">Bummer. It looks like the help center that you are trying to reach no longer exists.</span>", "<head> <title>The page you\'re looking for could not be found (404)</title> <style> body { color: #666; text-align: center; font-family: \"Helvetica Neue\", Helvetica, Arial, sans-serif; margin: 0; width: 800px; margin: auto; font-size: 14px; } h1 { font-size: 56px; line-height: 100px; font-weight: normal; color: #456; } h2 { font-size: 24px; color: #666; line-height: 1.5em; } h3 { color: #456; font-size: 20px; font-weight: normal; line-height: 28px; } hr { margin: 18px 0; border: 0; border-top: 1px solid #EEE; border-bottom: 1px solid white; } </style> </head>"]
|
||||
|
||||
def __check_http(self, bucket_url):
|
||||
check_response = self.session.head(
|
||||
S3_URL, timeout=3, headers={'Host': bucket_url})
|
||||
|
||||
# if not ARGS.ignore_rate_limiting\
|
||||
# and (check_response.status_code == 503 and check_response.reason == 'Slow Down'):
|
||||
# self.q.rate_limited = True
|
||||
# Add it back to the bucket for re-processing.
|
||||
# self.q.put(bucket_url)
|
||||
if check_response.status_code == 307: # valid bucket, lets check if its public
|
||||
new_bucket_url = check_response.headers['Location']
|
||||
bucket_response = requests.request(
|
||||
'GET' if ARGS.only_interesting else 'HEAD', new_bucket_url, timeout=3)
|
||||
|
||||
if bucket_response.status_code == 200\
|
||||
and (not ARGS.only_interesting or
|
||||
(ARGS.only_interesting and any(keyword in bucket_response.text for keyword in KEYWORDS))):
|
||||
print(f"Found bucket '{new_bucket_url}'")
|
||||
self.__log(new_bucket_url)
|
||||
|
||||
def do_s3(self):
|
||||
try:
|
||||
print('\t Searching takeovers for ' + self.host)
|
||||
r = requests.get('https://' + self.host, verify=False)
|
||||
for x in self.fingerprints:
|
||||
take_reg = re.compile(x)
|
||||
self.temp = take_reg.findall(r.text)
|
||||
if self.temp != []:
|
||||
print('\t\033[91m Takeover detected! - ' + self.host + '\033[1;32;40m')
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
def process(self):
|
||||
self.do_take()
|
34
theHarvester/discovery/spyse.py
Normal file
34
theHarvester/discovery/spyse.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
import requests
|
||||
from pprint import pprint
|
||||
|
||||
|
||||
class SearchSpyse:
|
||||
|
||||
def __init__(self, word):
|
||||
self.word = word
|
||||
self.key = Core.spyse_key()
|
||||
if self.key is None:
|
||||
raise MissingKey(True)
|
||||
self.results = ''
|
||||
self.totalresults = ''
|
||||
|
||||
def do_search(self):
|
||||
try:
|
||||
base_url = f'https://api.spyse.com/v1/subdomains?domain={self.word}&api_token={self.key}&page=2'
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
request = requests.get(base_url, headers=headers)
|
||||
self.results = request.json()
|
||||
pprint(self.results)
|
||||
# self.totalresults += self.results
|
||||
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {e}')
|
||||
|
||||
def get_hostnames(self):
|
||||
return self.totalresults
|
||||
|
||||
def process(self):
|
||||
self.do_search()
|
||||
print('\tSearching results.')
|
|
@ -46,6 +46,12 @@ def shodan_key() -> str:
|
|||
keys = yaml.safe_load(api_keys)
|
||||
return keys['apikeys']['shodan']['key']
|
||||
|
||||
@staticmethod
|
||||
def spyse_key() -> str:
|
||||
with open('api-keys.yaml', 'r') as api_keys:
|
||||
keys = yaml.safe_load(api_keys)
|
||||
return keys['apikeys']['spyse']['key']
|
||||
|
||||
@staticmethod
|
||||
def banner() -> None:
|
||||
print('\n\033[93m*******************************************************************')
|
||||
|
@ -67,7 +73,6 @@ def get_supportedengines() -> Set[Union[str, Any]]:
|
|||
supportedengines = {'baidu',
|
||||
'bing',
|
||||
'bingapi',
|
||||
'censys',
|
||||
'crtsh',
|
||||
'dnsdumpster',
|
||||
'dogpile',
|
||||
|
@ -83,6 +88,7 @@ def get_supportedengines() -> Set[Union[str, Any]]:
|
|||
'otx',
|
||||
'securityTrails',
|
||||
'suip',
|
||||
'spyse',
|
||||
'threatcrowd',
|
||||
'trello',
|
||||
'twitter',
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
|
||||
class Parser:
|
||||
|
||||
def __init__(self, resultstoparse):
|
||||
self.ipaddresses = []
|
||||
self.souphosts = BeautifulSoup(resultstoparse.total_resultshosts, features='html.parser')
|
||||
self.soupcerts = BeautifulSoup(resultstoparse.total_resultscerts, features='html.parser')
|
||||
self.hostnames = []
|
||||
self.hostnamesfromcerts = []
|
||||
self.urls = []
|
||||
self.numberofpageshosts = 0
|
||||
self.numberofpagescerts = 0
|
||||
self.domain = resultstoparse.word
|
||||
|
||||
def search_hostnamesfromcerts(self):
|
||||
try:
|
||||
hostnamelist = self.soupcerts.findAll('i', 'fa fa-fw fa-home')
|
||||
for hostnameitem in hostnamelist:
|
||||
hostitems = hostnameitem.next_sibling
|
||||
hostnames = str(hostitems)
|
||||
hostnamesclean = re.sub(r'[ \'\[\]]', '', hostnames)
|
||||
hostnamesclean = re.sub(r'\.\.\.', r'', hostnamesclean)
|
||||
self.hostnamesfromcerts.extend(hostnamesclean.split(','))
|
||||
self.hostnamesfromcerts = list(filter(None, self.hostnamesfromcerts))
|
||||
matchingdomains = [s for s in self.hostnamesfromcerts if str(self.domain) in s] # filter out domains issued to other sites
|
||||
self.hostnamesfromcerts = matchingdomains
|
||||
return self.hostnamesfromcerts
|
||||
except Exception as e:
|
||||
print('Error occurred in the Censys module: certificate hostname parser: ' + str(e))
|
||||
|
||||
def search_ipaddresses(self):
|
||||
try:
|
||||
ipaddresslist = self.souphosts.findAll('a', 'SearchResult__title-text')
|
||||
for ipaddressitem in ipaddresslist:
|
||||
self.ipaddresses.append(ipaddressitem.text.strip())
|
||||
return self.ipaddresses
|
||||
except Exception as e:
|
||||
print('Error occurred in the Censys module: IP address parser: ' + str(e))
|
||||
|
||||
def search_totalpageshosts(self):
|
||||
try:
|
||||
items = self.souphosts.findAll('span', 'SearchResultSectionHeader__statistic')
|
||||
if items == [] or items is None:
|
||||
self.numberofpageshosts = 0
|
||||
return self.numberofpageshosts
|
||||
numbers = re.findall(r"/\d*", items[0].text)
|
||||
pagenumber = numbers[0].replace('/', '')
|
||||
self.numberofpageshosts = int(pagenumber)
|
||||
return self.numberofpageshosts
|
||||
except Exception as e:
|
||||
print('Error occurred in the Censys module IP search: page parser: ' + str(e))
|
||||
|
||||
def search_totalpagescerts(self):
|
||||
try:
|
||||
items = self.soupcerts.findAll('span', 'SearchResultSectionHeader__statistic')
|
||||
if items == [] or items is None:
|
||||
self.numberofpageshosts = 0
|
||||
return self.numberofpageshosts
|
||||
numbers = re.findall(r"/\d*", items[0].text)
|
||||
pagenumber = numbers[0].replace('/', '')
|
||||
self.numberofpagescerts = int(pagenumber)
|
||||
return self.numberofpagescerts
|
||||
except Exception as e:
|
||||
print('Error occurred in the Censys module IP search: page parser: ' + str(e))
|
Loading…
Reference in a new issue