mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-23 14:02:58 +08:00
Censys implementation
This commit is contained in:
parent
86db14ea32
commit
6cf3d18902
5 changed files with 164 additions and 4 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -4,3 +4,4 @@ tests/myparser.py
|
||||||
stash.sqlite
|
stash.sqlite
|
||||||
*.sqlite
|
*.sqlite
|
||||||
.vscode
|
.vscode
|
||||||
|
venv
|
||||||
|
|
39
censysparser.py
Normal file
39
censysparser.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
|
|
||||||
|
class parser:
|
||||||
|
|
||||||
|
def __init__(self, results):
|
||||||
|
self.results = results
|
||||||
|
self.ipaddresses = []
|
||||||
|
self.soup = BeautifulSoup(results.results,features="html.parser")
|
||||||
|
self.hostnames = []
|
||||||
|
self.numberofpages = 0
|
||||||
|
|
||||||
|
def search_hostnames(self):
|
||||||
|
try:
|
||||||
|
hostnamelist = self.soup.findAll('tt')
|
||||||
|
for hostnameitem in hostnamelist:
|
||||||
|
self.hostnames.append(hostnameitem.text)
|
||||||
|
return self.hostnames
|
||||||
|
except Exception,e:
|
||||||
|
print("Error occurred: " + e)
|
||||||
|
|
||||||
|
def search_ipaddresses(self):
|
||||||
|
try:
|
||||||
|
ipaddresslist = self.soup.findAll('a','SearchResult__title-text')
|
||||||
|
for ipaddressitem in ipaddresslist:
|
||||||
|
self.ipaddresses.append(ipaddressitem.text.strip())
|
||||||
|
return self.ipaddresses
|
||||||
|
except Exception,e:
|
||||||
|
print("Error occurred: " + e)
|
||||||
|
|
||||||
|
def search_numberofpages(self):
|
||||||
|
try:
|
||||||
|
items = self.soup.findAll(href=re.compile("page"))
|
||||||
|
for item in items:
|
||||||
|
if (item.text !='next'): #to filter out pagination
|
||||||
|
self.numberofpages+=1
|
||||||
|
return self.numberofpages
|
||||||
|
except Exception,e:
|
||||||
|
print("Error occurred: " + e)
|
71
discovery/censys.py
Normal file
71
discovery/censys.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
import httplib
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import requests
|
||||||
|
import censysparser
|
||||||
|
|
||||||
|
class search_censys:
|
||||||
|
|
||||||
|
def __init__(self, word):
|
||||||
|
self.word = word
|
||||||
|
self.url = ""
|
||||||
|
self.page = ""
|
||||||
|
self.results = ""
|
||||||
|
self.total_results = ""
|
||||||
|
self.server = "censys.io"
|
||||||
|
self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
|
||||||
|
,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"),
|
||||||
|
("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " +
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254"),
|
||||||
|
"Mozilla/5.0 (SMART-TV; X11; Linux armv7l) AppleWebKit/537.42 (KHTML, like Gecko) Chromium/25.0.1349.2 Chrome/25.0.1349.2 Safari/537.42"
|
||||||
|
,"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.991"
|
||||||
|
,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 OPR/48.0.2685.52"
|
||||||
|
,"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
||||||
|
,"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
||||||
|
,"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"]
|
||||||
|
|
||||||
|
def do_search(self):
|
||||||
|
try:
|
||||||
|
headers = {'user-agent': random.choice(self.userAgent),'Accept':'*/*','Referer':self.url}
|
||||||
|
response = requests.get(self.url, headers=headers)
|
||||||
|
self.results = response.content
|
||||||
|
self.total_results += self.results
|
||||||
|
except Exception,e:
|
||||||
|
print e
|
||||||
|
|
||||||
|
def process(self,morepage=None):
|
||||||
|
try:
|
||||||
|
if (morepage is not None):
|
||||||
|
self.page =str(morepage)
|
||||||
|
baseurl = self.url
|
||||||
|
self.url = baseurl + "&page=" + self.page
|
||||||
|
else:
|
||||||
|
self.url="https://" + self.server + "/ipv4/_search?q=" + self.word
|
||||||
|
self.do_search()
|
||||||
|
print "\tSearching Censys results.."
|
||||||
|
except Exception,e:
|
||||||
|
print("Error occurred: " + e)
|
||||||
|
|
||||||
|
def get_hostnames(self):
|
||||||
|
try:
|
||||||
|
hostnames = censysparser.parser(self)
|
||||||
|
return hostnames.search_hostnames()
|
||||||
|
except Exception,e:
|
||||||
|
print("Error occurred: " + e)
|
||||||
|
|
||||||
|
def get_ipaddresses(self):
|
||||||
|
try:
|
||||||
|
ips = censysparser.parser(self)
|
||||||
|
return ips.search_ipaddresses()
|
||||||
|
except Exception,e:
|
||||||
|
print("Error occurred: " + e)
|
||||||
|
|
||||||
|
def get_totalnumberofpages(self):
|
||||||
|
try:
|
||||||
|
pages = censysparser.parser(self)
|
||||||
|
return pages.search_numberofpages()
|
||||||
|
except Exception,e:
|
||||||
|
print("Error occurred: " + e)
|
||||||
|
|
|
@ -1 +1,2 @@
|
||||||
requests==2.18.4
|
requests==2.18.4
|
||||||
|
bs4==0.0.1
|
|
@ -17,6 +17,7 @@
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
from discovery import *
|
from discovery import *
|
||||||
|
from discovery import censys
|
||||||
from lib import htmlExport
|
from lib import htmlExport
|
||||||
from lib import hostchecker
|
from lib import hostchecker
|
||||||
|
|
||||||
|
@ -94,6 +95,8 @@ def start(argv):
|
||||||
takeover_check = False
|
takeover_check = False
|
||||||
google_dorking = False
|
google_dorking = False
|
||||||
limit = 500
|
limit = 500
|
||||||
|
all_emails = []
|
||||||
|
all_hosts = []
|
||||||
dnsserver = ""
|
dnsserver = ""
|
||||||
for opt, arg in opts:
|
for opt, arg in opts:
|
||||||
if opt == '-l':
|
if opt == '-l':
|
||||||
|
@ -122,7 +125,7 @@ def start(argv):
|
||||||
dnstld = True
|
dnstld = True
|
||||||
elif opt == '-b':
|
elif opt == '-b':
|
||||||
engines = set(arg.split(','))
|
engines = set(arg.split(','))
|
||||||
supportedengines = set(["baidu","bing","crtsh","bingapi","dogpile","google","googleCSE","virustotal","threatcrowd","googleplus","google-profiles","linkedin","pgp","twitter","vhost","yahoo","netcraft","all"])
|
supportedengines = set(["baidu","bing","crtsh","bingapi","dogpile","google","googleCSE","virustotal","threatcrowd","googleplus","google-profiles","linkedin","pgp","twitter","vhost","yahoo","netcraft","censys","all"])
|
||||||
if set(engines).issubset(supportedengines):
|
if set(engines).issubset(supportedengines):
|
||||||
print "found supported engines"
|
print "found supported engines"
|
||||||
print "[-] Starting harvesting process for domain: " + word + "\n"
|
print "[-] Starting harvesting process for domain: " + word + "\n"
|
||||||
|
@ -149,6 +152,28 @@ def start(argv):
|
||||||
db=stash.stash_manager()
|
db=stash.stash_manager()
|
||||||
db.store_all(word,all_hosts,'host','netcraft')
|
db.store_all(word,all_hosts,'host','netcraft')
|
||||||
|
|
||||||
|
if engineitem == "censys":
|
||||||
|
db=stash.stash_manager()
|
||||||
|
print "[-] Searching in Censys:"
|
||||||
|
search = censys.search_censys(word)
|
||||||
|
search.process()
|
||||||
|
all_ip = search.get_ipaddresses()
|
||||||
|
all_hosts = search.get_hostnames()
|
||||||
|
db.store_all(word,all_ip,'ipaddress','censys')
|
||||||
|
db.store_all(word,all_hosts,'hostname','censys')
|
||||||
|
totalnumberofpages = search.get_totalnumberofpages() #as returned by censys at the initial search
|
||||||
|
pagecounter = 1 #pagecounter: variable to limit how many pages to query
|
||||||
|
while pagecounter < totalnumberofpages and pagecounter < 5: #pagecounter < 5: search 4 pages = 100 results
|
||||||
|
pagecounter += 1
|
||||||
|
search.process(pagecounter)
|
||||||
|
moreips = search.get_ipaddresses()
|
||||||
|
for moreipitem in moreips:
|
||||||
|
db.store(word,moreipitem,'ipaddress','censys')
|
||||||
|
all_ip.append(moreipitem)
|
||||||
|
morehostnames = search.get_hostnames()
|
||||||
|
for morehostnameitem in morehostnames:
|
||||||
|
db.store(word,morehostnameitem,'hostname','censys')
|
||||||
|
all_hosts.append(morehostnameitem)
|
||||||
|
|
||||||
if engineitem == "threatcrowd":
|
if engineitem == "threatcrowd":
|
||||||
print "[-] Searching in Threatcrowd:"
|
print "[-] Searching in Threatcrowd:"
|
||||||
|
@ -359,11 +384,32 @@ def start(argv):
|
||||||
all_emails.extend(emails)
|
all_emails.extend(emails)
|
||||||
#Clean up email list, sort and uniq
|
#Clean up email list, sort and uniq
|
||||||
all_emails=sorted(set(all_emails))
|
all_emails=sorted(set(all_emails))
|
||||||
|
|
||||||
|
print "[-] Searching in Censys:"
|
||||||
|
search = censys.search_censys(word)
|
||||||
|
search.process()
|
||||||
|
all_ip = search.get_ipaddresses()
|
||||||
|
all_hosts = search.get_hostnames()
|
||||||
|
db.store_all(word,all_ip,'ipaddress','censys')
|
||||||
|
db.store_all(word,all_hosts,'hostname','censys')
|
||||||
|
totalnumberofpages = search.get_totalnumberofpages() #as returned by censys at the initial search
|
||||||
|
pagecounter = 1 #pagecounter: variable to limit how many pages to query
|
||||||
|
while pagecounter < totalnumberofpages and pagecounter < 5: #pagecounter < 5: search 4 pages = 100 results
|
||||||
|
pagecounter += 1
|
||||||
|
search.process(pagecounter)
|
||||||
|
moreips = search.get_ipaddresses()
|
||||||
|
for moreipitem in moreips:
|
||||||
|
db.store(word,moreipitem,'ipaddress','censys')
|
||||||
|
all_ip.append(moreipitem)
|
||||||
|
morehostnames = search.get_hostnames()
|
||||||
|
for morehostnameitem in morehostnames:
|
||||||
|
db.store(word,morehostnameitem,'hostname','censys')
|
||||||
|
all_hosts.append(morehostnameitem)
|
||||||
else:
|
else:
|
||||||
|
|
||||||
#if engine not in ("baidu", "bing", "crtsh","bingapi","dogpile","google", "googleCSE","virustotal","threatcrowd", "googleplus", "google-profiles","linkedin", "pgp", "twitter", "vhost", "yahoo","netcraft","all"):
|
#if engine not in ("baidu", "bing", "crtsh","bingapi","dogpile","google", "googleCSE","virustotal","threatcrowd", "googleplus", "google-profiles","linkedin", "pgp", "twitter", "vhost", "yahoo","netcraft","all"):
|
||||||
usage()
|
usage()
|
||||||
print "Invalid search engine, try with: baidu, bing, bingapi, crtsh, dogpile, google, googleCSE, virustotal, netcraft, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, all"
|
print "Invalid search engine, try with: baidu, bing, bingapi, crtsh, dogpile, google, googleCSE, virustotal, netcraft, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, censys, all"
|
||||||
sys.exit()
|
sys.exit()
|
||||||
#else:
|
#else:
|
||||||
# pass
|
# pass
|
||||||
|
@ -625,5 +671,7 @@ def start(argv):
|
||||||
start(sys.argv[1:])
|
start(sys.argv[1:])
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print "Search interrupted by user.."
|
print "Search interrupted by user.."
|
||||||
|
except Exception, e:
|
||||||
|
print e
|
||||||
except:
|
except:
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
Loading…
Reference in a new issue