mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-22 21:43:02 +08:00
Censys implementation
This commit is contained in:
parent
86db14ea32
commit
6cf3d18902
5 changed files with 164 additions and 4 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -3,4 +3,5 @@
|
|||
tests/myparser.py
|
||||
stash.sqlite
|
||||
*.sqlite
|
||||
.vscode
|
||||
.vscode
|
||||
venv
|
||||
|
|
39
censysparser.py
Normal file
39
censysparser.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
class parser:
|
||||
|
||||
def __init__(self, results):
|
||||
self.results = results
|
||||
self.ipaddresses = []
|
||||
self.soup = BeautifulSoup(results.results,features="html.parser")
|
||||
self.hostnames = []
|
||||
self.numberofpages = 0
|
||||
|
||||
def search_hostnames(self):
|
||||
try:
|
||||
hostnamelist = self.soup.findAll('tt')
|
||||
for hostnameitem in hostnamelist:
|
||||
self.hostnames.append(hostnameitem.text)
|
||||
return self.hostnames
|
||||
except Exception,e:
|
||||
print("Error occurred: " + e)
|
||||
|
||||
def search_ipaddresses(self):
|
||||
try:
|
||||
ipaddresslist = self.soup.findAll('a','SearchResult__title-text')
|
||||
for ipaddressitem in ipaddresslist:
|
||||
self.ipaddresses.append(ipaddressitem.text.strip())
|
||||
return self.ipaddresses
|
||||
except Exception,e:
|
||||
print("Error occurred: " + e)
|
||||
|
||||
def search_numberofpages(self):
|
||||
try:
|
||||
items = self.soup.findAll(href=re.compile("page"))
|
||||
for item in items:
|
||||
if (item.text !='next'): #to filter out pagination
|
||||
self.numberofpages+=1
|
||||
return self.numberofpages
|
||||
except Exception,e:
|
||||
print("Error occurred: " + e)
|
71
discovery/censys.py
Normal file
71
discovery/censys.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
import httplib
|
||||
import sys
|
||||
import random
|
||||
import requests
|
||||
import censysparser
|
||||
|
||||
class search_censys:
|
||||
|
||||
def __init__(self, word):
|
||||
self.word = word
|
||||
self.url = ""
|
||||
self.page = ""
|
||||
self.results = ""
|
||||
self.total_results = ""
|
||||
self.server = "censys.io"
|
||||
self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
|
||||
,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"),
|
||||
("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " +
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254"),
|
||||
"Mozilla/5.0 (SMART-TV; X11; Linux armv7l) AppleWebKit/537.42 (KHTML, like Gecko) Chromium/25.0.1349.2 Chrome/25.0.1349.2 Safari/537.42"
|
||||
,"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.991"
|
||||
,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 OPR/48.0.2685.52"
|
||||
,"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
||||
,"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
||||
,"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"]
|
||||
|
||||
def do_search(self):
|
||||
try:
|
||||
headers = {'user-agent': random.choice(self.userAgent),'Accept':'*/*','Referer':self.url}
|
||||
response = requests.get(self.url, headers=headers)
|
||||
self.results = response.content
|
||||
self.total_results += self.results
|
||||
except Exception,e:
|
||||
print e
|
||||
|
||||
def process(self,morepage=None):
|
||||
try:
|
||||
if (morepage is not None):
|
||||
self.page =str(morepage)
|
||||
baseurl = self.url
|
||||
self.url = baseurl + "&page=" + self.page
|
||||
else:
|
||||
self.url="https://" + self.server + "/ipv4/_search?q=" + self.word
|
||||
self.do_search()
|
||||
print "\tSearching Censys results.."
|
||||
except Exception,e:
|
||||
print("Error occurred: " + e)
|
||||
|
||||
def get_hostnames(self):
|
||||
try:
|
||||
hostnames = censysparser.parser(self)
|
||||
return hostnames.search_hostnames()
|
||||
except Exception,e:
|
||||
print("Error occurred: " + e)
|
||||
|
||||
def get_ipaddresses(self):
|
||||
try:
|
||||
ips = censysparser.parser(self)
|
||||
return ips.search_ipaddresses()
|
||||
except Exception,e:
|
||||
print("Error occurred: " + e)
|
||||
|
||||
def get_totalnumberofpages(self):
|
||||
try:
|
||||
pages = censysparser.parser(self)
|
||||
return pages.search_numberofpages()
|
||||
except Exception,e:
|
||||
print("Error occurred: " + e)
|
||||
|
|
@ -1 +1,2 @@
|
|||
requests==2.18.4
|
||||
bs4==0.0.1
|
|
@ -17,6 +17,7 @@
|
|||
sys.exit()
|
||||
|
||||
from discovery import *
|
||||
from discovery import censys
|
||||
from lib import htmlExport
|
||||
from lib import hostchecker
|
||||
|
||||
|
@ -94,6 +95,8 @@ def start(argv):
|
|||
takeover_check = False
|
||||
google_dorking = False
|
||||
limit = 500
|
||||
all_emails = []
|
||||
all_hosts = []
|
||||
dnsserver = ""
|
||||
for opt, arg in opts:
|
||||
if opt == '-l':
|
||||
|
@ -122,7 +125,7 @@ def start(argv):
|
|||
dnstld = True
|
||||
elif opt == '-b':
|
||||
engines = set(arg.split(','))
|
||||
supportedengines = set(["baidu","bing","crtsh","bingapi","dogpile","google","googleCSE","virustotal","threatcrowd","googleplus","google-profiles","linkedin","pgp","twitter","vhost","yahoo","netcraft","all"])
|
||||
supportedengines = set(["baidu","bing","crtsh","bingapi","dogpile","google","googleCSE","virustotal","threatcrowd","googleplus","google-profiles","linkedin","pgp","twitter","vhost","yahoo","netcraft","censys","all"])
|
||||
if set(engines).issubset(supportedengines):
|
||||
print "found supported engines"
|
||||
print "[-] Starting harvesting process for domain: " + word + "\n"
|
||||
|
@ -148,7 +151,29 @@ def start(argv):
|
|||
all_emails = []
|
||||
db=stash.stash_manager()
|
||||
db.store_all(word,all_hosts,'host','netcraft')
|
||||
|
||||
|
||||
if engineitem == "censys":
|
||||
db=stash.stash_manager()
|
||||
print "[-] Searching in Censys:"
|
||||
search = censys.search_censys(word)
|
||||
search.process()
|
||||
all_ip = search.get_ipaddresses()
|
||||
all_hosts = search.get_hostnames()
|
||||
db.store_all(word,all_ip,'ipaddress','censys')
|
||||
db.store_all(word,all_hosts,'hostname','censys')
|
||||
totalnumberofpages = search.get_totalnumberofpages() #as returned by censys at the initial search
|
||||
pagecounter = 1 #pagecounter: variable to limit how many pages to query
|
||||
while pagecounter < totalnumberofpages and pagecounter < 5: #pagecounter < 5: search 4 pages = 100 results
|
||||
pagecounter += 1
|
||||
search.process(pagecounter)
|
||||
moreips = search.get_ipaddresses()
|
||||
for moreipitem in moreips:
|
||||
db.store(word,moreipitem,'ipaddress','censys')
|
||||
all_ip.append(moreipitem)
|
||||
morehostnames = search.get_hostnames()
|
||||
for morehostnameitem in morehostnames:
|
||||
db.store(word,morehostnameitem,'hostname','censys')
|
||||
all_hosts.append(morehostnameitem)
|
||||
|
||||
if engineitem == "threatcrowd":
|
||||
print "[-] Searching in Threatcrowd:"
|
||||
|
@ -359,11 +384,32 @@ def start(argv):
|
|||
all_emails.extend(emails)
|
||||
#Clean up email list, sort and uniq
|
||||
all_emails=sorted(set(all_emails))
|
||||
|
||||
print "[-] Searching in Censys:"
|
||||
search = censys.search_censys(word)
|
||||
search.process()
|
||||
all_ip = search.get_ipaddresses()
|
||||
all_hosts = search.get_hostnames()
|
||||
db.store_all(word,all_ip,'ipaddress','censys')
|
||||
db.store_all(word,all_hosts,'hostname','censys')
|
||||
totalnumberofpages = search.get_totalnumberofpages() #as returned by censys at the initial search
|
||||
pagecounter = 1 #pagecounter: variable to limit how many pages to query
|
||||
while pagecounter < totalnumberofpages and pagecounter < 5: #pagecounter < 5: search 4 pages = 100 results
|
||||
pagecounter += 1
|
||||
search.process(pagecounter)
|
||||
moreips = search.get_ipaddresses()
|
||||
for moreipitem in moreips:
|
||||
db.store(word,moreipitem,'ipaddress','censys')
|
||||
all_ip.append(moreipitem)
|
||||
morehostnames = search.get_hostnames()
|
||||
for morehostnameitem in morehostnames:
|
||||
db.store(word,morehostnameitem,'hostname','censys')
|
||||
all_hosts.append(morehostnameitem)
|
||||
else:
|
||||
|
||||
#if engine not in ("baidu", "bing", "crtsh","bingapi","dogpile","google", "googleCSE","virustotal","threatcrowd", "googleplus", "google-profiles","linkedin", "pgp", "twitter", "vhost", "yahoo","netcraft","all"):
|
||||
usage()
|
||||
print "Invalid search engine, try with: baidu, bing, bingapi, crtsh, dogpile, google, googleCSE, virustotal, netcraft, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, all"
|
||||
print "Invalid search engine, try with: baidu, bing, bingapi, crtsh, dogpile, google, googleCSE, virustotal, netcraft, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, censys, all"
|
||||
sys.exit()
|
||||
#else:
|
||||
# pass
|
||||
|
@ -625,5 +671,7 @@ def start(argv):
|
|||
start(sys.argv[1:])
|
||||
except KeyboardInterrupt:
|
||||
print "Search interrupted by user.."
|
||||
except Exception, e:
|
||||
print e
|
||||
except:
|
||||
sys.exit()
|
||||
|
|
Loading…
Reference in a new issue