Censys implementation

2025-02-23 14:02:58 +08:00 · 2018-11-02 23:04:20 +00:00 · 2018-11-02 23:04:20 +00:00 · 6cf3d18902
commit 6cf3d18902
parent 86db14ea32
5 changed files with 164 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,4 @@ tests/myparser.py
 stash.sqlite
 *.sqlite
 .vscode
 venv
--- a/censysparser.py
+++ b/censysparser.py
@ -0,0 +1,39 @@
 from bs4 import BeautifulSoup
 import re
 class parser:
    def __init__(self, results):
        self.results = results
        self.ipaddresses = []
        self.soup = BeautifulSoup(results.results,features="html.parser")
        self.hostnames = []
        self.numberofpages = 0
    def search_hostnames(self):
        try:
            hostnamelist = self.soup.findAll('tt')
            for hostnameitem in hostnamelist:
                self.hostnames.append(hostnameitem.text)
            return self.hostnames
        except Exception,e:
            print("Error occurred: " + e) 
    def search_ipaddresses(self):
        try:
            ipaddresslist = self.soup.findAll('a','SearchResult__title-text')
            for ipaddressitem in ipaddresslist:
                self.ipaddresses.append(ipaddressitem.text.strip())
            return self.ipaddresses
        except Exception,e:
            print("Error occurred: " + e)
    def search_numberofpages(self):
        try:
            items = self.soup.findAll(href=re.compile("page"))
            for item in items:
                if (item.text !='next'):            #to filter out pagination
                    self.numberofpages+=1
            return self.numberofpages
        except Exception,e:
            print("Error occurred: " + e)
--- a/discovery/censys.py
+++ b/discovery/censys.py
@ -0,0 +1,71 @@
 import httplib
 import sys
 import random
 import requests
 import censysparser
 class search_censys:
    def __init__(self, word):
        self.word = word
        self.url = ""
        self.page = ""
        self.results = ""
        self.total_results = ""
        self.server = "censys.io"
        self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
          "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
          ,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
          "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"),
          ("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " +
          "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254"),
          "Mozilla/5.0 (SMART-TV; X11; Linux armv7l) AppleWebKit/537.42 (KHTML, like Gecko) Chromium/25.0.1349.2 Chrome/25.0.1349.2 Safari/537.42"
          ,"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.991"
          ,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 OPR/48.0.2685.52"
          ,"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
          ,"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
          ,"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"]
    def do_search(self):
        try:
            headers = {'user-agent': random.choice(self.userAgent),'Accept':'*/*','Referer':self.url}
            response = requests.get(self.url, headers=headers)
            self.results = response.content
            self.total_results += self.results
        except Exception,e:
            print e
    def process(self,morepage=None):
        try:
            if (morepage is not None):
                self.page =str(morepage) 
                baseurl = self.url
                self.url = baseurl + "&page=" + self.page
            else:
                self.url="https://" + self.server + "/ipv4/_search?q=" + self.word
            self.do_search()
            print "\tSearching Censys results.."
        except Exception,e:
            print("Error occurred: " + e)
    def get_hostnames(self):
        try:
            hostnames = censysparser.parser(self)
            return hostnames.search_hostnames()
        except Exception,e:
            print("Error occurred: " + e)   
    def get_ipaddresses(self):
        try:
            ips = censysparser.parser(self)
            return ips.search_ipaddresses()
        except Exception,e:
            print("Error occurred: " + e) 
    def get_totalnumberofpages(self):
        try:
            pages = censysparser.parser(self)
            return pages.search_numberofpages()
        except Exception,e:
            print("Error occurred: " + e) 
--- a/requirements.txt
+++ b/requirements.txt
@ -1 +1,2 @@
 requests==2.18.4
 bs4==0.0.1
--- a/theHarvester.py
+++ b/theHarvester.py
@ -17,6 +17,7 @@
    sys.exit()
 from discovery import *
 from discovery import censys
 from lib import htmlExport
 from lib import hostchecker
@ -94,6 +95,8 @@ def start(argv):
    takeover_check = False
    google_dorking = False
    limit = 500
    all_emails = []
    all_hosts = []
    dnsserver = ""
    for opt, arg in opts:
        if opt == '-l':
@ -122,7 +125,7 @@ def start(argv):
            dnstld = True
        elif opt == '-b':
            engines = set(arg.split(','))
-            supportedengines = set(["baidu","bing","crtsh","bingapi","dogpile","google","googleCSE","virustotal","threatcrowd","googleplus","google-profiles","linkedin","pgp","twitter","vhost","yahoo","netcraft","all"])
+            supportedengines = set(["baidu","bing","crtsh","bingapi","dogpile","google","googleCSE","virustotal","threatcrowd","googleplus","google-profiles","linkedin","pgp","twitter","vhost","yahoo","netcraft","censys","all"])
            if set(engines).issubset(supportedengines):
                print "found supported engines"
                print "[-] Starting harvesting process for domain: " + word +  "\n" 
@ -149,6 +152,28 @@ def start(argv):
                        db=stash.stash_manager()
                        db.store_all(word,all_hosts,'host','netcraft')
                    if engineitem == "censys":
                        db=stash.stash_manager()
                        print "[-] Searching in Censys:"
                        search = censys.search_censys(word)
                        search.process()
                        all_ip = search.get_ipaddresses()
                        all_hosts = search.get_hostnames()
                        db.store_all(word,all_ip,'ipaddress','censys')
                        db.store_all(word,all_hosts,'hostname','censys')   
                        totalnumberofpages = search.get_totalnumberofpages()                #as returned by censys at the initial search
                        pagecounter = 1                                                     #pagecounter: variable to limit how many pages to query
                        while pagecounter < totalnumberofpages and pagecounter < 5:         #pagecounter < 5: search 4 pages = 100 results 
                            pagecounter += 1
                            search.process(pagecounter)
                            moreips = search.get_ipaddresses()
                            for moreipitem in moreips:
                                db.store(word,moreipitem,'ipaddress','censys')
                                all_ip.append(moreipitem)
                            morehostnames = search.get_hostnames()
                            for morehostnameitem in morehostnames:
                                db.store(word,morehostnameitem,'hostname','censys')
                                all_hosts.append(morehostnameitem)
                    if engineitem == "threatcrowd":
                        print "[-] Searching in Threatcrowd:"
@ -359,11 +384,32 @@ def start(argv):
                        all_emails.extend(emails)
                        #Clean up email list, sort and uniq
                        all_emails=sorted(set(all_emails))
                        print "[-] Searching in Censys:"
                        search = censys.search_censys(word)
                        search.process()
                        all_ip = search.get_ipaddresses()
                        all_hosts = search.get_hostnames()
                        db.store_all(word,all_ip,'ipaddress','censys')
                        db.store_all(word,all_hosts,'hostname','censys')   
                        totalnumberofpages = search.get_totalnumberofpages()                #as returned by censys at the initial search
                        pagecounter = 1                                                     #pagecounter: variable to limit how many pages to query
                        while pagecounter < totalnumberofpages and pagecounter < 5:         #pagecounter < 5: search 4 pages = 100 results 
                            pagecounter += 1
                            search.process(pagecounter)
                            moreips = search.get_ipaddresses()
                            for moreipitem in moreips:
                                db.store(word,moreipitem,'ipaddress','censys')
                                all_ip.append(moreipitem)
                            morehostnames = search.get_hostnames()
                            for morehostnameitem in morehostnames:
                                db.store(word,morehostnameitem,'hostname','censys')
                                all_hosts.append(morehostnameitem)
            else:
            #if engine not in ("baidu", "bing", "crtsh","bingapi","dogpile","google", "googleCSE","virustotal","threatcrowd", "googleplus", "google-profiles","linkedin", "pgp", "twitter", "vhost", "yahoo","netcraft","all"):
                usage()
-                print "Invalid search engine, try with: baidu, bing, bingapi, crtsh, dogpile, google, googleCSE, virustotal, netcraft, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, all"
+                print "Invalid search engine, try with: baidu, bing, bingapi, crtsh, dogpile, google, googleCSE, virustotal, netcraft, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, censys, all"
                sys.exit()
            #else:
            #    pass
@ -625,5 +671,7 @@ def start(argv):
        start(sys.argv[1:])
    except KeyboardInterrupt:
        print "Search interrupted by user.."
    except Exception, e:
            print e
    except:
        sys.exit()
`@ -1 +1,2 @@`
	`requests==2.18.4`	`requests==2.18.4`
		`bs4==0.0.1`