Censys implementation

2025-02-22 21:43:02 +08:00 · 2018-11-02 23:04:20 +00:00 · 2018-11-02 23:04:20 +00:00 · 6cf3d18902
commit 6cf3d18902
parent 86db14ea32
5 changed files with 164 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,4 +3,5 @@
 tests/myparser.py
 stash.sqlite
 *.sqlite
-.vscode
+.vscode
+venv
--- a/censysparser.py
+++ b/censysparser.py
@ -0,0 +1,39 @@
+from bs4 import BeautifulSoup
+import re
+
+class parser:
+    
+    def __init__(self, results):
+        self.results = results
+        self.ipaddresses = []
+        self.soup = BeautifulSoup(results.results,features="html.parser")
+        self.hostnames = []
+        self.numberofpages = 0
+
+    def search_hostnames(self):
+        try:
+            hostnamelist = self.soup.findAll('tt')
+            for hostnameitem in hostnamelist:
+                self.hostnames.append(hostnameitem.text)
+            return self.hostnames
+        except Exception,e:
+            print("Error occurred: " + e) 
+
+    def search_ipaddresses(self):
+        try:
+            ipaddresslist = self.soup.findAll('a','SearchResult__title-text')
+            for ipaddressitem in ipaddresslist:
+                self.ipaddresses.append(ipaddressitem.text.strip())
+            return self.ipaddresses
+        except Exception,e:
+            print("Error occurred: " + e)
+
+    def search_numberofpages(self):
+        try:
+            items = self.soup.findAll(href=re.compile("page"))
+            for item in items:
+                if (item.text !='next'):            #to filter out pagination
+                    self.numberofpages+=1
+            return self.numberofpages
+        except Exception,e:
+            print("Error occurred: " + e)
--- a/discovery/censys.py
+++ b/discovery/censys.py
@ -0,0 +1,71 @@
+import httplib
+import sys
+import random
+import requests
+import censysparser
+
+class search_censys:
+
+    def __init__(self, word):
+        self.word = word
+        self.url = ""
+        self.page = ""
+        self.results = ""
+        self.total_results = ""
+        self.server = "censys.io"
+        self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
+          "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
+          ,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
+          "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"),
+          ("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " +
+          "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254"),
+          "Mozilla/5.0 (SMART-TV; X11; Linux armv7l) AppleWebKit/537.42 (KHTML, like Gecko) Chromium/25.0.1349.2 Chrome/25.0.1349.2 Safari/537.42"
+          ,"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.991"
+          ,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 OPR/48.0.2685.52"
+          ,"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
+          ,"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
+          ,"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"]
+        
+    def do_search(self):
+        try:
+            headers = {'user-agent': random.choice(self.userAgent),'Accept':'*/*','Referer':self.url}
+            response = requests.get(self.url, headers=headers)
+            self.results = response.content
+            self.total_results += self.results
+        except Exception,e:
+            print e
+
+    def process(self,morepage=None):
+        try:
+            if (morepage is not None):
+                self.page =str(morepage) 
+                baseurl = self.url
+                self.url = baseurl + "&page=" + self.page
+            else:
+                self.url="https://" + self.server + "/ipv4/_search?q=" + self.word
+            self.do_search()
+            print "\tSearching Censys results.."
+        except Exception,e:
+            print("Error occurred: " + e)
+
+    def get_hostnames(self):
+        try:
+            hostnames = censysparser.parser(self)
+            return hostnames.search_hostnames()
+        except Exception,e:
+            print("Error occurred: " + e)   
+
+    def get_ipaddresses(self):
+        try:
+            ips = censysparser.parser(self)
+            return ips.search_ipaddresses()
+        except Exception,e:
+            print("Error occurred: " + e) 
+    
+    def get_totalnumberofpages(self):
+        try:
+            pages = censysparser.parser(self)
+            return pages.search_numberofpages()
+        except Exception,e:
+            print("Error occurred: " + e) 
+
--- a/requirements.txt
+++ b/requirements.txt
@ -1 +1,2 @@
 requests==2.18.4
+bs4==0.0.1
--- a/theHarvester.py
+++ b/theHarvester.py
@ -17,6 +17,7 @@
    sys.exit()

 from discovery import *
+from discovery import censys
 from lib import htmlExport
 from lib import hostchecker

@ -94,6 +95,8 @@ def start(argv):
    takeover_check = False
    google_dorking = False
    limit = 500
+    all_emails = []
+    all_hosts = []
    dnsserver = ""
    for opt, arg in opts:
        if opt == '-l':
@ -122,7 +125,7 @@ def start(argv):
            dnstld = True
        elif opt == '-b':
            engines = set(arg.split(','))
-            supportedengines = set(["baidu","bing","crtsh","bingapi","dogpile","google","googleCSE","virustotal","threatcrowd","googleplus","google-profiles","linkedin","pgp","twitter","vhost","yahoo","netcraft","all"])
+            supportedengines = set(["baidu","bing","crtsh","bingapi","dogpile","google","googleCSE","virustotal","threatcrowd","googleplus","google-profiles","linkedin","pgp","twitter","vhost","yahoo","netcraft","censys","all"])
            if set(engines).issubset(supportedengines):
                print "found supported engines"
                print "[-] Starting harvesting process for domain: " + word +  "\n" 
@ -148,7 +151,29 @@ def start(argv):
                        all_emails = []
                        db=stash.stash_manager()
                        db.store_all(word,all_hosts,'host','netcraft')
-                        
+
+                    if engineitem == "censys":
+                        db=stash.stash_manager()
+                        print "[-] Searching in Censys:"
+                        search = censys.search_censys(word)
+                        search.process()
+                        all_ip = search.get_ipaddresses()
+                        all_hosts = search.get_hostnames()
+                        db.store_all(word,all_ip,'ipaddress','censys')
+                        db.store_all(word,all_hosts,'hostname','censys')   
+                        totalnumberofpages = search.get_totalnumberofpages()                #as returned by censys at the initial search
+                        pagecounter = 1                                                     #pagecounter: variable to limit how many pages to query
+                        while pagecounter < totalnumberofpages and pagecounter < 5:         #pagecounter < 5: search 4 pages = 100 results 
+                            pagecounter += 1
+                            search.process(pagecounter)
+                            moreips = search.get_ipaddresses()
+                            for moreipitem in moreips:
+                                db.store(word,moreipitem,'ipaddress','censys')
+                                all_ip.append(moreipitem)
+                            morehostnames = search.get_hostnames()
+                            for morehostnameitem in morehostnames:
+                                db.store(word,morehostnameitem,'hostname','censys')
+                                all_hosts.append(morehostnameitem)
                    
                    if engineitem == "threatcrowd":
                        print "[-] Searching in Threatcrowd:"
@ -359,11 +384,32 @@ def start(argv):
                        all_emails.extend(emails)
                        #Clean up email list, sort and uniq
                        all_emails=sorted(set(all_emails))
+
+                        print "[-] Searching in Censys:"
+                        search = censys.search_censys(word)
+                        search.process()
+                        all_ip = search.get_ipaddresses()
+                        all_hosts = search.get_hostnames()
+                        db.store_all(word,all_ip,'ipaddress','censys')
+                        db.store_all(word,all_hosts,'hostname','censys')   
+                        totalnumberofpages = search.get_totalnumberofpages()                #as returned by censys at the initial search
+                        pagecounter = 1                                                     #pagecounter: variable to limit how many pages to query
+                        while pagecounter < totalnumberofpages and pagecounter < 5:         #pagecounter < 5: search 4 pages = 100 results 
+                            pagecounter += 1
+                            search.process(pagecounter)
+                            moreips = search.get_ipaddresses()
+                            for moreipitem in moreips:
+                                db.store(word,moreipitem,'ipaddress','censys')
+                                all_ip.append(moreipitem)
+                            morehostnames = search.get_hostnames()
+                            for morehostnameitem in morehostnames:
+                                db.store(word,morehostnameitem,'hostname','censys')
+                                all_hosts.append(morehostnameitem)
            else:

            #if engine not in ("baidu", "bing", "crtsh","bingapi","dogpile","google", "googleCSE","virustotal","threatcrowd", "googleplus", "google-profiles","linkedin", "pgp", "twitter", "vhost", "yahoo","netcraft","all"):
                usage()
-                print "Invalid search engine, try with: baidu, bing, bingapi, crtsh, dogpile, google, googleCSE, virustotal, netcraft, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, all"
+                print "Invalid search engine, try with: baidu, bing, bingapi, crtsh, dogpile, google, googleCSE, virustotal, netcraft, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, censys, all"
                sys.exit()
            #else:
            #    pass
@ -625,5 +671,7 @@ def start(argv):
        start(sys.argv[1:])
    except KeyboardInterrupt:
        print "Search interrupted by user.."
+    except Exception, e:
+            print e
    except:
        sys.exit()