Synching with master.

2024-11-10 17:13:07 +08:00 · 2018-12-15 22:01:39 -05:00 · 2018-12-15 22:01:39 -05:00 · b1d979d335
commit b1d979d335
parent 7f1addee25 8953b4d100
4 changed files with 51 additions and 11 deletions
--- a/censysparser.py
+++ b/censysparser.py
@ -6,14 +6,15 @@ class parser:
    def __init__(self, results):
        self.results = results
        self.ipaddresses = []
-        self.soup = BeautifulSoup(results.results,features="html.parser")
+        self.soup = BeautifulSoup(results.results, features="html.parser")
        self.hostnames = []
        self.urls = []
        self.numberofpages = 0

-    def search_hostnames(self):
+    def search_hostnames(self, totalresults):
        try:
-            hostnamelist = self.soup.findAll('tt')
+            hostnamelist = BeautifulSoup(totalresults, 'html.parser').findAll('tt')
+            #hostnamelist = self.soup.findAll('tt')
            for hostnameitem in hostnamelist:
                self.hostnames.append(hostnameitem.text)
            return self.hostnames
--- a/discovery/censys.py
+++ b/discovery/censys.py
@ -1,16 +1,16 @@
 import random
 import requests
+import time
 import censysparser

 class search_censys:

-    def __init__(self, word):
+    def __init__(self, word, limit):
        self.word = word
-        self.url = ""
-        self.page = ""
+        self.limit = int(limit)
        self.results = ""
        self.total_results = ""
-        self.server = "censys.io"
+        self.server = "https://censys.io/"
        self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
          "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
          ,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
@ -26,8 +26,10 @@ def __init__(self, word):
        
    def do_search(self):
        try:
-            headers = {'user-agent': random.choice(self.userAgent),'Accept':'*/*','Referer':self.url}
+            self.url = self.server + 'ipv4/_search?q=' + self.word
+            headers = {'user-agent': random.choice(self.userAgent),'Accept':'*/*','Referer': self.url}
            response = requests.get(self.url, headers=headers)
+<<<<<<< HEAD
            self.results = response.content
            print ('-')
            self.total_results += self.results
@ -50,11 +52,38 @@ def process(self):
            except Exception as e:
                print("Error occurred: " + str(e))
            self.counter+=1
+=======
+            print("\tSearching Censys results..")
+            self.results = response.text
+            self.total_results += self.results
+            pageLimit = self.get_pageLimit(self.total_results)
+            if pageLimit != -1:
+                for i in range(2, pageLimit+1):
+                    try:
+                        url = self.server + 'ipv4?q=' + self.word + '&page=' + str(i)
+                        headers = {'user-agent': random.choice(self.userAgent), 'Accept': '*/*', 'Referer': url}
+                        time.sleep(.5)
+                        response = requests.get(url, headers=headers)
+                        self.results = response.text
+                        self.total_results += self.results
+                    except Exception:
+                        continue
+        except Exception as e:
+            print(e)
+
+    def get_pageLimit(self, first_page_text):
+        for line in str(first_page_text).strip().splitlines():
+            if 'Page:' in line:
+                line = line[18:] #where format is Page:1/# / is at index 18 and want everything after /
+                return int(line)
+        return -1
+
+>>>>>>> 8953b4d1006153c1c82cea52d4776c1f87cd42da

    def get_hostnames(self):
        try:
            hostnames = censysparser.parser(self)
-            return hostnames.search_hostnames()
+            return hostnames.search_hostnames(self.total_results)
        except Exception as e:
            print("Error occurred: " + str(e))

--- a/discovery/threatcrowd.py
+++ b/discovery/threatcrowd.py
@ -28,7 +28,7 @@ def do_search(self):
        self.totalresults += self.results

    def get_hostnames(self):
-        rawres = myparser.parser(self.results, self.word)
+        rawres = myparser.parser(self.totalresults, self.word)
        return rawres.hostnames()

    def process(self):
--- a/theHarvester.py
+++ b/theHarvester.py
@ -317,8 +317,13 @@ def start(argv):
                        print("[-] Searching in Censys:")
                        from discovery import censys
                        #import locally or won't work
+<<<<<<< HEAD
                        search = censys.search_censys(word)
                        search.process()
+=======
+                        search = censys.search_censys(word, limit)
+                        search.do_search()
+>>>>>>> 8953b4d1006153c1c82cea52d4776c1f87cd42da
                        all_emails = []
                        all_ip = search.get_ipaddresses()
                        all_hosts = search.get_hostnames()
@ -495,14 +500,19 @@ def start(argv):
        print('No hosts found as all_hosts is not defined.')
        sys.exit()

+<<<<<<< HEAD
    if all_emails == []:
+=======
+
+    if all_emails == [] or all_emails is None:
+>>>>>>> 8953b4d1006153c1c82cea52d4776c1f87cd42da
        print("No emails found")
    else:
        print(("\n".join(all_emails)))

    print("\033[1;33;40m \n[+] Hosts found in search engines:")
    print("------------------------------------")
-    if all_hosts == [] or all_emails is None:
+    if all_hosts == [] or all_hosts is None:
        print("No hosts found")
    else:
        total = len(all_hosts)