Removed google dork class, added more dorks, and added boolean to indicate if user wants google dorks in google search.

2025-02-24 06:22:57 +08:00 · 2018-10-23 16:54:59 -04:00 · 2018-10-23 16:54:59 -04:00 · 568ace4419
commit 568ace4419
parent c2f86f7c54
4 changed files with 86 additions and 125 deletions
--- a/discovery/googledork.py
+++ b/discovery/googledork.py
@ -1,91 +0,0 @@
-import myparser
-import time
-import requests
-import random
-
-class google_dork:
-
-    def __init__(self, word, limit, start):
-        self.word = word
-        self.results = ""
-        self.totalresults = ""
-        self.dorks = []
-        self.links = []
-        self.database = "https://www.google.com/search?q="
-        #create list of userAgents to shuffle through
-        self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
-        ,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
-         "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"),
-         ("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " +
-         "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254")]
-        self.quantity = "100"
-        self.limit = limit
-        self.counter = start
-
-    def append_dorks(self):
-        try: #wrap in try-except incase filepaths are messed up
-            with open('../theHarvester/wordlists/dorks.txt',mode='r') as fp:
-                self.dorks = [dork.strip() for dork in fp]
-        except IOError as error:
-            print(error)
-
-    def construct_dorks(self):
-        #format is: site:targetwebsite.com + space + inurl:admindork
-        colon = "%3A"
-        plus = "%2B"
-        space = '+'
-        period = "%2E"
-        double_quote = "%22"
-        asterick = "%2A"
-        left_bracket = "%5B"
-        right_bracket = "%5D"
-        question_mark = "%3F"
-        slash = "%2F"
-        single_quote = "%27"
-        ampersand = "%26"
-        left_peren = "%28"
-        right_peren = "%29"
-        #populate links list required that we need correct html encoding to work properly
-        self.links = [self.database + space + self.word + space +
-                      str(dork).replace(':', colon).replace('+', plus).replace('.', period).replace('"', double_quote)
-                      .replace("*", asterick).replace('[', left_bracket).replace(']', right_bracket)
-                      .replace('?', question_mark).replace(' ', space).replace('/', slash).replace("'", single_quote)
-                      .replace("&", ampersand).replace('(', left_peren).replace(')', right_peren)
-                      for dork in self.dorks]
-
-    def do_search(self):
-        for link in self.links:
-              try:
-                  params = {
-                    'User-Agent': random.choice(self.userAgent) #grab random User-Agent to avoid google blocking ip
-                  }
-                  req = requests.get(link, params=params)
-                  time.sleep(0.2)
-                  self.results = req.content
-                  self.totalresults += self.results
-              except Exception: #if something happens just continue
-                  continue
-
-    def get_emails(self):
-        rawres = myparser.parser(self.totalresults, self.word)
-        return rawres.emails()
-
-    def get_hostnames(self):
-        rawres = myparser.parser(self.totalresults, self.word)
-        return rawres.hostnames()
-
-    def get_files(self):
-        rawres = myparser.parser(self.totalresults, self.word)
-        return rawres.fileurls(self.files)
-
-    def get_profiles(self):
-        rawres = myparser.parser(self.totalresults, self.word)
-        return rawres.profiles()
-
-    def process(self):
-        while self.counter <= self.limit and self.counter <= 1000:
-            self.do_search()
-            time.sleep(0.8)
-            print "\tSearching " + str(self.counter) + " results..."
-            self.counter += 100
--- a/discovery/googlesearch.py
+++ b/discovery/googlesearch.py
@ -4,32 +4,52 @@
 import re
 import time
 import requests
-
+import random

 class search_google:

-    def __init__(self, word, limit, start):
+    def __init__(self, word, limit, start,google_dorking):
        self.word = word
        self.results = ""
        self.totalresults = ""
        self.server = "www.google.com"
-        self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
+        self.dorks = []
+        self.links = []
+        self.database = "https://www.google.com/search?q="
+        self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
+          "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
+            , ("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
+            "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"),
+            ("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " +
+            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254")]
        self.quantity = "100"
        self.limit = limit
        self.counter = start
-  
-    def do_search(self):
-        try:
-            urly="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=%40\"" + self.word + "\""
-        except Exception, e:
-            print e
-        try:
-            r=requests.get(urly)
-        except Exception,e:
-            print e
-        self.results = r.content 
-        self.totalresults += self.results
+        self.google_dorking = google_dorking

+    def do_search(self):
+        if self.google_dorking == False:
+            try:
+                urly="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=%40\"" + self.word + "\""
+            except Exception, e:
+                print e
+            try:
+                r=requests.get(urly)
+            except Exception,e:
+                print e
+            self.results = r.content
+            self.totalresults += self.results
+        else: #google_dorking is true
+            for link in self.links:
+                try:
+                    params = {'User-Agent': random.choice(self.userAgent)}
+                    #grab random User-Agent to avoid google blocking ip
+                    req = requests.get(link, params=params)
+                    time.sleep(0.2)
+                    self.results = req.content
+                    self.totalresults += self.results
+                except Exception:  #if something happens just continue
+                    continue

    def do_search_profiles(self):
        try:
@ -76,3 +96,34 @@ def process_profiles(self):
            time.sleep(0.3)
            self.counter += 100
            print "\tSearching " + str(self.counter) + " results..."
+
+    def append_dorks(self):
+        try:  # wrap in try-except incase filepaths are messed up
+            with open('../theHarvester/wordlists/dorks.txt', mode='r') as fp:
+                self.dorks = [dork.strip() for dork in fp]
+        except IOError as error:
+            print(error)
+
+    def construct_dorks(self):
+        #format is: site:targetwebsite.com + space + inurl:admindork
+        colon = "%3A"
+        plus = "%2B"
+        space = '+'
+        period = "%2E"
+        double_quote = "%22"
+        asterick = "%2A"
+        left_bracket = "%5B"
+        right_bracket = "%5D"
+        question_mark = "%3F"
+        slash = "%2F"
+        single_quote = "%27"
+        ampersand = "%26"
+        left_peren = "%28"
+        right_peren = "%29"
+        #populate links list required that we need correct html encoding to work properly
+        self.links = [self.database + space + self.word + space +
+                      str(dork).replace(':', colon).replace('+', plus).replace('.', period).replace('"', double_quote)
+                      .replace("*", asterick).replace('[', left_bracket).replace(']', right_bracket)
+                      .replace('?', question_mark).replace(' ', space).replace('/', slash).replace("'", single_quote)
+                      .replace("&", ampersand).replace('(', left_peren).replace(')', right_peren)
+                      for dork in self.dorks]
--- a/theHarvester.py
+++ b/theHarvester.py
@ -8,7 +8,6 @@
 import re
 import getopt
 import stash
-from discovery import googledork

 try:
    import requests
@ -46,7 +45,6 @@ def usage():
    print """       -b: data source: baidu, bing, bingapi, dogpile, google, googleCSE,
                        googleplus, google-profiles, linkedin, pgp, twitter, vhost, 
                        virustotal, threatcrowd, crtsh, netcraft, yahoo, all\n"""
-    print "       -g: perform google dorking"
    print "       -s: start in result number X (default: 0)"
    print "       -v: verify host name via dns resolution and search for virtual hosts"
    print "       -f: save the results into an HTML and XML file (both)"
@ -58,6 +56,7 @@ def usage():
    print "       -l: limit the number of results to work with(bing goes from 50 to 50 results,"
    print "            google 100 to 100, and pgp doesn't use this option)"
    print "       -h: use SHODAN database to query discovered hosts"
+    print "       -g: perform google dorking"
    print "\nExamples:"
    print "        " + comm + " -d microsoft.com -l 500 -b google -h myresults.html"
    print "        " + comm + " -d microsoft.com -b pgp"
@ -70,7 +69,7 @@ def start(argv):
        usage()
        sys.exit()
    try:
-        opts, args = getopt.getopt(argv, "l:d:b:s:vf:nghcpte:")
+        opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcgpte:")
    except getopt.GetoptError:
        usage()
        sys.exit()
@ -100,7 +99,6 @@ def start(argv):
        elif opt == '-d':
            word = arg
        elif opt == '-g':
-            print 'google dorking is true'
            google_dorking = True
        elif opt == '-s':
            start = int(arg)
@ -131,7 +129,7 @@ def start(argv):
    print "[-] Starting harvesting process for domain: " + word +  "\n" 
    if engine == "google":
        print "[-] Searching in Google:"
-        search = googlesearch.search_google(word, limit, start)
+        search = googlesearch.search_google(word, limit, start, google_dorking)
        search.process()
        all_emails = search.get_emails()
        all_hosts = search.get_hostnames()
@ -532,19 +530,6 @@ def start(argv):
    else:
        pass

-    # Google Dorking####################################################
-    if google_dorking == True:
-        print "Starting Google Dorking: "
-        search = googledork.google_dork(word,limit,start)
-        search.append_dorks()
-        search.construct_dorks()
-        emails = search.get_emails()
-        hosts = search.get_hostnames()
-        print emails
-        print hosts
-    else:
-        pass
-
    #Reporting#######################################################
    if filename != "":
        try:
--- a/wordlists/dorks.txt
+++ b/wordlists/dorks.txt
@ -31,4 +31,20 @@ administrator/account.%XT%
 Database_Administration/
 phpSQLiteAdmin/
 0admin/
-super_login%XT%
+super_login%XT%
+/spid/lang/lang.php?lang_path=
+allinurl:.br/index.php?loc=
+PSUser/
+secrets/
+login_admin%XT%
+loginerror/
+superuser%XT%
+authenticate.%XT%
+authentication.%XT%
+auth.%XT%
+authuser.%XT%
+authadmin.%XT%
+cp.%XT%
+modelsearch/login.%XT%
+moderator.%XT%
+moderator/