From c2f86f7c5446794adb600dd43545402fc100636c Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Fri, 19 Oct 2018 16:23:49 -0400 Subject: [PATCH] Modified google dork to correctly generate urls and added user agent list. Also had to import it for some reason, may be an IDE issue, have to test later to confirm. --- discovery/googledork.py | 71 ++++++++++++++++++++++++++++++----------- theHarvester.py | 12 ++++--- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/discovery/googledork.py b/discovery/googledork.py index 8c5e04f3..09363661 100644 --- a/discovery/googledork.py +++ b/discovery/googledork.py @@ -1,20 +1,24 @@ -import string -import sys import myparser -import re import time import requests +import random class google_dork: - def __init__(self, target, limit, start): - self.target = target + def __init__(self, word, limit, start): + self.word = word self.results = "" self.totalresults = "" self.dorks = [] self.links = [] self.database = "https://www.google.com/search?q=" - self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6" + #create list of userAgents to shuffle through + self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36" + ,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " + + "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"), + ("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " + + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254")] self.quantity = "100" self.limit = limit self.counter = start @@ -22,8 +26,7 @@ def __init__(self, target, limit, start): def append_dorks(self): try: #wrap in try-except incase filepaths are messed up with open('../theHarvester/wordlists/dorks.txt',mode='r') as fp: - for dork in fp: - self.dorks.append(dork) + self.dorks = [dork.strip() for dork in fp] except IOError as error: print(error) @@ -32,25 +35,57 @@ def construct_dorks(self): colon = "%3A" plus = "%2B" space = '+' - #populate links list - self.links = [self.database + space + str(dork).replace(':',colon).replace('+',plus) for dork in self.dorks] - - def temp(self): - for link in self.links: - print('link is: link') + period = "%2E" + double_quote = "%22" + asterick = "%2A" + left_bracket = "%5B" + right_bracket = "%5D" + question_mark = "%3F" + slash = "%2F" + single_quote = "%27" + ampersand = "%26" + left_peren = "%28" + right_peren = "%29" + #populate links list required that we need correct html encoding to work properly + self.links = [self.database + space + self.word + space + + str(dork).replace(':', colon).replace('+', plus).replace('.', period).replace('"', double_quote) + .replace("*", asterick).replace('[', left_bracket).replace(']', right_bracket) + .replace('?', question_mark).replace(' ', space).replace('/', slash).replace("'", single_quote) + .replace("&", ampersand).replace('(', left_peren).replace(')', right_peren) + for dork in self.dorks] def do_search(self): for link in self.links: try: - req = requests.get(link) + params = { + 'User-Agent': random.choice(self.userAgent) #grab random User-Agent to avoid google blocking ip + } + req = requests.get(link, params=params) time.sleep(0.2) self.results = req.content self.totalresults += self.results - except Exception: #if something happens + except Exception: #if something happens just continue continue def get_emails(self): - pass + rawres = myparser.parser(self.totalresults, self.word) + return rawres.emails() + + def get_hostnames(self): + rawres = myparser.parser(self.totalresults, self.word) + return rawres.hostnames() def get_files(self): - pass + rawres = myparser.parser(self.totalresults, self.word) + return rawres.fileurls(self.files) + + def get_profiles(self): + rawres = myparser.parser(self.totalresults, self.word) + return rawres.profiles() + + def process(self): + while self.counter <= self.limit and self.counter <= 1000: + self.do_search() + time.sleep(0.8) + print "\tSearching " + str(self.counter) + " results..." + self.counter += 100 diff --git a/theHarvester.py b/theHarvester.py index 4fff5a72..3e38557e 100644 --- a/theHarvester.py +++ b/theHarvester.py @@ -8,6 +8,7 @@ import re import getopt import stash +from discovery import googledork try: import requests @@ -99,6 +100,7 @@ def start(argv): elif opt == '-d': word = arg elif opt == '-g': + print 'google dorking is true' google_dorking = True elif opt == '-s': start = int(arg) @@ -531,14 +533,15 @@ def start(argv): pass # Google Dorking#################################################### - info_found = [] if google_dorking == True: print "Starting Google Dorking: " - search = googledork(target='www.microsoft.com', - limit=10, start=0) + search = googledork.google_dork(word,limit,start) search.append_dorks() search.construct_dorks() - search.temp() + emails = search.get_emails() + hosts = search.get_hostnames() + print emails + print hosts else: pass @@ -619,6 +622,7 @@ def start(argv): print "Error saving XML file: " + er sys.exit() + if __name__ == "__main__": try: start(sys.argv[1:])