from parsers import myparser import time import requests from discovery.constants import * class search_google: def __init__(self, word, limit, start): self.word = word self.results = "" self.totalresults = "" self.server = "www.google.com" self.dorks = [] self.links = [] self.database = "https://www.google.com/search?q=" self.quantity = "100" self.limit = limit self.counter = start def do_search(self): try: # do normal scraping urly = "http://" + self.server + "/search?num=" + self.quantity + "&start=" + str( self.counter) + "&hl=en&meta=&q=%40\"" + self.word + "\"" except Exception as e: print(e) try: headers = {'User-Agent': googleUA} r = requests.get(urly, headers=headers) except Exception as e: print(e) self.results = r.text if search(self.results): time.sleep(getDelay() * 5) # sleep for a longer time else: time.sleep(getDelay()) self.totalresults += self.results def do_search_profiles(self): try: urly = "http://" + self.server + "/search?num=" + self.quantity + "&start=" + str( self.counter) + "&hl=en&meta=&q=site:www.google.com%20intitle:\"Google%20Profile\"%20\"Companies%20I%27ve%20worked%20for\"%20\"at%20" + self.word + "\"" except Exception as e: print(e) try: headers = {'User-Agent': googleUA} r = requests.get(urly, headers=headers) except Exception as e: print(e) self.results = r.text if search(self.results): time.sleep(getDelay() * 5) # sleep for a longer time else: time.sleep(getDelay()) self.totalresults += self.results def get_emails(self): rawres = myparser.parser(self.totalresults, self.word) return rawres.emails() def get_hostnames(self): rawres = myparser.parser(self.totalresults, self.word) return rawres.hostnames() def get_files(self): rawres = myparser.parser(self.totalresults, self.word) return rawres.fileurls(self.files) def get_profiles(self): rawres = myparser.parser(self.totalresults, self.word) return rawres.profiles() def process(self, google_dorking): if google_dorking is False: while self.counter <= self.limit and self.counter <= 1000: self.do_search() print("\tSearching " + str(self.counter) + " results...") self.counter += 100 else: # google dorking is true self.counter = 0 # reset counter print('\n') print("[-] Searching with Google Dorks: ") while self.counter <= self.limit and self.counter <= 200: # only 200 dorks in list self.googledork() # call google dorking method if user wanted it! print("\tSearching " + str(self.counter) + " results...") self.counter += 100 def process_profiles(self): while self.counter < self.limit: self.do_search_profiles() time.sleep(getDelay()) self.counter += 100 print("\tSearching " + str(self.counter) + " results...") def append_dorks(self): try: # wrap in try-except incase filepaths are messed up with open('wordlists/dorks.txt', mode='r') as fp: self.dorks = [dork.strip() for dork in fp] except FileNotFoundError as error: print(error) def construct_dorks(self): # format is: site:targetwebsite.com + space + inurl:admindork colon = "%3A" plus = "%2B" space = '+' period = "%2E" double_quote = "%22" asterick = "%2A" left_bracket = "%5B" right_bracket = "%5D" question_mark = "%3F" slash = "%2F" single_quote = "%27" ampersand = "%26" left_peren = "%28" right_peren = "%29" pipe = '%7C' # replace links with html encoding self.links = [self.database + space + self.word + space + str(dork).replace(':', colon).replace('+', plus).replace('.', period).replace('"', double_quote) .replace("*", asterick).replace('[', left_bracket).replace(']', right_bracket) .replace('?', question_mark).replace(' ', space).replace('/', slash).replace("'",single_quote) .replace("&", ampersand).replace('(', left_peren).replace(')', right_peren).replace('|', pipe) for dork in self.dorks] def googledork(self): self.append_dorks() # call functions to create list self.construct_dorks() if self.counter >= 0 and self.counter <= 100: self.send_dork(start=0, end=100) elif self.counter >= 100 and self.counter <= 200: self.send_dork(start=101, end=200) else: # only 200 dorks to prevent google from blocking ip pass def send_dork(self, start, end): # helper function to minimize code reusability headers = {'User-Agent': googleUA} # get random user agent to try and prevent google from blocking ip for i in range(start, end): try: link = self.links[i] # get link from dork list req = requests.get(link, headers=headers) self.results = req.text if search(self.results): time.sleep(getDelay() * 5) # sleep for a longer time else: time.sleep(getDelay()) self.totalresults += self.results except: continue