2014-12-17 07:25:12 +08:00
|
|
|
import myparser
|
2011-05-04 23:07:06 +08:00
|
|
|
import time
|
2014-12-17 07:25:12 +08:00
|
|
|
import requests
|
2018-10-24 04:54:59 +08:00
|
|
|
import random
|
2011-05-04 23:07:06 +08:00
|
|
|
|
|
|
|
class search_google:
|
|
|
|
|
2018-10-27 04:15:43 +08:00
|
|
|
def __init__(self, word, limit, start):
|
2014-12-17 07:25:12 +08:00
|
|
|
self.word = word
|
|
|
|
self.results = ""
|
|
|
|
self.totalresults = ""
|
|
|
|
self.server = "www.google.com"
|
2018-10-24 04:54:59 +08:00
|
|
|
self.dorks = []
|
|
|
|
self.links = []
|
|
|
|
self.database = "https://www.google.com/search?q="
|
|
|
|
self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
|
2018-10-25 03:09:23 +08:00
|
|
|
,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
|
|
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"),
|
|
|
|
("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " +
|
2018-10-25 03:38:51 +08:00
|
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254"),
|
|
|
|
"Mozilla/5.0 (SMART-TV; X11; Linux armv7l) AppleWebKit/537.42 (KHTML, like Gecko) Chromium/25.0.1349.2 Chrome/25.0.1349.2 Safari/537.42"
|
|
|
|
,"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.991"
|
|
|
|
,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 OPR/48.0.2685.52"
|
|
|
|
,"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
|
|
|
,"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
|
|
|
,"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"]
|
2014-12-17 07:25:12 +08:00
|
|
|
self.quantity = "100"
|
|
|
|
self.limit = limit
|
|
|
|
self.counter = start
|
|
|
|
|
2018-10-24 04:54:59 +08:00
|
|
|
def do_search(self):
|
2018-10-25 00:35:53 +08:00
|
|
|
try: #do normal scraping
|
|
|
|
urly="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=%40\"" + self.word + "\""
|
|
|
|
except Exception, e:
|
|
|
|
print e
|
|
|
|
try:
|
2018-10-25 08:49:48 +08:00
|
|
|
params = {'User-Agent': random.choice(self.userAgent)} #select random user agent
|
2018-10-25 03:38:51 +08:00
|
|
|
r=requests.get(urly,params= params)
|
2018-10-25 00:35:53 +08:00
|
|
|
except Exception,e:
|
|
|
|
print e
|
|
|
|
self.results = r.content
|
|
|
|
self.totalresults += self.results
|
2014-12-17 07:25:12 +08:00
|
|
|
|
|
|
|
def do_search_profiles(self):
|
|
|
|
try:
|
|
|
|
urly="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=site:www.google.com%20intitle:\"Google%20Profile\"%20\"Companies%20I%27ve%20worked%20for\"%20\"at%20" + self.word + "\""
|
|
|
|
except Exception, e:
|
|
|
|
print e
|
|
|
|
try:
|
|
|
|
r=requests.get(urly)
|
|
|
|
except Exception,e:
|
|
|
|
print e
|
2018-10-24 21:36:20 +08:00
|
|
|
self.results = r.content
|
2014-12-17 07:25:12 +08:00
|
|
|
#'&hl=en&meta=&q=site:www.google.com%20intitle:"Google%20Profile"%20"Companies%20I%27ve%20worked%20for"%20"at%20' + self.word + '"')
|
|
|
|
self.totalresults += self.results
|
|
|
|
|
|
|
|
def get_emails(self):
|
|
|
|
rawres = myparser.parser(self.totalresults, self.word)
|
|
|
|
return rawres.emails()
|
|
|
|
|
|
|
|
def get_hostnames(self):
|
|
|
|
rawres = myparser.parser(self.totalresults, self.word)
|
|
|
|
return rawres.hostnames()
|
2011-05-04 23:07:06 +08:00
|
|
|
|
2014-12-17 07:25:12 +08:00
|
|
|
def get_files(self):
|
|
|
|
rawres = myparser.parser(self.totalresults, self.word)
|
|
|
|
return rawres.fileurls(self.files)
|
2011-05-04 23:07:06 +08:00
|
|
|
|
2014-12-17 07:25:12 +08:00
|
|
|
def get_profiles(self):
|
|
|
|
rawres = myparser.parser(self.totalresults, self.word)
|
|
|
|
return rawres.profiles()
|
2011-05-04 23:07:06 +08:00
|
|
|
|
2018-10-27 04:15:43 +08:00
|
|
|
def process(self,google_dorking):
|
|
|
|
if google_dorking == False:
|
2018-10-25 08:49:48 +08:00
|
|
|
while self.counter <= self.limit and self.counter <= 1000:
|
|
|
|
self.do_search()
|
|
|
|
#more = self.check_next()
|
|
|
|
time.sleep(1)
|
|
|
|
print "\tSearching " + str(self.counter) + " results..."
|
|
|
|
self.counter += 100
|
|
|
|
else: #google dorking is true
|
2018-10-25 03:09:23 +08:00
|
|
|
self.counter = 0 #reset counter
|
|
|
|
print '\n'
|
2018-10-25 08:49:48 +08:00
|
|
|
print "[-] Searching with Google Dorks: "
|
|
|
|
while self.counter <= self.limit and self.counter <= 200: # only 200 dorks in list
|
2018-10-25 03:09:23 +08:00
|
|
|
self.googledork() #call google dorking method if user wanted it!
|
|
|
|
# more = self.check_next()
|
2018-11-05 06:26:34 +08:00
|
|
|
time.sleep(.1)
|
2018-10-25 03:09:23 +08:00
|
|
|
print "\tSearching " + str(self.counter) + " results..."
|
|
|
|
self.counter += 100
|
2011-05-04 23:07:06 +08:00
|
|
|
|
2014-12-17 07:25:12 +08:00
|
|
|
def process_profiles(self):
|
|
|
|
while self.counter < self.limit:
|
|
|
|
self.do_search_profiles()
|
2018-11-05 08:15:07 +08:00
|
|
|
time.sleep(0.2)
|
2014-12-17 07:25:12 +08:00
|
|
|
self.counter += 100
|
|
|
|
print "\tSearching " + str(self.counter) + " results..."
|
2018-10-24 04:54:59 +08:00
|
|
|
|
|
|
|
def append_dorks(self):
|
|
|
|
try: # wrap in try-except incase filepaths are messed up
|
2018-10-26 10:14:34 +08:00
|
|
|
with open('wordlists/dorks.txt', mode='r') as fp:
|
2018-10-24 04:54:59 +08:00
|
|
|
self.dorks = [dork.strip() for dork in fp]
|
|
|
|
except IOError as error:
|
|
|
|
print(error)
|
|
|
|
|
|
|
|
def construct_dorks(self):
|
|
|
|
#format is: site:targetwebsite.com + space + inurl:admindork
|
|
|
|
colon = "%3A"
|
|
|
|
plus = "%2B"
|
|
|
|
space = '+'
|
|
|
|
period = "%2E"
|
|
|
|
double_quote = "%22"
|
|
|
|
asterick = "%2A"
|
|
|
|
left_bracket = "%5B"
|
|
|
|
right_bracket = "%5D"
|
|
|
|
question_mark = "%3F"
|
|
|
|
slash = "%2F"
|
|
|
|
single_quote = "%27"
|
|
|
|
ampersand = "%26"
|
|
|
|
left_peren = "%28"
|
|
|
|
right_peren = "%29"
|
2018-10-25 08:49:48 +08:00
|
|
|
pipe = '%7C'
|
|
|
|
# replace links with html encoding
|
2018-10-24 04:54:59 +08:00
|
|
|
self.links = [self.database + space + self.word + space +
|
|
|
|
str(dork).replace(':', colon).replace('+', plus).replace('.', period).replace('"', double_quote)
|
|
|
|
.replace("*", asterick).replace('[', left_bracket).replace(']', right_bracket)
|
|
|
|
.replace('?', question_mark).replace(' ', space).replace('/', slash).replace("'", single_quote)
|
2018-10-25 08:49:48 +08:00
|
|
|
.replace("&", ampersand).replace('(', left_peren).replace(')', right_peren).replace('|', pipe)
|
2018-10-24 04:54:59 +08:00
|
|
|
for dork in self.dorks]
|
2018-10-25 03:09:23 +08:00
|
|
|
|
|
|
|
def googledork(self):
|
|
|
|
self.append_dorks() # call functions to create list
|
|
|
|
self.construct_dorks()
|
|
|
|
if (self.counter >= 0 and self.counter <=100):
|
|
|
|
self.send_dork(start=0, end=100)
|
|
|
|
elif (self.counter >= 100 and self.counter <=200):
|
|
|
|
self.send_dork(start=101, end=200)
|
2018-10-25 08:49:48 +08:00
|
|
|
else: #only 200 dorks to prevent google from blocking ip
|
|
|
|
pass
|
2018-10-25 03:09:23 +08:00
|
|
|
|
2018-10-25 08:49:48 +08:00
|
|
|
def send_dork(self, start, end): # helper function to minimize code reusability
|
2018-10-25 03:09:23 +08:00
|
|
|
params = {'User-Agent': random.choice(self.userAgent)}
|
2018-10-25 08:49:48 +08:00
|
|
|
# get random user agent to try and prevent google from blocking ip
|
2018-10-25 03:09:23 +08:00
|
|
|
for i in range(start, end):
|
|
|
|
try:
|
2018-10-25 08:49:48 +08:00
|
|
|
link = self.links[i] # get link from dork list
|
2018-10-25 03:09:23 +08:00
|
|
|
req = requests.get(link, params=params)
|
2018-11-05 08:15:07 +08:00
|
|
|
time.sleep(.2) # sleep for a short time
|
2018-10-25 03:09:23 +08:00
|
|
|
self.results = req.content
|
|
|
|
self.totalresults += self.results
|
|
|
|
except:
|
|
|
|
continue
|