Merge pull request #29 from NotoriousRebel/dev

Ported rest of modules to use aiohttp
This commit is contained in:
Matt 2020-01-04 00:02:53 -05:00 committed by GitHub
commit ac791355e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 91 additions and 77 deletions

View file

@ -1,8 +1,6 @@
from theHarvester.discovery.constants import *
from theHarvester.parsers import myparser
import requests
import time
import asyncio
class SearchGoogle:
@ -18,85 +16,91 @@ def __init__(self, word, limit, start):
self.limit = limit
self.counter = start
def do_search(self):
async def do_search(self):
# Do normal scraping.
urly = 'http://' + self.server + '/search?num=' + self.quantity + '&start=' + str(
self.counter) + '&hl=en&meta=&q=%40\"' + self.word + '\"'
try:
headers = {'User-Agent': googleUA}
r = requests.get(urly, headers=headers)
resp = await AsyncFetcher.fetch_all([urly], headers=headers)
except Exception as e:
print(e)
self.results = r.text
if search(self.results):
self.results = resp[0]
searched = await search(self.results)
if searched:
try:
self.results = google_workaround(urly)
self.results = await google_workaround(urly)
print('self.results: ', self.results)
p.pprint(self.results, indent=4)
if isinstance(self.results, bool):
print('Google is blocking your ip and the workaround, returning')
return
except Exception:
except Exception as e:
print(e)
import traceback as t
t.print_exc()
# google blocked, no useful result
return
time.sleep(getDelay())
await asyncio.sleep(getDelay())
self.totalresults += self.results
def do_search_profiles(self):
async def do_search_profiles(self):
urly = 'http://' + self.server + '/search?num=' + self.quantity + '&start=' + str(
self.counter) + '&hl=en&meta=&q=site:www.google.com%20intitle:\"Google%20Profile\"%20\"Companies%20I%27ve%20worked%20for\"%20\"at%20' + self.word + '\"'
try:
headers = {'User-Agent': googleUA}
r = requests.get(urly, headers=headers)
resp = await AsyncFetcher.fetch_all([urly], headers=headers)
except Exception as e:
print(e)
self.results = r.text
if search(self.results):
self.results = resp[0]
if await search(self.results):
try:
self.results = google_workaround(urly)
self.results = await google_workaround(urly)
if isinstance(self.results, bool):
print('Google is blocking your ip and the workaround, returning')
return
except Exception:
# google blocked, no useful result
return
time.sleep(getDelay())
await asyncio.sleep(getDelay())
self.totalresults += self.results
def get_emails(self):
async def get_emails(self):
rawres = myparser.Parser(self.totalresults, self.word)
return rawres.emails()
return await rawres.emails()
def get_hostnames(self):
async def get_hostnames(self):
rawres = myparser.Parser(self.totalresults, self.word)
return rawres.hostnames()
return await rawres.hostnames()
def get_files(self):
async def get_files(self):
rawres = myparser.Parser(self.totalresults, self.word)
return rawres.fileurls(self.files)
def get_profiles(self):
async def get_profiles(self):
rawres = myparser.Parser(self.totalresults, self.word)
return rawres.profiles()
def process(self, google_dorking):
async def process(self, google_dorking):
if google_dorking is False:
while self.counter <= self.limit and self.counter <= 1000:
self.do_search()
await self.do_search()
print(f'\tSearching {self.counter} results.')
self.counter += 100
else: # Google dorking is true.
self.counter = 0 # Reset counter.
print('\n')
print('[-] Searching with Google Dorks: ')
self.googledork() # Call Google dorking method if user wanted it!
await self.googledork() # Call Google dorking method if user wanted it!
def process_profiles(self):
async def process_profiles(self):
while self.counter < self.limit:
self.do_search_profiles()
time.sleep(getDelay())
await self.do_search_profiles()
await asyncio.sleep(getDelay())
self.counter += 100
print(f'\tSearching {self.counter} results.')
def append_dorks(self):
async def append_dorks(self):
# Wrap in try-except incase filepaths are messed up.
try:
with open('wordlists/dorks.txt', mode='r') as fp:
@ -104,7 +108,7 @@ def append_dorks(self):
except FileNotFoundError as error:
print(error)
def construct_dorks(self):
async def construct_dorks(self):
# Format is: site:targetwebsite.com + space + inurl:admindork
colon = '%3A'
plus = '%2B'
@ -128,12 +132,12 @@ def construct_dorks(self):
.replace('&', ampersand).replace('(', left_peren).replace(')', right_peren).replace('|', pipe) + space + self.word
for dork in self.dorks)
def googledork(self):
self.append_dorks() # Call functions to create list.
self.construct_dorks()
self.send_dorks()
async def googledork(self):
await self.append_dorks() # Call functions to create list.
await self.construct_dorks()
await self.send_dorks()
def send_dorks(self): # Helper function to minimize code reusability.
async def send_dorks(self): # Helper function to minimize code reusability.
headers = {'User-Agent': googleUA}
# Get random user agent to try and prevent google from blocking IP.
for num in range(len(self.links)):
@ -141,18 +145,18 @@ def send_dorks(self): # Helper function to minimize code reusability.
if num % 10 == 0 and num > 0:
print(f'\tSearching through {num} results')
link = self.links[num]
req = requests.get(link, headers=headers)
self.results = req.text
if search(self.results):
req = await AsyncFetcher.fetch_all([link], headers=headers)
self.results = req[0]
if await search(self.results):
try:
self.results = google_workaround(link)
self.results = await google_workaround(link)
if isinstance(self.results, bool):
print('Google is blocking your ip and the workaround, returning')
return
except Exception:
# google blocked, no useful result
return
time.sleep(getDelay())
await asyncio.sleep(getDelay())
self.totalresults += self.results
except Exception as e:
print(f'\tException Occurred {e}')

View file

@ -1,8 +1,7 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import requests
import time
import asyncio
class SearchLinkedin:
@ -16,15 +15,15 @@ def __init__(self, word, limit):
self.limit = int(limit)
self.counter = 0
def do_search(self):
async def do_search(self):
urly = 'http://' + self.server + '/search?num=100&start=' + str(self.counter) + '&hl=en&meta=&q=site%3Alinkedin.com/in%20' + self.word
try:
headers = {'User-Agent': Core.get_user_agent()}
r = requests.get(urly, headers=headers)
self.results = r.text
if search(self.results):
resp = await AsyncFetcher.fetch_all([urly], headers=headers)
self.results = resp[0]
if await search(self.results):
try:
self.results = google_workaround(urly)
self.results = await google_workaround(urly)
if isinstance(self.results, bool):
print('Google is blocking your ip and the workaround, returning')
return
@ -33,20 +32,20 @@ def do_search(self):
return
except Exception as e:
print(e)
time.sleep(getDelay())
await asyncio.sleep(getDelay())
self.totalresults += self.results
def get_people(self):
async def get_people(self):
rawres = myparser.Parser(self.totalresults, self.word)
return rawres.people_linkedin()
return await rawres.people_linkedin()
def get_links(self):
async def get_links(self):
links = myparser.Parser(self.totalresults, self.word)
return splitter(links.links_linkedin())
return splitter(await links.links_linkedin())
def process(self):
async def process(self):
while self.counter < self.limit:
self.do_search()
time.sleep(getDelay())
await self.do_search()
await asyncio.sleep(getDelay())
self.counter += 100
print(f'\tSearching {self.counter} results.')

View file

@ -2,7 +2,7 @@
from theHarvester.parsers import myparser
import requests
import random
import time
import asyncio
class SearchTrello:
@ -18,54 +18,54 @@ def __init__(self, word):
self.hostnames = []
self.counter = 0
def do_search(self):
async def do_search(self):
base_url = f'https://{self.server}/search?num=300&start=xx&hl=en&q=site%3Atrello.com%20{self.word}'
urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 20) if num <= self.limit]
# limit is 20 as that is the most results google will show per num
headers = {'User-Agent': googleUA}
for url in urls:
try:
resp = requests.get(url, headers=headers)
self.results = resp.text
if search(self.results):
resp = await AsyncFetcher.fetch_all([url], headers=headers)
self.results = resp[0]
if await search(self.results):
try:
self.results = google_workaround(base_url)
self.results = await google_workaround(base_url)
if isinstance(self.results, bool):
print('Google is blocking your ip and the workaround, returning')
return
except Exception as e:
print(e)
self.totalresults += self.results
time.sleep(getDelay() - .5)
await asyncio.sleep(getDelay() - .5)
except Exception as e:
print(f'An exception has occurred in trello: {e}')
def get_emails(self):
async def get_emails(self):
rawres = myparser.Parser(self.totalresults, self.word)
return rawres.emails()
def get_urls(self):
async def get_urls(self):
try:
rawres = myparser.Parser(self.totalresults, 'trello.com')
self.trello_urls = set(rawres.urls())
self.trello_urls = set(await rawres.urls())
self.totalresults = ''
# reset what totalresults as before it was just google results now it is trello results
headers = {'User-Agent': random.choice(['curl/7.37.0', 'Wget/1.19.4'])}
# do not change the headers
req = (grequests.get(url, headers=headers, timeout=4) for url in self.trello_urls)
responses = grequests.imap(req, size=8)
print('fetching trello urls')
responses = await AsyncFetcher.fetch_all(self.trello_urls, headers=headers)
for response in responses:
self.totalresults += response.content.decode('UTF-8')
self.totalresults += response
rawres = myparser.Parser(self.totalresults, self.word)
self.hostnames = rawres.hostnames()
self.hostnames = await rawres.hostnames()
except Exception as e:
print(f'Error occurred: {e}')
def process(self):
self.do_search()
self.get_urls()
async def process(self):
await self.do_search()
await self.get_urls()
print(f'\tSearching {self.counter} results.')
def get_results(self) -> tuple:
return self.get_emails(), self.hostnames, self.trello_urls
async def get_results(self) -> tuple:
return await self.get_emails(), self.hostnames, self.trello_urls

View file

@ -1,3 +1,4 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import re
@ -19,9 +20,19 @@ async def do_search(self):
headers = {'User-Agent': Core.get_user_agent()}
try:
urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
responses = await AsyncFetcher.fetch_all(urls, headers=headers)
for response in responses:
self.totalresults += response
for url in urls:
response = await AsyncFetcher.fetch_all([url], headers=headers)
self.results = response[0]
if await search(self.results):
try:
self.results = await google_workaround(url)
if isinstance(self.results, bool):
print('Google is blocking your ip and the workaround, returning')
return
except Exception:
# google blocked, no useful result
return
self.totalresults += self.results
except Exception as error:
print(error)