mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-24 14:32:57 +08:00
Merge pull request #29 from NotoriousRebel/dev
Ported rest of modules to use aiohttp
This commit is contained in:
commit
ac791355e2
4 changed files with 91 additions and 77 deletions
|
@ -1,8 +1,6 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.parsers import myparser
|
||||
import requests
|
||||
import time
|
||||
|
||||
import asyncio
|
||||
|
||||
class SearchGoogle:
|
||||
|
||||
|
@ -18,85 +16,91 @@ def __init__(self, word, limit, start):
|
|||
self.limit = limit
|
||||
self.counter = start
|
||||
|
||||
def do_search(self):
|
||||
async def do_search(self):
|
||||
# Do normal scraping.
|
||||
urly = 'http://' + self.server + '/search?num=' + self.quantity + '&start=' + str(
|
||||
self.counter) + '&hl=en&meta=&q=%40\"' + self.word + '\"'
|
||||
try:
|
||||
headers = {'User-Agent': googleUA}
|
||||
r = requests.get(urly, headers=headers)
|
||||
resp = await AsyncFetcher.fetch_all([urly], headers=headers)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.results = r.text
|
||||
if search(self.results):
|
||||
self.results = resp[0]
|
||||
searched = await search(self.results)
|
||||
if searched:
|
||||
try:
|
||||
self.results = google_workaround(urly)
|
||||
self.results = await google_workaround(urly)
|
||||
print('self.results: ', self.results)
|
||||
p.pprint(self.results, indent=4)
|
||||
if isinstance(self.results, bool):
|
||||
print('Google is blocking your ip and the workaround, returning')
|
||||
return
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
print(e)
|
||||
import traceback as t
|
||||
t.print_exc()
|
||||
# google blocked, no useful result
|
||||
return
|
||||
time.sleep(getDelay())
|
||||
await asyncio.sleep(getDelay())
|
||||
self.totalresults += self.results
|
||||
|
||||
def do_search_profiles(self):
|
||||
async def do_search_profiles(self):
|
||||
urly = 'http://' + self.server + '/search?num=' + self.quantity + '&start=' + str(
|
||||
self.counter) + '&hl=en&meta=&q=site:www.google.com%20intitle:\"Google%20Profile\"%20\"Companies%20I%27ve%20worked%20for\"%20\"at%20' + self.word + '\"'
|
||||
try:
|
||||
headers = {'User-Agent': googleUA}
|
||||
r = requests.get(urly, headers=headers)
|
||||
resp = await AsyncFetcher.fetch_all([urly], headers=headers)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.results = r.text
|
||||
if search(self.results):
|
||||
self.results = resp[0]
|
||||
if await search(self.results):
|
||||
try:
|
||||
self.results = google_workaround(urly)
|
||||
self.results = await google_workaround(urly)
|
||||
if isinstance(self.results, bool):
|
||||
print('Google is blocking your ip and the workaround, returning')
|
||||
return
|
||||
except Exception:
|
||||
# google blocked, no useful result
|
||||
return
|
||||
time.sleep(getDelay())
|
||||
await asyncio.sleep(getDelay())
|
||||
self.totalresults += self.results
|
||||
|
||||
def get_emails(self):
|
||||
async def get_emails(self):
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.emails()
|
||||
return await rawres.emails()
|
||||
|
||||
def get_hostnames(self):
|
||||
async def get_hostnames(self):
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.hostnames()
|
||||
return await rawres.hostnames()
|
||||
|
||||
def get_files(self):
|
||||
async def get_files(self):
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.fileurls(self.files)
|
||||
|
||||
def get_profiles(self):
|
||||
async def get_profiles(self):
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.profiles()
|
||||
|
||||
def process(self, google_dorking):
|
||||
async def process(self, google_dorking):
|
||||
if google_dorking is False:
|
||||
while self.counter <= self.limit and self.counter <= 1000:
|
||||
self.do_search()
|
||||
await self.do_search()
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
self.counter += 100
|
||||
else: # Google dorking is true.
|
||||
self.counter = 0 # Reset counter.
|
||||
print('\n')
|
||||
print('[-] Searching with Google Dorks: ')
|
||||
self.googledork() # Call Google dorking method if user wanted it!
|
||||
await self.googledork() # Call Google dorking method if user wanted it!
|
||||
|
||||
def process_profiles(self):
|
||||
async def process_profiles(self):
|
||||
while self.counter < self.limit:
|
||||
self.do_search_profiles()
|
||||
time.sleep(getDelay())
|
||||
await self.do_search_profiles()
|
||||
await asyncio.sleep(getDelay())
|
||||
self.counter += 100
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
|
||||
def append_dorks(self):
|
||||
async def append_dorks(self):
|
||||
# Wrap in try-except incase filepaths are messed up.
|
||||
try:
|
||||
with open('wordlists/dorks.txt', mode='r') as fp:
|
||||
|
@ -104,7 +108,7 @@ def append_dorks(self):
|
|||
except FileNotFoundError as error:
|
||||
print(error)
|
||||
|
||||
def construct_dorks(self):
|
||||
async def construct_dorks(self):
|
||||
# Format is: site:targetwebsite.com + space + inurl:admindork
|
||||
colon = '%3A'
|
||||
plus = '%2B'
|
||||
|
@ -128,12 +132,12 @@ def construct_dorks(self):
|
|||
.replace('&', ampersand).replace('(', left_peren).replace(')', right_peren).replace('|', pipe) + space + self.word
|
||||
for dork in self.dorks)
|
||||
|
||||
def googledork(self):
|
||||
self.append_dorks() # Call functions to create list.
|
||||
self.construct_dorks()
|
||||
self.send_dorks()
|
||||
async def googledork(self):
|
||||
await self.append_dorks() # Call functions to create list.
|
||||
await self.construct_dorks()
|
||||
await self.send_dorks()
|
||||
|
||||
def send_dorks(self): # Helper function to minimize code reusability.
|
||||
async def send_dorks(self): # Helper function to minimize code reusability.
|
||||
headers = {'User-Agent': googleUA}
|
||||
# Get random user agent to try and prevent google from blocking IP.
|
||||
for num in range(len(self.links)):
|
||||
|
@ -141,18 +145,18 @@ def send_dorks(self): # Helper function to minimize code reusability.
|
|||
if num % 10 == 0 and num > 0:
|
||||
print(f'\tSearching through {num} results')
|
||||
link = self.links[num]
|
||||
req = requests.get(link, headers=headers)
|
||||
self.results = req.text
|
||||
if search(self.results):
|
||||
req = await AsyncFetcher.fetch_all([link], headers=headers)
|
||||
self.results = req[0]
|
||||
if await search(self.results):
|
||||
try:
|
||||
self.results = google_workaround(link)
|
||||
self.results = await google_workaround(link)
|
||||
if isinstance(self.results, bool):
|
||||
print('Google is blocking your ip and the workaround, returning')
|
||||
return
|
||||
except Exception:
|
||||
# google blocked, no useful result
|
||||
return
|
||||
time.sleep(getDelay())
|
||||
await asyncio.sleep(getDelay())
|
||||
self.totalresults += self.results
|
||||
except Exception as e:
|
||||
print(f'\tException Occurred {e}')
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
import requests
|
||||
import time
|
||||
import asyncio
|
||||
|
||||
|
||||
class SearchLinkedin:
|
||||
|
@ -16,15 +15,15 @@ def __init__(self, word, limit):
|
|||
self.limit = int(limit)
|
||||
self.counter = 0
|
||||
|
||||
def do_search(self):
|
||||
async def do_search(self):
|
||||
urly = 'http://' + self.server + '/search?num=100&start=' + str(self.counter) + '&hl=en&meta=&q=site%3Alinkedin.com/in%20' + self.word
|
||||
try:
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
r = requests.get(urly, headers=headers)
|
||||
self.results = r.text
|
||||
if search(self.results):
|
||||
resp = await AsyncFetcher.fetch_all([urly], headers=headers)
|
||||
self.results = resp[0]
|
||||
if await search(self.results):
|
||||
try:
|
||||
self.results = google_workaround(urly)
|
||||
self.results = await google_workaround(urly)
|
||||
if isinstance(self.results, bool):
|
||||
print('Google is blocking your ip and the workaround, returning')
|
||||
return
|
||||
|
@ -33,20 +32,20 @@ def do_search(self):
|
|||
return
|
||||
except Exception as e:
|
||||
print(e)
|
||||
time.sleep(getDelay())
|
||||
await asyncio.sleep(getDelay())
|
||||
self.totalresults += self.results
|
||||
|
||||
def get_people(self):
|
||||
async def get_people(self):
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.people_linkedin()
|
||||
return await rawres.people_linkedin()
|
||||
|
||||
def get_links(self):
|
||||
async def get_links(self):
|
||||
links = myparser.Parser(self.totalresults, self.word)
|
||||
return splitter(links.links_linkedin())
|
||||
return splitter(await links.links_linkedin())
|
||||
|
||||
def process(self):
|
||||
async def process(self):
|
||||
while self.counter < self.limit:
|
||||
self.do_search()
|
||||
time.sleep(getDelay())
|
||||
await self.do_search()
|
||||
await asyncio.sleep(getDelay())
|
||||
self.counter += 100
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from theHarvester.parsers import myparser
|
||||
import requests
|
||||
import random
|
||||
import time
|
||||
import asyncio
|
||||
|
||||
|
||||
class SearchTrello:
|
||||
|
@ -18,54 +18,54 @@ def __init__(self, word):
|
|||
self.hostnames = []
|
||||
self.counter = 0
|
||||
|
||||
def do_search(self):
|
||||
async def do_search(self):
|
||||
base_url = f'https://{self.server}/search?num=300&start=xx&hl=en&q=site%3Atrello.com%20{self.word}'
|
||||
urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 20) if num <= self.limit]
|
||||
# limit is 20 as that is the most results google will show per num
|
||||
headers = {'User-Agent': googleUA}
|
||||
for url in urls:
|
||||
try:
|
||||
resp = requests.get(url, headers=headers)
|
||||
self.results = resp.text
|
||||
if search(self.results):
|
||||
resp = await AsyncFetcher.fetch_all([url], headers=headers)
|
||||
self.results = resp[0]
|
||||
if await search(self.results):
|
||||
try:
|
||||
self.results = google_workaround(base_url)
|
||||
self.results = await google_workaround(base_url)
|
||||
if isinstance(self.results, bool):
|
||||
print('Google is blocking your ip and the workaround, returning')
|
||||
return
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.totalresults += self.results
|
||||
time.sleep(getDelay() - .5)
|
||||
await asyncio.sleep(getDelay() - .5)
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred in trello: {e}')
|
||||
|
||||
def get_emails(self):
|
||||
async def get_emails(self):
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.emails()
|
||||
|
||||
def get_urls(self):
|
||||
async def get_urls(self):
|
||||
try:
|
||||
rawres = myparser.Parser(self.totalresults, 'trello.com')
|
||||
self.trello_urls = set(rawres.urls())
|
||||
self.trello_urls = set(await rawres.urls())
|
||||
self.totalresults = ''
|
||||
# reset what totalresults as before it was just google results now it is trello results
|
||||
headers = {'User-Agent': random.choice(['curl/7.37.0', 'Wget/1.19.4'])}
|
||||
# do not change the headers
|
||||
req = (grequests.get(url, headers=headers, timeout=4) for url in self.trello_urls)
|
||||
responses = grequests.imap(req, size=8)
|
||||
print('fetching trello urls')
|
||||
responses = await AsyncFetcher.fetch_all(self.trello_urls, headers=headers)
|
||||
for response in responses:
|
||||
self.totalresults += response.content.decode('UTF-8')
|
||||
self.totalresults += response
|
||||
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
self.hostnames = rawres.hostnames()
|
||||
self.hostnames = await rawres.hostnames()
|
||||
except Exception as e:
|
||||
print(f'Error occurred: {e}')
|
||||
|
||||
def process(self):
|
||||
self.do_search()
|
||||
self.get_urls()
|
||||
async def process(self):
|
||||
await self.do_search()
|
||||
await self.get_urls()
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
|
||||
def get_results(self) -> tuple:
|
||||
return self.get_emails(), self.hostnames, self.trello_urls
|
||||
async def get_results(self) -> tuple:
|
||||
return await self.get_emails(), self.hostnames, self.trello_urls
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
import re
|
||||
|
@ -19,9 +20,19 @@ async def do_search(self):
|
|||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
try:
|
||||
urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
|
||||
responses = await AsyncFetcher.fetch_all(urls, headers=headers)
|
||||
for response in responses:
|
||||
self.totalresults += response
|
||||
for url in urls:
|
||||
response = await AsyncFetcher.fetch_all([url], headers=headers)
|
||||
self.results = response[0]
|
||||
if await search(self.results):
|
||||
try:
|
||||
self.results = await google_workaround(url)
|
||||
if isinstance(self.results, bool):
|
||||
print('Google is blocking your ip and the workaround, returning')
|
||||
return
|
||||
except Exception:
|
||||
# google blocked, no useful result
|
||||
return
|
||||
self.totalresults += self.results
|
||||
except Exception as error:
|
||||
print(error)
|
||||
|
||||
|
|
Loading…
Reference in a new issue