Merge pull request #14 from NotoriousRebel/dev

ported hunter & exalead
This commit is contained in:
Matt 2019-12-26 00:02:32 -05:00 committed by GitHub
commit 407b3f01d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 44 additions and 55 deletions

View file

@ -103,6 +103,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
db_stash.store_all(word, all_hosts, 'host', source)
if store_emails:
email_list = filter(await search_engine.get_emails())
all_emails.extend(email_list)
db_stash.store_all(word, email_list, 'email', source)
if store_ip:
ips_list = await search_engine.get_ips()
@ -175,7 +176,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
if isinstance(e, MissingKey):
print(e)
else:
pass
print(e)
elif engineitem == 'certspotter':
print('\033[94m[*] Searching CertSpotter. \033[0m')
@ -669,4 +670,6 @@ async def entry_point():
if __name__ == '__main__':
#import uvloop
#uvloop.install()
asyncio.run(main=entry_point())

View file

@ -1,7 +1,6 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
# import grequests
from theHarvester.lib.core import async_fetcher
@ -19,7 +18,6 @@ def __init__(self, word, limit, start):
self.counter = start
async def do_search(self):
print('hello from bing do search')
headers = {
'Host': self.hostname,
'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',

View file

@ -1,10 +1,7 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import re
import time
import grequests
import requests
import asyncio
class SearchExalead:
@ -19,7 +16,7 @@ def __init__(self, word, limit, start):
self.limit = limit
self.counter = start
def do_search(self):
async def do_search(self):
base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
headers = {
'Host': self.hostname,
@ -27,29 +24,23 @@ def do_search(self):
'User-agent': Core.get_user_agent()
}
urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
req = []
for url in urls:
req.append(grequests.get(url, headers=headers, timeout=5))
time.sleep(3)
responses = grequests.imap(tuple(req), size=3)
responses = await async_fetcher.fetch_all(urls, headers=headers)
for response in responses:
# TODO if decoded content contains information about solving captcha print message to user to visit website
# TODO to solve it or use a vpn as it appears to be ip based
self.total_results += response.content.decode('UTF-8')
self.total_results += response
def do_search_files(self, files):
async def do_search_files(self, files):
url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
f'=50&start_index={self.counter} '
f'=50&start_index={self.counter} '
headers = {
'Host': self.hostname,
'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
'User-agent': Core.get_user_agent()
}
h = requests.get(url=url, headers=headers)
self.results = h.text
responses = await async_fetcher.fetch_all([url], headers=headers)
self.results = responses[0]
self.total_results += self.results
def check_next(self):
async def check_next(self):
renext = re.compile('topNextUrl')
nextres = renext.findall(self.results)
if nextres != []:
@ -59,27 +50,27 @@ def check_next(self):
nexty = '0'
return nexty
def get_emails(self):
async def get_emails(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.emails()
def get_hostnames(self):
async def get_hostnames(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.hostnames()
def get_files(self):
async def get_files(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.fileurls(self.files)
def process(self):
async def process(self):
print('Searching results')
self.do_search()
await self.do_search()
def process_files(self, files):
async def process_files(self, files):
while self.counter < self.limit:
self.do_search_files(files)
time.sleep(getDelay())
await self.do_search_files(files)
more = self.check_next()
await asyncio.sleep(2)
if more == '1':
self.counter += 50
else:

View file

@ -1,7 +1,6 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import grequests
class SearchHunter:
@ -17,22 +16,21 @@ def __init__(self, word, limit, start):
self.counter = start
self.database = f'https://api.hunter.io/v2/domain-search?domain={word}&api_key={self.key}&limit={self.limit}'
def do_search(self):
request = grequests.get(self.database)
response = grequests.map([request])
self.total_results = response[0].content.decode('UTF-8')
async def do_search(self):
responses = await async_fetcher.fetch_all([self.database], headers={'User-Agent': Core.get_user_agent()})
self.total_results += responses[0]
def process(self):
self.do_search() # Only need to do it once.
async def process(self):
await self.do_search() # Only need to do it once.
def get_emails(self):
async def get_emails(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.emails()
def get_hostnames(self):
async def get_hostnames(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.hostnames()
def get_profiles(self):
async def get_profiles(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.profiles()

View file

@ -1,6 +1,5 @@
from theHarvester.lib.core import *
from bs4 import BeautifulSoup
import requests
import aiohttp
import asyncio
@ -17,7 +16,7 @@ def __init__(self, word: str):
async def request(self, url, params):
headers = {'User-Agent': Core.get_user_agent()}
data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
timeout = aiohttp.ClientTimeout(total=360)
timeout = aiohttp.ClientTimeout(total=720)
# by default timeout is 5 minutes we will change that to 6 minutes
# Depending on the domain and if it has a lot of subdomains you may want to tweak it
# The results are well worth the wait :)
@ -51,9 +50,7 @@ async def do_search(self):
hosts: list = str(soup.find('pre')).splitlines()
await self.clean_hosts(hosts)
except Exception as e:
print('An exception has occurred: ', e)
import traceback as t
t.print_exc()
print(f'An exception has occurred: {e}')
async def get_hostnames(self) -> set:
return self.totalhosts
@ -69,4 +66,4 @@ async def clean_hosts(self, soup_hosts):
if host[0] == '.':
self.totalhosts.add(host[1:])
else:
self.totalhosts.add(host)
self.totalhosts.add(host)

View file

@ -379,14 +379,17 @@ class async_fetcher:
async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
# This fetch method solely focuses on get requests
# TODO determine if method for post requests is necessary
if len(params) == 0:
async with session.get(url, params=params) as response:
await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
else:
async with session.get(url) as response:
await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
try:
if params != '':
async with session.get(url, params=params) as response:
await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
else:
async with session.get(url) as response:
await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
except Exception:
return ''
@staticmethod
async def fetch_all(urls, headers='', params='') -> list:

View file

@ -9,7 +9,6 @@
except Exception as error:
print(f'{error}')
class GraphGenerator:
def __init__(self, domain):
@ -92,4 +91,4 @@ def drawscattergraphscanhistory(self, domain, scanhistorydomain):
output_type='div')
return scatterchartcode
except Exception as e:
print(f'Error generating HTML for the historical graph for domain: {e}')
print(f'Error generating HTML for the historical graph for domain: {e}')