mirror of
https://github.com/laramies/theHarvester.git
synced 2024-11-11 09:41:06 +08:00
commit
407b3f01d3
7 changed files with 44 additions and 55 deletions
|
@ -103,6 +103,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
db_stash.store_all(word, all_hosts, 'host', source)
|
||||
if store_emails:
|
||||
email_list = filter(await search_engine.get_emails())
|
||||
all_emails.extend(email_list)
|
||||
db_stash.store_all(word, email_list, 'email', source)
|
||||
if store_ip:
|
||||
ips_list = await search_engine.get_ips()
|
||||
|
@ -175,7 +176,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
|
|||
if isinstance(e, MissingKey):
|
||||
print(e)
|
||||
else:
|
||||
pass
|
||||
print(e)
|
||||
|
||||
elif engineitem == 'certspotter':
|
||||
print('\033[94m[*] Searching CertSpotter. \033[0m')
|
||||
|
@ -669,4 +670,6 @@ async def entry_point():
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
#import uvloop
|
||||
#uvloop.install()
|
||||
asyncio.run(main=entry_point())
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
# import grequests
|
||||
from theHarvester.lib.core import async_fetcher
|
||||
|
||||
|
||||
|
@ -19,7 +18,6 @@ def __init__(self, word, limit, start):
|
|||
self.counter = start
|
||||
|
||||
async def do_search(self):
|
||||
print('hello from bing do search')
|
||||
headers = {
|
||||
'Host': self.hostname,
|
||||
'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
import re
|
||||
import time
|
||||
import grequests
|
||||
import requests
|
||||
import asyncio
|
||||
|
||||
|
||||
class SearchExalead:
|
||||
|
@ -19,7 +16,7 @@ def __init__(self, word, limit, start):
|
|||
self.limit = limit
|
||||
self.counter = start
|
||||
|
||||
def do_search(self):
|
||||
async def do_search(self):
|
||||
base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
|
||||
headers = {
|
||||
'Host': self.hostname,
|
||||
|
@ -27,29 +24,23 @@ def do_search(self):
|
|||
'User-agent': Core.get_user_agent()
|
||||
}
|
||||
urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
|
||||
req = []
|
||||
for url in urls:
|
||||
req.append(grequests.get(url, headers=headers, timeout=5))
|
||||
time.sleep(3)
|
||||
responses = grequests.imap(tuple(req), size=3)
|
||||
responses = await async_fetcher.fetch_all(urls, headers=headers)
|
||||
for response in responses:
|
||||
# TODO if decoded content contains information about solving captcha print message to user to visit website
|
||||
# TODO to solve it or use a vpn as it appears to be ip based
|
||||
self.total_results += response.content.decode('UTF-8')
|
||||
self.total_results += response
|
||||
|
||||
def do_search_files(self, files):
|
||||
async def do_search_files(self, files):
|
||||
url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
|
||||
f'=50&start_index={self.counter} '
|
||||
f'=50&start_index={self.counter} '
|
||||
headers = {
|
||||
'Host': self.hostname,
|
||||
'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
|
||||
'User-agent': Core.get_user_agent()
|
||||
}
|
||||
h = requests.get(url=url, headers=headers)
|
||||
self.results = h.text
|
||||
responses = await async_fetcher.fetch_all([url], headers=headers)
|
||||
self.results = responses[0]
|
||||
self.total_results += self.results
|
||||
|
||||
def check_next(self):
|
||||
async def check_next(self):
|
||||
renext = re.compile('topNextUrl')
|
||||
nextres = renext.findall(self.results)
|
||||
if nextres != []:
|
||||
|
@ -59,27 +50,27 @@ def check_next(self):
|
|||
nexty = '0'
|
||||
return nexty
|
||||
|
||||
def get_emails(self):
|
||||
async def get_emails(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.emails()
|
||||
|
||||
def get_hostnames(self):
|
||||
async def get_hostnames(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.hostnames()
|
||||
|
||||
def get_files(self):
|
||||
async def get_files(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.fileurls(self.files)
|
||||
|
||||
def process(self):
|
||||
async def process(self):
|
||||
print('Searching results')
|
||||
self.do_search()
|
||||
await self.do_search()
|
||||
|
||||
def process_files(self, files):
|
||||
async def process_files(self, files):
|
||||
while self.counter < self.limit:
|
||||
self.do_search_files(files)
|
||||
time.sleep(getDelay())
|
||||
await self.do_search_files(files)
|
||||
more = self.check_next()
|
||||
await asyncio.sleep(2)
|
||||
if more == '1':
|
||||
self.counter += 50
|
||||
else:
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
import grequests
|
||||
|
||||
|
||||
class SearchHunter:
|
||||
|
@ -17,22 +16,21 @@ def __init__(self, word, limit, start):
|
|||
self.counter = start
|
||||
self.database = f'https://api.hunter.io/v2/domain-search?domain={word}&api_key={self.key}&limit={self.limit}'
|
||||
|
||||
def do_search(self):
|
||||
request = grequests.get(self.database)
|
||||
response = grequests.map([request])
|
||||
self.total_results = response[0].content.decode('UTF-8')
|
||||
async def do_search(self):
|
||||
responses = await async_fetcher.fetch_all([self.database], headers={'User-Agent': Core.get_user_agent()})
|
||||
self.total_results += responses[0]
|
||||
|
||||
def process(self):
|
||||
self.do_search() # Only need to do it once.
|
||||
async def process(self):
|
||||
await self.do_search() # Only need to do it once.
|
||||
|
||||
def get_emails(self):
|
||||
async def get_emails(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.emails()
|
||||
|
||||
def get_hostnames(self):
|
||||
async def get_hostnames(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.hostnames()
|
||||
|
||||
def get_profiles(self):
|
||||
async def get_profiles(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.profiles()
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
from theHarvester.lib.core import *
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
|
@ -17,7 +16,7 @@ def __init__(self, word: str):
|
|||
async def request(self, url, params):
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
|
||||
timeout = aiohttp.ClientTimeout(total=360)
|
||||
timeout = aiohttp.ClientTimeout(total=720)
|
||||
# by default timeout is 5 minutes we will change that to 6 minutes
|
||||
# Depending on the domain and if it has a lot of subdomains you may want to tweak it
|
||||
# The results are well worth the wait :)
|
||||
|
@ -51,9 +50,7 @@ async def do_search(self):
|
|||
hosts: list = str(soup.find('pre')).splitlines()
|
||||
await self.clean_hosts(hosts)
|
||||
except Exception as e:
|
||||
print('An exception has occurred: ', e)
|
||||
import traceback as t
|
||||
t.print_exc()
|
||||
print(f'An exception has occurred: {e}')
|
||||
|
||||
async def get_hostnames(self) -> set:
|
||||
return self.totalhosts
|
||||
|
@ -69,4 +66,4 @@ async def clean_hosts(self, soup_hosts):
|
|||
if host[0] == '.':
|
||||
self.totalhosts.add(host[1:])
|
||||
else:
|
||||
self.totalhosts.add(host)
|
||||
self.totalhosts.add(host)
|
|
@ -379,14 +379,17 @@ class async_fetcher:
|
|||
async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
|
||||
# This fetch method solely focuses on get requests
|
||||
# TODO determine if method for post requests is necessary
|
||||
if len(params) == 0:
|
||||
async with session.get(url, params=params) as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
else:
|
||||
async with session.get(url) as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
try:
|
||||
if params != '':
|
||||
async with session.get(url, params=params) as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
else:
|
||||
async with session.get(url) as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
@staticmethod
|
||||
async def fetch_all(urls, headers='', params='') -> list:
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
except Exception as error:
|
||||
print(f'{error}')
|
||||
|
||||
|
||||
class GraphGenerator:
|
||||
|
||||
def __init__(self, domain):
|
||||
|
@ -92,4 +91,4 @@ def drawscattergraphscanhistory(self, domain, scanhistorydomain):
|
|||
output_type='div')
|
||||
return scatterchartcode
|
||||
except Exception as e:
|
||||
print(f'Error generating HTML for the historical graph for domain: {e}')
|
||||
print(f'Error generating HTML for the historical graph for domain: {e}')
|
Loading…
Reference in a new issue