Removed old code, ported exalead to use aiohttp.

This commit is contained in:
NotoriousRebel 2019-12-25 17:54:32 -05:00
parent 9fe27a0379
commit 90ed486184
4 changed files with 31 additions and 42 deletions

View file

@ -1,7 +1,6 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
# import grequests
from theHarvester.lib.core import async_fetcher
@ -19,7 +18,6 @@ def __init__(self, word, limit, start):
self.counter = start
async def do_search(self):
print('hello from bing do search')
headers = {
'Host': self.hostname,
'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',

View file

@ -1,10 +1,7 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import re
import time
import grequests
import requests
import asyncio
class SearchExalead:
@ -19,7 +16,7 @@ def __init__(self, word, limit, start):
self.limit = limit
self.counter = start
def do_search(self):
async def do_search(self):
base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
headers = {
'Host': self.hostname,
@ -27,29 +24,23 @@ def do_search(self):
'User-agent': Core.get_user_agent()
}
urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
req = []
for url in urls:
req.append(grequests.get(url, headers=headers, timeout=5))
time.sleep(3)
responses = grequests.imap(tuple(req), size=3)
responses = await async_fetcher.fetch_all(urls, headers=headers)
for response in responses:
# TODO if decoded content contains information about solving captcha print message to user to visit website
# TODO to solve it or use a vpn as it appears to be ip based
self.total_results += response.content.decode('UTF-8')
self.total_results += response
def do_search_files(self, files):
async def do_search_files(self, files):
url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
f'=50&start_index={self.counter} '
f'=50&start_index={self.counter} '
headers = {
'Host': self.hostname,
'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
'User-agent': Core.get_user_agent()
}
h = requests.get(url=url, headers=headers)
self.results = h.text
responses = await async_fetcher.fetch_all(url, headers=headers)
self.results = responses[0]
self.total_results += self.results
def check_next(self):
async def check_next(self):
renext = re.compile('topNextUrl')
nextres = renext.findall(self.results)
if nextres != []:
@ -59,27 +50,27 @@ def check_next(self):
nexty = '0'
return nexty
def get_emails(self):
async def get_emails(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.emails()
def get_hostnames(self):
async def get_hostnames(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.hostnames()
def get_files(self):
async def get_files(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.fileurls(self.files)
def process(self):
async def process(self):
print('Searching results')
self.do_search()
await self.do_search()
def process_files(self, files):
async def process_files(self, files):
while self.counter < self.limit:
self.do_search_files(files)
time.sleep(getDelay())
await self.do_search_files(files)
more = self.check_next()
await asyncio.sleep(2)
if more == '1':
self.counter += 50
else:

View file

@ -1,6 +1,5 @@
from theHarvester.lib.core import *
from bs4 import BeautifulSoup
import requests
import aiohttp
import asyncio
@ -17,7 +16,7 @@ def __init__(self, word: str):
async def request(self, url, params):
headers = {'User-Agent': Core.get_user_agent()}
data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
timeout = aiohttp.ClientTimeout(total=360)
timeout = aiohttp.ClientTimeout(total=720)
# by default timeout is 5 minutes we will change that to 6 minutes
# Depending on the domain and if it has a lot of subdomains you may want to tweak it
# The results are well worth the wait :)
@ -51,9 +50,7 @@ async def do_search(self):
hosts: list = str(soup.find('pre')).splitlines()
await self.clean_hosts(hosts)
except Exception as e:
print('An exception has occurred: ', e)
import traceback as t
t.print_exc()
print(f'An exception has occurred: {e}')
async def get_hostnames(self) -> set:
return self.totalhosts
@ -69,4 +66,4 @@ async def clean_hosts(self, soup_hosts):
if host[0] == '.':
self.totalhosts.add(host[1:])
else:
self.totalhosts.add(host)
self.totalhosts.add(host)

View file

@ -378,14 +378,17 @@ class async_fetcher:
async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
# This fetch method solely focuses on get requests
# TODO determine if method for post requests is necessary
if len(params) == 0:
async with session.get(url, params=params) as response:
await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
else:
async with session.get(url) as response:
await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
try:
if params != '':
async with session.get(url, params=params) as response:
await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
else:
async with session.get(url) as response:
await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
except Exception:
return ''
@staticmethod
async def fetch_all(urls, headers='', params='') -> list: