Removed old code, ported exalead to use aiohttp.

This commit is contained in:
NotoriousRebel 2019-12-25 17:54:32 -05:00
parent 9fe27a0379
commit 90ed486184
4 changed files with 31 additions and 42 deletions

View file

@ -1,7 +1,6 @@
from theHarvester.discovery.constants import * from theHarvester.discovery.constants import *
from theHarvester.lib.core import * from theHarvester.lib.core import *
from theHarvester.parsers import myparser from theHarvester.parsers import myparser
# import grequests
from theHarvester.lib.core import async_fetcher from theHarvester.lib.core import async_fetcher
@ -19,7 +18,6 @@ def __init__(self, word, limit, start):
self.counter = start self.counter = start
async def do_search(self): async def do_search(self):
print('hello from bing do search')
headers = { headers = {
'Host': self.hostname, 'Host': self.hostname,
'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50', 'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',

View file

@ -1,10 +1,7 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import * from theHarvester.lib.core import *
from theHarvester.parsers import myparser from theHarvester.parsers import myparser
import re import re
import time import asyncio
import grequests
import requests
class SearchExalead: class SearchExalead:
@ -19,7 +16,7 @@ def __init__(self, word, limit, start):
self.limit = limit self.limit = limit
self.counter = start self.counter = start
def do_search(self): async def do_search(self):
base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx' base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
headers = { headers = {
'Host': self.hostname, 'Host': self.hostname,
@ -27,29 +24,23 @@ def do_search(self):
'User-agent': Core.get_user_agent() 'User-agent': Core.get_user_agent()
} }
urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit] urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
req = [] responses = await async_fetcher.fetch_all(urls, headers=headers)
for url in urls:
req.append(grequests.get(url, headers=headers, timeout=5))
time.sleep(3)
responses = grequests.imap(tuple(req), size=3)
for response in responses: for response in responses:
# TODO if decoded content contains information about solving captcha print message to user to visit website self.total_results += response
# TODO to solve it or use a vpn as it appears to be ip based
self.total_results += response.content.decode('UTF-8')
def do_search_files(self, files): async def do_search_files(self, files):
url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \ url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
f'=50&start_index={self.counter} ' f'=50&start_index={self.counter} '
headers = { headers = {
'Host': self.hostname, 'Host': self.hostname,
'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word), 'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
'User-agent': Core.get_user_agent() 'User-agent': Core.get_user_agent()
} }
h = requests.get(url=url, headers=headers) responses = await async_fetcher.fetch_all(url, headers=headers)
self.results = h.text self.results = responses[0]
self.total_results += self.results self.total_results += self.results
def check_next(self): async def check_next(self):
renext = re.compile('topNextUrl') renext = re.compile('topNextUrl')
nextres = renext.findall(self.results) nextres = renext.findall(self.results)
if nextres != []: if nextres != []:
@ -59,27 +50,27 @@ def check_next(self):
nexty = '0' nexty = '0'
return nexty return nexty
def get_emails(self): async def get_emails(self):
rawres = myparser.Parser(self.total_results, self.word) rawres = myparser.Parser(self.total_results, self.word)
return rawres.emails() return rawres.emails()
def get_hostnames(self): async def get_hostnames(self):
rawres = myparser.Parser(self.total_results, self.word) rawres = myparser.Parser(self.total_results, self.word)
return rawres.hostnames() return rawres.hostnames()
def get_files(self): async def get_files(self):
rawres = myparser.Parser(self.total_results, self.word) rawres = myparser.Parser(self.total_results, self.word)
return rawres.fileurls(self.files) return rawres.fileurls(self.files)
def process(self): async def process(self):
print('Searching results') print('Searching results')
self.do_search() await self.do_search()
def process_files(self, files): async def process_files(self, files):
while self.counter < self.limit: while self.counter < self.limit:
self.do_search_files(files) await self.do_search_files(files)
time.sleep(getDelay())
more = self.check_next() more = self.check_next()
await asyncio.sleep(2)
if more == '1': if more == '1':
self.counter += 50 self.counter += 50
else: else:

View file

@ -1,6 +1,5 @@
from theHarvester.lib.core import * from theHarvester.lib.core import *
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import requests
import aiohttp import aiohttp
import asyncio import asyncio
@ -17,7 +16,7 @@ def __init__(self, word: str):
async def request(self, url, params): async def request(self, url, params):
headers = {'User-Agent': Core.get_user_agent()} headers = {'User-Agent': Core.get_user_agent()}
data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'} data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
timeout = aiohttp.ClientTimeout(total=360) timeout = aiohttp.ClientTimeout(total=720)
# by default timeout is 5 minutes we will change that to 6 minutes # by default timeout is 5 minutes we will change that to 6 minutes
# Depending on the domain and if it has a lot of subdomains you may want to tweak it # Depending on the domain and if it has a lot of subdomains you may want to tweak it
# The results are well worth the wait :) # The results are well worth the wait :)
@ -51,9 +50,7 @@ async def do_search(self):
hosts: list = str(soup.find('pre')).splitlines() hosts: list = str(soup.find('pre')).splitlines()
await self.clean_hosts(hosts) await self.clean_hosts(hosts)
except Exception as e: except Exception as e:
print('An exception has occurred: ', e) print(f'An exception has occurred: {e}')
import traceback as t
t.print_exc()
async def get_hostnames(self) -> set: async def get_hostnames(self) -> set:
return self.totalhosts return self.totalhosts
@ -69,4 +66,4 @@ async def clean_hosts(self, soup_hosts):
if host[0] == '.': if host[0] == '.':
self.totalhosts.add(host[1:]) self.totalhosts.add(host[1:])
else: else:
self.totalhosts.add(host) self.totalhosts.add(host)

View file

@ -378,14 +378,17 @@ class async_fetcher:
async def fetch(session, url, params='', json=False) -> Union[str, dict, list]: async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
# This fetch method solely focuses on get requests # This fetch method solely focuses on get requests
# TODO determine if method for post requests is necessary # TODO determine if method for post requests is necessary
if len(params) == 0: try:
async with session.get(url, params=params) as response: if params != '':
await asyncio.sleep(2) async with session.get(url, params=params) as response:
return await response.text() if json is False else await response.json() await asyncio.sleep(2)
else: return await response.text() if json is False else await response.json()
async with session.get(url) as response: else:
await asyncio.sleep(2) async with session.get(url) as response:
return await response.text() if json is False else await response.json() await asyncio.sleep(2)
return await response.text() if json is False else await response.json()
except Exception:
return ''
@staticmethod @staticmethod
async def fetch_all(urls, headers='', params='') -> list: async def fetch_all(urls, headers='', params='') -> list: