mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 00:06:30 +08:00
Removed old code, ported exalead to use aiohttp.
This commit is contained in:
parent
9fe27a0379
commit
90ed486184
|
@ -1,7 +1,6 @@
|
||||||
from theHarvester.discovery.constants import *
|
from theHarvester.discovery.constants import *
|
||||||
from theHarvester.lib.core import *
|
from theHarvester.lib.core import *
|
||||||
from theHarvester.parsers import myparser
|
from theHarvester.parsers import myparser
|
||||||
# import grequests
|
|
||||||
from theHarvester.lib.core import async_fetcher
|
from theHarvester.lib.core import async_fetcher
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,7 +18,6 @@ def __init__(self, word, limit, start):
|
||||||
self.counter = start
|
self.counter = start
|
||||||
|
|
||||||
async def do_search(self):
|
async def do_search(self):
|
||||||
print('hello from bing do search')
|
|
||||||
headers = {
|
headers = {
|
||||||
'Host': self.hostname,
|
'Host': self.hostname,
|
||||||
'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
|
'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
from theHarvester.discovery.constants import *
|
|
||||||
from theHarvester.lib.core import *
|
from theHarvester.lib.core import *
|
||||||
from theHarvester.parsers import myparser
|
from theHarvester.parsers import myparser
|
||||||
import re
|
import re
|
||||||
import time
|
import asyncio
|
||||||
import grequests
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
class SearchExalead:
|
class SearchExalead:
|
||||||
|
@ -19,7 +16,7 @@ def __init__(self, word, limit, start):
|
||||||
self.limit = limit
|
self.limit = limit
|
||||||
self.counter = start
|
self.counter = start
|
||||||
|
|
||||||
def do_search(self):
|
async def do_search(self):
|
||||||
base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
|
base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
|
||||||
headers = {
|
headers = {
|
||||||
'Host': self.hostname,
|
'Host': self.hostname,
|
||||||
|
@ -27,29 +24,23 @@ def do_search(self):
|
||||||
'User-agent': Core.get_user_agent()
|
'User-agent': Core.get_user_agent()
|
||||||
}
|
}
|
||||||
urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
|
urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
|
||||||
req = []
|
responses = await async_fetcher.fetch_all(urls, headers=headers)
|
||||||
for url in urls:
|
|
||||||
req.append(grequests.get(url, headers=headers, timeout=5))
|
|
||||||
time.sleep(3)
|
|
||||||
responses = grequests.imap(tuple(req), size=3)
|
|
||||||
for response in responses:
|
for response in responses:
|
||||||
# TODO if decoded content contains information about solving captcha print message to user to visit website
|
self.total_results += response
|
||||||
# TODO to solve it or use a vpn as it appears to be ip based
|
|
||||||
self.total_results += response.content.decode('UTF-8')
|
|
||||||
|
|
||||||
def do_search_files(self, files):
|
async def do_search_files(self, files):
|
||||||
url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
|
url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
|
||||||
f'=50&start_index={self.counter} '
|
f'=50&start_index={self.counter} '
|
||||||
headers = {
|
headers = {
|
||||||
'Host': self.hostname,
|
'Host': self.hostname,
|
||||||
'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
|
'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
|
||||||
'User-agent': Core.get_user_agent()
|
'User-agent': Core.get_user_agent()
|
||||||
}
|
}
|
||||||
h = requests.get(url=url, headers=headers)
|
responses = await async_fetcher.fetch_all(url, headers=headers)
|
||||||
self.results = h.text
|
self.results = responses[0]
|
||||||
self.total_results += self.results
|
self.total_results += self.results
|
||||||
|
|
||||||
def check_next(self):
|
async def check_next(self):
|
||||||
renext = re.compile('topNextUrl')
|
renext = re.compile('topNextUrl')
|
||||||
nextres = renext.findall(self.results)
|
nextres = renext.findall(self.results)
|
||||||
if nextres != []:
|
if nextres != []:
|
||||||
|
@ -59,27 +50,27 @@ def check_next(self):
|
||||||
nexty = '0'
|
nexty = '0'
|
||||||
return nexty
|
return nexty
|
||||||
|
|
||||||
def get_emails(self):
|
async def get_emails(self):
|
||||||
rawres = myparser.Parser(self.total_results, self.word)
|
rawres = myparser.Parser(self.total_results, self.word)
|
||||||
return rawres.emails()
|
return rawres.emails()
|
||||||
|
|
||||||
def get_hostnames(self):
|
async def get_hostnames(self):
|
||||||
rawres = myparser.Parser(self.total_results, self.word)
|
rawres = myparser.Parser(self.total_results, self.word)
|
||||||
return rawres.hostnames()
|
return rawres.hostnames()
|
||||||
|
|
||||||
def get_files(self):
|
async def get_files(self):
|
||||||
rawres = myparser.Parser(self.total_results, self.word)
|
rawres = myparser.Parser(self.total_results, self.word)
|
||||||
return rawres.fileurls(self.files)
|
return rawres.fileurls(self.files)
|
||||||
|
|
||||||
def process(self):
|
async def process(self):
|
||||||
print('Searching results')
|
print('Searching results')
|
||||||
self.do_search()
|
await self.do_search()
|
||||||
|
|
||||||
def process_files(self, files):
|
async def process_files(self, files):
|
||||||
while self.counter < self.limit:
|
while self.counter < self.limit:
|
||||||
self.do_search_files(files)
|
await self.do_search_files(files)
|
||||||
time.sleep(getDelay())
|
|
||||||
more = self.check_next()
|
more = self.check_next()
|
||||||
|
await asyncio.sleep(2)
|
||||||
if more == '1':
|
if more == '1':
|
||||||
self.counter += 50
|
self.counter += 50
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from theHarvester.lib.core import *
|
from theHarvester.lib.core import *
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
|
@ -17,7 +16,7 @@ def __init__(self, word: str):
|
||||||
async def request(self, url, params):
|
async def request(self, url, params):
|
||||||
headers = {'User-Agent': Core.get_user_agent()}
|
headers = {'User-Agent': Core.get_user_agent()}
|
||||||
data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
|
data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
|
||||||
timeout = aiohttp.ClientTimeout(total=360)
|
timeout = aiohttp.ClientTimeout(total=720)
|
||||||
# by default timeout is 5 minutes we will change that to 6 minutes
|
# by default timeout is 5 minutes we will change that to 6 minutes
|
||||||
# Depending on the domain and if it has a lot of subdomains you may want to tweak it
|
# Depending on the domain and if it has a lot of subdomains you may want to tweak it
|
||||||
# The results are well worth the wait :)
|
# The results are well worth the wait :)
|
||||||
|
@ -51,9 +50,7 @@ async def do_search(self):
|
||||||
hosts: list = str(soup.find('pre')).splitlines()
|
hosts: list = str(soup.find('pre')).splitlines()
|
||||||
await self.clean_hosts(hosts)
|
await self.clean_hosts(hosts)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('An exception has occurred: ', e)
|
print(f'An exception has occurred: {e}')
|
||||||
import traceback as t
|
|
||||||
t.print_exc()
|
|
||||||
|
|
||||||
async def get_hostnames(self) -> set:
|
async def get_hostnames(self) -> set:
|
||||||
return self.totalhosts
|
return self.totalhosts
|
||||||
|
@ -69,4 +66,4 @@ async def clean_hosts(self, soup_hosts):
|
||||||
if host[0] == '.':
|
if host[0] == '.':
|
||||||
self.totalhosts.add(host[1:])
|
self.totalhosts.add(host[1:])
|
||||||
else:
|
else:
|
||||||
self.totalhosts.add(host)
|
self.totalhosts.add(host)
|
|
@ -378,14 +378,17 @@ class async_fetcher:
|
||||||
async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
|
async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
|
||||||
# This fetch method solely focuses on get requests
|
# This fetch method solely focuses on get requests
|
||||||
# TODO determine if method for post requests is necessary
|
# TODO determine if method for post requests is necessary
|
||||||
if len(params) == 0:
|
try:
|
||||||
async with session.get(url, params=params) as response:
|
if params != '':
|
||||||
await asyncio.sleep(2)
|
async with session.get(url, params=params) as response:
|
||||||
return await response.text() if json is False else await response.json()
|
await asyncio.sleep(2)
|
||||||
else:
|
return await response.text() if json is False else await response.json()
|
||||||
async with session.get(url) as response:
|
else:
|
||||||
await asyncio.sleep(2)
|
async with session.get(url) as response:
|
||||||
return await response.text() if json is False else await response.json()
|
await asyncio.sleep(2)
|
||||||
|
return await response.text() if json is False else await response.json()
|
||||||
|
except Exception:
|
||||||
|
return ''
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def fetch_all(urls, headers='', params='') -> list:
|
async def fetch_all(urls, headers='', params='') -> list:
|
||||||
|
|
Loading…
Reference in a new issue