mirror of
https://github.com/laramies/theHarvester.git
synced 2024-11-11 18:03:10 +08:00
Ported netcraft to use aiohttp.
This commit is contained in:
parent
989a062ba9
commit
82e1b23f74
1 changed files with 50 additions and 27 deletions
|
@ -1,9 +1,10 @@
|
|||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
import requests
|
||||
import hashlib
|
||||
import urllib.parse as urllib
|
||||
import re
|
||||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
|
||||
class SearchNetcraft:
|
||||
|
@ -14,30 +15,36 @@ def __init__(self, word):
|
|||
self.totalresults = ""
|
||||
self.server = 'netcraft.com'
|
||||
self.base_url = f'https://searchdns.netcraft.com/?restriction=site+ends+with&host={word}'
|
||||
self.session = requests.session()
|
||||
self.session = None
|
||||
self.headers = {
|
||||
'User-Agent': Core.get_user_agent()
|
||||
}
|
||||
self.timeout = 25
|
||||
self.timeout = aiohttp.ClientTimeout(total=25)
|
||||
self.domain = f"https://searchdns.netcraft.com/?restriction=site+ends+with&host={self.word}"
|
||||
|
||||
def request(self, url, cookies=None):
|
||||
cookies = cookies or {}
|
||||
async def request(self, url, first=False):
|
||||
try:
|
||||
resp = self.session.get(url, headers=self.headers, timeout=self.timeout, cookies=cookies)
|
||||
if first:
|
||||
async with aiohttp.ClientSession(headers=self.headers, timeout=self.timeout) as sess:
|
||||
async with sess.get(url) as resp:
|
||||
await asyncio.sleep(3)
|
||||
return resp.headers
|
||||
else:
|
||||
async with self.session.get(url) as sess:
|
||||
await asyncio.sleep(2)
|
||||
return await sess.text()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
resp = None
|
||||
return resp
|
||||
|
||||
def get_next(self, resp):
|
||||
async def get_next(self, resp):
|
||||
link_regx = re.compile('<A href="(.*?)"><b>Next page</b></a>')
|
||||
link = link_regx.findall(resp)
|
||||
link = re.sub(f'host=.*?{self.word}', f'host={self.domain}', link[0])
|
||||
url = f'https://searchdns.netcraft.com{link.replace(" ", "%20")}'
|
||||
return url
|
||||
|
||||
def create_cookies(self, cookie):
|
||||
async def create_cookies(self, cookie):
|
||||
cookies = dict()
|
||||
cookies_list = cookie[0:cookie.find(';')].split("=")
|
||||
cookies[cookies_list[0]] = cookies_list[1]
|
||||
|
@ -46,27 +53,43 @@ def create_cookies(self, cookie):
|
|||
urllib.unquote(cookies_list[1]).encode('utf-8')).hexdigest()
|
||||
return cookies
|
||||
|
||||
def get_cookies(self, headers):
|
||||
if 'set-cookie' in headers:
|
||||
cookies = self.create_cookies(headers['set-cookie'])
|
||||
else:
|
||||
cookies = {}
|
||||
async def get_cookies(self, headers):
|
||||
try:
|
||||
if headers is None:
|
||||
return {}
|
||||
elif 'set-cookie' in headers:
|
||||
cookies = await self.create_cookies(headers['set-cookie'])
|
||||
else:
|
||||
cookies = {}
|
||||
except Exception as e:
|
||||
return {}
|
||||
return cookies
|
||||
|
||||
def do_search(self):
|
||||
start_url = self.base_url
|
||||
resp = self.request(start_url)
|
||||
cookies = self.get_cookies(resp.headers)
|
||||
while True:
|
||||
resp = self.request(self.base_url, cookies).text
|
||||
self.totalresults += resp
|
||||
if 'Next page' not in resp or resp is None:
|
||||
break
|
||||
self.base_url = self.get_next(resp)
|
||||
async def do_search(self):
|
||||
try:
|
||||
start_url = self.base_url
|
||||
resp = await self.request(start_url, first=True)
|
||||
# indicates this is the start_url to retrieve cookie we need
|
||||
cookies = await self.get_cookies(resp)
|
||||
self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout, cookies=cookies)
|
||||
while True:
|
||||
resp = await self.request(self.base_url)
|
||||
if isinstance(resp, str):
|
||||
self.totalresults += resp
|
||||
if 'Next page' not in resp or resp is None:
|
||||
await self.session.close()
|
||||
break
|
||||
self.base_url = await self.get_next(resp)
|
||||
except Exception as e:
|
||||
try:
|
||||
await self.session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_hostnames(self):
|
||||
async def get_hostnames(self):
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.hostnames()
|
||||
|
||||
def process(self):
|
||||
self.do_search()
|
||||
async def process(self):
|
||||
await self.do_search()
|
||||
|
||||
|
|
Loading…
Reference in a new issue