Ported netcraft to use aiohttp.

This commit is contained in:
NotoriousRebel 2019-12-30 22:56:54 -05:00
parent 989a062ba9
commit 82e1b23f74

View file

@ -1,9 +1,10 @@
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import requests
import hashlib
import urllib.parse as urllib
import re
import aiohttp
import asyncio
class SearchNetcraft:
@ -14,30 +15,36 @@ def __init__(self, word):
self.totalresults = ""
self.server = 'netcraft.com'
self.base_url = f'https://searchdns.netcraft.com/?restriction=site+ends+with&host={word}'
self.session = requests.session()
self.session = None
self.headers = {
'User-Agent': Core.get_user_agent()
}
self.timeout = 25
self.timeout = aiohttp.ClientTimeout(total=25)
self.domain = f"https://searchdns.netcraft.com/?restriction=site+ends+with&host={self.word}"
def request(self, url, cookies=None):
cookies = cookies or {}
async def request(self, url, first=False):
try:
resp = self.session.get(url, headers=self.headers, timeout=self.timeout, cookies=cookies)
if first:
async with aiohttp.ClientSession(headers=self.headers, timeout=self.timeout) as sess:
async with sess.get(url) as resp:
await asyncio.sleep(3)
return resp.headers
else:
async with self.session.get(url) as sess:
await asyncio.sleep(2)
return await sess.text()
except Exception as e:
print(e)
resp = None
return resp
def get_next(self, resp):
async def get_next(self, resp):
link_regx = re.compile('<A href="(.*?)"><b>Next page</b></a>')
link = link_regx.findall(resp)
link = re.sub(f'host=.*?{self.word}', f'host={self.domain}', link[0])
url = f'https://searchdns.netcraft.com{link.replace(" ", "%20")}'
return url
def create_cookies(self, cookie):
async def create_cookies(self, cookie):
cookies = dict()
cookies_list = cookie[0:cookie.find(';')].split("=")
cookies[cookies_list[0]] = cookies_list[1]
@ -46,27 +53,43 @@ def create_cookies(self, cookie):
urllib.unquote(cookies_list[1]).encode('utf-8')).hexdigest()
return cookies
def get_cookies(self, headers):
if 'set-cookie' in headers:
cookies = self.create_cookies(headers['set-cookie'])
else:
cookies = {}
async def get_cookies(self, headers):
try:
if headers is None:
return {}
elif 'set-cookie' in headers:
cookies = await self.create_cookies(headers['set-cookie'])
else:
cookies = {}
except Exception as e:
return {}
return cookies
def do_search(self):
start_url = self.base_url
resp = self.request(start_url)
cookies = self.get_cookies(resp.headers)
while True:
resp = self.request(self.base_url, cookies).text
self.totalresults += resp
if 'Next page' not in resp or resp is None:
break
self.base_url = self.get_next(resp)
async def do_search(self):
try:
start_url = self.base_url
resp = await self.request(start_url, first=True)
# indicates this is the start_url to retrieve cookie we need
cookies = await self.get_cookies(resp)
self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout, cookies=cookies)
while True:
resp = await self.request(self.base_url)
if isinstance(resp, str):
self.totalresults += resp
if 'Next page' not in resp or resp is None:
await self.session.close()
break
self.base_url = await self.get_next(resp)
except Exception as e:
try:
await self.session.close()
except Exception:
pass
def get_hostnames(self):
async def get_hostnames(self):
rawres = myparser.Parser(self.totalresults, self.word)
return rawres.hostnames()
def process(self):
self.do_search()
async def process(self):
await self.do_search()