Merge pull request #62 from NotoriousRebel/master

Ported intelx to be asynchronous
This commit is contained in:
J.Townsend 2019-12-31 01:34:47 +00:00 committed by GitHub
commit 483dcafb4f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 26 deletions

View file

@ -1,8 +1,7 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import intelxparser
import requests
import time
import asyncio
class SearchIntelx:
@ -18,39 +17,33 @@ def __init__(self, word, limit):
self.info = ()
self.limit = limit
def do_search(self):
async def do_search(self):
try:
user_agent = Core.get_user_agent()
headers = {'User-Agent': user_agent, 'x-key': self.key}
# data is json that corresponds to what we are searching for, sort:2 means sort by most relevant
data = f'{{"term": "{self.word}", "maxresults": {self.limit}, "media": 0, "sort": 2 , "terminate": []}}'
r = requests.post(f'{self.database}phonebook/search', data=data, headers=headers)
if r.status_code == 400:
raise Exception('Invalid json was passed in.')
time.sleep(1)
resp = await AsyncFetcher.post_fetch(url=f'{self.database}phonebook/search', headers=headers, data=data,
json=True)
uuid = resp['id']
# grab uuid to send get request to fetch data
uuid = r.json()['id']
await asyncio.sleep(2)
url = f'{self.database}phonebook/search/result?id={uuid}&offset=0&limit={self.limit}'
r = requests.get(url, headers=headers)
time.sleep(1)
# TODO: add in future grab status from r.text and check if more results can be gathered
if r.status_code != 200:
raise Exception('Error occurred while searching intelx.')
self.results = r.json()
resp = await AsyncFetcher.fetch_all([url], headers=headers, json=True)
resp = resp[0]
# TODO: Check if more results can be gathered depending on status
self.results = resp
except Exception as e:
print(f'An exception has occurred: {e}')
def process(self):
self.do_search()
async def process(self):
await self.do_search()
intelx_parser = intelxparser.Parser()
self.info = intelx_parser.parse_dictionaries(self.results)
self.info = await intelx_parser.parse_dictionaries(self.results)
# Create parser and set self.info to tuple returned from parsing text.
def get_emails(self):
async def get_emails(self):
return self.info[0]
def get_hostnames(self):
async def get_hostnames(self):
return self.info[1]

View file

@ -415,7 +415,7 @@ async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
return ''
@staticmethod
async def fetch_all(urls, headers='', params='') -> list:
async def fetch_all(urls, headers='', params='', json=False) -> list:
# By default timeout is 5 minutes, 30 seconds should suffice
timeout = aiohttp.ClientTimeout(total=30)
@ -423,10 +423,10 @@ async def fetch_all(urls, headers='', params='') -> list:
headers = {'User-Agent': Core.get_user_agent()}
if len(params) == 0:
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url) for url in urls])
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
return texts
else:
# Indicates the request has certain params
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params) for url in urls])
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
return texts

View file

@ -4,7 +4,7 @@ def __init__(self):
self.emails = set()
self.hosts = set()
def parse_dictionaries(self, results: dict) -> tuple:
async def parse_dictionaries(self, results: dict) -> tuple:
"""
Parse method to parse json results
:param results: Dictionary containing a list of dictionaries known as selectors