improvement: bump Spyse API version & add pagination support

This commit is contained in:
r-romanov 2021-07-13 13:41:42 +03:00
parent ddacfcfb71
commit bb8894ebde
2 changed files with 47 additions and 13 deletions

View file

@ -421,7 +421,7 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
elif engineitem == 'spyse':
from theHarvester.discovery import spyse
try:
spyse_search = spyse.SearchSpyse(word)
spyse_search = spyse.SearchSpyse(word, limit)
stor_lst.append(store(spyse_search, engineitem, store_host=True, store_ip=True))
except Exception as e:
print(e)

View file

@ -1,10 +1,11 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
import json
class SearchSpyse:
def __init__(self, word):
def __init__(self, word, limit):
self.ips = set()
self.word = word
self.key = Core.spyse_key()
@ -13,19 +14,52 @@ def __init__(self, word):
self.results = ''
self.hosts = set()
self.proxy = False
self.limit = limit
async def do_search(self):
try:
headers = {
'accept': 'application/json',
'Authorization': f'Bearer {self.key}',
}
base_url = f'https://api.spyse.com/v3/data/domain/subdomain?limit=100&domain={self.word}'
results = await AsyncFetcher.fetch_all([base_url], json=True, proxy=self.proxy, headers=headers)
results = results[0]
self.hosts = {domain['name'] for domain in results['data']['items']}
except Exception as e:
print(f'An exception has occurred: {e}')
# Spyse allows to get up to 100 results per one request
max_limit = 100
# Spyse "search" methods allows to fetch up to 10 000 first results
max_offset = 9900
offset = 0
while True:
try:
headers = {
'accept': 'application/json',
'Authorization': f'Bearer {self.key}',
}
base_url = 'https://api.spyse.com/v4/data/domain/search'
query = {
'search_params': [
{
'name': {
'operator': 'ends',
'value': '.' + self.word,
}
}
],
'limit': max_limit if self.limit > max_limit else self.limit,
'offset': offset,
}
results = await AsyncFetcher.post_fetch(base_url, json=True, headers=headers, data=json.dumps(query))
if len(results.get('data').get('items')) > 0:
for domain in results['data']['items']:
self.hosts.add(domain['name'])
else:
break
offset += max_limit
if offset > max_offset or offset + max_limit > self.limit:
break
except Exception as e:
print(f'An exception has occurred: {e}')
async def get_hostnames(self):
return self.hosts