mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 16:26:34 +08:00
Merge pull request #270 from L1ghtn1ng/dev
Fixes for bugs and updated deps plus other misc things
This commit is contained in:
commit
951f567bab
|
@ -9,7 +9,7 @@ before_install:
|
|||
install:
|
||||
- python setup.py test
|
||||
script:
|
||||
- python theHarvester.py -d metasploit.com -x trello,google,intelx,bingapi,crtsh,hunter -b all
|
||||
- python theHarvester.py -d metasploit.com -x trello,google,intelx,bingapi,hunter -b all
|
||||
- pytest
|
||||
notifications:
|
||||
email: false
|
||||
|
|
|
@ -44,8 +44,6 @@ Passive:
|
|||
|
||||
* google: Google search engine (Optional Google dorking.) - www.google.com
|
||||
|
||||
* google-certificates: Google Certificate Transparency report
|
||||
|
||||
* hunter: Hunter search engine (Requires API key, see below.) - www.hunter.io
|
||||
|
||||
* intelx: Intelx search engine (Requires API key, see below.) - www.intelx.io
|
||||
|
|
|
@ -6,13 +6,13 @@ apikeys:
|
|||
key:
|
||||
|
||||
hunter:
|
||||
key:
|
||||
key:
|
||||
|
||||
intelx:
|
||||
key: 9df61df0-84f7-4dc7-b34c-8ccfb8646ace
|
||||
|
||||
securityTrails:
|
||||
key:
|
||||
key:
|
||||
|
||||
shodan:
|
||||
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
__all__ = ['baidusearch',
|
||||
'bingsearch',
|
||||
'censys',
|
||||
'crtsh',
|
||||
'cymon',
|
||||
'dnssearch',
|
||||
'dogpilesearch',
|
||||
'duckduckgosearch',
|
||||
'exaleadsearch',
|
||||
'googlecertificates',
|
||||
'googlesearch',
|
||||
'huntersearch',
|
||||
'intelxsearch',
|
||||
'linkedinsearch',
|
||||
'netcraft',
|
||||
'port_scanner',
|
||||
'securitytrailssearch',
|
||||
'shodansearch',
|
||||
'takeover',
|
||||
'threatcrowd',
|
||||
'trello',
|
||||
'twittersearch',
|
||||
'virustotal',
|
||||
'yahoosearch',
|
||||
'yandexsearch']
|
|
@ -1,8 +1,9 @@
|
|||
beautifulsoup4>=4.7.1
|
||||
beautifulsoup4==4.8.0
|
||||
censys==0.0.8
|
||||
plotly==3.10.0
|
||||
pytest>=4.6.3
|
||||
PyYaml>=5.1.1
|
||||
requests>=2.22.0
|
||||
shodan>=1.13.0
|
||||
texttable>=1.6.1
|
||||
chart-studio==1.0.0
|
||||
plotly==4.0.0
|
||||
pytest==5.0.1
|
||||
PyYaml==5.1.1
|
||||
requests==2.22.0
|
||||
shodan==1.14.0
|
||||
texttable==1.6.2
|
|
@ -54,7 +54,7 @@ def start():
|
|||
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
|
||||
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
|
||||
dogpile, duckduckgo, github-code, google,
|
||||
google-certificates, hunter, intelx,
|
||||
hunter, intelx,
|
||||
linkedin, netcraft, securityTrails, threatcrowd,
|
||||
trello, twitter, vhost, virustotal, yahoo, all''')
|
||||
parser.add_argument('-x', '--exclude', help='exclude options when using all sources', type=str)
|
||||
|
@ -147,7 +147,7 @@ def start():
|
|||
print('\033[94m[*] Searching CRT.sh. \033[0m')
|
||||
search = crtsh.SearchCrtsh(word)
|
||||
search.process()
|
||||
hosts = filter(search.get_hostnames())
|
||||
hosts = filter(search.get_data())
|
||||
all_hosts.extend(hosts)
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'CRTsh')
|
||||
|
@ -223,15 +223,6 @@ def start():
|
|||
db.store_all(word, all_hosts, 'host', 'google')
|
||||
db.store_all(word, all_emails, 'email', 'google')
|
||||
|
||||
elif engineitem == 'google-certificates':
|
||||
print('\033[94m[*] Searching Google Certificate transparency report. \033[0m')
|
||||
search = googlecertificates.SearchGoogleCertificates(word, limit, start)
|
||||
search.process()
|
||||
hosts = filter(search.get_domains())
|
||||
all_hosts.extend(hosts)
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'google-certificates')
|
||||
|
||||
elif engineitem == 'hunter':
|
||||
print('\033[94m[*] Searching Hunter. \033[0m')
|
||||
from theHarvester.discovery import huntersearch
|
||||
|
@ -287,7 +278,6 @@ def start():
|
|||
print('---------------------')
|
||||
for user in sorted(list(set(people))):
|
||||
print(user)
|
||||
sys.exit(0)
|
||||
|
||||
elif engineitem == 'netcraft':
|
||||
print('\033[94m[*] Searching Netcraft. \033[0m')
|
||||
|
@ -434,7 +424,7 @@ def start():
|
|||
print('\033[94m[*] Searching CRT.sh. \033[0m')
|
||||
search = crtsh.SearchCrtsh(word)
|
||||
search.process()
|
||||
hosts = filter(search.get_hostnames())
|
||||
hosts = filter(search.get_data())
|
||||
all_hosts.extend(hosts)
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'CRTsh')
|
||||
|
@ -489,14 +479,6 @@ def start():
|
|||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'google')
|
||||
|
||||
print('\033[94m[*] Searching Google Certificate transparency report. \033[0m')
|
||||
search = googlecertificates.SearchGoogleCertificates(word, limit, start)
|
||||
search.process()
|
||||
domains = filter(search.get_domains())
|
||||
all_hosts.extend(domains)
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'google-certificates')
|
||||
|
||||
print('\033[94m[*] Searching Hunter. \033[0m')
|
||||
from theHarvester.discovery import huntersearch
|
||||
# Import locally.
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
'dogpilesearch',
|
||||
'duckduckgosearch',
|
||||
'exaleadsearch',
|
||||
'googlecertificates',
|
||||
'googlesearch',
|
||||
'huntersearch',
|
||||
'intelxsearch',
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
from theHarvester.parsers import censysparser
|
||||
import requests
|
||||
|
||||
# TODO rewrite this module to use the censys api as the current way does notwork
|
||||
# TODO And not really that maintainable as it currently stands
|
||||
|
||||
|
||||
class SearchCensys:
|
||||
|
||||
|
|
|
@ -1,67 +1,26 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
import requests
|
||||
import time
|
||||
|
||||
import urllib3
|
||||
|
||||
class SearchCrtsh:
|
||||
|
||||
def __init__(self, word):
|
||||
self.word = word.replace(' ', '%20')
|
||||
self.results = ""
|
||||
self.totalresults = ""
|
||||
self.server = 'https://crt.sh/?q='
|
||||
self.quantity = '100'
|
||||
self.counter = 0
|
||||
|
||||
self.word = word
|
||||
self.data = set()
|
||||
|
||||
def do_search(self):
|
||||
try:
|
||||
urly = self.server + self.word
|
||||
except Exception as e:
|
||||
print(e)
|
||||
try:
|
||||
params = {'User-Agent': Core.get_user_agent()}
|
||||
r = requests.get(urly, headers=params)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
links = self.get_info(r.text)
|
||||
for link in links:
|
||||
params = {'User-Agent': Core.get_user_agent()}
|
||||
r = requests.get(link, headers=params)
|
||||
time.sleep(getDelay())
|
||||
self.results = r.text
|
||||
self.totalresults += self.results
|
||||
|
||||
"""
|
||||
Function goes through text from base request and parses it for links
|
||||
@param text requests text
|
||||
@return list of links
|
||||
"""
|
||||
def get_info(self, text):
|
||||
lines = []
|
||||
for line in str(text).splitlines():
|
||||
line = line.strip()
|
||||
if 'id=' in line:
|
||||
lines.append(line)
|
||||
links = []
|
||||
for i in range(len(lines)):
|
||||
if i % 2 == 0: # Way html is formatted only care about every other one.
|
||||
current = lines[i]
|
||||
current = current[43:] # 43 is not an arbitrary number, the id number always starts at 43rd index.
|
||||
link = ''
|
||||
for ch in current:
|
||||
if ch == '"':
|
||||
break
|
||||
else:
|
||||
link += ch
|
||||
links.append(('https://crt.sh?id=' + str(link)))
|
||||
return links
|
||||
|
||||
def get_hostnames(self):
|
||||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.hostnames()
|
||||
url = f'https://crt.sh/?q=%25.{self.word}&output=json'
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
request = requests.get(url, params=headers, timeout=30)
|
||||
if request.ok:
|
||||
content = request.json()
|
||||
data = set([dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value'] for dct in content])
|
||||
return data
|
||||
|
||||
def process(self):
|
||||
self.do_search()
|
||||
print('\tSearching results.')
|
||||
data = self.do_search()
|
||||
self.data = data
|
||||
|
||||
def get_data(self):
|
||||
return self.data
|
||||
|
|
|
@ -39,4 +39,4 @@ def process(self):
|
|||
self.do_search()
|
||||
time.sleep(getDelay())
|
||||
self.counter += 100
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
|
|
|
@ -74,7 +74,6 @@ def get_supportedengines():
|
|||
'duckduckgo',
|
||||
'github-code',
|
||||
'google',
|
||||
'google-certificates',
|
||||
'hunter',
|
||||
'intelx',
|
||||
'linkedin',
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from datetime import datetime
|
||||
import plotly
|
||||
import plotly.graph_objs as go
|
||||
import plotly.plotly as py
|
||||
import chart_studio.plotly as py
|
||||
|
||||
try:
|
||||
db = stash.stash_manager()
|
||||
|
@ -92,5 +92,4 @@ def drawscattergraphscanhistory(self, domain, scanhistorydomain):
|
|||
except Exception as e:
|
||||
print(f'Error generating HTML for the historical graph for domain: {e}')
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error in the reportgraph module: {e}')
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
login.html
|
||||
administrator/login.%XT%
|
||||
admin_area/login.%XT%
|
||||
intext:@
|
||||
inurl:
|
||||
intitle:
|
||||
intext:
|
||||
|
|
Loading…
Reference in a new issue