Merge pull request #270 from L1ghtn1ng/dev

Fixes for bugs and updated deps plus other misc things
This commit is contained in:
J.Townsend 2019-08-09 00:16:04 +01:00 committed by GitHub
commit 951f567bab
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 37 additions and 121 deletions

View file

@ -9,7 +9,7 @@ before_install:
install:
- python setup.py test
script:
- python theHarvester.py -d metasploit.com -x trello,google,intelx,bingapi,crtsh,hunter -b all
- python theHarvester.py -d metasploit.com -x trello,google,intelx,bingapi,hunter -b all
- pytest
notifications:
email: false

View file

@ -44,8 +44,6 @@ Passive:
* google: Google search engine (Optional Google dorking.) - www.google.com
* google-certificates: Google Certificate Transparency report
* hunter: Hunter search engine (Requires API key, see below.) - www.hunter.io
* intelx: Intelx search engine (Requires API key, see below.) - www.intelx.io

View file

@ -6,13 +6,13 @@ apikeys:
key:
hunter:
key:
key:
intelx:
key: 9df61df0-84f7-4dc7-b34c-8ccfb8646ace
securityTrails:
key:
key:
shodan:
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt

View file

@ -1,25 +0,0 @@
__all__ = ['baidusearch',
'bingsearch',
'censys',
'crtsh',
'cymon',
'dnssearch',
'dogpilesearch',
'duckduckgosearch',
'exaleadsearch',
'googlecertificates',
'googlesearch',
'huntersearch',
'intelxsearch',
'linkedinsearch',
'netcraft',
'port_scanner',
'securitytrailssearch',
'shodansearch',
'takeover',
'threatcrowd',
'trello',
'twittersearch',
'virustotal',
'yahoosearch',
'yandexsearch']

View file

@ -1,8 +1,9 @@
beautifulsoup4>=4.7.1
beautifulsoup4==4.8.0
censys==0.0.8
plotly==3.10.0
pytest>=4.6.3
PyYaml>=5.1.1
requests>=2.22.0
shodan>=1.13.0
texttable>=1.6.1
chart-studio==1.0.0
plotly==4.0.0
pytest==5.0.1
PyYaml==5.1.1
requests==2.22.0
shodan==1.14.0
texttable==1.6.2

View file

@ -54,7 +54,7 @@ def start():
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
dogpile, duckduckgo, github-code, google,
google-certificates, hunter, intelx,
hunter, intelx,
linkedin, netcraft, securityTrails, threatcrowd,
trello, twitter, vhost, virustotal, yahoo, all''')
parser.add_argument('-x', '--exclude', help='exclude options when using all sources', type=str)
@ -147,7 +147,7 @@ def start():
print('\033[94m[*] Searching CRT.sh. \033[0m')
search = crtsh.SearchCrtsh(word)
search.process()
hosts = filter(search.get_hostnames())
hosts = filter(search.get_data())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'CRTsh')
@ -223,15 +223,6 @@ def start():
db.store_all(word, all_hosts, 'host', 'google')
db.store_all(word, all_emails, 'email', 'google')
elif engineitem == 'google-certificates':
print('\033[94m[*] Searching Google Certificate transparency report. \033[0m')
search = googlecertificates.SearchGoogleCertificates(word, limit, start)
search.process()
hosts = filter(search.get_domains())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'google-certificates')
elif engineitem == 'hunter':
print('\033[94m[*] Searching Hunter. \033[0m')
from theHarvester.discovery import huntersearch
@ -287,7 +278,6 @@ def start():
print('---------------------')
for user in sorted(list(set(people))):
print(user)
sys.exit(0)
elif engineitem == 'netcraft':
print('\033[94m[*] Searching Netcraft. \033[0m')
@ -434,7 +424,7 @@ def start():
print('\033[94m[*] Searching CRT.sh. \033[0m')
search = crtsh.SearchCrtsh(word)
search.process()
hosts = filter(search.get_hostnames())
hosts = filter(search.get_data())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'CRTsh')
@ -489,14 +479,6 @@ def start():
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'google')
print('\033[94m[*] Searching Google Certificate transparency report. \033[0m')
search = googlecertificates.SearchGoogleCertificates(word, limit, start)
search.process()
domains = filter(search.get_domains())
all_hosts.extend(domains)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'google-certificates')
print('\033[94m[*] Searching Hunter. \033[0m')
from theHarvester.discovery import huntersearch
# Import locally.

View file

@ -6,7 +6,6 @@
'dogpilesearch',
'duckduckgosearch',
'exaleadsearch',
'googlecertificates',
'googlesearch',
'huntersearch',
'intelxsearch',

View file

@ -2,6 +2,9 @@
from theHarvester.parsers import censysparser
import requests
# TODO rewrite this module to use the censys api as the current way does notwork
# TODO And not really that maintainable as it currently stands
class SearchCensys:

View file

@ -1,67 +1,26 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import requests
import time
import urllib3
class SearchCrtsh:
def __init__(self, word):
self.word = word.replace(' ', '%20')
self.results = ""
self.totalresults = ""
self.server = 'https://crt.sh/?q='
self.quantity = '100'
self.counter = 0
self.word = word
self.data = set()
def do_search(self):
try:
urly = self.server + self.word
except Exception as e:
print(e)
try:
params = {'User-Agent': Core.get_user_agent()}
r = requests.get(urly, headers=params)
except Exception as e:
print(e)
links = self.get_info(r.text)
for link in links:
params = {'User-Agent': Core.get_user_agent()}
r = requests.get(link, headers=params)
time.sleep(getDelay())
self.results = r.text
self.totalresults += self.results
"""
Function goes through text from base request and parses it for links
@param text requests text
@return list of links
"""
def get_info(self, text):
lines = []
for line in str(text).splitlines():
line = line.strip()
if 'id=' in line:
lines.append(line)
links = []
for i in range(len(lines)):
if i % 2 == 0: # Way html is formatted only care about every other one.
current = lines[i]
current = current[43:] # 43 is not an arbitrary number, the id number always starts at 43rd index.
link = ''
for ch in current:
if ch == '"':
break
else:
link += ch
links.append(('https://crt.sh?id=' + str(link)))
return links
def get_hostnames(self):
rawres = myparser.Parser(self.totalresults, self.word)
return rawres.hostnames()
url = f'https://crt.sh/?q=%25.{self.word}&output=json'
headers = {'User-Agent': Core.get_user_agent()}
request = requests.get(url, params=headers, timeout=30)
if request.ok:
content = request.json()
data = set([dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value'] for dct in content])
return data
def process(self):
self.do_search()
print('\tSearching results.')
data = self.do_search()
self.data = data
def get_data(self):
return self.data

View file

@ -39,4 +39,4 @@ def process(self):
self.do_search()
time.sleep(getDelay())
self.counter += 100
print(f'\tSearching {self.counter} results.')
print(f'\tSearching {self.counter} results.')

View file

@ -74,7 +74,6 @@ def get_supportedengines():
'duckduckgo',
'github-code',
'google',
'google-certificates',
'hunter',
'intelx',
'linkedin',

View file

@ -2,7 +2,7 @@
from datetime import datetime
import plotly
import plotly.graph_objs as go
import plotly.plotly as py
import chart_studio.plotly as py
try:
db = stash.stash_manager()
@ -92,5 +92,4 @@ def drawscattergraphscanhistory(self, domain, scanhistorydomain):
except Exception as e:
print(f'Error generating HTML for the historical graph for domain: {e}')
except Exception as e:
print(f'Error in the reportgraph module: {e}')

View file

@ -1,6 +1,7 @@
login.html
administrator/login.%XT%
admin_area/login.%XT%
intext:@
inurl:
intitle:
intext: