Merge branch 'dev' into dev

This commit is contained in:
Matt 2019-10-02 15:47:08 -04:00 committed by GitHub
commit 40eaadfd2b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 88 additions and 1942 deletions

View file

@ -35,10 +35,6 @@ jobs:
run: |
python theHarvester.py -d metasploit.com -b bing
- name: Run theHarvester module censys
run: |
python theHarvester.py -d metasploit.com -b censys
- name: Run theHarvester module crtsh
run: |
python theHarvester.py -d metasploit.com -b crtsh
@ -112,6 +108,6 @@ jobs:
- name: Test with pytest
run: |
pytest
# - name: Check static type checking with mypy
# - name: Static type checking with mypy
# run: |
# mypy *.py
# mypy --pretty *.py

2
.gitignore vendored
View file

@ -9,4 +9,4 @@ debug_results.txt
tests/myparser.py
venv
.mypy_cache
.pytest_cache
.pytest_cache

View file

@ -14,10 +14,10 @@ before_install:
install:
- python setup.py test
script:
- python theHarvester.py -d metasploit.com -b baidu,bing,censys,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,otx,intelx,threatcrowd,trello,twitter,virustotal,yahoo -l 200
- python theHarvester.py -d metasploit.com -b baidu,bing,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,otx,intelx,threatcrowd,trello,twitter,virustotal,yahoo -l 200
- pytest
- flake8 . --count --show-source --statistics
#- mypy *.py
#- mypy --pretty *.py
notifications:
email: false
slack:

View file

@ -19,8 +19,6 @@ Passive:
* bingapi: Microsoft search engine, through the API (Requires API key, see below.)
* censys: Censys.io search engine - www.censys.io
* crtsh: Comodo Certificate search - www.crt.sh
* dnsdumpster: DNSdumpster search engine - dnsdumpster.com
@ -51,6 +49,8 @@ Passive:
* shodan: Shodan search engine, will search for ports and banners from discovered<br>
hosts - www.shodanhq.com
* Spyse: Web research tools for professionals(Requires an API key) - https://spyse.com/
* threatcrowd: Open source threat intelligence - www.threatcrowd.org
* trello: Search trello boards (Uses Google search.)
@ -79,6 +79,7 @@ Add your keys to api-keys.yaml
* intelx
* securityTrails
* shodan
* spyse
Dependencies:
-------------
@ -89,14 +90,16 @@ Dependencies:
Comments, bugs, or requests?
----------------------------
* [![Twitter Follow](https://img.shields.io/twitter/follow/laramies.svg?style=social&label=Follow)](https://twitter.com/laramies) Christian Martorella @laramies
* cmartorella@edge-security.com
cmartorella@edge-security.com
* [![Twitter Follow](https://img.shields.io/twitter/follow/NotoriousRebel1.svg?style=social&label=Follow)](https://twitter.com/NotoriousRebel1) Matthew Brown @NotoriousRebel1
* [![Twitter Follow](https://img.shields.io/twitter/follow/jay_townsend1.svg?style=social&label=Follow)](https://twitter.com/jay_townsend1) Jay "L1ghtn1ng" Townsend @jay_townsend1
Main contributors:
------------------
* [![Twitter Follow](https://img.shields.io/twitter/follow/NotoriousRebel1.svg?style=social&label=Follow)](https://twitter.com/NotoriousRebel1) Matthew Brown @NotoriousRebel1
* [![Twitter Follow](https://img.shields.io/twitter/follow/jay_townsend1.svg?style=social&label=Follow)](https://twitter.com/jay_townsend1) Jay "L1ghtn1ng" Townsend @jay_townsend1
* [![LinkedIn](https://static.licdn.com/scds/common/u/img/webpromo/btn_viewmy_160x25.png)](https://www.linkedin.com/in/janoszold/) Janos Zold
* [![Twitter Follow](https://img.shields.io/twitter/follow/discoverscripts.svg?style=social&label=Follow)](https://twitter.com/discoverscripts) Lee Baird @discoverscripts
* [![LinkedIn](https://static.licdn.com/scds/common/u/img/webpromo/btn_viewmy_160x25.png)](https://www.linkedin.com/in/janoszold/) Janos Zold
Thanks:
-------

View file

@ -16,3 +16,6 @@ apikeys:
shodan:
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
spyse:
key:

View file

@ -1,3 +1,4 @@
[mypy]
ignore_missing_imports = True
show_traceback = True
show_traceback = True
show_error_codes = True

View file

@ -1,14 +1,13 @@
aiodns==2.0.0
beautifulsoup4==4.8.0
censys==0.0.8
dnspython==1.16.0
flake8==3.7.8
grequests==0.4.0
mypy==0.720
mypy==0.730
netaddr==0.7.19
plotly==4.1.1
pytest==5.1.3
pytest==5.2.0
PyYaml==5.1.2
requests==2.22.0
shodan==1.17.0
shodan==1.19.0
texttable==1.6.2

View file

@ -1,3 +1,2 @@
[flake8]
ignore = E501, F405, F403, E402
exclude = theHarvester/discovery/IPy.py,theHarvester/discovery/s3_scanner.py
ignore = E501, F405, F403, E402

View file

@ -32,10 +32,10 @@ def start():
parser.add_argument('-n', '--dns-lookup', help='enable DNS server lookup, default False', default=False, action='store_true')
parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, crtsh, dnsdumpster,
dogpile, duckduckgo, github-code, google,
hunter, intelx,
linkedin, linkedin_links, netcraft, otx, securityTrails, threatcrowd,
linkedin, linkedin_links, netcraft, otx, securityTrails, spyse, threatcrowd,
trello, twitter, vhost, virustotal, yahoo''')
args = parser.parse_args()
@ -112,19 +112,6 @@ def start():
else:
pass
elif engineitem == 'censys':
print('\033[94m[*] Searching Censys. \033[0m')
from theHarvester.discovery import censys
# Import locally or won't work
censys_search = censys.SearchCensys(word, limit)
censys_search.process()
all_ip = censys_search.get_ipaddresses()
hosts = filter(censys_search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'censys')
db.store_all(word, all_ip, 'ip', 'censys')
elif engineitem == 'crtsh':
try:
print('\033[94m[*] Searching CRT.sh. \033[0m')
@ -356,6 +343,22 @@ def start():
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'suip')
elif engineitem == 'spyse':
print('\033[94m[*] Searching Spyse. \033[0m')
from theHarvester.discovery import spyse
try:
spysesearch_search = spyse.SearchSpyse(word)
spysesearch_search.process()
hosts = filter(spysesearch_search.get_hostnames())
all_hosts.extend(list(hosts))
# ips = filter(spysesearch_search.get_ips())
# all_ip.extend(list(ips))
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'spyse')
# db.store_all(word, all_ip, 'ip', 'spyse')
except Exception as e:
print(e)
@ -615,8 +618,7 @@ def start():
# Here we need to add explosion mode.
# We have to take out the TLDs to do this.
recursion = False
if recursion:
if args.dns_tld is not False:
counter = 0
for word in vhost:
search = googlesearch.SearchGoogle(word, limit, counter)

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,5 @@
__all__ = ['baidusearch',
'bingsearch',
'censys',
'crtsh',
'dnssearch',
'dogpilesearch',
@ -16,6 +15,7 @@
'port_scanner',
'securitytrailssearch',
'shodansearch',
'spyse',
'takeover',
'threatcrowd',
'trello',

View file

@ -1,133 +0,0 @@
from theHarvester.lib.core import *
from theHarvester.parsers import censysparser
import requests
# TODO rewrite this module to use the censys api as the current way does not work
# TODO And not really that maintainable as it currently stands
class SearchCensys:
def __init__(self, word, limit):
self.word = word
self.urlhost = ""
self.urlcert = ""
self.page = ""
self.resultshosts = ""
self.resultcerts = ""
self.total_resultshosts = ""
self.total_resultscerts = ""
self.server = 'censys.io'
self.ips = []
self.hostnamesall = []
self.limit = limit
def do_searchhosturl(self):
try:
headers = {'user-agent': Core.get_user_agent(), 'Accept': '*/*', 'Referer': self.urlhost}
responsehost = requests.get(self.urlhost, headers=headers)
self.resultshosts = responsehost.text
self.total_resultshosts += self.resultshosts
except Exception as e:
print(f'Error occurred in the Censys module downloading pages from Censys - IP search: + {e}')
def do_searchcertificateurl(self):
try:
headers = {'user-agent': Core.get_user_agent(), 'Accept': '*/*', 'Referer': self.urlcert}
responsecert = requests.get(self.urlcert, headers=headers)
self.resultcerts = responsecert.text
self.total_resultscerts += self.resultcerts
except Exception as e:
print(f'Error occurred in the Censys module downloading pages from Censys - certificates search: {e}')
def process(self):
try:
self.urlhost = 'https://' + self.server + '/ipv4/_search?q=' + str(self.word) + '&page=1'
self.urlcert = 'https://' + self.server + '/certificates/_search?q=' + str(self.word) + '&page=1'
self.do_searchhosturl()
self.do_searchcertificateurl()
counter = 2
pages = censysparser.Parser(self)
totalpages = pages.search_totalpageshosts()
pagestosearch = int(self.limit / 25) # 25 results/page
if totalpages is None:
totalpages = 0
if totalpages <= pagestosearch:
while counter <= totalpages:
try:
self.page = str(counter)
self.urlhost = 'https://' + self.server + '/ipv4/_search?q=' + str(self.word) + '&page=' + str(
self.page)
print('\tSearching IP results page ' + self.page + '.')
self.do_searchhosturl()
counter += 1
except Exception as e:
print(f'Error occurred in the Censys module requesting the pages: {e}')
else:
while counter <= pagestosearch:
try:
self.page = str(counter)
self.urlhost = 'https://' + self.server + '/ipv4/_search?q=' + str(self.word) + '&page=' + str(
self.page)
print(f'\tSearching results page {self.page}.')
self.do_searchhosturl()
counter += 1
except Exception as e:
print(f'Error occurred in the Censys module requesting the pages: {e}')
counter = 2
totalpages = pages.search_totalpagescerts()
if totalpages is None:
totalpages = 0
if totalpages <= pagestosearch:
while counter <= totalpages:
try:
self.page = str(counter)
self.urlhost = 'https://' + self.server + '/certificates/_search?q=' + str(
self.word) + '&page=' + str(self.page)
print(f'\tSearching certificates results page {self.page}.')
self.do_searchcertificateurl()
counter += 1
except Exception as e:
print(f'Error occurred in the Censys module requesting the pages: {e}')
else:
while counter <= pagestosearch:
try:
self.page = str(counter)
self.urlhost = 'https://' + self.server + '/ipv4/_search?q=' + str(self.word) + '&page=' + str(
self.page)
print('\tSearching IP results page ' + self.page + '.')
self.do_searchhosturl()
counter += 1
except Exception as e:
print(f'Error occurred in the Censys module requesting the pages: {e}')
except Exception as e:
print(f'Error occurred in the main Censys module: {e}')
def get_hostnames(self):
try:
ips = self.get_ipaddresses()
headers = {'user-agent': Core.get_user_agent(), 'Accept': '*/*', 'Referer': self.urlcert}
response = requests.post('https://censys.io/ipv4/getdns', json={'ips': ips}, headers=headers)
responsejson = response.json()
domainsfromcensys = []
for key, jdata in responsejson.items():
if jdata is not None:
domainsfromcensys.append(jdata)
else:
pass
matchingdomains = [s for s in domainsfromcensys if str(self.word) in s]
self.hostnamesall.extend(matchingdomains)
hostnamesfromcerts = censysparser.Parser(self)
self.hostnamesall.extend(hostnamesfromcerts.search_hostnamesfromcerts())
return self.hostnamesall
except Exception as e:
print(f'Error occurred in the Censys module - hostname search: {e}')
def get_ipaddresses(self):
try:
ips = censysparser.Parser(self)
self.ips = ips.search_ipaddresses()
return self.ips
except Exception as e:
print(f'Error occurred in the main Censys module - IP address search: {e}')

View file

@ -20,12 +20,10 @@ def __init__(self, word, limit):
self.limit = limit
def do_search(self):
try: # Do normal scraping.
url = self.api.replace('x', self.word)
headers = {'User-Agent': googleUA}
r = requests.get(url, headers=headers)
except Exception as e:
print(e)
# Do normal scraping.
url = self.api.replace('x', self.word)
headers = {'User-Agent': googleUA}
r = requests.get(url, headers=headers)
time.sleep(getDelay())
self.results = r.text
self.totalresults += self.results
@ -46,8 +44,8 @@ def crawl(self, text):
urls = set()
try:
load = json.loads(text)
for key in load.keys(): # Iterate through keys of dict.
val = load.get(key)
for keys in load.keys(): # Iterate through keys of dict.
val = load.get(keys)
if isinstance(val, int) or isinstance(val, dict) or val is None:
continue
if isinstance(val, list):

View file

@ -1,46 +0,0 @@
import re
import requests
class s3_scanner:
def __init__(self, host):
self.host = host
self.results = ""
self.totalresults = ""
self.fingerprints = ['www.herokucdn.com/error-pages/no-such-app.html', '<title>Squarespace - No Such Account</title>', "<p> If you're trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>", "<p> If you\'re trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>", "<span class=\"title\">Bummer. It looks like the help center that you are trying to reach no longer exists.</span>", "<head> <title>The page you\'re looking for could not be found (404)</title> <style> body { color: #666; text-align: center; font-family: \"Helvetica Neue\", Helvetica, Arial, sans-serif; margin: 0; width: 800px; margin: auto; font-size: 14px; } h1 { font-size: 56px; line-height: 100px; font-weight: normal; color: #456; } h2 { font-size: 24px; color: #666; line-height: 1.5em; } h3 { color: #456; font-size: 20px; font-weight: normal; line-height: 28px; } hr { margin: 18px 0; border: 0; border-top: 1px solid #EEE; border-bottom: 1px solid white; } </style> </head>"]
def __check_http(self, bucket_url):
check_response = self.session.head(
S3_URL, timeout=3, headers={'Host': bucket_url})
# if not ARGS.ignore_rate_limiting\
# and (check_response.status_code == 503 and check_response.reason == 'Slow Down'):
# self.q.rate_limited = True
# Add it back to the bucket for re-processing.
# self.q.put(bucket_url)
if check_response.status_code == 307: # valid bucket, lets check if its public
new_bucket_url = check_response.headers['Location']
bucket_response = requests.request(
'GET' if ARGS.only_interesting else 'HEAD', new_bucket_url, timeout=3)
if bucket_response.status_code == 200\
and (not ARGS.only_interesting or
(ARGS.only_interesting and any(keyword in bucket_response.text for keyword in KEYWORDS))):
print(f"Found bucket '{new_bucket_url}'")
self.__log(new_bucket_url)
def do_s3(self):
try:
print('\t Searching takeovers for ' + self.host)
r = requests.get('https://' + self.host, verify=False)
for x in self.fingerprints:
take_reg = re.compile(x)
self.temp = take_reg.findall(r.text)
if self.temp != []:
print('\t\033[91m Takeover detected! - ' + self.host + '\033[1;32;40m')
except Exception as e:
print(e)
def process(self):
self.do_take()

View file

@ -0,0 +1,34 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
import requests
from pprint import pprint
class SearchSpyse:
def __init__(self, word):
self.word = word
self.key = Core.spyse_key()
if self.key is None:
raise MissingKey(True)
self.results = ''
self.totalresults = ''
def do_search(self):
try:
base_url = f'https://api.spyse.com/v1/subdomains?domain={self.word}&api_token={self.key}&page=2'
headers = {'User-Agent': Core.get_user_agent()}
request = requests.get(base_url, headers=headers)
self.results = request.json()
pprint(self.results)
# self.totalresults += self.results
except Exception as e:
print(f'An exception has occurred: {e}')
def get_hostnames(self):
return self.totalresults
def process(self):
self.do_search()
print('\tSearching results.')

View file

@ -46,6 +46,12 @@ def shodan_key() -> str:
keys = yaml.safe_load(api_keys)
return keys['apikeys']['shodan']['key']
@staticmethod
def spyse_key() -> str:
with open('api-keys.yaml', 'r') as api_keys:
keys = yaml.safe_load(api_keys)
return keys['apikeys']['spyse']['key']
@staticmethod
def banner() -> None:
print('\n\033[93m*******************************************************************')
@ -67,7 +73,6 @@ def get_supportedengines() -> Set[Union[str, Any]]:
supportedengines = {'baidu',
'bing',
'bingapi',
'censys',
'crtsh',
'dnsdumpster',
'dogpile',
@ -83,6 +88,7 @@ def get_supportedengines() -> Set[Union[str, Any]]:
'otx',
'securityTrails',
'suip',
'spyse',
'threatcrowd',
'trello',
'twitter',

View file

@ -1,67 +0,0 @@
from bs4 import BeautifulSoup
import re
class Parser:
def __init__(self, resultstoparse):
self.ipaddresses = []
self.souphosts = BeautifulSoup(resultstoparse.total_resultshosts, features='html.parser')
self.soupcerts = BeautifulSoup(resultstoparse.total_resultscerts, features='html.parser')
self.hostnames = []
self.hostnamesfromcerts = []
self.urls = []
self.numberofpageshosts = 0
self.numberofpagescerts = 0
self.domain = resultstoparse.word
def search_hostnamesfromcerts(self):
try:
hostnamelist = self.soupcerts.findAll('i', 'fa fa-fw fa-home')
for hostnameitem in hostnamelist:
hostitems = hostnameitem.next_sibling
hostnames = str(hostitems)
hostnamesclean = re.sub(r'[ \'\[\]]', '', hostnames)
hostnamesclean = re.sub(r'\.\.\.', r'', hostnamesclean)
self.hostnamesfromcerts.extend(hostnamesclean.split(','))
self.hostnamesfromcerts = list(filter(None, self.hostnamesfromcerts))
matchingdomains = [s for s in self.hostnamesfromcerts if str(self.domain) in s] # filter out domains issued to other sites
self.hostnamesfromcerts = matchingdomains
return self.hostnamesfromcerts
except Exception as e:
print('Error occurred in the Censys module: certificate hostname parser: ' + str(e))
def search_ipaddresses(self):
try:
ipaddresslist = self.souphosts.findAll('a', 'SearchResult__title-text')
for ipaddressitem in ipaddresslist:
self.ipaddresses.append(ipaddressitem.text.strip())
return self.ipaddresses
except Exception as e:
print('Error occurred in the Censys module: IP address parser: ' + str(e))
def search_totalpageshosts(self):
try:
items = self.souphosts.findAll('span', 'SearchResultSectionHeader__statistic')
if items == [] or items is None:
self.numberofpageshosts = 0
return self.numberofpageshosts
numbers = re.findall(r"/\d*", items[0].text)
pagenumber = numbers[0].replace('/', '')
self.numberofpageshosts = int(pagenumber)
return self.numberofpageshosts
except Exception as e:
print('Error occurred in the Censys module IP search: page parser: ' + str(e))
def search_totalpagescerts(self):
try:
items = self.soupcerts.findAll('span', 'SearchResultSectionHeader__statistic')
if items == [] or items is None:
self.numberofpageshosts = 0
return self.numberofpageshosts
numbers = re.findall(r"/\d*", items[0].text)
pagenumber = numbers[0].replace('/', '')
self.numberofpagescerts = int(pagenumber)
return self.numberofpagescerts
except Exception as e:
print('Error occurred in the Censys module IP search: page parser: ' + str(e))