theHarvester/theHarvester.py

911 lines
39 KiB
Python
Raw Normal View History

2018-12-18 00:05:11 +08:00
#!/usr/bin/env python
2011-05-04 23:07:06 +08:00
2018-12-27 15:43:32 +08:00
import getopt
import os
2011-05-04 23:07:06 +08:00
import re
2018-03-23 06:32:50 +08:00
import stash
2018-12-27 15:43:32 +08:00
import sys
2018-12-20 03:39:33 +08:00
import time
try:
2018-12-27 15:43:32 +08:00
import bs4
except:
2018-12-27 15:43:32 +08:00
print("\nBeautifulSoup library not found, please install before proceeding.\n\n")
sys.exit()
2018-12-16 11:07:37 +08:00
2018-11-30 05:28:37 +08:00
try:
2018-12-27 15:43:32 +08:00
import requests
2018-11-30 05:28:37 +08:00
except:
2018-12-27 15:43:32 +08:00
print("Requests library not found, please install before proceeding.\n\n")
2018-11-30 05:28:37 +08:00
sys.exit()
2011-05-04 23:07:06 +08:00
from discovery import *
from discovery.constants import *
from lib import hostchecker
2018-12-27 15:43:32 +08:00
from lib import htmlExport
2011-05-04 23:07:06 +08:00
2018-11-30 05:28:37 +08:00
print("\n\033[92m*******************************************************************")
2018-11-23 05:20:06 +08:00
print("* *")
print("* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *")
print("* | __| '_ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *")
print("* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *")
print("* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *")
print("* *")
2018-12-27 15:43:32 +08:00
print("* theHarvester Ver. 3.0.6 v65 *")
2018-11-23 05:20:06 +08:00
print("* Coded by Christian Martorella *")
print("* Edge-Security Research *")
print("* cmartorella@edge-security.com *")
print("*******************************************************************\033[94m\n\n")
2011-05-04 23:07:06 +08:00
2018-12-20 03:39:33 +08:00
2011-05-04 23:07:06 +08:00
def usage():
comm = os.path.basename(sys.argv[0])
2018-12-20 03:39:33 +08:00
if os.path.dirname(sys.argv[0]) == os.getcwd():
comm = "./" + comm
2018-12-20 03:39:33 +08:00
2018-12-23 04:29:11 +08:00
print("Usage: theHarvester.py <options> \n")
print(" -d: company name or domain to search")
print(""" -b: source: baidu, bing, bingapi, censys, crtsh, cymon, dogpile, google,
googleCSE, googleplus, google-certificates, google-profiles,
hunter, linkedin, netcraft, pgp, securityTrails, threatcrowd, trello, twitter,
2018-12-23 04:29:11 +08:00
vhost, virustotal, yahoo, all""")
print(" -g: use Google Dorking instead of normal Google search")
print(" -s: start with result number X (default: 0)")
print(" -v: verify host name via DNS resolution and search for virtual hosts")
print(" -f: save the results into an HTML and/or XML file")
print(" -n: perform a DNS reverse query on all ranges discovered")
2018-12-27 15:43:32 +08:00
<<<<<<< HEAD
print(" -c: perform a DNS brute force on the domain")
2018-12-23 04:29:11 +08:00
print(" -t: perform a DNS TLD expansion discovery")
2018-12-27 15:43:32 +08:00
print(" -e: specify DNS server")
print(" -p: port scan the detected hosts and check for Takeovers (21,22,80,443,8080)")
print(" -l: limit the number of results (Bing goes from 50 to 50 results,")
2018-12-23 04:29:11 +08:00
print(" Google 100 to 100, and PGP doesn't use this option)")
print(" -h: use Shodan to query discovered hosts")
2018-11-23 05:20:06 +08:00
print("\nExamples:")
2018-12-23 04:29:11 +08:00
print((" " + comm + " -d acme.com -l 500 -b google -f myresults.html"))
print((" " + comm + " -d acme.com -b pgp, virustotal"))
print((" " + comm + " -d acme -l 200 -b linkedin"))
print((" " + comm + " -d acme.com -l 200 -g -b google"))
print((" " + comm + " -d acme.com -b googleCSE -l 500 -s 300"))
print((" " + comm + " -d acme.edu -l 100 -b bing -h \n"))
2011-05-04 23:07:06 +08:00
2018-12-20 03:39:33 +08:00
2011-05-04 23:07:06 +08:00
def start(argv):
if len(sys.argv) < 4:
usage()
2018-12-27 15:43:32 +08:00
sys.exit(1)
try:
2018-11-11 22:24:58 +08:00
opts, args = getopt.getopt(argv, "l:d:b:s:u:vf:nhcgpte:")
except getopt.GetoptError:
usage()
2018-12-27 15:43:32 +08:00
sys.exit(1)
2018-03-23 06:32:50 +08:00
try:
2018-12-16 11:07:37 +08:00
db = stash.stash_manager()
2018-03-23 06:32:50 +08:00
db.do_init()
2018-11-23 05:20:06 +08:00
except Exception as e:
2018-03-23 06:32:50 +08:00
pass
start = 0
host_ip = []
2018-12-18 07:14:42 +08:00
all_hosts = []
all_emails = []
filename = ""
bingapi = "yes"
dnslookup = False
dnsbrute = False
dnstld = False
shodan = False
vhost = []
virtual = False
ports_scanning = False
takeover_check = False
google_dorking = False
2018-03-23 06:32:50 +08:00
limit = 500
all_ip = []
2018-12-20 03:39:33 +08:00
full = []
trello_info = ([], False)
dnsserver = ""
2018-11-23 05:20:06 +08:00
for value in enumerate(opts):
opt = value[1][0]
arg = value[1][1]
opt = str(opt)
arg = str(arg)
if opt == '-l':
limit = int(arg)
elif opt == '-d':
word = arg
elif opt == '-g':
google_dorking = True
elif opt == '-s':
start = int(arg)
elif opt == '-v':
virtual = "basic"
elif opt == '-f':
filename = arg
elif opt == '-n':
dnslookup = True
elif opt == '-c':
dnsbrute = True
elif opt == '-h':
shodan = True
elif opt == '-e':
dnsserver = arg
elif opt == '-p':
ports_scanning = True
elif opt == '-t':
dnstld = True
elif opt == '-b':
engines = set(arg.split(','))
supportedengines = set(["baidu", "bing", "bingapi", "censys", "crtsh", "cymon", "dogpile", "google", "googleCSE", "googleplus",'google-certificates', "google-profiles", "hunter", "linkedin", "netcraft", "pgp", "securityTrails", "threatcrowd", "trello", "twitter", "vhost", "virustotal", "yahoo", "all"])
if set(engines).issubset(supportedengines):
2018-11-23 05:20:06 +08:00
print("found supported engines")
2018-12-16 11:07:37 +08:00
print(("[-] Starting harvesting process for domain: " + word + "\n"))
for engineitem in engines:
2018-12-23 04:29:11 +08:00
if engineitem == "baidu":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Baidu.")
try:
search = baidusearch.search_baidu(word, limit)
search.process()
all_emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'baidu')
db.store_all(word, all_emails, 'email', 'baidu')
except Exception:
pass
2018-11-23 05:51:31 +08:00
2018-12-23 04:29:11 +08:00
elif engineitem == "bing" or engineitem == "bingapi":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Bing.")
try:
search = bingsearch.search_bing(word, limit, start)
if engineitem == "bingapi":
bingapi = "yes"
else:
bingapi = "no"
search.process(bingapi)
all_emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'email', 'bing')
db.store_all(word, all_hosts, 'host', 'bing')
except Exception as e:
2018-12-27 15:43:32 +08:00
if isinstance(e, MissingKey): # Sanity check.
print(e)
else:
pass
2018-12-16 11:07:37 +08:00
2018-12-23 04:29:11 +08:00
elif engineitem == "censys":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Censys.")
2018-12-23 04:29:11 +08:00
from discovery import censys
2018-12-27 15:43:32 +08:00
# Import locally or won't work.
2018-12-23 04:29:11 +08:00
search = censys.search_censys(word)
search.process()
2018-12-23 04:29:11 +08:00
all_ip = search.get_ipaddresses()
hosts = filter(search.get_hostnames())
2018-12-18 07:14:42 +08:00
all_hosts.extend(hosts)
2018-12-20 03:39:33 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'censys')
db.store_all(word, all_ip, 'ip', 'censys')
2018-12-23 04:29:11 +08:00
elif engineitem == "crtsh":
2018-12-27 15:43:32 +08:00
print("[-] Searching in CRT.sh.")
search = crtsh.search_crtsh(word)
search.process()
hosts = filter(search.get_hostnames())
2018-12-18 07:14:42 +08:00
all_hosts.extend(hosts)
2018-12-20 03:39:33 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'CRTsh')
2018-12-23 04:29:11 +08:00
elif engineitem == "cymon":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Cymon.")
2018-12-23 04:29:11 +08:00
from discovery import cymon
2018-12-27 15:43:32 +08:00
# Import locally or won't work.
2018-12-23 04:29:11 +08:00
search = cymon.search_cymon(word)
search.process()
2018-12-23 04:29:11 +08:00
all_ip = search.get_ipaddresses()
2018-12-20 03:39:33 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_ip, 'ip', 'cymon')
elif engineitem == "dogpile":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Dogpilesearch.")
search = dogpilesearch.search_dogpile(word, limit)
search.process()
all_emails = filter(search.get_emails())
all_hosts = filter(search.get_hostnames())
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'email', 'dogpile')
db.store_all(word, all_hosts, 'host', 'dogpile')
2018-12-23 04:29:11 +08:00
elif engineitem == "google":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Google.")
2018-12-23 04:29:11 +08:00
search = googlesearch.search_google(word, limit, start)
search.process(google_dorking)
emails = filter(search.get_emails())
all_emails.extend(emails)
hosts = filter(search.get_hostnames())
2018-12-18 07:14:42 +08:00
all_hosts.extend(hosts)
2018-12-20 03:39:33 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'google')
db.store_all(word, all_emails, 'email', 'google')
2018-12-23 04:29:11 +08:00
elif engineitem == "googleCSE":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Google Custom Search.")
try:
search = googleCSE.search_googleCSE(word, limit, start)
search.process()
search.store_results()
all_emails = filter(search.get_emails())
db = stash.stash_manager()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db.store_all(word, all_hosts, 'email', 'googleCSE')
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'googleCSE')
except Exception as e:
2018-12-27 15:43:32 +08:00
if isinstance(e, MissingKey): # Sanity check.
print(e)
else:
pass
elif engineitem == "googleplus":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Google+.")
search = googleplussearch.search_googleplus(word, limit)
search.process()
people = search.get_people()
print("\nUsers from Google+:")
2018-12-27 15:43:32 +08:00
print("===================")
2018-12-20 03:39:33 +08:00
db = stash.stash_manager()
db.store_all(word, people, 'name', 'googleplus')
for user in people:
2018-11-23 05:20:06 +08:00
print(user)
sys.exit()
2018-12-23 04:29:11 +08:00
elif engineitem == "google-certificates":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Google Certificate transparency report.")
2018-12-23 04:29:11 +08:00
search = googlecertificates.search_googlecertificates(word, limit, start)
search.process()
hosts = filter(search.get_domains())
2018-12-23 04:29:11 +08:00
all_hosts.extend(hosts)
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'google-certificates')
elif engineitem == "google-profiles":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Google profiles.")
search = googlesearch.search_google(word, limit, start)
search.process_profiles()
people = search.get_profiles()
db = stash.stash_manager()
2018-12-20 03:39:33 +08:00
db.store_all(word, people, 'name', 'google-profile')
print("\nUsers from Google profiles:")
2018-11-23 05:20:06 +08:00
print("---------------------------")
for users in people:
2018-11-23 05:20:06 +08:00
print(users)
sys.exit()
2018-11-11 22:24:58 +08:00
elif engineitem == "hunter":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Hunter.")
2018-11-11 22:24:58 +08:00
from discovery import huntersearch
2018-12-23 04:29:11 +08:00
# Import locally or won't work
try:
search = huntersearch.search_hunter(word, limit, start)
search.process()
emails = filter(search.get_emails())
all_emails.extend(emails)
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'hunter')
db.store_all(word, all_emails, 'email', 'hunter')
except Exception as e:
2018-12-27 15:43:32 +08:00
if isinstance(e, MissingKey): # Sanity check.
print(e)
else:
pass
2018-11-11 22:24:58 +08:00
2018-12-23 04:29:11 +08:00
elif engineitem == "linkedin":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Linkedin.")
2018-12-23 04:29:11 +08:00
search = linkedinsearch.search_linkedin(word, limit)
search.process()
people = search.get_people()
db = stash.stash_manager()
db.store_all(word, people, 'name', 'linkedin')
print("\nUsers from Linkedin:")
2018-12-23 04:29:11 +08:00
print("-------------------")
for user in people:
print(user)
sys.exit()
elif engineitem == "netcraft":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Netcraft.")
2018-12-23 04:29:11 +08:00
search = netcraft.search_netcraft(word)
2018-12-01 04:57:12 +08:00
search.process()
hosts = filter(search.get_hostnames())
2018-12-18 07:14:42 +08:00
all_hosts.extend(hosts)
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'netcraft')
2018-12-20 03:39:33 +08:00
2018-12-23 04:29:11 +08:00
elif engineitem == "pgp":
2018-12-27 15:43:32 +08:00
print("[-] Searching in PGP key server.")
try:
search = pgpsearch.search_pgp(word)
search.process()
all_emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'pgp')
db.store_all(word, all_emails, 'email', 'pgp')
except Exception:
pass
2018-12-23 04:29:11 +08:00
elif engineitem == 'securityTrails':
2018-12-27 15:43:32 +08:00
print("[-] Searching in SecurityTrails.")
from discovery import securitytrailssearch
try:
search = securitytrailssearch.search_securitytrail(word)
search.process()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, hosts, 'host', 'securityTrails')
ips = search.get_ips()
all_ip.extend(ips)
db = stash.stash_manager()
db.store_all(word, ips, 'ip', 'securityTrails')
except Exception as e:
2018-12-27 15:43:32 +08:00
if isinstance(e, MissingKey): # Sanity check.
print(e)
else:
pass
2018-12-23 04:29:11 +08:00
elif engineitem == "threatcrowd":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Threatcrowd.")
try:
search = threatcrowd.search_threatcrowd(word)
search.process()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'threatcrowd')
except Exception:
pass
2018-12-16 11:07:37 +08:00
elif engineitem == "trello":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Trello.")
from discovery import trello
2018-12-27 15:43:32 +08:00
# Import locally or won't work.
search = trello.search_trello(word, limit)
search.process()
emails = filter(search.get_emails())
all_emails.extend(emails)
info = search.get_urls()
hosts = filter(info[0])
trello_info = (filter(info[1]), True)
all_hosts.extend(hosts)
2018-12-20 03:39:33 +08:00
db = stash.stash_manager()
db.store_all(word, hosts, 'host', 'trello')
db.store_all(word, emails, 'email', 'trello')
2018-12-16 11:07:37 +08:00
2018-12-23 04:29:11 +08:00
elif engineitem == "twitter":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Twitter.")
2018-12-23 04:29:11 +08:00
search = twittersearch.search_twitter(word, limit)
search.process()
people = search.get_people()
2018-12-20 03:39:33 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, people, 'name', 'twitter')
print("\nUsers from Twitter:")
2018-12-23 04:29:11 +08:00
print("-------------------")
for user in people:
print(user)
sys.exit()
2018-12-16 11:07:37 +08:00
2018-12-23 04:29:11 +08:00
# vhost
2018-12-16 11:07:37 +08:00
2018-12-23 04:29:11 +08:00
elif engineitem == "virustotal":
2018-12-27 15:43:32 +08:00
print("[-] Searching in VirusTotal.")
2018-12-23 04:29:11 +08:00
search = virustotal.search_virustotal(word)
search.process()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'virustotal')
2018-12-16 11:07:37 +08:00
2018-12-23 04:29:11 +08:00
elif engineitem == "yahoo":
2018-12-27 15:43:32 +08:00
print("[-] Searching in Yahoo.")
2018-12-23 04:29:11 +08:00
search = yahoosearch.search_yahoo(word, limit)
search.process()
all_hosts.extend(hosts)
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'yahoo')
db.store_all(word, all_emails, 'email', 'yahoo')
elif engineitem == "all":
2018-11-23 05:20:06 +08:00
print(("Full harvest on " + word))
all_emails = []
all_hosts = []
2018-12-16 11:07:37 +08:00
2018-12-23 04:29:11 +08:00
# baidu
2018-12-27 15:43:32 +08:00
print("[-] Searching in Bing.")
2018-12-23 04:29:11 +08:00
bingapi = "no"
search = bingsearch.search_bing(word, limit, start)
search.process(bingapi)
emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
2018-12-16 11:07:37 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'bing')
all_emails.extend(emails)
2018-12-23 04:29:11 +08:00
all_emails = sorted(set(all_emails))
db.store_all(word, all_emails, 'email', 'bing')
2018-12-16 11:07:37 +08:00
2018-12-27 15:43:32 +08:00
print("[-] Searching in Censys.")
2018-12-23 04:29:11 +08:00
from discovery import censys
search = censys.search_censys(word)
search.process()
ips = search.get_ipaddresses()
setips = set(ips)
2018-12-27 15:43:32 +08:00
uniqueips = list(setips) # Remove duplicates.
all_ip.extend(uniqueips)
hosts = filter(search.get_hostnames())
sethosts = set(hosts)
2018-12-27 15:43:32 +08:00
uniquehosts = list(sethosts) # Remove duplicates.
all_hosts.extend(uniquehosts)
2018-12-16 11:07:37 +08:00
db = stash.stash_manager()
db.store_all(word, uniquehosts, 'host', 'censys')
db.store_all(word, uniqueips, 'ip', 'censys')
2018-12-27 15:43:32 +08:00
print("[-] Searching in CRTSH server.")
search = crtsh.search_crtsh(word)
search.process()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
2018-12-16 11:07:37 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'CRTsh')
2018-12-23 04:29:11 +08:00
# cymon
2018-11-11 22:24:58 +08:00
2018-12-23 04:29:11 +08:00
# dogpile
2018-12-27 15:43:32 +08:00
print("[-] Searching in Google.")
2018-12-23 04:29:11 +08:00
search = googlesearch.search_google(word, limit, start)
search.process(google_dorking)
emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
2018-12-23 04:29:11 +08:00
all_emails.extend(emails)
db = stash.stash_manager()
db.store_all(word, all_emails, 'email', 'google')
all_hosts.extend(hosts)
2018-12-16 11:07:37 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'google')
2018-12-27 15:43:32 +08:00
print("[-] Searching in Google Certificate transparency report.")
2018-12-23 04:29:11 +08:00
search = googlecertificates.search_googlecertificates(word, limit, start)
search.process()
domains = filter(search.get_domains())
2018-12-23 04:29:11 +08:00
all_hosts.extend(domains)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'google-certificates')
# googleplus
# google-certificates
# google-profiles
2018-11-03 07:04:20 +08:00
2018-12-27 15:43:32 +08:00
print("[-] Searching in Hunter.")
2018-11-11 22:24:58 +08:00
from discovery import huntersearch
2018-12-27 15:43:32 +08:00
# Import locally.
try:
search = huntersearch.search_hunter(word, limit, start)
search.process()
emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, hosts, 'host', 'hunter')
all_emails.extend(emails)
all_emails = sorted(set(all_emails))
db.store_all(word, all_emails, 'email', 'hunter')
except Exception as e:
2018-12-27 15:43:32 +08:00
if isinstance(e, MissingKey): # Sanity check.
print(e)
else:
pass
2018-12-27 15:43:32 +08:00
2018-12-23 04:29:11 +08:00
# linkedin
2018-12-27 15:43:32 +08:00
print("[-] Searching in Netcraft server.")
2018-12-23 04:29:11 +08:00
search = netcraft.search_netcraft(word)
search.process()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'netcraft')
2018-12-27 15:43:32 +08:00
print("[-] Searching in PGP key server.")
try:
search = pgpsearch.search_pgp(word)
search.process()
emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
sethosts = set(hosts)
2018-12-27 15:43:32 +08:00
uniquehosts = list(sethosts) # Remove duplicates.
all_hosts.extend(uniquehosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'PGP')
all_emails.extend(emails)
db = stash.stash_manager()
db.store_all(word, all_emails, 'email', 'PGP')
except Exception:
pass
2018-11-23 05:20:06 +08:00
2018-12-27 15:43:32 +08:00
print("[-] Searching in ThreatCrowd server.")
2018-12-23 04:29:11 +08:00
try:
search = threatcrowd.search_threatcrowd(word)
search.process()
hosts = filter(search.get_hostnames())
2018-12-23 04:29:11 +08:00
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'threatcrowd')
except Exception:
pass
2018-12-23 04:29:11 +08:00
2018-12-27 15:43:32 +08:00
print("[-] Searching in Trello.")
from discovery import trello
2018-12-27 15:43:32 +08:00
# Import locally or won't work.
search = trello.search_trello(word, limit)
search.process()
emails = filter(search.get_emails())
all_emails.extend(emails)
info = search.get_urls()
hosts = filter(info[0])
trello_info = (filter(info[1]), True)
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, hosts, 'host', 'trello')
db.store_all(word, emails, 'email', 'trello')
2018-12-23 04:29:11 +08:00
# twitter
# vhost
2018-12-27 15:43:32 +08:00
print("[-] Searching in VirusTotal server.")
2018-12-23 04:29:11 +08:00
search = virustotal.search_virustotal(word)
2018-12-23 02:46:27 +08:00
search.process()
2018-12-27 15:43:32 +08:00
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
2018-12-23 02:46:27 +08:00
db = stash.stash_manager()
2018-12-23 04:29:11 +08:00
db.store_all(word, all_hosts, 'host', 'virustotal')
# yahoo
2018-11-11 22:24:58 +08:00
else:
usage()
2018-12-27 15:43:32 +08:00
print("Invalid search engine, try using: baidu, bing, bingapi, censys, crtsh, cymon, dogpile, google, googleCSE, googleplus, google-certificates, google-profiles, hunter, linkedin, netcraft, pgp, securityTrails, threatcrowd, trello, twitter, vhost, virustotal, yahoo, all")
sys.exit(1)
2018-11-11 22:24:58 +08:00
2018-12-23 04:29:11 +08:00
# Results ############################################################
print("\n\033[1;32;40mHarvesting results")
2018-12-27 15:43:32 +08:00
if len(all_ip) == 0:
2018-12-23 04:29:11 +08:00
print("No IP addresses found.")
2018-12-20 03:39:33 +08:00
else:
print("\033[1;33;40m \n[+] IP addresses found in search engines:")
2018-12-27 15:43:32 +08:00
print("----------------------------------------")
print("Total IP addresses: " + str(len(all_ip)) + "\n")
for ip in sorted(list(set(all_ip))):
print(ip)
2018-12-20 03:39:33 +08:00
print("\n\n[+] Emails found:")
2018-12-27 15:43:32 +08:00
print("----------------")
2018-12-27 15:43:32 +08:00
# Sanity check to see if all_emails and all_hosts are defined.
2018-11-23 05:20:06 +08:00
try:
all_emails
except NameError:
print('No emails found as all_emails is not defined.')
2018-12-27 15:43:32 +08:00
sys.exit(1)
2018-11-23 05:20:06 +08:00
try:
all_hosts
except NameError:
print('No hosts found as all_hosts is not defined.')
2018-12-27 15:43:32 +08:00
sys.exit(1)
2018-11-23 05:20:06 +08:00
if all_emails == []:
2018-12-23 04:29:11 +08:00
print("No emails found.")
else:
print("Total emails: " + str(len(all_emails)) + "\n")
print(("\n".join(sorted(list(set(all_emails))))))
2018-03-23 06:32:50 +08:00
print("\033[1;33;40m \n[+] Hosts found in search engines:")
2018-12-27 15:43:32 +08:00
print("----------------------------------")
2018-12-16 11:07:37 +08:00
if all_hosts == [] or all_emails is None:
2018-12-23 04:29:11 +08:00
print("No hosts found.")
else:
2018-03-23 06:32:50 +08:00
total = len(all_hosts)
2018-11-23 05:20:06 +08:00
print(("\nTotal hosts: " + str(total) + "\n"))
all_hosts = sorted(list(set(all_hosts)))
for host in all_hosts:
print(host)
2018-12-27 15:43:32 +08:00
print("\033[94m[-] Resolving hostnames to IPs.\033[1;33;40m \n ")
full_host = hostchecker.Checker(all_hosts)
full = full_host.check()
for host in full:
2018-04-16 19:55:52 +08:00
ip = host.split(':')[1]
2018-11-23 05:20:06 +08:00
print(host)
if ip != "empty":
if host_ip.count(ip.lower()):
pass
else:
host_ip.append(ip.lower())
2018-12-16 11:07:37 +08:00
2018-12-20 03:39:33 +08:00
db = stash.stash_manager()
db.store_all(word, host_ip, 'ip', 'DNS-resolver')
2018-12-27 15:43:32 +08:00
if trello_info[1] == True: # Indicates user selected Trello.
print("\033[1;33;40m \n[+] URLs found from Trello:")
print("--------------------------")
trello_urls = trello_info[0]
if trello_urls == []:
2018-12-27 15:43:32 +08:00
print('\nNo Trello URLs found.')
else:
total = len(trello_urls)
2018-12-27 15:43:32 +08:00
print(("\nTotal URLs: " + str(total) + "\n"))
for url in sorted(list(set(trello_urls))):
print(url)
2018-12-23 04:29:11 +08:00
# DNS Brute force ################################################
dnsres = []
if dnsbrute == True:
2018-12-27 15:43:32 +08:00
print("\n\033[94m[-] Starting DNS brute force. \033[1;33;40m")
a = dnssearch.dns_force(word, dnsserver, verbose=True)
res = a.process()
2018-11-23 05:20:06 +08:00
print("\n\033[94m[-] Hosts found after DNS brute force:")
2018-12-27 15:43:32 +08:00
print("-------------------------------------")
for y in res:
2018-11-23 05:20:06 +08:00
print(y)
dnsres.append(y.split(':')[0])
if y not in full:
full.append(y)
2018-12-16 11:07:37 +08:00
db = stash.stash_manager()
db.store_all(word, dnsres, 'host', 'dns_bruteforce')
2018-12-16 11:07:37 +08:00
# Port Scanning #################################################
if ports_scanning == True:
2018-12-27 15:43:32 +08:00
print("\n\n\033[1;32;40m[-] Scanning ports (active).\n")
2018-12-16 11:07:37 +08:00
for x in full:
host = x.split(':')[1]
domain = x.split(':')[0]
if host != "empty":
2018-12-27 15:43:32 +08:00
print(("- Scanning " + host))
ports = [21, 22, 80, 443, 8080]
2018-12-16 11:07:37 +08:00
try:
scan = port_scanner.port_scan(host, ports)
2018-12-16 11:07:37 +08:00
openports = scan.process()
if len(openports) > 1:
print(("\t\033[91m Detected open ports: " + ','.join(
str(e) for e in openports) + "\033[1;32;40m"))
2018-12-16 11:07:37 +08:00
takeover_check = 'True'
if takeover_check == 'True':
if len(openports) > 0:
search_take = takeover.take_over(domain)
search_take.process()
except Exception as e:
print(e)
2018-12-23 04:29:11 +08:00
# DNS reverse lookup ################################################
dnsrev = []
if dnslookup == True:
2018-12-27 15:43:32 +08:00
print("\n[+] Starting active queries.")
analyzed_ranges = []
2018-04-16 19:55:52 +08:00
for x in host_ip:
2018-11-23 05:20:06 +08:00
print(x)
ip = x.split(":")[0]
range = ip.split(".")
range[3] = "0/24"
2018-11-23 05:20:06 +08:00
s = '.'
range = s.join(range)
if not analyzed_ranges.count(range):
2018-12-27 15:43:32 +08:00
print(("\033[94m[-] Performing reverse lookup in " + range + "\033[1;33;40m"))
a = dnssearch.dns_reverse(range, True)
a.list()
res = a.process()
analyzed_ranges.append(range)
else:
continue
for x in res:
if x.count(word):
dnsrev.append(x)
if x not in full:
full.append(x)
2018-11-23 05:20:06 +08:00
print("Hosts found after reverse lookup (in target domain):")
2018-12-27 15:43:32 +08:00
print("----------------------------------------------------")
for xh in dnsrev:
2018-11-23 05:20:06 +08:00
print(xh)
2018-12-16 11:07:37 +08:00
2018-12-23 04:29:11 +08:00
# DNS TLD expansion #################################################
dnstldres = []
if dnstld == True:
2018-12-27 15:43:32 +08:00
print("[-] Starting DNS TLD expansion.")
a = dnssearch.dns_tld(word, dnsserver, verbose=True)
res = a.process()
2018-11-23 05:20:06 +08:00
print("\n[+] Hosts found after DNS TLD expansion:")
2018-12-27 15:43:32 +08:00
print("----------------------------------------")
for y in res:
2018-11-23 05:20:06 +08:00
print(y)
dnstldres.append(y)
if y not in full:
full.append(y)
2018-12-23 04:29:11 +08:00
# Virtual hosts search ##############################################
if virtual == "basic":
2018-11-23 05:20:06 +08:00
print("\n[+] Virtual hosts:")
print("------------------")
for l in host_ip:
search = bingsearch.search_bing(l, limit, start)
search.process_vhost()
res = search.get_allhostnames()
for x in res:
2018-12-16 11:07:37 +08:00
x = re.sub(r'[[\<\/?]*[\w]*>]*', '', x)
x = re.sub('<', '', x)
x = re.sub('>', '', x)
2018-11-23 05:20:06 +08:00
print((l + "\t" + x))
vhost.append(l + ":" + x)
full.append(l + ":" + x)
2018-12-16 11:07:37 +08:00
vhost = sorted(set(vhost))
else:
pass
2018-12-27 15:43:32 +08:00
2018-12-23 04:29:11 +08:00
# Shodan search ####################################################
shodanres = []
shodanvisited = []
if shodan == True:
print("\n\n\033[1;32;40m[-] Shodan DB search (passive):\n")
2018-12-16 11:07:37 +08:00
if full == []:
print('No host to search, exiting.')
2018-12-27 15:43:32 +08:00
sys.exit(1)
for x in full:
try:
2018-03-23 06:32:50 +08:00
ip = x.split(":")[1]
if not shodanvisited.count(ip):
2018-11-23 05:20:06 +08:00
print(("\tSearching for: " + ip))
a = shodansearch.search_shodan(ip)
shodanvisited.append(ip)
results = a.run()
for res in results['data']:
2018-12-16 11:07:37 +08:00
shodanres.append(
str("%s:%s - %s - %s - %s," % (res['ip_str'], res['port'], res['os'], res['isp'])))
except Exception as e:
pass
2018-11-23 05:20:06 +08:00
print("\n [+] Shodan results:")
2018-12-27 15:43:32 +08:00
print("-------------------")
for x in shodanres:
2018-12-16 11:07:37 +08:00
print(x)
else:
pass
###################################################################
2018-12-23 04:29:11 +08:00
# Here we need to add explosion mode.
# Tengo que sacar los TLD para hacer esto.
recursion = None
if recursion:
start = 0
for word in vhost:
search = googlesearch.search_google(word, limit, start)
search.process(google_dorking)
emails = search.get_emails()
hosts = search.get_hostnames()
2018-11-23 05:20:06 +08:00
print(emails)
print(hosts)
else:
pass
2018-12-23 04:29:11 +08:00
# Reporting #######################################################
if filename != "":
try:
2018-12-27 15:43:32 +08:00
print("NEW REPORTING BEGINS.")
2018-12-16 01:22:02 +08:00
db = stash.stash_manager()
scanboarddata = db.getscanboarddata()
latestscanresults = db.getlatestscanresults(word)
2018-12-20 03:39:33 +08:00
previousscanresults = db.getlatestscanresults(word, previousday=True)
latestscanchartdata = db.latestscanchartdata(word)
scanhistorydomain = db.getscanhistorydomain(word)
pluginscanstatistics = db.getpluginscanstatistics()
2018-12-16 01:22:02 +08:00
from lib import statichtmlgenerator
generator = statichtmlgenerator.htmlgenerator(word)
HTMLcode = generator.beginhtml()
HTMLcode += generator.generatelatestscanresults(latestscanresults)
HTMLcode += generator.generatepreviousscanresults(previousscanresults)
2018-12-16 01:22:02 +08:00
from lib import reportgraph
import datetime
graph = reportgraph.graphgenerator(word)
HTMLcode += graph.drawlatestscangraph(word, latestscanchartdata)
HTMLcode += graph.drawscattergraphscanhistory(word, scanhistorydomain)
HTMLcode += generator.generatepluginscanstatistics(pluginscanstatistics)
HTMLcode += generator.generatedashboardcode(scanboarddata)
HTMLcode += '<p><span style="color: #000000;">Report generated on ' + str(
datetime.datetime.now()) + '</span></p>'
2018-12-20 03:39:33 +08:00
HTMLcode += '''
</body>
</html>
'''
Html_file = open("report.html", "w")
2018-12-16 01:22:02 +08:00
Html_file.write(HTMLcode)
Html_file.close()
print("NEW REPORTING FINISHED!")
2018-12-27 15:43:32 +08:00
print("[+] Saving files.")
html = htmlExport.htmlExport(
all_emails,
full,
vhost,
dnsres,
dnsrev,
filename,
word,
shodanres,
dnstldres)
save = html.writehtml()
except Exception as e:
2018-11-23 05:20:06 +08:00
print(e)
2018-12-23 04:29:11 +08:00
print("Error creating the file.")
try:
filename = filename.split(".")[0] + ".xml"
file = open(filename, 'w')
file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
for x in all_emails:
file.write('<email>' + x + '</email>')
2018-12-20 03:39:33 +08:00
for x in full:
x = x.split(":")
if len(x) == 2:
2018-12-16 11:07:37 +08:00
file.write('<host>' + '<ip>' + x[1] + '</ip><hostname>' + x[0] + '</hostname>' + '</host>')
else:
file.write('<host>' + x + '</host>')
for x in vhost:
x = x.split(":")
if len(x) == 2:
2018-12-16 11:07:37 +08:00
file.write('<vhost>' + '<ip>' + x[1] + '</ip><hostname>' + x[0] + '</hostname>' + '</vhost>')
else:
file.write('<vhost>' + x + '</vhost>')
if shodanres != []:
shodanalysis = []
for x in shodanres:
res = x.split("SAPO")
file.write('<shodan>')
file.write('<host>' + res[0] + '</host>')
file.write('<port>' + res[2] + '</port>')
file.write('<banner><!--' + res[1] + '--></banner>')
2018-12-20 03:39:33 +08:00
reg_server = re.compile('Server:.*')
temp = reg_server.findall(res[1])
if temp != []:
shodanalysis.append(res[0] + ":" + temp[0])
file.write('</shodan>')
if shodanalysis != []:
2018-12-16 11:07:37 +08:00
shodanalysis = sorted(set(shodanalysis))
file.write('<servers>')
for x in shodanalysis:
file.write('<server>' + x + '</server>')
file.write('</servers>')
2018-12-23 04:29:11 +08:00
file.write('</theHarvester>')
2016-03-05 23:25:44 +08:00
file.flush()
file.close()
2018-11-23 05:20:06 +08:00
print("Files saved!")
except Exception as er:
2018-11-23 05:20:06 +08:00
print(("Error saving XML file: " + str(er)))
sys.exit()
2018-12-16 11:07:37 +08:00
if __name__ == "__main__":
try:
start(sys.argv[1:])
except KeyboardInterrupt:
2018-12-27 15:43:32 +08:00
print("[*] Search interrupted by user.")
2018-11-23 05:20:06 +08:00
except Exception:
import traceback
print(traceback.print_exc())
sys.exit()
2018-12-27 15:43:32 +08:00