Added filtering method in constants.py and applied it to hostnames and emails in theHarvester.py

This commit is contained in:
NotoriousRebel 2018-12-23 19:54:56 -05:00
parent f8ba4ebb2c
commit b994a5d2f4
2 changed files with 57 additions and 43 deletions

View file

@ -254,6 +254,20 @@
]
def filter(lst):
"""
Method that filters list
:param lst: list to be filtered
:return: new filtered list
"""
lst = set(lst) # remove duplicates
new_lst = []
for item in lst:
if item[0].isalpha() or item[0].isdigit():
new_lst.append(item)
return new_lst
def getDelay():
return random.randint(1, 3) - .5

View file

@ -146,8 +146,8 @@ def start(argv):
try:
search = baidusearch.search_baidu(word, limit)
search.process()
all_emails = search.get_emails()
hosts = search.get_hostnames()
all_emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'baidu')
@ -164,8 +164,8 @@ def start(argv):
else:
bingapi = "no"
search.process(bingapi)
all_emails = search.get_emails()
hosts = search.get_hostnames()
all_emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'email', 'bing')
@ -183,7 +183,7 @@ def start(argv):
search = censys.search_censys(word)
search.process()
all_ip = search.get_ipaddresses()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'censys')
@ -193,7 +193,7 @@ def start(argv):
print("[-] Searching in CRT.sh:")
search = crtsh.search_crtsh(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'CRTsh')
@ -212,8 +212,8 @@ def start(argv):
print("[-] Searching in Dogpilesearch..")
search = dogpilesearch.search_dogpile(word, limit)
search.process()
all_emails = search.get_emails()
all_hosts = search.get_hostnames()
all_emails = filter(search.get_emails())
all_hosts = filter(search.get_hostnames())
db.store_all(word, all_hosts, 'email', 'dogpile')
db.store_all(word, all_hosts, 'host', 'dogpile')
@ -221,9 +221,9 @@ def start(argv):
print("[-] Searching in Google:")
search = googlesearch.search_google(word, limit, start)
search.process(google_dorking)
emails = search.get_emails()
emails = filter(search.get_emails())
all_emails.extend(emails)
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'google')
@ -235,9 +235,9 @@ def start(argv):
search = googleCSE.search_googleCSE(word, limit, start)
search.process()
search.store_results()
all_emails = search.get_emails()
all_emails = filter(search.get_emails())
db = stash.stash_manager()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db.store_all(word, all_hosts, 'email', 'googleCSE')
db = stash.stash_manager()
@ -265,7 +265,7 @@ def start(argv):
print("[-] Searching in Google Certificate transparency report..")
search = googlecertificates.search_googlecertificates(word, limit, start)
search.process()
hosts = search.get_domains()
hosts = filter(search.get_domains())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'google-certificates')
@ -290,9 +290,9 @@ def start(argv):
try:
search = huntersearch.search_hunter(word, limit, start)
search.process()
emails = search.get_emails()
emails = filter(search.get_emails())
all_emails.extend(emails)
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'hunter')
@ -320,7 +320,7 @@ def start(argv):
print("[-] Searching in Netcraft:")
search = netcraft.search_netcraft(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'netcraft')
@ -330,8 +330,8 @@ def start(argv):
try:
search = pgpsearch.search_pgp(word)
search.process()
all_emails = search.get_emails()
hosts = search.get_hostnames()
all_emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'pgp')
@ -345,7 +345,7 @@ def start(argv):
try:
search = securitytrailssearch.search_securitytrail(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, hosts, 'host', 'securityTrails')
@ -364,7 +364,7 @@ def start(argv):
try:
search = threatcrowd.search_threatcrowd(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'threatcrowd')
@ -377,11 +377,11 @@ def start(argv):
# import locally or won't work
search = trello.search_trello(word, limit)
search.process()
emails = search.get_emails()
emails = filter(search.get_emails())
all_emails.extend(emails)
info = search.get_urls()
hosts = info[0]
trello_info = (info[1], True)
hosts = filter(info[0])
trello_info = (filter(info[1]), True)
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, hosts, 'host', 'trello')
@ -406,7 +406,7 @@ def start(argv):
print("[-] Searching in Virustotal:")
search = virustotal.search_virustotal(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'virustotal')
@ -415,8 +415,8 @@ def start(argv):
print("[-] Searching in Yahoo..")
search = yahoosearch.search_yahoo(word, limit)
search.process()
all_emails = search.get_emails()
hosts = search.get_hostnames()
all_emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'yahoo')
@ -433,8 +433,8 @@ def start(argv):
bingapi = "no"
search = bingsearch.search_bing(word, limit, start)
search.process(bingapi)
emails = search.get_emails()
hosts = search.get_hostnames()
emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'bing')
@ -450,7 +450,7 @@ def start(argv):
setips = set(ips)
uniqueips = list(setips) # remove duplicates
all_ip.extend(uniqueips)
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
sethosts = set(hosts)
uniquehosts = list(sethosts) # remove duplicates
all_hosts.extend(uniquehosts)
@ -461,7 +461,7 @@ def start(argv):
print("[-] Searching in CRTSH server..")
search = crtsh.search_crtsh(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'CRTsh')
@ -473,8 +473,8 @@ def start(argv):
print("[-] Searching in Google..")
search = googlesearch.search_google(word, limit, start)
search.process(google_dorking)
emails = search.get_emails()
hosts = search.get_hostnames()
emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_emails.extend(emails)
db = stash.stash_manager()
db.store_all(word, all_emails, 'email', 'google')
@ -485,7 +485,7 @@ def start(argv):
print("[-] Searching in Google Certificate transparency report..")
search = googlecertificates.search_googlecertificates(word, limit, start)
search.process()
domains = search.get_domains()
domains = filter(search.get_domains())
all_hosts.extend(domains)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'google-certificates')
@ -502,8 +502,8 @@ def start(argv):
try:
search = huntersearch.search_hunter(word, limit, start)
search.process()
emails = search.get_emails()
hosts = search.get_hostnames()
emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, hosts, 'host', 'hunter')
@ -520,7 +520,7 @@ def start(argv):
print("[-] Searching in Netcraft server..")
search = netcraft.search_netcraft(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'netcraft')
@ -529,8 +529,8 @@ def start(argv):
try:
search = pgpsearch.search_pgp(word)
search.process()
emails = search.get_emails()
hosts = search.get_hostnames()
emails = filter(search.get_emails())
hosts = filter(search.get_hostnames())
sethosts = set(hosts)
uniquehosts = list(sethosts) # remove duplicates
all_hosts.extend(uniquehosts)
@ -546,7 +546,7 @@ def start(argv):
try:
search = threatcrowd.search_threatcrowd(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'threatcrowd')
@ -560,11 +560,11 @@ def start(argv):
# import locally or won't work
search = trello.search_trello(word, limit)
search.process()
emails = search.get_emails()
emails = filter(search.get_emails())
all_emails.extend(emails)
info = search.get_urls()
hosts = info[0]
trello_info = (info[1], True)
hosts = filter(info[0])
trello_info = (filter(info[1]), True)
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, hosts, 'host', 'trello')
@ -577,7 +577,7 @@ def start(argv):
print("[-] Searching in Virustotal server..")
search = virustotal.search_virustotal(word)
search.process()
hosts = search.get_hostnames()
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'virustotal')