mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-20 23:37:00 +08:00
Added method to get crt.sh ids and crawl them.
This commit is contained in:
parent
20f22b57bd
commit
f3788c6b41
|
@ -7,8 +7,7 @@ def __init__(self, word):
|
||||||
self.word = word.replace(' ', '%20')
|
self.word = word.replace(' ', '%20')
|
||||||
self.results = ""
|
self.results = ""
|
||||||
self.totalresults = ""
|
self.totalresults = ""
|
||||||
self.server = "www.google.com"
|
self.server = "https://crt.sh/?q="
|
||||||
self.hostname = "www.google.com"
|
|
||||||
self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100116 Firefox/3.7"
|
self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100116 Firefox/3.7"
|
||||||
self.quantity = "100"
|
self.quantity = "100"
|
||||||
self.counter = 0
|
self.counter = 0
|
||||||
|
@ -16,7 +15,7 @@ def __init__(self, word):
|
||||||
|
|
||||||
def do_search(self):
|
def do_search(self):
|
||||||
try:
|
try:
|
||||||
urly = "https://crt.sh/?q=%25" + self.word
|
urly = self.server + self.word
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:34.0) Gecko/20100101 Firefox/34.0'}
|
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:34.0) Gecko/20100101 Firefox/34.0'}
|
||||||
|
@ -24,8 +23,32 @@ def do_search(self):
|
||||||
r=requests.get(urly,headers=headers)
|
r=requests.get(urly,headers=headers)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
self.results = r.text
|
links = self.get_info(r.text)
|
||||||
self.totalresults += self.results
|
for link in links:
|
||||||
|
r = requests.get(link, headers=headers)
|
||||||
|
self.results = r.text
|
||||||
|
self.totalresults += self.results
|
||||||
|
|
||||||
|
def get_info(self,text):
|
||||||
|
lines = []
|
||||||
|
for line in str(text).splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if 'id=' in line:
|
||||||
|
lines.append(line)
|
||||||
|
links = []
|
||||||
|
for i in range(len(lines)):
|
||||||
|
if i % 2 == 0:
|
||||||
|
current = lines[i]
|
||||||
|
current = current[43:]
|
||||||
|
link = ''
|
||||||
|
for ch in current:
|
||||||
|
if ch == '"':
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
link += ch
|
||||||
|
links.append(('https://crt.sh?id=' + str(link)))
|
||||||
|
return links
|
||||||
|
|
||||||
|
|
||||||
def get_hostnames(self):
|
def get_hostnames(self):
|
||||||
rawres = myparser.parser(self.results, self.word)
|
rawres = myparser.parser(self.results, self.word)
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import string
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -337,13 +337,13 @@ def start(argv):
|
||||||
db.store_all(word,all_hosts,'host','threatcrowd')
|
db.store_all(word,all_hosts,'host','threatcrowd')
|
||||||
except Exception: pass
|
except Exception: pass
|
||||||
|
|
||||||
"""print("[-] Searching in CRTSH server..")
|
print("[-] Searching in CRTSH server..")
|
||||||
search = crtsh.search_crtsh(word)
|
search = crtsh.search_crtsh(word)
|
||||||
search.process()
|
search.process()
|
||||||
hosts = search.get_hostnames()
|
hosts = search.get_hostnames()
|
||||||
all_hosts.extend(hosts)
|
all_hosts.extend(hosts)
|
||||||
db=stash.stash_manager()
|
db=stash.stash_manager()
|
||||||
db.store_all(word,all_hosts,'host','CRTsh')"""
|
db.store_all(word,all_hosts,'host','CRTsh')
|
||||||
|
|
||||||
print("[-] Searching in Virustotal server..")
|
print("[-] Searching in Virustotal server..")
|
||||||
search = virustotal.search_virustotal(word)
|
search = virustotal.search_virustotal(word)
|
||||||
|
|
Loading…
Reference in a new issue