theHarvester/discovery/crtsh.py

66 lines
2.1 KiB
Python
Raw Normal View History

import requests
import myparser
import time
from discovery.constants import *
class search_crtsh:
def __init__(self, word):
self.word = word.replace(' ', '%20')
self.results = ""
self.totalresults = ""
self.server = "https://crt.sh/?q="
self.quantity = "100"
self.counter = 0
def do_search(self):
try:
urly = self.server + self.word
except Exception as e:
print(e)
try:
params = {'User-Agent': getUserAgent()}
r=requests.get(urly,headers=params)
except Exception as e:
print(e)
links = self.get_info(r.text)
for link in links:
params = {'User-Agent': getUserAgent()}
#print("\tSearching " + link)
r = requests.get(link, headers=params)
time.sleep(getDelay())
self.results = r.text
self.totalresults += self.results
2018-11-29 01:45:03 +08:00
"""
Function goes through text from base request and parses it for links
@param text requests text
@return list of links
"""
def get_info(self,text):
lines = []
for line in str(text).splitlines():
line = line.strip()
if 'id=' in line:
lines.append(line)
links = []
for i in range(len(lines)):
2018-11-29 01:45:03 +08:00
if i % 2 == 0: #way html is formatted only care about every other one
current = lines[i]
2018-11-29 01:45:03 +08:00
current = current[43:] #43 is not an arbitrary number, the id number always starts at 43rd index
link = ''
for ch in current:
if ch == '"':
break
else:
link += ch
links.append(('https://crt.sh?id=' + str(link)))
return links
def get_hostnames(self):
rawres = myparser.parser(self.totalresults, self.word)
return rawres.hostnames()
def process(self):
self.do_search()
print("\tSearching CRT.sh results..")