mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-26 23:43:02 +08:00
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
|
class parser:
|
||
|
|
||
|
def __init__(self, word, text):
|
||
|
self.word = word
|
||
|
self.text = text
|
||
|
self.hostnames = set()
|
||
|
self.urls = []
|
||
|
self.ips = set()
|
||
|
|
||
|
def parse_text(self):
|
||
|
subDomain_flag = 0
|
||
|
self.text = str(self.text).splitlines()
|
||
|
# splitlines to get a list of lines
|
||
|
for index in range(0, len(self.text)):
|
||
|
line = self.text[index].strip()
|
||
|
if '"ip":' in line:
|
||
|
# extract ip
|
||
|
ip = ''
|
||
|
for ch in line[7:]:
|
||
|
if ch == '"':
|
||
|
break
|
||
|
else:
|
||
|
ip += ch
|
||
|
self.ips.add(ip)
|
||
|
elif '"subdomains":' in line:
|
||
|
# subdomains start here so set flag to 1
|
||
|
subDomain_flag = 1
|
||
|
continue
|
||
|
elif subDomain_flag > 0:
|
||
|
if ']' in line:
|
||
|
subDomain_flag = 0
|
||
|
else:
|
||
|
if 'www' in self.word: self.word = str(self.word).replace('www.', '').replace('www', '')
|
||
|
# remove www from word if entered
|
||
|
self.hostnames.add(str(line).replace('"', '').replace(',', '') + '.' + self.word)
|
||
|
else:
|
||
|
continue
|
||
|
return list(self.ips), list(self.hostnames)
|