mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 00:06:30 +08:00
Rename the variables in myparser.py
This commit is contained in:
parent
1821ce48a1
commit
30ba3e5b86
|
@ -13,13 +13,13 @@ def genericClean(self):
|
|||
.replace('%2f', '').replace('%3a', '').replace('<strong>', '').replace('</strong>', '')\
|
||||
.replace('<wbr>', '').replace('</wbr>', '')
|
||||
|
||||
for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C', '/', '\\'):
|
||||
self.results = self.results.replace(e, ' ')
|
||||
for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C', '/', '\\'):
|
||||
self.results = self.results.replace(search, ' ')
|
||||
|
||||
def urlClean(self):
|
||||
self.results = self.results.replace('<em>', '').replace('</em>', '').replace('%2f', '').replace('%3a', '')
|
||||
for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
|
||||
self.results = self.results.replace(e, ' ')
|
||||
for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
|
||||
self.results = self.results.replace(search, ' ')
|
||||
|
||||
def emails(self):
|
||||
self.genericClean()
|
||||
|
@ -38,11 +38,11 @@ def fileurls(self, file):
|
|||
reg_urls = re.compile('<a href="(.*?)"')
|
||||
self.temp = reg_urls.findall(self.results)
|
||||
allurls = self.unique()
|
||||
for x in allurls:
|
||||
if x.count('webcache') or x.count('google.com') or x.count('search?hl'):
|
||||
for iteration in allurls:
|
||||
if iteration.count('webcache') or iteration.count('google.com') or iteration.count('search?hl'):
|
||||
pass
|
||||
else:
|
||||
urls.append(x)
|
||||
urls.append(iteration)
|
||||
return urls
|
||||
|
||||
def hostnames(self):
|
||||
|
@ -61,24 +61,24 @@ def people_googleplus(self):
|
|||
reg_people = re.compile(r'>[a-zA-Z0-9._ ]* - Google\+')
|
||||
self.temp = reg_people.findall(self.results)
|
||||
resul = []
|
||||
for x in self.temp:
|
||||
y = x.replace(' | LinkedIn', '')
|
||||
y = y.replace(' profiles ', '')
|
||||
y = y.replace('LinkedIn', '')
|
||||
y = y.replace('"', '')
|
||||
y = y.replace('>', '')
|
||||
if y != " ":
|
||||
resul.append(y)
|
||||
for iteration in self.temp:
|
||||
delete = iteration.replace(' | LinkedIn', '')
|
||||
delete = delete.replace(' profiles ', '')
|
||||
delete = delete.replace('LinkedIn', '')
|
||||
delete = delete.replace('"', '')
|
||||
delete = delete.replace('>', '')
|
||||
if delete != " ":
|
||||
resul.append(delete)
|
||||
return resul
|
||||
|
||||
def hostnames_all(self):
|
||||
reg_hosts = re.compile('<cite>(.*?)</cite>')
|
||||
temp = reg_hosts.findall(self.results)
|
||||
for x in temp:
|
||||
if x.count(':'):
|
||||
res = x.split(':')[1].split('/')[2]
|
||||
for iteration in temp:
|
||||
if iteration.count(':'):
|
||||
res = iteration.split(':')[1].split('/')[2]
|
||||
else:
|
||||
res = x.split('/')[0]
|
||||
res = iteration.split('/')[0]
|
||||
self.temp.append(res)
|
||||
hostnames = self.unique()
|
||||
return hostnames
|
||||
|
@ -96,14 +96,14 @@ def people_linkedin(self):
|
|||
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* \| LinkedIn')
|
||||
self.temp = reg_people.findall(self.results)
|
||||
resul = []
|
||||
for x in (self.temp):
|
||||
y = x.replace(' | LinkedIn', '')
|
||||
y = y.replace(' profiles ', '')
|
||||
y = y.replace('LinkedIn', '')
|
||||
y = y.replace('"', '')
|
||||
y = y.replace('>', '')
|
||||
if y != " ":
|
||||
resul.append(y)
|
||||
for iteration in (self.temp):
|
||||
delete = iteration.replace(' | LinkedIn', '')
|
||||
delete = delete.replace(' profiles ', '')
|
||||
delete = delete.replace('LinkedIn', '')
|
||||
delete = delete.replace('"', '')
|
||||
delete = delete.replace('>', '')
|
||||
if delete != " ":
|
||||
resul.append(delete)
|
||||
return resul
|
||||
|
||||
def people_twitter(self):
|
||||
|
@ -111,36 +111,36 @@ def people_twitter(self):
|
|||
self.temp = reg_people.findall(self.results)
|
||||
users = self.unique()
|
||||
resul = []
|
||||
for x in users:
|
||||
y = x.replace(' | LinkedIn', '')
|
||||
y = y.replace(' profiles ', '')
|
||||
y = y.replace('LinkedIn', '')
|
||||
y = y.replace('"', '')
|
||||
y = y.replace('>', '')
|
||||
if y != " ":
|
||||
resul.append(y)
|
||||
for iteration in users:
|
||||
delete = iteration.replace(' | LinkedIn', '')
|
||||
delete = delete.replace(' profiles ', '')
|
||||
delete = delete.replace('LinkedIn', '')
|
||||
delete = delete.replace('"', '')
|
||||
delete = delete.replace('>', '')
|
||||
if delete != " ":
|
||||
resul.append(delete)
|
||||
return resul
|
||||
|
||||
def profiles(self):
|
||||
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* - <em>Google Profile</em>')
|
||||
self.temp = reg_people.findall(self.results)
|
||||
resul = []
|
||||
for x in self.temp:
|
||||
y = x.replace(' <em>Google Profile</em>', '')
|
||||
y = y.replace('-', '')
|
||||
y = y.replace('">', '')
|
||||
if y != " ":
|
||||
resul.append(y)
|
||||
for iteration in self.temp:
|
||||
delete = iteration.replace(' <em>Google Profile</em>', '')
|
||||
delete = delete.replace('-', '')
|
||||
delete = delete.replace('">', '')
|
||||
if delete != " ":
|
||||
resul.append(delete)
|
||||
return resul
|
||||
|
||||
def set(self):
|
||||
reg_sets = re.compile(r'>[a-zA-Z0-9]*</a></font>')
|
||||
self.temp = reg_sets.findall(self.results)
|
||||
sets = []
|
||||
for x in self.temp:
|
||||
y = x.replace('>', '')
|
||||
y = y.replace('</a</font', '')
|
||||
sets.append(y)
|
||||
for iteration in self.temp:
|
||||
delete = iteration.replace('>', '')
|
||||
delete = delete.replace('</a</font', '')
|
||||
sets.append(delete)
|
||||
return sets
|
||||
|
||||
def urls(self):
|
||||
|
@ -150,7 +150,7 @@ def urls(self):
|
|||
|
||||
def unique(self) -> list:
|
||||
self.new = []
|
||||
for x in self.temp:
|
||||
if x not in self.new:
|
||||
self.new.append(x)
|
||||
for iteration in self.temp:
|
||||
if iteration not in self.new:
|
||||
self.new.append(iteration)
|
||||
return self.new
|
||||
|
|
Loading…
Reference in a new issue