mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 08:16:35 +08:00
Rename the variables in myparser.py
This commit is contained in:
parent
1821ce48a1
commit
30ba3e5b86
|
@ -13,13 +13,13 @@ def genericClean(self):
|
||||||
.replace('%2f', '').replace('%3a', '').replace('<strong>', '').replace('</strong>', '')\
|
.replace('%2f', '').replace('%3a', '').replace('<strong>', '').replace('</strong>', '')\
|
||||||
.replace('<wbr>', '').replace('</wbr>', '')
|
.replace('<wbr>', '').replace('</wbr>', '')
|
||||||
|
|
||||||
for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C', '/', '\\'):
|
for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C', '/', '\\'):
|
||||||
self.results = self.results.replace(e, ' ')
|
self.results = self.results.replace(search, ' ')
|
||||||
|
|
||||||
def urlClean(self):
|
def urlClean(self):
|
||||||
self.results = self.results.replace('<em>', '').replace('</em>', '').replace('%2f', '').replace('%3a', '')
|
self.results = self.results.replace('<em>', '').replace('</em>', '').replace('%2f', '').replace('%3a', '')
|
||||||
for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
|
for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
|
||||||
self.results = self.results.replace(e, ' ')
|
self.results = self.results.replace(search, ' ')
|
||||||
|
|
||||||
def emails(self):
|
def emails(self):
|
||||||
self.genericClean()
|
self.genericClean()
|
||||||
|
@ -38,11 +38,11 @@ def fileurls(self, file):
|
||||||
reg_urls = re.compile('<a href="(.*?)"')
|
reg_urls = re.compile('<a href="(.*?)"')
|
||||||
self.temp = reg_urls.findall(self.results)
|
self.temp = reg_urls.findall(self.results)
|
||||||
allurls = self.unique()
|
allurls = self.unique()
|
||||||
for x in allurls:
|
for iteration in allurls:
|
||||||
if x.count('webcache') or x.count('google.com') or x.count('search?hl'):
|
if iteration.count('webcache') or iteration.count('google.com') or iteration.count('search?hl'):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
urls.append(x)
|
urls.append(iteration)
|
||||||
return urls
|
return urls
|
||||||
|
|
||||||
def hostnames(self):
|
def hostnames(self):
|
||||||
|
@ -61,24 +61,24 @@ def people_googleplus(self):
|
||||||
reg_people = re.compile(r'>[a-zA-Z0-9._ ]* - Google\+')
|
reg_people = re.compile(r'>[a-zA-Z0-9._ ]* - Google\+')
|
||||||
self.temp = reg_people.findall(self.results)
|
self.temp = reg_people.findall(self.results)
|
||||||
resul = []
|
resul = []
|
||||||
for x in self.temp:
|
for iteration in self.temp:
|
||||||
y = x.replace(' | LinkedIn', '')
|
delete = iteration.replace(' | LinkedIn', '')
|
||||||
y = y.replace(' profiles ', '')
|
delete = delete.replace(' profiles ', '')
|
||||||
y = y.replace('LinkedIn', '')
|
delete = delete.replace('LinkedIn', '')
|
||||||
y = y.replace('"', '')
|
delete = delete.replace('"', '')
|
||||||
y = y.replace('>', '')
|
delete = delete.replace('>', '')
|
||||||
if y != " ":
|
if delete != " ":
|
||||||
resul.append(y)
|
resul.append(delete)
|
||||||
return resul
|
return resul
|
||||||
|
|
||||||
def hostnames_all(self):
|
def hostnames_all(self):
|
||||||
reg_hosts = re.compile('<cite>(.*?)</cite>')
|
reg_hosts = re.compile('<cite>(.*?)</cite>')
|
||||||
temp = reg_hosts.findall(self.results)
|
temp = reg_hosts.findall(self.results)
|
||||||
for x in temp:
|
for iteration in temp:
|
||||||
if x.count(':'):
|
if iteration.count(':'):
|
||||||
res = x.split(':')[1].split('/')[2]
|
res = iteration.split(':')[1].split('/')[2]
|
||||||
else:
|
else:
|
||||||
res = x.split('/')[0]
|
res = iteration.split('/')[0]
|
||||||
self.temp.append(res)
|
self.temp.append(res)
|
||||||
hostnames = self.unique()
|
hostnames = self.unique()
|
||||||
return hostnames
|
return hostnames
|
||||||
|
@ -96,14 +96,14 @@ def people_linkedin(self):
|
||||||
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* \| LinkedIn')
|
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* \| LinkedIn')
|
||||||
self.temp = reg_people.findall(self.results)
|
self.temp = reg_people.findall(self.results)
|
||||||
resul = []
|
resul = []
|
||||||
for x in (self.temp):
|
for iteration in (self.temp):
|
||||||
y = x.replace(' | LinkedIn', '')
|
delete = iteration.replace(' | LinkedIn', '')
|
||||||
y = y.replace(' profiles ', '')
|
delete = delete.replace(' profiles ', '')
|
||||||
y = y.replace('LinkedIn', '')
|
delete = delete.replace('LinkedIn', '')
|
||||||
y = y.replace('"', '')
|
delete = delete.replace('"', '')
|
||||||
y = y.replace('>', '')
|
delete = delete.replace('>', '')
|
||||||
if y != " ":
|
if delete != " ":
|
||||||
resul.append(y)
|
resul.append(delete)
|
||||||
return resul
|
return resul
|
||||||
|
|
||||||
def people_twitter(self):
|
def people_twitter(self):
|
||||||
|
@ -111,36 +111,36 @@ def people_twitter(self):
|
||||||
self.temp = reg_people.findall(self.results)
|
self.temp = reg_people.findall(self.results)
|
||||||
users = self.unique()
|
users = self.unique()
|
||||||
resul = []
|
resul = []
|
||||||
for x in users:
|
for iteration in users:
|
||||||
y = x.replace(' | LinkedIn', '')
|
delete = iteration.replace(' | LinkedIn', '')
|
||||||
y = y.replace(' profiles ', '')
|
delete = delete.replace(' profiles ', '')
|
||||||
y = y.replace('LinkedIn', '')
|
delete = delete.replace('LinkedIn', '')
|
||||||
y = y.replace('"', '')
|
delete = delete.replace('"', '')
|
||||||
y = y.replace('>', '')
|
delete = delete.replace('>', '')
|
||||||
if y != " ":
|
if delete != " ":
|
||||||
resul.append(y)
|
resul.append(delete)
|
||||||
return resul
|
return resul
|
||||||
|
|
||||||
def profiles(self):
|
def profiles(self):
|
||||||
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* - <em>Google Profile</em>')
|
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* - <em>Google Profile</em>')
|
||||||
self.temp = reg_people.findall(self.results)
|
self.temp = reg_people.findall(self.results)
|
||||||
resul = []
|
resul = []
|
||||||
for x in self.temp:
|
for iteration in self.temp:
|
||||||
y = x.replace(' <em>Google Profile</em>', '')
|
delete = iteration.replace(' <em>Google Profile</em>', '')
|
||||||
y = y.replace('-', '')
|
delete = delete.replace('-', '')
|
||||||
y = y.replace('">', '')
|
delete = delete.replace('">', '')
|
||||||
if y != " ":
|
if delete != " ":
|
||||||
resul.append(y)
|
resul.append(delete)
|
||||||
return resul
|
return resul
|
||||||
|
|
||||||
def set(self):
|
def set(self):
|
||||||
reg_sets = re.compile(r'>[a-zA-Z0-9]*</a></font>')
|
reg_sets = re.compile(r'>[a-zA-Z0-9]*</a></font>')
|
||||||
self.temp = reg_sets.findall(self.results)
|
self.temp = reg_sets.findall(self.results)
|
||||||
sets = []
|
sets = []
|
||||||
for x in self.temp:
|
for iteration in self.temp:
|
||||||
y = x.replace('>', '')
|
delete = iteration.replace('>', '')
|
||||||
y = y.replace('</a</font', '')
|
delete = delete.replace('</a</font', '')
|
||||||
sets.append(y)
|
sets.append(delete)
|
||||||
return sets
|
return sets
|
||||||
|
|
||||||
def urls(self):
|
def urls(self):
|
||||||
|
@ -150,7 +150,7 @@ def urls(self):
|
||||||
|
|
||||||
def unique(self) -> list:
|
def unique(self) -> list:
|
||||||
self.new = []
|
self.new = []
|
||||||
for x in self.temp:
|
for iteration in self.temp:
|
||||||
if x not in self.new:
|
if iteration not in self.new:
|
||||||
self.new.append(x)
|
self.new.append(iteration)
|
||||||
return self.new
|
return self.new
|
||||||
|
|
Loading…
Reference in a new issue