mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-24 06:22:57 +08:00
Code review.
This commit is contained in:
parent
c1d5fb2094
commit
c4c0c11ff7
2 changed files with 6 additions and 13 deletions
|
@ -1,5 +1,6 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class parser:
|
||||
|
||||
|
@ -11,8 +12,9 @@ def __init__(self, results):
|
|||
def search_ipaddresses(self):
|
||||
try:
|
||||
tags = self.soup.findAll('td')
|
||||
allip = re.findall(r'[0-9]+(?:\.[0-9]+){3}',str(tags))
|
||||
allip = re.findall(r'[0-9]+(?:\.[0-9]+){3}', str(tags))
|
||||
self.ipaddresses = set(allip)
|
||||
return self.ipaddresses
|
||||
except Exception as e:
|
||||
print("Error occurred: " + str(e))
|
||||
|
||||
|
|
13
myparser.py
13
myparser.py
|
@ -35,7 +35,7 @@ def emails(self):
|
|||
self.genericClean()
|
||||
reg_emails = re.compile(
|
||||
# Local part is required, charset is flexible
|
||||
# https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly )
|
||||
# https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly)
|
||||
'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' +
|
||||
'@' +
|
||||
'[a-zA-Z0-9.-]*' +
|
||||
|
@ -91,16 +91,6 @@ def hostnames_all(self):
|
|||
hostnames = self.unique()
|
||||
return hostnames
|
||||
|
||||
def people_jigsaw(self):
|
||||
res = []
|
||||
reg_people = re.compile(
|
||||
"href=javascript:showContact\('[0-9]*'\)>[a-zA-Z0-9., ]*</a></span>")
|
||||
self.temp = reg_people.findall(self.results)
|
||||
for x in self.temp:
|
||||
a = x.split('>')[1].replace("</a", "")
|
||||
res.append(a)
|
||||
return res
|
||||
|
||||
def people_linkedin(self):
|
||||
reg_people = re.compile('">[a-zA-Z0-9._ -]* \| LinkedIn')
|
||||
self.temp = reg_people.findall(self.results)
|
||||
|
@ -165,3 +155,4 @@ def unique(self):
|
|||
if x not in self.new:
|
||||
self.new.append(x)
|
||||
return self.new
|
||||
|
||||
|
|
Loading…
Reference in a new issue