mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-25 06:53:05 +08:00
Improved parser to fix improper emails.
This commit is contained in:
parent
560c5ad094
commit
53562a25bc
1 changed files with 10 additions and 1 deletions
|
@ -33,7 +33,16 @@ def process(self):
|
|||
|
||||
def get_emails(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.emails()
|
||||
toparse_emails = rawres.emails()
|
||||
emails = set()
|
||||
# strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld
|
||||
for email in toparse_emails:
|
||||
email = str(email)
|
||||
if '-' in email and email[0].isdigit() and email.index('-') <= 9:
|
||||
while email[0] == '-' or email[0].isdigit():
|
||||
email = email[1:]
|
||||
emails.add(email)
|
||||
return list(emails)
|
||||
|
||||
def get_hostnames(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
|
|
Loading…
Reference in a new issue