diff --git a/discovery/yahoosearch.py b/discovery/yahoosearch.py index 4c2b5fd3..72931b13 100644 --- a/discovery/yahoosearch.py +++ b/discovery/yahoosearch.py @@ -33,7 +33,16 @@ def process(self): def get_emails(self): rawres = myparser.Parser(self.total_results, self.word) - return rawres.emails() + toparse_emails = rawres.emails() + emails = set() + # strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld + for email in toparse_emails: + email = str(email) + if '-' in email and email[0].isdigit() and email.index('-') <= 9: + while email[0] == '-' or email[0].isdigit(): + email = email[1:] + emails.add(email) + return list(emails) def get_hostnames(self): rawres = myparser.Parser(self.total_results, self.word)