<wbr> </wbr>'''.split(): self.results = self.results.replace(e, '') for e in '%2f %3a %3A %3C %3D & / : ; < = > \\'.split(): self.results = self.results.replace(e, ' ') def emails(self): self.genericClean() reg_emails = re.compile( '[a-zA-Z0-9.\-_+#~!$&\',;=:]+' + '@' + '[a-zA-Z0-9.-]*' + self.word) self.temp = reg_emails.findall(self.results) emails = self.unique() return emails def unique(self): self.new = list(set(self.temp)) return self.new ################################################################### class EmailHarvester(object): def __init__(self, userAgent, proxy): self.plugins = {} self.proxy = proxy self.userAgent = userAgent path = "plugins/" plugins = {} sys.path.insert(0, path) for f in os.listdir(path): fname, ext = os.path.splitext(f) if ext == '.py': mod = __import__(fname) plugins[fname] = mod.Plugin(self) def register_plugin(self, search_method, functions): self.plugins[search_method] = functions def get_plugins(self): return self.plugins def show_message(self, msg): print(green(msg)) def init_search(self, urlPattern, word, limit, counterInit, counterStep): self.results = "" self.totalresults = "" self.limit = int(limit) self.counter = int(counterInit) self.urlPattern = urlPattern self.step = int(counterStep) self.word = word def do_search(self): try: urly = self.urlPattern.format(counter=str(self.counter), word=self.word) headers = {'User-Agent': self.userAgent} if(self.proxy): proxies = {self.proxy.scheme: "http://" + self.proxy.netloc} r=requests.get(urly, headers=headers, proxies=proxies) else: r=requests.get(urly, headers=headers) except Exception as e: print(e) sys.exit(4) self.results = r.content.decode(r.encoding) self.totalresults += self.results def process(self): while (self.counter < self.limit): self.do_search() time.sleep(1) self.counter += self.step print("\tSearching " + str(self.counter) + " results...") def get_emails(self): rawres = myparser(self.totalresults, self.word) return rawres.emails() ################################################################### def yellow(text): return colored(text, 'yellow', attrs=['bold']) def green(text): return colored(text, 'green', attrs=['bold']) def red(text): return colored(text, 'red', attrs=['bold']) def unique(data): return list(set(data)) ################################################################### def checkProxyUrl(url): url_checked = urlparse(url) if (url_checked.scheme not in ('http', 'https')) | (url_checked.netloc == ''): raise argparse.ArgumentTypeError('Invalid {} Proxy URL (example: http://127.0.0.1:8080).'.format(url)) return url_checked def limit_type(x): x = int(x) if x > 0: return x raise argparse.ArgumentTypeError("Minimum results limit is 1.") def engine_type(engine): engines = 'all ask bing google yahoo'.split() if engine in engines: return engine raise argparse.ArgumentTypeError("Invalid search engine, try with: {}.".format(', '.join(engines))) ################################################################### if __name__ == '__main__': parser = argparse.ArgumentParser(description=""" _____ _ _ _ _ _ | ___| (_)| | | | | | | | | |__ _ __ ___ __ _ _ | | | |_| | __ _ _ __ __ __ ___ ___ | |_ ___ _ __ | __|| '_ ` _ \ / _` || || | | _ | / _` || '__|\ \ / // _ \/ __|| __|/ _ \| '__| | |___| | | | | || (_| || || | | | | || (_| || | \ V /| __/\__ \| |_| __/| | \____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_| A tool to retrieve Domain email addresses from Search Engines | @maldevel {}: {} """.format(red('Version'), yellow(__version__)), formatter_class=RawTextHelpFormatter) parser.add_argument("-d", '--domain', action="store", metavar='DOMAIN', dest='domain', default=None, type=str, help="Domain to search.") parser.add_argument("-s", '--save', action="store", metavar='FILE', dest='filename', default=None, type=str, help="Save the results into a TXT and XML file (both).") #(google, bing, yahoo, ask, all) needs to be fixed/scan plugins folder to show available search engines?? parser.add_argument("-e", '--engine', action="store", metavar='ENGINE', dest='engine', default="all", type=engine_type, help="Select search engine plugin(google, bing, yahoo, ask, all).") parser.add_argument("-l", '--limit', action="store", metavar='LIMIT', dest='limit', type=limit_type, default=100, help="Limit the number of results.") parser.add_argument('-u', '--user-agent', action="store", metavar='USER-AGENT', dest='uagent', type=str, help="Set the User-Agent request header.") parser.add_argument('-x', '--proxy', action="store", metavar='PROXY', dest='proxy', default=None, type=checkProxyUrl, help='Setup proxy server (example: http://127.0.0.1:8080)') parser.add_argument('--noprint', action='store_true', default=False, help='EmailHarvester will print discovered emails to terminal. It is possible to tell EmailHarvester not to print results to terminal with this option.') if len(sys.argv) is 1: parser.print_help() sys.exit() args = parser.parse_args() if not args.domain: print(red("[-] Please specify a domain name to search.")) sys.exit(2) domain = args.domain userAgent = (args.uagent or "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1") print("User-Agent in use: {}".format(yellow(userAgent))) if args.proxy: print("Proxy server in use: {}".format(yellow(args.proxy.scheme + "://" + args.proxy.netloc))) filename = args.filename or "" limit = args.limit engine = args.engine app = EmailHarvester(userAgent, args.proxy) plugins = app.get_plugins() all_emails = [] if engine == "all": print(green("[+] Searching everywhere..")) for search_engine in plugins: all_emails += plugins[search_engine]['search'](domain, limit) elif engine not in plugins: print(red("Search engine plugin not found")) sys.exit(3) else: msg, all_emails = plugins[engine]['search'](domain, limit) print(green(msg)) all_emails = unique(all_emails) if not all_emails: print(red("No emails found")) sys.exit(4) msg = "\n\n[+] {} emails found:".format(len(all_emails)) print(green(msg)) print(green("-" * len(msg))) if not args.noprint: for emails in all_emails: print(emails) if filename: try: print(green("\n[+] Saving files...")) with open(filename, 'w') as out_file: for email in all_emails: try: out_file.write(email + "\n") except: print(red("Exception " + email)) except Exception as e: print(red("Error saving TXT file: " + e)) try: filename = filename.split(".")[0] + ".xml" with open(filename, 'w') as out_file: out_file.write('<?xml version="1.0" encoding="UTF-8"?><EmailHarvester>') for email in all_emails: out_file.write('<email>{}</email>'.format(email)) out_file.write('</EmailHarvester>') print(green("Files saved!")) except Exception as er: print(red("Error saving XML file: " + er))

#!/usr/bin/env python3 # encoding: UTF-8 """ This file is part of EmailHarvester Copyright (C) 2016 @maldevel https://github.com/maldevel/EmailHarvester EmailHarvester - A tool to retrieve Domain email addresses from Search Engines. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . For more see the file 'LICENSE' for copying permission. """ __author__ = "maldevel" __copyright__ = "Copyright (c) 2016 @maldevel" __credits__ = ["maldevel", "PaulSec", "cclauss", "Christian Martorella"] __license__ = "GPLv3" __version__ = "1.2.7" __maintainer__ = "maldevel" ################################ import argparse import sys import time import requests import re import os from termcolor import colored from argparse import RawTextHelpFormatter from sys import platform as _platform from urllib.parse import urlparse ################################ if _platform == 'win32': import colorama colorama.init() class myparser: def __init__(self, results, word): self.results = results self.word = word self.temp = [] def genericClean(self): for e in '''