#!/usr/bin/env python3
# encoding: UTF-8
"""
This file is part of EmailHarvester
Copyright (C) 2016 @maldevel
https://github.com/maldevel/EmailHarvester
EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
For more see the file 'LICENSE' for copying permission.
"""
__author__ = "maldevel"
__copyright__ = "Copyright (c) 2016 @maldevel"
__credits__ = ["maldevel", "PaulSec", "cclauss", "Christian Martorella"]
__license__ = "GPLv3"
__version__ = "1.2.7"
__maintainer__ = "maldevel"
################################
import argparse
import sys
import time
import requests
import re
import os
from termcolor import colored
from argparse import RawTextHelpFormatter
from sys import platform as _platform
from urllib.parse import urlparse
################################
if _platform == 'win32':
import colorama
colorama.init()
class myparser:
def __init__(self, results, word):
self.results = results
self.word = word
self.temp = []
def genericClean(self):
for e in '''
'''.split():
self.results = self.results.replace(e, '')
for e in '%2f %3a %3A %3C %3D & / : ; < = > \\'.split():
self.results = self.results.replace(e, ' ')
def emails(self):
self.genericClean()
reg_emails = re.compile(
'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' +
'@' +
'[a-zA-Z0-9.-]*' +
self.word)
self.temp = reg_emails.findall(self.results)
emails = self.unique()
return emails
def unique(self):
self.new = list(set(self.temp))
return self.new
###################################################################
class EmailHarvester(object):
def __init__(self, userAgent, proxy):
self.plugins = {}
self.proxy = proxy
self.userAgent = userAgent
path = "plugins/"
plugins = {}
sys.path.insert(0, path)
for f in os.listdir(path):
fname, ext = os.path.splitext(f)
if ext == '.py':
mod = __import__(fname)
plugins[fname] = mod.Plugin(self)
def register_plugin(self, search_method, functions):
self.plugins[search_method] = functions
def get_plugins(self):
return self.plugins
def show_message(self, msg):
print(green(msg))
def init_search(self, urlPattern, word, limit, counterInit, counterStep):
self.results = ""
self.totalresults = ""
self.limit = int(limit)
self.counter = int(counterInit)
self.urlPattern = urlPattern
self.step = int(counterStep)
self.word = word
def do_search(self):
try:
urly = self.urlPattern.format(counter=str(self.counter), word=self.word)
headers = {'User-Agent': self.userAgent}
if(self.proxy):
proxies = {self.proxy.scheme: "http://" + self.proxy.netloc}
r=requests.get(urly, headers=headers, proxies=proxies)
else:
r=requests.get(urly, headers=headers)
except Exception as e:
print(e)
sys.exit(4)
self.results = r.content.decode(r.encoding)
self.totalresults += self.results
def process(self):
while (self.counter < self.limit):
self.do_search()
time.sleep(1)
self.counter += self.step
print("\tSearching " + str(self.counter) + " results...")
def get_emails(self):
rawres = myparser(self.totalresults, self.word)
return rawres.emails()
###################################################################
def yellow(text):
return colored(text, 'yellow', attrs=['bold'])
def green(text):
return colored(text, 'green', attrs=['bold'])
def red(text):
return colored(text, 'red', attrs=['bold'])
def unique(data):
return list(set(data))
###################################################################
def checkProxyUrl(url):
url_checked = urlparse(url)
if (url_checked.scheme not in ('http', 'https')) | (url_checked.netloc == ''):
raise argparse.ArgumentTypeError('Invalid {} Proxy URL (example: http://127.0.0.1:8080).'.format(url))
return url_checked
def limit_type(x):
x = int(x)
if x > 0:
return x
raise argparse.ArgumentTypeError("Minimum results limit is 1.")
def engine_type(engine):
engines = 'all ask bing google yahoo'.split()
if engine in engines:
return engine
raise argparse.ArgumentTypeError("Invalid search engine, try with: {}.".format(', '.join(engines)))
###################################################################
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="""
_____ _ _ _ _ _
| ___| (_)| | | | | | | |
| |__ _ __ ___ __ _ _ | | | |_| | __ _ _ __ __ __ ___ ___ | |_ ___ _ __
| __|| '_ ` _ \ / _` || || | | _ | / _` || '__|\ \ / // _ \/ __|| __|/ _ \| '__|
| |___| | | | | || (_| || || | | | | || (_| || | \ V /| __/\__ \| |_| __/| |
\____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_|
A tool to retrieve Domain email addresses from Search Engines | @maldevel
{}: {}
""".format(red('Version'), yellow(__version__)),
formatter_class=RawTextHelpFormatter)
parser.add_argument("-d", '--domain', action="store", metavar='DOMAIN', dest='domain',
default=None, type=str, help="Domain to search.")
parser.add_argument("-s", '--save', action="store", metavar='FILE', dest='filename',
default=None, type=str, help="Save the results into a TXT and XML file (both).")
#(google, bing, yahoo, ask, all) needs to be fixed/scan plugins folder to show available search engines??
parser.add_argument("-e", '--engine', action="store", metavar='ENGINE', dest='engine',
default="all", type=engine_type, help="Select search engine plugin(google, bing, yahoo, ask, all).")
parser.add_argument("-l", '--limit', action="store", metavar='LIMIT', dest='limit',
type=limit_type, default=100, help="Limit the number of results.")
parser.add_argument('-u', '--user-agent', action="store", metavar='USER-AGENT', dest='uagent',
type=str, help="Set the User-Agent request header.")
parser.add_argument('-x', '--proxy', action="store", metavar='PROXY', dest='proxy',
default=None, type=checkProxyUrl, help='Setup proxy server (example: http://127.0.0.1:8080)')
parser.add_argument('--noprint', action='store_true', default=False,
help='EmailHarvester will print discovered emails to terminal. It is possible to tell EmailHarvester not to print results to terminal with this option.')
if len(sys.argv) is 1:
parser.print_help()
sys.exit()
args = parser.parse_args()
if not args.domain:
print(red("[-] Please specify a domain name to search."))
sys.exit(2)
domain = args.domain
userAgent = (args.uagent or
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1")
print("User-Agent in use: {}".format(yellow(userAgent)))
if args.proxy:
print("Proxy server in use: {}".format(yellow(args.proxy.scheme + "://" + args.proxy.netloc)))
filename = args.filename or ""
limit = args.limit
engine = args.engine
app = EmailHarvester(userAgent, args.proxy)
plugins = app.get_plugins()
all_emails = []
if engine == "all":
print(green("[+] Searching everywhere.."))
for search_engine in plugins:
all_emails += plugins[search_engine]['search'](domain, limit)
elif engine not in plugins:
print(red("Search engine plugin not found"))
sys.exit(3)
else:
msg, all_emails = plugins[engine]['search'](domain, limit)
print(green(msg))
all_emails = unique(all_emails)
if not all_emails:
print(red("No emails found"))
sys.exit(4)
msg = "\n\n[+] {} emails found:".format(len(all_emails))
print(green(msg))
print(green("-" * len(msg)))
if not args.noprint:
for emails in all_emails:
print(emails)
if filename:
try:
print(green("\n[+] Saving files..."))
with open(filename, 'w') as out_file:
for email in all_emails:
try:
out_file.write(email + "\n")
except:
print(red("Exception " + email))
except Exception as e:
print(red("Error saving TXT file: " + e))
try:
filename = filename.split(".")[0] + ".xml"
with open(filename, 'w') as out_file:
out_file.write('')
for email in all_emails:
out_file.write('{}'.format(email))
out_file.write('')
print(green("Files saved!"))
except Exception as er:
print(red("Error saving XML file: " + er))