From 932ca0518ca0aa6199197ebc81f54502074006a1 Mon Sep 17 00:00:00 2001 From: maldevel Date: Fri, 29 Apr 2016 10:41:09 +0300 Subject: [PATCH] validate domain+instagram+reddit release 1.3.1 validate domain instagram plugin added reddit plugin added validators required --- EmailHarvester.py | 14 +++++++-- README.md | 7 +++-- plugins/instagram.py | 73 ++++++++++++++++++++++++++++++++++++++++++++ plugins/reddit.py | 73 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 +- 5 files changed, 164 insertions(+), 6 deletions(-) create mode 100644 plugins/instagram.py create mode 100644 plugins/reddit.py diff --git a/EmailHarvester.py b/EmailHarvester.py index 560987c..4540831 100644 --- a/EmailHarvester.py +++ b/EmailHarvester.py @@ -28,23 +28,27 @@ __author__ = "maldevel" __copyright__ = "Copyright (c) 2016 @maldevel" __credits__ = ["maldevel", "PaulSec", "cclauss", "Christian Martorella"] __license__ = "GPLv3" -__version__ = "1.3.0" +__version__ = "1.3.1" __maintainer__ = "maldevel" ################################ + import argparse import sys import time import requests import re import os +import validators from termcolor import colored from argparse import RawTextHelpFormatter from sys import platform as _platform from urllib.parse import urlparse + ################################ + if _platform == 'win32': import colorama colorama.init() @@ -173,6 +177,12 @@ def limit_type(x): return x raise argparse.ArgumentTypeError("Minimum results limit is 1.") +def checkDomain(value): + domain_checked = validators.domain(value) + if not domain_checked: + raise argparse.ArgumentTypeError('Invalid {} domain.'.format(value)) + return value + ################################################################### if __name__ == '__main__': @@ -192,7 +202,7 @@ if __name__ == '__main__': formatter_class=RawTextHelpFormatter) parser.add_argument("-d", '--domain', action="store", metavar='DOMAIN', dest='domain', - default=None, type=str, help="Domain to search.") + default=None, type=checkDomain, help="Domain to search.") parser.add_argument("-s", '--save', action="store", metavar='FILE', dest='filename', default=None, type=str, help="Save the results into a TXT and XML file (both).") diff --git a/README.md b/README.md index 408c504..65f9473 100644 --- a/README.md +++ b/README.md @@ -14,17 +14,18 @@ Requirements * termcolor * colorama * requests +* validators Features ===== -* Retrieve Domain email addresses from Search Engines (Google, Bing, Yahoo, ASK, Baidu, Dogpile, Exalead). +* Retrieve Domain email addresses from popular Search engines (Google, Bing, Yahoo, ASK, Baidu, Dogpile, Exalead). * Export results to txt and xml files. * Limit search results. * Define your own User-Agent string. * Use proxy server. * Plugins system. -* Search in popular web sites using Search engines (Twitter, LinkedIn, Google+, Github). +* Search in popular web sites using Search engines (Twitter, LinkedIn, Google+, Github, Instagram, Reddit). Download/Installation @@ -47,7 +48,7 @@ usage: EmailHarvester.py [-h] [-d DOMAIN] [-s FILE] [-e ENGINE] [-l LIMIT] \____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_| A tool to retrieve Domain email addresses from Search Engines | @maldevel - Version: 1.3.0 + Version: 1.3.1 optional arguments: -h, --help show this help message and exit diff --git a/plugins/instagram.py b/plugins/instagram.py new file mode 100644 index 0000000..3ff0360 --- /dev/null +++ b/plugins/instagram.py @@ -0,0 +1,73 @@ +""" + This file is part of EmailHarvester + Copyright (C) 2016 @maldevel + https://github.com/maldevel/EmailHarvester + + EmailHarvester - A tool to retrieve Domain email addresses from Search Engines. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + For more see the file 'LICENSE' for copying permission. +""" + +#config = None +app_emailharvester = None + + +def search(domain, limit): + all_emails = [] + app_emailharvester.show_message("\n[+] Searching in Instagram..\n") + + app_emailharvester.show_message("\n[+] Searching in Yahoo + Instagram..\n") + yahooUrl = "http://search.yahoo.com/search?p=site%3Ainstagram.com+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}" + app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Bing + Instagram..\n") + bingUrl = "http://www.bing.com/search?q=site%3Ainstagram.com+%40{word}&count=50&first={counter}" + app_emailharvester.init_search(bingUrl, domain, limit, 0, 50) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Google + Instagram..\n") + googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Ainstagram.com+"%40{word}"' + app_emailharvester.init_search(googleUrl, domain, limit, 0, 100) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Baidu + Instagram..\n") + url = 'http://www.baidu.com/search/s?wd=site%3Ainstagram.com+"%40{word}"&pn={counter}' + app_emailharvester.init_search(url, domain, limit, 0, 10) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Exalead + Instagram..\n") + url = "http://www.exalead.com/search/web/results/?q=site%3Ainstagram.com+%40{word}&elements_per_page=10&start_index={counter}" + app_emailharvester.init_search(url, domain, limit, 0, 50) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + #dogpile seems to not support site: + + return all_emails + + +class Plugin: + def __init__(self, app, conf):# + global app_emailharvester, config + #config = conf + app.register_plugin('instagram', {'search': search}) + app_emailharvester = app + \ No newline at end of file diff --git a/plugins/reddit.py b/plugins/reddit.py new file mode 100644 index 0000000..2705251 --- /dev/null +++ b/plugins/reddit.py @@ -0,0 +1,73 @@ +""" + This file is part of EmailHarvester + Copyright (C) 2016 @maldevel + https://github.com/maldevel/EmailHarvester + + EmailHarvester - A tool to retrieve Domain email addresses from Search Engines. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + For more see the file 'LICENSE' for copying permission. +""" + +#config = None +app_emailharvester = None + + +def search(domain, limit): + all_emails = [] + app_emailharvester.show_message("\n[+] Searching in Reddit..\n") + + app_emailharvester.show_message("\n[+] Searching in Yahoo + Reddit..\n") + yahooUrl = "http://search.yahoo.com/search?p=site%3Areddit.com+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}" + app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Bing + Reddit..\n") + bingUrl = "http://www.bing.com/search?q=site%3Areddit.com+%40{word}&count=50&first={counter}" + app_emailharvester.init_search(bingUrl, domain, limit, 0, 50) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Google + Reddit..\n") + googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Areddit.com+"%40{word}"' + app_emailharvester.init_search(googleUrl, domain, limit, 0, 100) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Baidu + Reddit..\n") + url = 'http://www.baidu.com/search/s?wd=site%3Areddit.com+"%40{word}"&pn={counter}' + app_emailharvester.init_search(url, domain, limit, 0, 10) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Exalead + Reddit..\n") + url = "http://www.exalead.com/search/web/results/?q=site%3Areddit.com+%40{word}&elements_per_page=10&start_index={counter}" + app_emailharvester.init_search(url, domain, limit, 0, 50) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + #dogpile seems to not support site: + + return all_emails + + +class Plugin: + def __init__(self, app, conf):# + global app_emailharvester, config + #config = conf + app.register_plugin('reddit', {'search': search}) + app_emailharvester = app + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 63a54a1..529240e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ termcolor colorama -requests \ No newline at end of file +requests +validators