From 932ca0518ca0aa6199197ebc81f54502074006a1 Mon Sep 17 00:00:00 2001
From: maldevel <maldevel@mail.com>
Date: Fri, 29 Apr 2016 10:41:09 +0300
Subject: [PATCH] validate domain+instagram+reddit

release 1.3.1
validate domain
instagram plugin added
reddit plugin added
validators required
---
 EmailHarvester.py    | 14 +++++++--
 README.md            |  7 +++--
 plugins/instagram.py | 73 ++++++++++++++++++++++++++++++++++++++++++++
 plugins/reddit.py    | 73 ++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt     |  3 +-
 5 files changed, 164 insertions(+), 6 deletions(-)
 create mode 100644 plugins/instagram.py
 create mode 100644 plugins/reddit.py

diff --git a/EmailHarvester.py b/EmailHarvester.py
index 560987c..4540831 100644
--- a/EmailHarvester.py
+++ b/EmailHarvester.py
@@ -28,23 +28,27 @@ __author__ = "maldevel"
 __copyright__ = "Copyright (c) 2016 @maldevel"
 __credits__ = ["maldevel", "PaulSec", "cclauss", "Christian Martorella"]
 __license__ = "GPLv3"
-__version__ = "1.3.0"
+__version__ = "1.3.1"
 __maintainer__ = "maldevel"
 
 ################################
+
 import argparse
 import sys
 import time
 import requests
 import re
 import os
+import validators
 
 from termcolor import colored
 from argparse import RawTextHelpFormatter
 from sys import platform as _platform
 from urllib.parse import urlparse
+
 ################################
 
+
 if _platform == 'win32':
     import colorama
     colorama.init()
@@ -173,6 +177,12 @@ def limit_type(x):
         return x
     raise argparse.ArgumentTypeError("Minimum results limit is 1.")
 
+def checkDomain(value):
+    domain_checked = validators.domain(value)
+    if not domain_checked:
+        raise argparse.ArgumentTypeError('Invalid {} domain.'.format(value))
+    return value
+
 ###################################################################
 
 if __name__ == '__main__':
@@ -192,7 +202,7 @@ if __name__ == '__main__':
                                      formatter_class=RawTextHelpFormatter)
     
     parser.add_argument("-d", '--domain', action="store", metavar='DOMAIN', dest='domain', 
-                        default=None, type=str, help="Domain to search.")
+                        default=None, type=checkDomain, help="Domain to search.")
     parser.add_argument("-s", '--save', action="store", metavar='FILE', dest='filename', 
                         default=None, type=str, help="Save the results into a TXT and XML file (both).")
     
diff --git a/README.md b/README.md
index 408c504..65f9473 100644
--- a/README.md
+++ b/README.md
@@ -14,17 +14,18 @@ Requirements
 * termcolor
 * colorama
 * requests
+* validators
 
 
 Features
 =====
-* Retrieve Domain email addresses from Search Engines (Google, Bing, Yahoo, ASK, Baidu, Dogpile, Exalead).
+* Retrieve Domain email addresses from popular Search engines (Google, Bing, Yahoo, ASK, Baidu, Dogpile, Exalead).
 * Export results to txt and xml files.
 * Limit search results.
 * Define your own User-Agent string.
 * Use proxy server.
 * Plugins system.
-* Search in popular web sites using Search engines (Twitter, LinkedIn, Google+, Github).
+* Search in popular web sites using Search engines (Twitter, LinkedIn, Google+, Github, Instagram, Reddit).
 
 
 Download/Installation
@@ -47,7 +48,7 @@ usage: EmailHarvester.py [-h] [-d DOMAIN] [-s FILE] [-e ENGINE] [-l LIMIT]
 \____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_|     \_/  \___||___/ \__|\___||_|
 
     A tool to retrieve Domain email addresses from Search Engines | @maldevel
-                                Version: 1.3.0
+                                Version: 1.3.1
 
 optional arguments:
   -h, --help            show this help message and exit
diff --git a/plugins/instagram.py b/plugins/instagram.py
new file mode 100644
index 0000000..3ff0360
--- /dev/null
+++ b/plugins/instagram.py
@@ -0,0 +1,73 @@
+"""
+    This file is part of EmailHarvester
+    Copyright (C) 2016 @maldevel
+    https://github.com/maldevel/EmailHarvester
+    
+    EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    
+    For more see the file 'LICENSE' for copying permission.
+"""
+
+#config = None
+app_emailharvester = None
+
+
+def search(domain, limit):
+    all_emails = []
+    app_emailharvester.show_message("\n[+] Searching in Instagram..\n")
+
+    app_emailharvester.show_message("\n[+] Searching in Yahoo + Instagram..\n")
+    yahooUrl = "http://search.yahoo.com/search?p=site%3Ainstagram.com+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}"
+    app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+    
+    app_emailharvester.show_message("\n[+] Searching in Bing + Instagram..\n")
+    bingUrl = "http://www.bing.com/search?q=site%3Ainstagram.com+%40{word}&count=50&first={counter}"
+    app_emailharvester.init_search(bingUrl, domain, limit, 0, 50)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+    
+    app_emailharvester.show_message("\n[+] Searching in Google + Instagram..\n")
+    googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Ainstagram.com+"%40{word}"'
+    app_emailharvester.init_search(googleUrl, domain, limit, 0, 100)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+
+    app_emailharvester.show_message("\n[+] Searching in Baidu + Instagram..\n")
+    url = 'http://www.baidu.com/search/s?wd=site%3Ainstagram.com+"%40{word}"&pn={counter}'
+    app_emailharvester.init_search(url, domain, limit, 0, 10)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+
+    app_emailharvester.show_message("\n[+] Searching in Exalead + Instagram..\n")
+    url = "http://www.exalead.com/search/web/results/?q=site%3Ainstagram.com+%40{word}&elements_per_page=10&start_index={counter}" 
+    app_emailharvester.init_search(url, domain, limit, 0, 50)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+
+    #dogpile seems to not support site:
+    
+    return all_emails
+
+
+class Plugin:
+    def __init__(self, app, conf):#
+        global app_emailharvester, config
+        #config = conf
+        app.register_plugin('instagram', {'search': search})
+        app_emailharvester = app
+        
\ No newline at end of file
diff --git a/plugins/reddit.py b/plugins/reddit.py
new file mode 100644
index 0000000..2705251
--- /dev/null
+++ b/plugins/reddit.py
@@ -0,0 +1,73 @@
+"""
+    This file is part of EmailHarvester
+    Copyright (C) 2016 @maldevel
+    https://github.com/maldevel/EmailHarvester
+    
+    EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    
+    For more see the file 'LICENSE' for copying permission.
+"""
+
+#config = None
+app_emailharvester = None
+
+
+def search(domain, limit):
+    all_emails = []
+    app_emailharvester.show_message("\n[+] Searching in Reddit..\n")
+
+    app_emailharvester.show_message("\n[+] Searching in Yahoo + Reddit..\n")
+    yahooUrl = "http://search.yahoo.com/search?p=site%3Areddit.com+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}"
+    app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+    
+    app_emailharvester.show_message("\n[+] Searching in Bing + Reddit..\n")
+    bingUrl = "http://www.bing.com/search?q=site%3Areddit.com+%40{word}&count=50&first={counter}"
+    app_emailharvester.init_search(bingUrl, domain, limit, 0, 50)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+    
+    app_emailharvester.show_message("\n[+] Searching in Google + Reddit..\n")
+    googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Areddit.com+"%40{word}"'
+    app_emailharvester.init_search(googleUrl, domain, limit, 0, 100)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+
+    app_emailharvester.show_message("\n[+] Searching in Baidu + Reddit..\n")
+    url = 'http://www.baidu.com/search/s?wd=site%3Areddit.com+"%40{word}"&pn={counter}'
+    app_emailharvester.init_search(url, domain, limit, 0, 10)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+
+    app_emailharvester.show_message("\n[+] Searching in Exalead + Reddit..\n")
+    url = "http://www.exalead.com/search/web/results/?q=site%3Areddit.com+%40{word}&elements_per_page=10&start_index={counter}" 
+    app_emailharvester.init_search(url, domain, limit, 0, 50)
+    app_emailharvester.process()
+    all_emails += app_emailharvester.get_emails()
+
+    #dogpile seems to not support site:
+    
+    return all_emails
+
+
+class Plugin:
+    def __init__(self, app, conf):#
+        global app_emailharvester, config
+        #config = conf
+        app.register_plugin('reddit', {'search': search})
+        app_emailharvester = app
+        
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 63a54a1..529240e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 termcolor
 colorama
-requests
\ No newline at end of file
+requests
+validators