Merge pull request #270 from L1ghtn1ng/dev

Fixes for bugs and updated deps plus other misc things
2024-09-22 08:16:35 +08:00 · 2019-08-09 00:16:04 +01:00 · 2019-08-09 00:16:04 +01:00 · 951f567bab
parent 0649595da9 b54243e63e
commit 951f567bab
13 changed files with 37 additions and 121 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -9,7 +9,7 @@ before_install:
 install:
 - python setup.py test
 script:
- python theHarvester.py -d metasploit.com -x trello,google,intelx,bingapi,crtsh,hunter -b all
+- python theHarvester.py -d metasploit.com -x trello,google,intelx,bingapi,hunter -b all
 - pytest
 notifications:
  email: false
--- a/README.md
+++ b/README.md
@ -44,8 +44,6 @@ Passive:

 * google: Google search engine (Optional Google dorking.) - www.google.com

-* google-certificates: Google Certificate Transparency report 
-
 * hunter: Hunter search engine (Requires API key, see below.) - www.hunter.io

 * intelx: Intelx search engine (Requires API key, see below.) - www.intelx.io
--- a/api-keys.yaml
+++ b/api-keys.yaml
@ -6,13 +6,13 @@ apikeys:
    key: 

  hunter:
-    key: 
+    key:

  intelx:
    key: 9df61df0-84f7-4dc7-b34c-8ccfb8646ace

  securityTrails:
-    key: 
+    key:

  shodan:
    key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
--- a/discovery/init.py
+++ b/discovery/init.py
@ -1,25 +0,0 @@
-__all__ = ['baidusearch',
-           'bingsearch',
-           'censys',
-           'crtsh',
-           'cymon',
-           'dnssearch',
-           'dogpilesearch',
-           'duckduckgosearch',
-           'exaleadsearch',
-           'googlecertificates',
-           'googlesearch',
-           'huntersearch',
-           'intelxsearch',
-           'linkedinsearch',
-           'netcraft',
-           'port_scanner',
-           'securitytrailssearch',
-           'shodansearch',
-           'takeover',
-           'threatcrowd',
-           'trello',
-           'twittersearch',
-           'virustotal',
-           'yahoosearch',
-           'yandexsearch']
--- a/requirements.txt
+++ b/requirements.txt
@ -1,8 +1,9 @@
-beautifulsoup4>=4.7.1
+beautifulsoup4==4.8.0
 censys==0.0.8
-plotly==3.10.0
-pytest>=4.6.3
-PyYaml>=5.1.1
-requests>=2.22.0
-shodan>=1.13.0
-texttable>=1.6.1
+chart-studio==1.0.0
+plotly==4.0.0
+pytest==5.0.1
+PyYaml==5.1.1
+requests==2.22.0
+shodan==1.14.0
+texttable==1.6.2
--- a/theHarvester/main.py
+++ b/theHarvester/main.py
@ -54,7 +54,7 @@ def start():
    parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
    parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
                        dogpile, duckduckgo, github-code, google, 
-                        google-certificates, hunter, intelx,
+                        hunter, intelx,
                        linkedin, netcraft, securityTrails, threatcrowd,
                        trello, twitter, vhost, virustotal, yahoo, all''')
    parser.add_argument('-x', '--exclude', help='exclude options when using all sources', type=str)
@ -147,7 +147,7 @@ def start():
                    print('\033[94m[*] Searching CRT.sh. \033[0m')
                    search = crtsh.SearchCrtsh(word)
                    search.process()
-                    hosts = filter(search.get_hostnames())
+                    hosts = filter(search.get_data())
                    all_hosts.extend(hosts)
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'CRTsh')
@ -223,15 +223,6 @@ def start():
                    db.store_all(word, all_hosts, 'host', 'google')
                    db.store_all(word, all_emails, 'email', 'google')

-                elif engineitem == 'google-certificates':
-                    print('\033[94m[*] Searching Google Certificate transparency report. \033[0m')
-                    search = googlecertificates.SearchGoogleCertificates(word, limit, start)
-                    search.process()
-                    hosts = filter(search.get_domains())
-                    all_hosts.extend(hosts)
-                    db = stash.stash_manager()
-                    db.store_all(word, all_hosts, 'host', 'google-certificates')
-
                elif engineitem == 'hunter':
                    print('\033[94m[*] Searching Hunter. \033[0m')
                    from theHarvester.discovery import huntersearch
@ -287,7 +278,6 @@ def start():
                        print('---------------------')
                        for user in sorted(list(set(people))):
                            print(user)
-                    sys.exit(0)

                elif engineitem == 'netcraft':
                    print('\033[94m[*] Searching Netcraft. \033[0m')
@ -434,7 +424,7 @@ def start():
                    print('\033[94m[*] Searching CRT.sh. \033[0m')
                    search = crtsh.SearchCrtsh(word)
                    search.process()
-                    hosts = filter(search.get_hostnames())
+                    hosts = filter(search.get_data())
                    all_hosts.extend(hosts)
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'CRTsh')
@ -489,14 +479,6 @@ def start():
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'google')

-                    print('\033[94m[*] Searching Google Certificate transparency report. \033[0m')
-                    search = googlecertificates.SearchGoogleCertificates(word, limit, start)
-                    search.process()
-                    domains = filter(search.get_domains())
-                    all_hosts.extend(domains)
-                    db = stash.stash_manager()
-                    db.store_all(word, all_hosts, 'host', 'google-certificates')
-
                    print('\033[94m[*] Searching Hunter. \033[0m')
                    from theHarvester.discovery import huntersearch
                    # Import locally.
--- a/theHarvester/discovery/init.py
+++ b/theHarvester/discovery/init.py
@ -6,7 +6,6 @@
           'dogpilesearch',
           'duckduckgosearch',
           'exaleadsearch',
-           'googlecertificates',
           'googlesearch',
           'huntersearch',
           'intelxsearch',
--- a/theHarvester/discovery/censys.py
+++ b/theHarvester/discovery/censys.py
@ -2,6 +2,9 @@
 from theHarvester.parsers import censysparser
 import requests

+ # TODO rewrite this module to use the censys api as the current way does notwork
+ # TODO And not really that maintainable as it currently stands
+

 class SearchCensys:

--- a/theHarvester/discovery/crtsh.py
+++ b/theHarvester/discovery/crtsh.py
@ -1,67 +1,26 @@
-from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
-from theHarvester.parsers import myparser
 import requests
-import time
-
+import urllib3

 class SearchCrtsh:

    def __init__(self, word):
-        self.word = word.replace(' ', '%20')
-        self.results = ""
-        self.totalresults = ""
-        self.server = 'https://crt.sh/?q='
-        self.quantity = '100'
-        self.counter = 0
-
+        self.word = word
+        self.data = set()
+        
    def do_search(self):
-        try:
-            urly = self.server + self.word
-        except Exception as e:
-            print(e)
-        try:
-            params = {'User-Agent': Core.get_user_agent()}
-            r = requests.get(urly, headers=params)
-        except Exception as e:
-            print(e)
-        links = self.get_info(r.text)
-        for link in links:
-            params = {'User-Agent': Core.get_user_agent()}
-            r = requests.get(link, headers=params)
-            time.sleep(getDelay())
-            self.results = r.text
-            self.totalresults += self.results
-
-    """
-    Function goes through text from base request and parses it for links
-    @param text requests text
-    @return list of links
-    """
-    def get_info(self, text):
-        lines = []
-        for line in str(text).splitlines():
-            line = line.strip()
-            if 'id=' in line:
-                lines.append(line)
-        links = []
-        for i in range(len(lines)):
-            if i % 2 == 0:  # Way html is formatted only care about every other one.
-                current = lines[i]
-                current = current[43:]  # 43 is not an arbitrary number, the id number always starts at 43rd index.
-                link = ''
-                for ch in current:
-                    if ch == '"':
-                        break
-                    else:
-                        link += ch
-                links.append(('https://crt.sh?id=' + str(link)))
-        return links
-
-    def get_hostnames(self):
-        rawres = myparser.Parser(self.totalresults, self.word)
-        return rawres.hostnames()
+        url = f'https://crt.sh/?q=%25.{self.word}&output=json'
+        headers = {'User-Agent': Core.get_user_agent()}
+        request = requests.get(url, params=headers, timeout=30)
+        if request.ok:
+            content = request.json()
+            data = set([dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value'] for dct in content])
+        return data

    def process(self):
-        self.do_search()
        print('\tSearching results.')
+        data = self.do_search()
+        self.data = data
+
+    def get_data(self):
+        return self.data
--- a/theHarvester/discovery/linkedinsearch.py
+++ b/theHarvester/discovery/linkedinsearch.py
@ -39,4 +39,4 @@ def process(self):
            self.do_search()
            time.sleep(getDelay())
            self.counter += 100
-            print(f'\tSearching  {self.counter} results.')
+            print(f'\tSearching {self.counter} results.')
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@ -74,7 +74,6 @@ def get_supportedengines():
                            'duckduckgo',
                            'github-code',
                            'google',
-                            'google-certificates',
                            'hunter',
                            'intelx',
                            'linkedin',
--- a/theHarvester/lib/reportgraph.py
+++ b/theHarvester/lib/reportgraph.py
@ -2,7 +2,7 @@
 from datetime import datetime
 import plotly
 import plotly.graph_objs as go
-import plotly.plotly as py
+import chart_studio.plotly as py

 try:
    db = stash.stash_manager()
@ -92,5 +92,4 @@ def drawscattergraphscanhistory(self, domain, scanhistorydomain):
            except Exception as e:
                print(f'Error generating HTML for the historical graph for domain: {e}')

-except Exception as e:
-    print(f'Error in the reportgraph module: {e}')
+
--- a/wordlists/dorks.txt
+++ b/wordlists/dorks.txt
@ -1,6 +1,7 @@
 login.html
 administrator/login.%XT%
 admin_area/login.%XT%
+intext:@
 inurl: 
 intitle: 
 intext: