diff --git a/.travis.yml b/.travis.yml index f1dc5228..2b313ae7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ before_install: install: - python setup.py test script: -- python theHarvester.py -d metasploit.com -x trello,google,intelx,bingapi,crtsh,hunter -b all +- python theHarvester.py -d metasploit.com -x trello,google,intelx,bingapi,hunter -b all - pytest notifications: email: false diff --git a/README.md b/README.md index 86040d68..ffd87303 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,6 @@ Passive: * google: Google search engine (Optional Google dorking.) - www.google.com -* google-certificates: Google Certificate Transparency report - * hunter: Hunter search engine (Requires API key, see below.) - www.hunter.io * intelx: Intelx search engine (Requires API key, see below.) - www.intelx.io diff --git a/api-keys.yaml b/api-keys.yaml index 02c5138e..343587ba 100644 --- a/api-keys.yaml +++ b/api-keys.yaml @@ -6,13 +6,13 @@ apikeys: key: hunter: - key: + key: intelx: key: 9df61df0-84f7-4dc7-b34c-8ccfb8646ace securityTrails: - key: + key: shodan: key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt diff --git a/discovery/__init__.py b/discovery/__init__.py deleted file mode 100644 index f775d5e3..00000000 --- a/discovery/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -__all__ = ['baidusearch', - 'bingsearch', - 'censys', - 'crtsh', - 'cymon', - 'dnssearch', - 'dogpilesearch', - 'duckduckgosearch', - 'exaleadsearch', - 'googlecertificates', - 'googlesearch', - 'huntersearch', - 'intelxsearch', - 'linkedinsearch', - 'netcraft', - 'port_scanner', - 'securitytrailssearch', - 'shodansearch', - 'takeover', - 'threatcrowd', - 'trello', - 'twittersearch', - 'virustotal', - 'yahoosearch', - 'yandexsearch'] diff --git a/requirements.txt b/requirements.txt index bbdea08f..235c5e61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,9 @@ -beautifulsoup4>=4.7.1 +beautifulsoup4==4.8.0 censys==0.0.8 -plotly==3.10.0 -pytest>=4.6.3 -PyYaml>=5.1.1 -requests>=2.22.0 -shodan>=1.13.0 -texttable>=1.6.1 +chart-studio==1.0.0 +plotly==4.0.0 +pytest==5.0.1 +PyYaml==5.1.1 +requests==2.22.0 +shodan==1.14.0 +texttable==1.6.2 \ No newline at end of file diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py index 8fec020e..fb43e147 100644 --- a/theHarvester/__main__.py +++ b/theHarvester/__main__.py @@ -54,7 +54,7 @@ def start(): parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str) parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster, dogpile, duckduckgo, github-code, google, - google-certificates, hunter, intelx, + hunter, intelx, linkedin, netcraft, securityTrails, threatcrowd, trello, twitter, vhost, virustotal, yahoo, all''') parser.add_argument('-x', '--exclude', help='exclude options when using all sources', type=str) @@ -147,7 +147,7 @@ def start(): print('\033[94m[*] Searching CRT.sh. \033[0m') search = crtsh.SearchCrtsh(word) search.process() - hosts = filter(search.get_hostnames()) + hosts = filter(search.get_data()) all_hosts.extend(hosts) db = stash.stash_manager() db.store_all(word, all_hosts, 'host', 'CRTsh') @@ -223,15 +223,6 @@ def start(): db.store_all(word, all_hosts, 'host', 'google') db.store_all(word, all_emails, 'email', 'google') - elif engineitem == 'google-certificates': - print('\033[94m[*] Searching Google Certificate transparency report. \033[0m') - search = googlecertificates.SearchGoogleCertificates(word, limit, start) - search.process() - hosts = filter(search.get_domains()) - all_hosts.extend(hosts) - db = stash.stash_manager() - db.store_all(word, all_hosts, 'host', 'google-certificates') - elif engineitem == 'hunter': print('\033[94m[*] Searching Hunter. \033[0m') from theHarvester.discovery import huntersearch @@ -287,7 +278,6 @@ def start(): print('---------------------') for user in sorted(list(set(people))): print(user) - sys.exit(0) elif engineitem == 'netcraft': print('\033[94m[*] Searching Netcraft. \033[0m') @@ -434,7 +424,7 @@ def start(): print('\033[94m[*] Searching CRT.sh. \033[0m') search = crtsh.SearchCrtsh(word) search.process() - hosts = filter(search.get_hostnames()) + hosts = filter(search.get_data()) all_hosts.extend(hosts) db = stash.stash_manager() db.store_all(word, all_hosts, 'host', 'CRTsh') @@ -489,14 +479,6 @@ def start(): db = stash.stash_manager() db.store_all(word, all_hosts, 'host', 'google') - print('\033[94m[*] Searching Google Certificate transparency report. \033[0m') - search = googlecertificates.SearchGoogleCertificates(word, limit, start) - search.process() - domains = filter(search.get_domains()) - all_hosts.extend(domains) - db = stash.stash_manager() - db.store_all(word, all_hosts, 'host', 'google-certificates') - print('\033[94m[*] Searching Hunter. \033[0m') from theHarvester.discovery import huntersearch # Import locally. diff --git a/theHarvester/discovery/__init__.py b/theHarvester/discovery/__init__.py index ffe63379..5dd6f419 100644 --- a/theHarvester/discovery/__init__.py +++ b/theHarvester/discovery/__init__.py @@ -6,7 +6,6 @@ 'dogpilesearch', 'duckduckgosearch', 'exaleadsearch', - 'googlecertificates', 'googlesearch', 'huntersearch', 'intelxsearch', diff --git a/theHarvester/discovery/censys.py b/theHarvester/discovery/censys.py index f0275cd5..697de756 100644 --- a/theHarvester/discovery/censys.py +++ b/theHarvester/discovery/censys.py @@ -2,6 +2,9 @@ from theHarvester.parsers import censysparser import requests + # TODO rewrite this module to use the censys api as the current way does notwork + # TODO And not really that maintainable as it currently stands + class SearchCensys: diff --git a/theHarvester/discovery/crtsh.py b/theHarvester/discovery/crtsh.py index 5f33e2ff..d5dc5b2d 100644 --- a/theHarvester/discovery/crtsh.py +++ b/theHarvester/discovery/crtsh.py @@ -1,67 +1,26 @@ -from theHarvester.discovery.constants import * from theHarvester.lib.core import * -from theHarvester.parsers import myparser import requests -import time - +import urllib3 class SearchCrtsh: def __init__(self, word): - self.word = word.replace(' ', '%20') - self.results = "" - self.totalresults = "" - self.server = 'https://crt.sh/?q=' - self.quantity = '100' - self.counter = 0 - + self.word = word + self.data = set() + def do_search(self): - try: - urly = self.server + self.word - except Exception as e: - print(e) - try: - params = {'User-Agent': Core.get_user_agent()} - r = requests.get(urly, headers=params) - except Exception as e: - print(e) - links = self.get_info(r.text) - for link in links: - params = {'User-Agent': Core.get_user_agent()} - r = requests.get(link, headers=params) - time.sleep(getDelay()) - self.results = r.text - self.totalresults += self.results - - """ - Function goes through text from base request and parses it for links - @param text requests text - @return list of links - """ - def get_info(self, text): - lines = [] - for line in str(text).splitlines(): - line = line.strip() - if 'id=' in line: - lines.append(line) - links = [] - for i in range(len(lines)): - if i % 2 == 0: # Way html is formatted only care about every other one. - current = lines[i] - current = current[43:] # 43 is not an arbitrary number, the id number always starts at 43rd index. - link = '' - for ch in current: - if ch == '"': - break - else: - link += ch - links.append(('https://crt.sh?id=' + str(link))) - return links - - def get_hostnames(self): - rawres = myparser.Parser(self.totalresults, self.word) - return rawres.hostnames() + url = f'https://crt.sh/?q=%25.{self.word}&output=json' + headers = {'User-Agent': Core.get_user_agent()} + request = requests.get(url, params=headers, timeout=30) + if request.ok: + content = request.json() + data = set([dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value'] for dct in content]) + return data def process(self): - self.do_search() print('\tSearching results.') + data = self.do_search() + self.data = data + + def get_data(self): + return self.data diff --git a/theHarvester/discovery/linkedinsearch.py b/theHarvester/discovery/linkedinsearch.py index 4f8e9b94..d59d14fd 100644 --- a/theHarvester/discovery/linkedinsearch.py +++ b/theHarvester/discovery/linkedinsearch.py @@ -39,4 +39,4 @@ def process(self): self.do_search() time.sleep(getDelay()) self.counter += 100 - print(f'\tSearching {self.counter} results.') + print(f'\tSearching {self.counter} results.') diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py index 497b96fa..1038a560 100644 --- a/theHarvester/lib/core.py +++ b/theHarvester/lib/core.py @@ -74,7 +74,6 @@ def get_supportedengines(): 'duckduckgo', 'github-code', 'google', - 'google-certificates', 'hunter', 'intelx', 'linkedin', diff --git a/theHarvester/lib/reportgraph.py b/theHarvester/lib/reportgraph.py index 255ff85e..7ee2e443 100644 --- a/theHarvester/lib/reportgraph.py +++ b/theHarvester/lib/reportgraph.py @@ -2,7 +2,7 @@ from datetime import datetime import plotly import plotly.graph_objs as go -import plotly.plotly as py +import chart_studio.plotly as py try: db = stash.stash_manager() @@ -92,5 +92,4 @@ def drawscattergraphscanhistory(self, domain, scanhistorydomain): except Exception as e: print(f'Error generating HTML for the historical graph for domain: {e}') -except Exception as e: - print(f'Error in the reportgraph module: {e}') + diff --git a/wordlists/dorks.txt b/wordlists/dorks.txt index 0c56e244..e40b574a 100644 --- a/wordlists/dorks.txt +++ b/wordlists/dorks.txt @@ -1,6 +1,7 @@ login.html administrator/login.%XT% admin_area/login.%XT% +intext:@ inurl: intitle: intext: