From a04f2b130628d914adcfb526598d05603da352f3 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Thu, 31 Jan 2019 14:48:26 -0500 Subject: [PATCH 1/6] Added intelx.io search and fixed misc. --- api-keys.yaml | 7 ++- lib/core.py | 7 +++ parsers/intelxparser.py | 21 ++++++++ theHarvester.py | 111 +++++++++++++++++++++++++++++++--------- 4 files changed, 120 insertions(+), 26 deletions(-) create mode 100644 parsers/intelxparser.py diff --git a/api-keys.yaml b/api-keys.yaml index e687912d..e894b2c8 100644 --- a/api-keys.yaml +++ b/api-keys.yaml @@ -7,10 +7,13 @@ apikeys: id: hunter: - key: + key: 2723f6b795e30b04a3b5de2ebd79d809b8729cd2 + + intelx: + key: 9df61df0-84f7-4dc7-b34c-8ccfb8646ace securityTrails: - key: + key: H2fgSNA1FflWFMcxpP9YpNmkupFyV9pL shodan: key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt diff --git a/lib/core.py b/lib/core.py index 2aaf0341..af2bca72 100644 --- a/lib/core.py +++ b/lib/core.py @@ -35,6 +35,12 @@ def shodan_key(): keys = yaml.safe_load(api_keys) return keys['apikeys']['shodan']['key'] + @staticmethod + def intelx_key(): + with open('api-keys.yaml', 'r') as api_keys: + keys = yaml.safe_load(api_keys) + return keys['apikeys']['intelx']['key'] + @staticmethod def banner(): print('\n\033[93m*******************************************************************') @@ -66,6 +72,7 @@ def get_supportedengines(): 'google-certificates', 'google-profiles', 'hunter', + 'intelx', 'linkedin', 'netcraft', 'pgp', diff --git a/parsers/intelxparser.py b/parsers/intelxparser.py new file mode 100644 index 00000000..8448ffe9 --- /dev/null +++ b/parsers/intelxparser.py @@ -0,0 +1,21 @@ +class Parser: + + def __init__(self): + self.emails = set() + self.hosts = set() + + def parse_dictionaries(self, results): + """ + Parse method to parse json results + :param results: Dictionary containing a list of dictionaries known as selectors + :return: tuple of emails and hosts + """ + if results is not None: + for dictionary in results["selectors"]: + field = dictionary['selectorvalue'] + if '@' in field: + self.emails.add(field) + else: + self.hosts.add(str(field).replace(')', '')) + return self.emails, self.hosts + return None, None diff --git a/theHarvester.py b/theHarvester.py index 50f6d180..6e56b76b 100755 --- a/theHarvester.py +++ b/theHarvester.py @@ -47,7 +47,7 @@ def start(): parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str) parser.add_argument('-b', '--source', help='''source: baidu, bing, bingapi, censys, crtsh, cymon, dogpile, duckduckgo, google, googleCSE, - google-certificates, google-profiles, hunter, + google-certificates, google-profiles, hunter, intelx, linkedin, netcraft, pgp, securityTrails, threatcrowd, trello, twitter, vhost, virustotal, yahoo, all''') args = parser.parse_args() @@ -256,6 +256,26 @@ def start(): else: pass + elif engineitem == 'intelx': + print('\033[94m[*] Searching Intelx. \033[0m') + from discovery import intelxsearch + # Import locally or won't work. + try: + search = intelxsearch.search_intelx(word, limit) + search.process() + emails = filter(search.get_emails()) + all_emails.extend(emails) + hosts = filter(search.get_hostnames()) + all_hosts.extend(hosts) + db = stash.stash_manager() + db.store_all(word, all_hosts, 'host', 'intelx') + db.store_all(word, all_emails, 'email', 'intelx') + except Exception as e: + if isinstance(e, MissingKey): + print(e) + else: + print(e) + elif engineitem == 'linkedin': print('\033[94m[*] Searching Linkedin. \033[0m') search = linkedinsearch.SearchLinkedin(word, limit) @@ -397,19 +417,21 @@ def start(): db.store_all(word, all_emails, 'email', 'baidu') except Exception: pass - - print('\033[94m[*] Searching Bing. \033[0m') - bingapi = 'no' - search = bingsearch.SearchBing(word, limit, start) - search.process(bingapi) - emails = filter(search.get_emails()) - hosts = filter(search.get_hostnames()) - all_hosts.extend(hosts) - db = stash.stash_manager() - db.store_all(word, all_hosts, 'host', 'bing') - all_emails.extend(emails) - all_emails = sorted(set(all_emails)) - db.store_all(word, all_emails, 'email', 'bing') + try: + print('\033[94m[*] Searching Bing. \033[0m') + bingapi = 'no' + search = bingsearch.SearchBing(word, limit, start) + search.process(bingapi) + emails = filter(search.get_emails()) + hosts = filter(search.get_hostnames()) + all_hosts.extend(hosts) + db = stash.stash_manager() + db.store_all(word, all_hosts, 'host', 'bing') + all_emails.extend(emails) + all_emails = sorted(set(all_emails)) + db.store_all(word, all_emails, 'email', 'bing') + except Exception: + pass print('\033[94m[*] Searching Censys. \033[0m') from discovery import censys @@ -522,6 +544,25 @@ def start(): else: pass + print('\033[94m[*] Searching Intelx. \033[0m') + from discovery import intelxsearch + # Import locally or won't work. + try: + search = intelxsearch.search_intelx(word, limit) + search.process() + emails = filter(search.get_emails()) + all_emails.extend(emails) + hosts = filter(search.get_hostnames()) + all_hosts.extend(hosts) + db = stash.stash_manager() + db.store_all(word, all_hosts, 'host', 'intelx') + db.store_all(word, all_emails, 'email', 'intelx') + except Exception as e: + if isinstance(e, MissingKey): + print(e) + else: + print(e) + print('\033[94m[*] Searching Linkedin. \033[0m') search = linkedinsearch.SearchLinkedin(word, limit) search.process() @@ -562,6 +603,25 @@ def start(): except Exception: pass + print('\033[94m[*] Searching SecurityTrails. \033[0m') + from discovery import securitytrailssearch + try: + search = securitytrailssearch.search_securitytrail(word) + search.process() + hosts = filter(search.get_hostnames()) + all_hosts.extend(hosts) + db = stash.stash_manager() + db.store_all(word, hosts, 'host', 'securityTrails') + ips = search.get_ips() + all_ip.extend(ips) + db = stash.stash_manager() + db.store_all(word, ips, 'ip', 'securityTrails') + except Exception as e: + if isinstance(e, MissingKey): + print(e) + else: + pass + print('\033[94m[*] Searching Threatcrowd. \033[0m') try: search = threatcrowd.search_threatcrowd(word) @@ -625,16 +685,19 @@ def start(): db = stash.stash_manager() db.store_all(word, all_hosts, 'host', 'virustotal') - print('\033[94m[*] Searching Yahoo. \033[0m') - search = yahoosearch.search_yahoo(word, limit) - search.process() - hosts = search.get_hostnames() - emails = search.get_emails() - all_hosts.extend(filter(hosts)) - all_emails.extend(filter(emails)) - db = stash.stash_manager() - db.store_all(word, all_hosts, 'host', 'yahoo') - db.store_all(word, all_emails, 'email', 'yahoo') + try: + print('\033[94m[*] Searching Yahoo. \033[0m') + search = yahoosearch.search_yahoo(word, limit) + search.process() + hosts = search.get_hostnames() + emails = search.get_emails() + all_hosts.extend(filter(hosts)) + all_emails.extend(filter(emails)) + db = stash.stash_manager() + db.store_all(word, all_hosts, 'host', 'yahoo') + db.store_all(word, all_emails, 'email', 'yahoo') + except Exception as e: + print(f'An exception occurred in yahoo: {e}') else: print('\033[93m[!] Invalid source.\n\n \033[0m') sys.exit(1) From d7d7d6a61e6c562a8f4924e4c4c32c16870218b5 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Thu, 31 Jan 2019 15:05:29 -0500 Subject: [PATCH 2/6] Fixed impromper space. --- discovery/intelxsearch.py | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 discovery/intelxsearch.py diff --git a/discovery/intelxsearch.py b/discovery/intelxsearch.py new file mode 100644 index 00000000..ddabeffc --- /dev/null +++ b/discovery/intelxsearch.py @@ -0,0 +1,54 @@ +from discovery.constants import * +from lib.core import * +from parsers import intelxparser +import requests +import time + +class search_intelx: + + def __init__(self, word, limit): + self.word = word + # default key is public key + self.key = Core.intelx_key() + if self.key is None: + raise MissingKey(True) + self.database = 'https://public.intelx.io/' + self.results = None + self.info = () + self.limit = limit + + def do_search(self): + try: + user_agent = Core.get_user_agent() + headers = {'User-Agent': user_agent, 'x-key': self.key} + data = f'{{"term": "{self.word}", "maxresults": {self.limit}, "media": 0, "sort": 2 , "terminate": []}}' + # data is json that corresponds to what we are searching for, sort:2 means sort by most relevant + r = requests.post(f'{self.database}phonebook/search', data=data, headers=headers) + if r.status_code == 400: + raise Exception('Invalid json was passed in.') + time.sleep(1) + # grab uuid to send get request to fetch data + uuid = r.json()['id'] + url = f'{self.database}phonebook/search/result?id={uuid}&offset=0&limit={self.limit}' + r = requests.get(url, headers=headers) + # to add in future grab status from r.text and check if more results can be gathered + if r.status_code != 200: + raise Exception('Error occurred while searching intelx.') + self.results = r.json() + except Exception as e: + print(f'An exception has occurred: {e}') + + def process(self): + print('\t Processing Results') + self.do_search() + intelx_parser = intelxparser.Parser() + self.info = intelx_parser.parse_dictionaries(self.results) + # Create parser and set self.info to tuple returned from parsing text. + print('\t Done Searching Results') + + def get_emails(self): + return self.info[0] + + def get_hostnames(self): + return self.info[1] + From eb83fb744883804c3272712026067ec2e79d91df Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Thu, 31 Jan 2019 15:09:26 -0500 Subject: [PATCH 3/6] Removed debugging statements. --- discovery/intelxsearch.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/discovery/intelxsearch.py b/discovery/intelxsearch.py index ddabeffc..7eb7502e 100644 --- a/discovery/intelxsearch.py +++ b/discovery/intelxsearch.py @@ -39,12 +39,10 @@ def do_search(self): print(f'An exception has occurred: {e}') def process(self): - print('\t Processing Results') self.do_search() intelx_parser = intelxparser.Parser() self.info = intelx_parser.parse_dictionaries(self.results) # Create parser and set self.info to tuple returned from parsing text. - print('\t Done Searching Results') def get_emails(self): return self.info[0] From fbe30dd197c450066e31bcd4abe5821fb93cfd53 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Thu, 31 Jan 2019 15:21:16 -0500 Subject: [PATCH 4/6] Updated parser and added 1 sec sleep. --- discovery/intelxsearch.py | 1 + parsers/intelxparser.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/discovery/intelxsearch.py b/discovery/intelxsearch.py index 7eb7502e..03d9fb93 100644 --- a/discovery/intelxsearch.py +++ b/discovery/intelxsearch.py @@ -31,6 +31,7 @@ def do_search(self): uuid = r.json()['id'] url = f'{self.database}phonebook/search/result?id={uuid}&offset=0&limit={self.limit}' r = requests.get(url, headers=headers) + time.sleep(1) # to add in future grab status from r.text and check if more results can be gathered if r.status_code != 200: raise Exception('Error occurred while searching intelx.') diff --git a/parsers/intelxparser.py b/parsers/intelxparser.py index 8448ffe9..8d0dbb39 100644 --- a/parsers/intelxparser.py +++ b/parsers/intelxparser.py @@ -16,6 +16,12 @@ def parse_dictionaries(self, results): if '@' in field: self.emails.add(field) else: - self.hosts.add(str(field).replace(')', '')) + field = str(field) + if 'http' in field or 'https' in field: + if field[:5] == 'https': + field = field[8:] + else: + field = field[7:] + self.hosts.add(field.replace(')', '').replace(',', '')) return self.emails, self.hosts return None, None From 1ae9ab67efb3e2f59e8a02ccd27f27b643abb9d6 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Thu, 31 Jan 2019 16:13:37 -0500 Subject: [PATCH 5/6] Updated README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index a9987715..94cadcad 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,8 @@ Passive: * hunter: Hunter search engine (Requires API key, see below.) - www.hunter.io +* intelx: Intelx search engine (Requires API key, see below.) - www.intelx.io + * linkedin: Google search engine, specific search for Linkedin users * netcraft: Netcraft Data Mining @@ -84,6 +86,7 @@ Add your keys to api-keys.yaml * googleCSE: API key and CSE ID * hunter: API key +* intelx: API key * securityTrails: API key * shodan: API key From 8e6360d0a5f2a7c04df7bde6a1a28c11da7bf3eb Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Thu, 31 Jan 2019 16:16:34 -0500 Subject: [PATCH 6/6] Updated version number. --- README.md | 2 +- lib/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 94cadcad..7a17b90a 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ * | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | * * \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| * * * -* theHarvester 3.0.6 v247 * +* theHarvester 3.0.6 v260 * * Coded by Christian Martorella * * Edge-Security Research * * cmartorella@edge-security.com * diff --git a/lib/core.py b/lib/core.py index c4f6efd0..6c9450cd 100644 --- a/lib/core.py +++ b/lib/core.py @@ -50,7 +50,7 @@ def banner(): print("* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *") print("* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *") print('* *') - print('* theHarvester 3.0.6 v247 *') + print('* theHarvester 3.0.6 v260 *') print('* Coded by Christian Martorella *') print('* Edge-Security Research *') print('* cmartorella@edge-security.com *')