From babb2bd206dec17ade1cbbca7c45a6d1cd2ee832 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Sun, 12 Jan 2020 17:41:46 -0500 Subject: [PATCH] Made takeover module asynchronous and added a flag if user wishes to perform subdomain takeover checks. --- theHarvester/__main__.py | 14 ++++-- theHarvester/discovery/takeover.py | 81 ++++++++++++++++++------------ theHarvester/lib/core.py | 23 ++++++++- 3 files changed, 80 insertions(+), 38 deletions(-) diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py index 1e7dabd8..a45a99c9 100644 --- a/theHarvester/__main__.py +++ b/theHarvester/__main__.py @@ -36,6 +36,8 @@ async def start(): parser.add_argument('-t', '--dns-tld', help='perform a DNS TLD expansion discovery, default False', default=False) parser.add_argument('-n', '--dns-lookup', help='enable DNS server lookup, default False', default=False, action='store_true') + parser.add_argument('-r', '--take-over', help='Check for takeovers', default=False, + action='store_true') parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true') parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str) @@ -73,6 +75,7 @@ async def start(): vhost: list = [] virtual = args.virtual_host word: str = args.domain + takeover_status = args.take_over async def store(search_engine: Any, source: str, process_param: Any = None, store_host: bool = False, store_emails: bool = False, store_ip: bool = False, store_people: bool = False, @@ -464,8 +467,7 @@ async def handler(lst): if ports_scanning: print('\n\n[*] Scanning ports (active).\n') for x in full: - host = x.split(':')[1] - domain = x.split(':')[0] + domain, host = x.split(':') if host != 'empty': print(('[*] Scanning ' + host)) ports = [21, 22, 80, 443, 8080] @@ -476,10 +478,14 @@ async def handler(lst): print(('\t[*] Detected open ports: ' + ','.join(str(e) for e in openports))) takeover_check = 'True' if takeover_check == 'True' and len(openports) > 0: - search_take = takeover.TakeOver(domain) - search_take.process() + search_take = takeover.TakeOver([domain]) + await search_take.process() except Exception as e: print(e) + if takeover_status: + print('Performing takeover check') + search_take = takeover.TakeOver(all_hosts) + await search_take.process() # DNS reverse lookup dnsrev = [] diff --git a/theHarvester/discovery/takeover.py b/theHarvester/discovery/takeover.py index 6fd6a489..0e6e29b8 100644 --- a/theHarvester/discovery/takeover.py +++ b/theHarvester/discovery/takeover.py @@ -1,44 +1,61 @@ +from theHarvester.lib.core import * import re -import requests class TakeOver: - def __init__(self, host): - self.host = host + def __init__(self, hosts): + self.hosts = hosts self.results = "" self.totalresults = "" - self.fingerprints = ["Squarespace - Domain Not Claimed", - 'www.herokucdn.com/error-pages/no-such-app.html', - 'Squarespace - No Such Account', - "

If you're trying to publish one, read the full documentation to learn how to set up GitHub Pages for your repository, organization, or user account.

", - "

If you\'re trying to publish one, read the full documentation to learn how to set up GitHub Pages for your repository, organization, or user account.

", - "Bummer. It looks like the help center that you are trying to reach no longer exists.", - " The page you\'re looking for could not be found (404) ", - 'The specified bucket does not exist', - 'Bad Request: ERROR: The request could not be satisfied', - 'Fastly error: unknown domain:', - "There isn't a Github Pages site here.", - 'No such app', - 'Unrecognized domain', - 'Sorry, this shop is currently unavailable.', - "Whatever you were looking for doesn't currently exist at this address", - 'The requested URL was not found on this server.', - 'This UserVoice subdomain is currently available!', - 'Do you want to register *.wordpress.com?', - 'Help Center Closed'] - def do_take(self): + # Thank you to https://github.com/EdOverflow/can-i-take-over-xyz for these fingerprints + self.fingerprints = {"'Trying to access your account?'": 'Campaign Monitor', + '404 Not Found': 'Fly.io', + '404 error unknown site!': 'Pantheon', + 'Do you want to register *.wordpress.com?': 'Wordpress', + 'Domain uses DO name serves with no records in DO.': 'Digital Ocean', + "It looks like you may have taken a wrong turn somewhere. Don't worry...it happens to all of us.": 'LaunchRock', + 'No Site For Domain': 'Kinsta', + 'No settings were found for this company:': 'Help Scout', + 'Project doesnt exist... yet!': 'Readme.io', + 'Repository not found': 'Bitbucket', + 'The feed has not been found.': 'Feedpress', + 'No such app': 'Heroku', + 'The specified bucket does not exist': 'AWS/S3', + 'The thing you were looking for is no longer here, or never was': 'Ghost', + "There isn't a Github Pages site here.": 'Github', + 'This UserVoice subdomain is currently available!': 'UserVoice', + "Uh oh. That page doesn't exist.": 'Intercom', + "We could not find what you're looking for.": 'Help Juice', + "Whatever you were looking for doesn't currently exist at this address": 'Tumblr', + 'is not a registered InCloud YouTrack': 'JetBrains', + 'page not found': 'Uptimerobot', + 'project not found': 'Surge.sh'} + + async def check(self, url, resp): + # Simple function that takes response and checks if any fingerprints exists + # If a fingerprint exists figures out which one and prints it out + regex = re.compile("(?=(" + "|".join(map(re.escape, list(self.fingerprints.keys()))) + "))") + # Sanitize fingerprints + matches = re.findall(regex, resp) + for match in matches: + print(f'\t\033[91m Takeover detected: {url}\033[1;32;40m') + if match in self.fingerprints.keys(): + # Sanity check as to not error out + print(f'\t\033[91m Type of takeover is: {self.fingerprints[match]}\033[1;32;40m') + + async def do_take(self): try: - print(f'\t Searching takeovers for {self.host}') - r = requests.get(f'https://{self.host}', verify=False) - for x in self.fingerprints: - take_reg = re.compile(x) - self.temp = take_reg.findall(r.text) - if self.temp != []: - print(f'\t\033[91m Takeover detected! - {self.host}\033[1;32;40m') + tup_resps: list = await AsyncFetcher.fetch_all(self.hosts, takeover=True) + # Returns a list of tuples in this format: (url, response) + tup_resps = [tup for tup in tup_resps if tup[1] != ''] + # Filter out responses whose responses are empty strings (indicates errored) + for url, resp in tup_resps: + await self.check(url, resp) + except Exception as e: print(e) - def process(self): - self.do_take() + async def process(self): + await self.do_take() diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py index 55cc88b9..6939a03b 100644 --- a/theHarvester/lib/core.py +++ b/theHarvester/lib/core.py @@ -1,7 +1,7 @@ # coding=utf-8 import random -from typing import Set, Union, Any +from typing import Set, Union, Any, Tuple import yaml import asyncio import aiohttp @@ -415,12 +415,31 @@ async def fetch(session, url, params='', json=False) -> Union[str, dict, list]: return '' @staticmethod - async def fetch_all(urls, headers='', params='', json=False) -> list: + async def takeover_fetch(session, url) -> Union[Tuple[Any, Any], str]: + # This fetch method solely focuses on get requests + try: + # Wrap in try except due to 0x89 png/jpg files + # This fetch method solely focuses on get requests + # TODO determine if method for post requests is necessary + url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url + # Clean up urls with proper schemas + async with session.get(url) as response: + await asyncio.sleep(2) + return url, await response.text() + except Exception: + return url, '' + + @staticmethod + async def fetch_all(urls, headers='', params='', json=False, takeover=False) -> list: # By default timeout is 5 minutes, 30 seconds should suffice timeout = aiohttp.ClientTimeout(total=30) if len(headers) == 0: headers = {'User-Agent': Core.get_user_agent()} + if takeover: + async with aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as session: + tuples = await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls]) + return tuples if len(params) == 0: async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session: texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])