Added proxies.yaml for initial commit for integration of proxies.

2024-09-20 15:26:31 +08:00 · 2020-02-05 00:39:50 -05:00 · 2020-02-05 00:39:50 -05:00 · c08b0f3982
parent 62f3bc6f93
commit c08b0f3982
7 changed files with 105 additions and 165 deletions
--- a/proxies.yaml
+++ b/proxies.yaml
@ -0,0 +1,4 @@
+http:
+    - ip:port
+https: 
+    - ip:port
--- a/theHarvester/main.py
+++ b/theHarvester/main.py
@ -26,7 +26,7 @@ async def start():
    parser.add_argument('-S', '--start', help='start with result number X, default=0', default=0, type=int)
    parser.add_argument('-g', '--google-dork', help='use Google Dorks for Google search', default=False,
                        action='store_true')
-    parser.add_argument('-p', '--port-scan', help='scan the detected hosts and check for Takeovers (21,22,80,443,8080)',
+    parser.add_argument('-p', '--proxies', help='use proxies for requests, enter proxies in proxies.yaml',
                        default=False, action='store_true')
    parser.add_argument('-s', '--shodan', help='use Shodan to query discovered hosts', default=False,
                        action='store_true')
@ -68,7 +68,6 @@ async def start():
    google_dorking = args.google_dork
    host_ip: list = []
    limit: int = args.limit
-    ports_scanning = args.port_scan
    shodan = args.shodan
    start: int = args.start
    all_urls: list = []
@ -458,27 +457,11 @@ async def handler(lst):
        # db = stash.stash_manager()
        # db.store_all(word, dnsres, 'host', 'dns_bruteforce')

-    # Port scanning
-    if ports_scanning:
-        print('\n\n[*] Scanning ports (active).\n')
-        for x in full:
-            domain, host = x.split(':')
-            if host != 'empty':
-                print(('[*] Scanning ' + host))
-                ports = [21, 22, 80, 443, 8080]
-                try:
-                    scan = port_scanner.PortScan(host, ports)
-                    openports = scan.process()
-                    if len(openports) > 1:
-                        print(('\t[*] Detected open ports: ' + ','.join(str(e) for e in openports)))
-                    takeover_check = 'True'
-                    if takeover_check == 'True' and len(openports) > 0:
-                        search_take = takeover.TakeOver([domain])
-                        await search_take.process()
-                except Exception as e:
-                    print(e)
+    # TakeOver Checking
+
    if takeover_status:
-        print('Performing takeover check')
+        print('\n[*] Performing subdomain takeover check')
+        print('\n[*] Subdomain Takeover checking IS ACTIVE RECON')
        search_take = takeover.TakeOver(all_hosts)
        await search_take.process()

--- a/theHarvester/discovery/init.py
+++ b/theHarvester/discovery/init.py
@ -13,7 +13,6 @@
           'linkedinsearch',
           'netcraft',
           'otxsearch',
-           'port_scanner',
           'securitytrailssearch',
           'shodansearch',
           'spyse',
--- a/theHarvester/discovery/constants.py
+++ b/theHarvester/discovery/constants.py
@ -2,14 +2,13 @@
 from typing import Union
 import random
 import aiohttp
-import re
-from bs4 import BeautifulSoup
+import asyncio

 googleUA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 ' \
           'Safari/537.36 '


-def splitter(links):
+async def splitter(links):
    """
    Method that tries to remove duplicates
    LinkedinLists pulls a lot of profiles with the same name.
@ -73,7 +72,6 @@ async def google_workaround(visit_url: str) -> Union[bool, str]:
    :param visit_url: Url to scrape
    :return: Correct html that can be parsed by BS4
    """
-    return True
    url = 'https://websniffer.cc/'
    data = {
        'Cookie': '',
@ -82,15 +80,10 @@ async def google_workaround(visit_url: str) -> Union[bool, str]:
        'type': 'GET&http=1.1',
        'uak': str(random.randint(4, 8))  # select random UA to send to Google
    }
-    import requests
-    returned_html = requests.post(url, data=data, headers={'User-Agent': Core.get_user_agent()})
-    returned_html = returned_html.text
-    # TODO FIX
-    # returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
-    import pprint as p
-    print('returned html')
-    p.pprint(returned_html, indent=4)
-    returned_html = "This page appears when Google automatically detects requests coming from your computer network"
+    returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
+    returned_html = "This page appears when Google automatically detects requests coming from your computer network" \
+        if returned_html == "" else returned_html[0]
+
    if await search(returned_html):
        print('going to second method!')
        # indicates that google is serving workaround a captcha
@ -109,6 +102,9 @@ async def google_workaround(visit_url: str) -> Union[bool, str]:
    return correct_html


+async def second_method(url: str) -> Union[str, bool]:
+    return ""
+
 async def request(url, params):
    headers = {'User-Agent': Core.get_user_agent()}
    session = aiohttp.ClientSession(headers=headers)
@ -119,7 +115,7 @@ async def request(url, params):

 async def proxy_fetch(session, url, proxy):
    try:
-        async with session.get(url, proxy=proxy, ssl=False) as resp:
+        async with session.get(url, proxy=proxy) as resp:
            return f'success:{proxy}', await resp.text()
    except Exception:
        return f'failed:{proxy}', proxy
@ -128,84 +124,12 @@ async def proxy_fetch(session, url, proxy):
 async def proxy_test(proxies, url):
    print('doing proxy test with this number of proxies: ', len(proxies))
    headers = {'User-Agent': Core.get_user_agent()}
-    timeout = aiohttp.ClientTimeout(total=40)
+    timeout = aiohttp.ClientTimeout(total=50)
    async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
        texts = await asyncio.gather(*[proxy_fetch(session, url, proxy) for proxy in proxies])
        return texts


-async def get_proxies():
-    print('inside get proxies')
-    # ideas borrowed and modified from twitterscraper
-    proxy_url = 'https://free-proxy-list.net/'
-    response = await AsyncFetcher.fetch_all([proxy_url])
-    response = response[0]
-    soup = BeautifulSoup(response, 'lxml')
-    table = soup.find('table', id='proxylisttable')
-    list_tr = table.find_all('tr')
-    list_td = [elem.find_all('td') for elem in list_tr]
-    list_td = [x for x in list_td if x is not None and len(x) > 0]
-    list_ip = [elem[0].text for elem in list_td]
-    list_ports = [elem[1].text for elem in list_td]
-    list_proxies = [f"http://{':'.join(elem)}" for elem in list(zip(list_ip, list_ports))]
-    return list_proxies
-
-
-async def clean_dct(dct: dict, second_test=False):
-    print('cleaning dct and second test is: ', second_test)
-    good_proxies = set()
-    for proxy, text in dct.items():
-        if 'failed' not in proxy:
-            if second_test:
-                if await search(text) is False:
-                    print(text)
-                    return text
-            else:
-                good_proxies.add(proxy[proxy.find(':') + 1:])
-    return good_proxies if second_test is False else True
-
-
-async def create_init_proxies():
-    print('inside create init proxies')
-    url = "https://suip.biz"
-    first_param = [url, (('act', 'proxy1'),), ]
-    second_param = [url, (('act', 'proxy2'),), ]
-    third_param = [url, (('act', 'proxy3'),), ]
-    async_requests = [
-        request(url=url, params=params)
-        for url, params in [first_param, second_param, third_param]
-    ]
-    results = await asyncio.gather(*async_requests)
-    proxy_set = set()
-    for resp in results:
-        ip_candidates = re.findall(r'[0-9]+(?:\.[0-9]+){3}:[0-9]+', resp)
-        proxy_set.update({f'http://{ip}' for ip in ip_candidates})
-
-    new_proxies = await get_proxies()
-    proxy_set.update({proxy for proxy in new_proxies})
-    return proxy_set
-
-
-async def second_method(url: str) -> Union[str, bool]:
-    print('inside second method')
-    # First visit example.com to make to filter out bad proxies
-    init_url = "http://example.com"
-    proxy_set = await create_init_proxies()
-    tuples = await proxy_test(proxy_set, init_url)
-    mega_dct = dict((x, y) for x, y in tuples)
-    proxy_set = await clean_dct(mega_dct)
-    # After we clean our proxy set now we use them to visit the url we care about
-    print('got working proxies now onto the juice')
-    tuples = await proxy_test(proxy_set, url)
-    mega_dct = dict((x, y) for x, y in tuples)
-    results = await clean_dct(mega_dct, second_test=True)
-    print('returning the juice')
-    # pass in second_test flag as True to indicate this will
-    # the text we care about or a bool to indicate it was
-    # not successful
-    return results
-
-
 class MissingKey(Exception):

    def __init__(self, identity_flag: bool):
--- a/theHarvester/discovery/port_scanner.py
+++ b/theHarvester/discovery/port_scanner.py
@ -1,35 +0,0 @@
-import socket
-import threading
-
-
-class PortScan:
-
-    def __init__(self, host, ports):
-        self.threads = 25
-        self.host = host
-        self.ports = ports
-        self.lock = threading.BoundedSemaphore(value=self.threads)
-
-    def port_scanner(self, host, ports):
-        openports: list = []
-        self.lock.acquire()
-        for port in ports:
-            try:
-                connect = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-                connect.settimeout(2)
-                result = connect.connect_ex((host, int(port)))
-                if result == 0:
-                    openports.append(port)
-                connect.close()
-            except Exception as e:
-                print(e)
-        self.lock.release()
-
-        if(len(self.ports)) == 0:
-            print(f'No ports found on host: {host}')
-
-        return openports
-
-    def process(self):
-        ports = self.port_scanner(self.host, self.ports)
-        return ports
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@ -1,10 +1,10 @@
 # coding=utf-8

-import random
-from typing import Set, Union, Any, Tuple
+from typing import Set, Union, Any, Tuple, List
 import yaml
 import asyncio
 import aiohttp
+import random


 class Core:
@ -89,6 +89,21 @@ def spyse_key() -> str:
                return keys['apikeys']['spyse']['key']
        return keys['apikeys']['spyse']['key']

+    @staticmethod
+    def proxy_list() -> List:
+        try:
+            with open('/etc/theHarvester/proxies.yaml', 'r') as api_keys:
+                keys = yaml.safe_load(api_keys)
+        except FileNotFoundError:
+            with open('proxies.yaml', 'r') as api_keys:
+                keys = yaml.safe_load(api_keys)
+                http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else []
+                https_list = [f'https://{proxy}' for proxy in keys['https']] if keys['https'] is not None else []
+                return http_list + https_list
+        http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else []
+        https_list = [f'https://{proxy}' for proxy in keys['https']] if keys['https'] is not None else []
+        return http_list + https_list
+
    @staticmethod
    def banner() -> None:
        print('\n\033[93m*******************************************************************')
@ -373,16 +388,30 @@ def get_user_agent() -> str:


 class AsyncFetcher:
+    proxy_list = Core.proxy_list()

-    @staticmethod
-    async def post_fetch(url, headers='', data='', params='', json=False):
+    @classmethod
+    async def post_fetch(cls, url, headers='', data='', params='', json=False, proxy=False):
        if len(headers) == 0:
            headers = {'User-Agent': Core.get_user_agent()}
        timeout = aiohttp.ClientTimeout(total=720)
        # by default timeout is 5 minutes, changed to 12 minutes for suip module
        # results are well worth the wait
        try:
-            if params == '':
+            if proxy:
+                proxy = str(random.choice(cls().proxy_list))
+                print('proxy is: ', proxy)
+                if params != "":
+                    async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
+                        async with session.get(url, params=params, proxy=proxy) as response:
+                            await asyncio.sleep(2)
+                            return await response.text() if json is False else await response.json()
+                else:
+                    async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
+                        async with session.get(url, proxy=proxy, ssl=True if 'https' in proxy else False) as response:
+                            await asyncio.sleep(2)
+                            return await response.text() if json is False else await response.json()
+            elif params == '':
                async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
                    async with session.post(url, data=data) as resp:
                        await asyncio.sleep(3)
@ -393,20 +422,32 @@ async def post_fetch(url, headers='', data='', params='', json=False):
                        await asyncio.sleep(3)
                        return await resp.text() if json is False else await resp.json()
        except Exception as e:
-            print(e)
+            print('An exception has occurred: ', e)
            return ''

    @staticmethod
-    async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
+    async def fetch(session, url, params='', json=False, proxy="") -> Union[str, dict, list, bool]:
        # This fetch method solely focuses on get requests
        try:
            # Wrap in try except due to 0x89 png/jpg files
            # This fetch method solely focuses on get requests
            # TODO determine if method for post requests is necessary
+            if proxy != "":
+                if params != "":
+                    async with session.get(url, params=params, proxy=proxy, ssl=True if proxy.startswith('https')
+                    else False) as response:
+                        await asyncio.sleep(2)
+                        return await response.text() if json is False else await response.json()
+                else:
+                    async with session.get(url, proxy=proxy, ssl=True if proxy.startswith('https') else False) \
+                            as response:
+                        await asyncio.sleep(2)
+                        return await response.text() if json is False else await response.json()
            if params != '':
                async with session.get(url, params=params) as response:
                    await asyncio.sleep(2)
                    return await response.text() if json is False else await response.json()
+
            else:
                async with session.get(url) as response:
                    await asyncio.sleep(2)
@ -415,7 +456,7 @@ async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
            return ''

    @staticmethod
-    async def takeover_fetch(session, url) -> Union[Tuple[Any, Any], str]:
+    async def takeover_fetch(session, url, proxy="") -> Union[Tuple[Any, Any], str]:
        # This fetch method solely focuses on get requests
        try:
            # Wrap in try except due to 0x89 png/jpg files
@ -423,29 +464,53 @@ async def takeover_fetch(session, url) -> Union[Tuple[Any, Any], str]:
            # TODO determine if method for post requests is necessary
            url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url
            # Clean up urls with proper schemas
-            async with session.get(url) as response:
-                await asyncio.sleep(2)
-                return url, await response.text()
+            if proxy != "":
+                async with session.get(url, proxy=proxy, ssl=True if proxy.startswith('https') else False) as response:
+                    await asyncio.sleep(2)
+                    return url, await response.text()
+            else:
+                async with session.get(url) as response:
+                    await asyncio.sleep(2)
+                    return url, await response.text()
        except Exception:
            return url, ''

-    @staticmethod
-    async def fetch_all(urls, headers='', params='', json=False, takeover=False) -> list:
+    @classmethod
+    async def fetch_all(cls, urls, headers='', params='', json=False, takeover=False, proxy=False) -> list:
        # By default timeout is 5 minutes, 30 seconds should suffice
        timeout = aiohttp.ClientTimeout(total=30)
-
        if len(headers) == 0:
            headers = {'User-Agent': Core.get_user_agent()}
        if takeover:
            async with aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as session:
-                tuples = await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls])
-                return tuples
+                if proxy:
+                        tuples = await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url, proxy=random.choice(cls().proxy_list)) for url in urls])
+                        return tuples
+                else:
+                        tuples = await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls])
+                        return tuples
+
        if len(params) == 0:
            async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
-                texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
-                return texts
+                if proxy:
+                    print('proxy is none and so are params :) ')
+                    print('proxy is: ', cls().proxy_list)
+                    texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json, proxy=random.choice(cls().proxy_list)) for url in urls])
+                    return texts
+                else:
+                    texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
+                    return texts
        else:
            # Indicates the request has certain params
            async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
-                texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
-                return texts
+                if proxy:
+                    texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json,
+                                                              proxy=random.choice(cls().proxy_list)) for url in urls])
+                    return texts
+                else:
+                    texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
+                    return texts
+
+if __name__ == '__main__':
+    x = Core()
+    x.proxy_list()
--- a/theHarvester/lib/port_scanner.py
+++ b/theHarvester/lib/port_scanner.py