mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-20 15:26:31 +08:00
Added proxies.yaml for initial commit for integration of proxies.
This commit is contained in:
parent
62f3bc6f93
commit
c08b0f3982
4
proxies.yaml
Normal file
4
proxies.yaml
Normal file
|
@ -0,0 +1,4 @@
|
|||
http:
|
||||
- ip:port
|
||||
https:
|
||||
- ip:port
|
|
@ -26,7 +26,7 @@ async def start():
|
|||
parser.add_argument('-S', '--start', help='start with result number X, default=0', default=0, type=int)
|
||||
parser.add_argument('-g', '--google-dork', help='use Google Dorks for Google search', default=False,
|
||||
action='store_true')
|
||||
parser.add_argument('-p', '--port-scan', help='scan the detected hosts and check for Takeovers (21,22,80,443,8080)',
|
||||
parser.add_argument('-p', '--proxies', help='use proxies for requests, enter proxies in proxies.yaml',
|
||||
default=False, action='store_true')
|
||||
parser.add_argument('-s', '--shodan', help='use Shodan to query discovered hosts', default=False,
|
||||
action='store_true')
|
||||
|
@ -68,7 +68,6 @@ async def start():
|
|||
google_dorking = args.google_dork
|
||||
host_ip: list = []
|
||||
limit: int = args.limit
|
||||
ports_scanning = args.port_scan
|
||||
shodan = args.shodan
|
||||
start: int = args.start
|
||||
all_urls: list = []
|
||||
|
@ -458,27 +457,11 @@ async def handler(lst):
|
|||
# db = stash.stash_manager()
|
||||
# db.store_all(word, dnsres, 'host', 'dns_bruteforce')
|
||||
|
||||
# Port scanning
|
||||
if ports_scanning:
|
||||
print('\n\n[*] Scanning ports (active).\n')
|
||||
for x in full:
|
||||
domain, host = x.split(':')
|
||||
if host != 'empty':
|
||||
print(('[*] Scanning ' + host))
|
||||
ports = [21, 22, 80, 443, 8080]
|
||||
try:
|
||||
scan = port_scanner.PortScan(host, ports)
|
||||
openports = scan.process()
|
||||
if len(openports) > 1:
|
||||
print(('\t[*] Detected open ports: ' + ','.join(str(e) for e in openports)))
|
||||
takeover_check = 'True'
|
||||
if takeover_check == 'True' and len(openports) > 0:
|
||||
search_take = takeover.TakeOver([domain])
|
||||
await search_take.process()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
# TakeOver Checking
|
||||
|
||||
if takeover_status:
|
||||
print('Performing takeover check')
|
||||
print('\n[*] Performing subdomain takeover check')
|
||||
print('\n[*] Subdomain Takeover checking IS ACTIVE RECON')
|
||||
search_take = takeover.TakeOver(all_hosts)
|
||||
await search_take.process()
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
'linkedinsearch',
|
||||
'netcraft',
|
||||
'otxsearch',
|
||||
'port_scanner',
|
||||
'securitytrailssearch',
|
||||
'shodansearch',
|
||||
'spyse',
|
||||
|
|
|
@ -2,14 +2,13 @@
|
|||
from typing import Union
|
||||
import random
|
||||
import aiohttp
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import asyncio
|
||||
|
||||
googleUA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 ' \
|
||||
'Safari/537.36 '
|
||||
|
||||
|
||||
def splitter(links):
|
||||
async def splitter(links):
|
||||
"""
|
||||
Method that tries to remove duplicates
|
||||
LinkedinLists pulls a lot of profiles with the same name.
|
||||
|
@ -73,7 +72,6 @@ async def google_workaround(visit_url: str) -> Union[bool, str]:
|
|||
:param visit_url: Url to scrape
|
||||
:return: Correct html that can be parsed by BS4
|
||||
"""
|
||||
return True
|
||||
url = 'https://websniffer.cc/'
|
||||
data = {
|
||||
'Cookie': '',
|
||||
|
@ -82,15 +80,10 @@ async def google_workaround(visit_url: str) -> Union[bool, str]:
|
|||
'type': 'GET&http=1.1',
|
||||
'uak': str(random.randint(4, 8)) # select random UA to send to Google
|
||||
}
|
||||
import requests
|
||||
returned_html = requests.post(url, data=data, headers={'User-Agent': Core.get_user_agent()})
|
||||
returned_html = returned_html.text
|
||||
# TODO FIX
|
||||
# returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
|
||||
import pprint as p
|
||||
print('returned html')
|
||||
p.pprint(returned_html, indent=4)
|
||||
returned_html = "This page appears when Google automatically detects requests coming from your computer network"
|
||||
returned_html = await AsyncFetcher.post_fetch(url, headers={'User-Agent': Core.get_user_agent()}, data=data)
|
||||
returned_html = "This page appears when Google automatically detects requests coming from your computer network" \
|
||||
if returned_html == "" else returned_html[0]
|
||||
|
||||
if await search(returned_html):
|
||||
print('going to second method!')
|
||||
# indicates that google is serving workaround a captcha
|
||||
|
@ -109,6 +102,9 @@ async def google_workaround(visit_url: str) -> Union[bool, str]:
|
|||
return correct_html
|
||||
|
||||
|
||||
async def second_method(url: str) -> Union[str, bool]:
|
||||
return ""
|
||||
|
||||
async def request(url, params):
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
session = aiohttp.ClientSession(headers=headers)
|
||||
|
@ -119,7 +115,7 @@ async def request(url, params):
|
|||
|
||||
async def proxy_fetch(session, url, proxy):
|
||||
try:
|
||||
async with session.get(url, proxy=proxy, ssl=False) as resp:
|
||||
async with session.get(url, proxy=proxy) as resp:
|
||||
return f'success:{proxy}', await resp.text()
|
||||
except Exception:
|
||||
return f'failed:{proxy}', proxy
|
||||
|
@ -128,84 +124,12 @@ async def proxy_fetch(session, url, proxy):
|
|||
async def proxy_test(proxies, url):
|
||||
print('doing proxy test with this number of proxies: ', len(proxies))
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
timeout = aiohttp.ClientTimeout(total=40)
|
||||
timeout = aiohttp.ClientTimeout(total=50)
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
texts = await asyncio.gather(*[proxy_fetch(session, url, proxy) for proxy in proxies])
|
||||
return texts
|
||||
|
||||
|
||||
async def get_proxies():
|
||||
print('inside get proxies')
|
||||
# ideas borrowed and modified from twitterscraper
|
||||
proxy_url = 'https://free-proxy-list.net/'
|
||||
response = await AsyncFetcher.fetch_all([proxy_url])
|
||||
response = response[0]
|
||||
soup = BeautifulSoup(response, 'lxml')
|
||||
table = soup.find('table', id='proxylisttable')
|
||||
list_tr = table.find_all('tr')
|
||||
list_td = [elem.find_all('td') for elem in list_tr]
|
||||
list_td = [x for x in list_td if x is not None and len(x) > 0]
|
||||
list_ip = [elem[0].text for elem in list_td]
|
||||
list_ports = [elem[1].text for elem in list_td]
|
||||
list_proxies = [f"http://{':'.join(elem)}" for elem in list(zip(list_ip, list_ports))]
|
||||
return list_proxies
|
||||
|
||||
|
||||
async def clean_dct(dct: dict, second_test=False):
|
||||
print('cleaning dct and second test is: ', second_test)
|
||||
good_proxies = set()
|
||||
for proxy, text in dct.items():
|
||||
if 'failed' not in proxy:
|
||||
if second_test:
|
||||
if await search(text) is False:
|
||||
print(text)
|
||||
return text
|
||||
else:
|
||||
good_proxies.add(proxy[proxy.find(':') + 1:])
|
||||
return good_proxies if second_test is False else True
|
||||
|
||||
|
||||
async def create_init_proxies():
|
||||
print('inside create init proxies')
|
||||
url = "https://suip.biz"
|
||||
first_param = [url, (('act', 'proxy1'),), ]
|
||||
second_param = [url, (('act', 'proxy2'),), ]
|
||||
third_param = [url, (('act', 'proxy3'),), ]
|
||||
async_requests = [
|
||||
request(url=url, params=params)
|
||||
for url, params in [first_param, second_param, third_param]
|
||||
]
|
||||
results = await asyncio.gather(*async_requests)
|
||||
proxy_set = set()
|
||||
for resp in results:
|
||||
ip_candidates = re.findall(r'[0-9]+(?:\.[0-9]+){3}:[0-9]+', resp)
|
||||
proxy_set.update({f'http://{ip}' for ip in ip_candidates})
|
||||
|
||||
new_proxies = await get_proxies()
|
||||
proxy_set.update({proxy for proxy in new_proxies})
|
||||
return proxy_set
|
||||
|
||||
|
||||
async def second_method(url: str) -> Union[str, bool]:
|
||||
print('inside second method')
|
||||
# First visit example.com to make to filter out bad proxies
|
||||
init_url = "http://example.com"
|
||||
proxy_set = await create_init_proxies()
|
||||
tuples = await proxy_test(proxy_set, init_url)
|
||||
mega_dct = dict((x, y) for x, y in tuples)
|
||||
proxy_set = await clean_dct(mega_dct)
|
||||
# After we clean our proxy set now we use them to visit the url we care about
|
||||
print('got working proxies now onto the juice')
|
||||
tuples = await proxy_test(proxy_set, url)
|
||||
mega_dct = dict((x, y) for x, y in tuples)
|
||||
results = await clean_dct(mega_dct, second_test=True)
|
||||
print('returning the juice')
|
||||
# pass in second_test flag as True to indicate this will
|
||||
# the text we care about or a bool to indicate it was
|
||||
# not successful
|
||||
return results
|
||||
|
||||
|
||||
class MissingKey(Exception):
|
||||
|
||||
def __init__(self, identity_flag: bool):
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
import socket
|
||||
import threading
|
||||
|
||||
|
||||
class PortScan:
|
||||
|
||||
def __init__(self, host, ports):
|
||||
self.threads = 25
|
||||
self.host = host
|
||||
self.ports = ports
|
||||
self.lock = threading.BoundedSemaphore(value=self.threads)
|
||||
|
||||
def port_scanner(self, host, ports):
|
||||
openports: list = []
|
||||
self.lock.acquire()
|
||||
for port in ports:
|
||||
try:
|
||||
connect = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
connect.settimeout(2)
|
||||
result = connect.connect_ex((host, int(port)))
|
||||
if result == 0:
|
||||
openports.append(port)
|
||||
connect.close()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.lock.release()
|
||||
|
||||
if(len(self.ports)) == 0:
|
||||
print(f'No ports found on host: {host}')
|
||||
|
||||
return openports
|
||||
|
||||
def process(self):
|
||||
ports = self.port_scanner(self.host, self.ports)
|
||||
return ports
|
|
@ -1,10 +1,10 @@
|
|||
# coding=utf-8
|
||||
|
||||
import random
|
||||
from typing import Set, Union, Any, Tuple
|
||||
from typing import Set, Union, Any, Tuple, List
|
||||
import yaml
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import random
|
||||
|
||||
|
||||
class Core:
|
||||
|
@ -89,6 +89,21 @@ def spyse_key() -> str:
|
|||
return keys['apikeys']['spyse']['key']
|
||||
return keys['apikeys']['spyse']['key']
|
||||
|
||||
@staticmethod
|
||||
def proxy_list() -> List:
|
||||
try:
|
||||
with open('/etc/theHarvester/proxies.yaml', 'r') as api_keys:
|
||||
keys = yaml.safe_load(api_keys)
|
||||
except FileNotFoundError:
|
||||
with open('proxies.yaml', 'r') as api_keys:
|
||||
keys = yaml.safe_load(api_keys)
|
||||
http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else []
|
||||
https_list = [f'https://{proxy}' for proxy in keys['https']] if keys['https'] is not None else []
|
||||
return http_list + https_list
|
||||
http_list = [f'http://{proxy}' for proxy in keys['http']] if keys['http'] is not None else []
|
||||
https_list = [f'https://{proxy}' for proxy in keys['https']] if keys['https'] is not None else []
|
||||
return http_list + https_list
|
||||
|
||||
@staticmethod
|
||||
def banner() -> None:
|
||||
print('\n\033[93m*******************************************************************')
|
||||
|
@ -373,16 +388,30 @@ def get_user_agent() -> str:
|
|||
|
||||
|
||||
class AsyncFetcher:
|
||||
proxy_list = Core.proxy_list()
|
||||
|
||||
@staticmethod
|
||||
async def post_fetch(url, headers='', data='', params='', json=False):
|
||||
@classmethod
|
||||
async def post_fetch(cls, url, headers='', data='', params='', json=False, proxy=False):
|
||||
if len(headers) == 0:
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
timeout = aiohttp.ClientTimeout(total=720)
|
||||
# by default timeout is 5 minutes, changed to 12 minutes for suip module
|
||||
# results are well worth the wait
|
||||
try:
|
||||
if params == '':
|
||||
if proxy:
|
||||
proxy = str(random.choice(cls().proxy_list))
|
||||
print('proxy is: ', proxy)
|
||||
if params != "":
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
async with session.get(url, params=params, proxy=proxy) as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
else:
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
async with session.get(url, proxy=proxy, ssl=True if 'https' in proxy else False) as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
elif params == '':
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
async with session.post(url, data=data) as resp:
|
||||
await asyncio.sleep(3)
|
||||
|
@ -393,20 +422,32 @@ async def post_fetch(url, headers='', data='', params='', json=False):
|
|||
await asyncio.sleep(3)
|
||||
return await resp.text() if json is False else await resp.json()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('An exception has occurred: ', e)
|
||||
return ''
|
||||
|
||||
@staticmethod
|
||||
async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
|
||||
async def fetch(session, url, params='', json=False, proxy="") -> Union[str, dict, list, bool]:
|
||||
# This fetch method solely focuses on get requests
|
||||
try:
|
||||
# Wrap in try except due to 0x89 png/jpg files
|
||||
# This fetch method solely focuses on get requests
|
||||
# TODO determine if method for post requests is necessary
|
||||
if proxy != "":
|
||||
if params != "":
|
||||
async with session.get(url, params=params, proxy=proxy, ssl=True if proxy.startswith('https')
|
||||
else False) as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
else:
|
||||
async with session.get(url, proxy=proxy, ssl=True if proxy.startswith('https') else False) \
|
||||
as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
if params != '':
|
||||
async with session.get(url, params=params) as response:
|
||||
await asyncio.sleep(2)
|
||||
return await response.text() if json is False else await response.json()
|
||||
|
||||
else:
|
||||
async with session.get(url) as response:
|
||||
await asyncio.sleep(2)
|
||||
|
@ -415,7 +456,7 @@ async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
|
|||
return ''
|
||||
|
||||
@staticmethod
|
||||
async def takeover_fetch(session, url) -> Union[Tuple[Any, Any], str]:
|
||||
async def takeover_fetch(session, url, proxy="") -> Union[Tuple[Any, Any], str]:
|
||||
# This fetch method solely focuses on get requests
|
||||
try:
|
||||
# Wrap in try except due to 0x89 png/jpg files
|
||||
|
@ -423,29 +464,53 @@ async def takeover_fetch(session, url) -> Union[Tuple[Any, Any], str]:
|
|||
# TODO determine if method for post requests is necessary
|
||||
url = f'http://{url}' if str(url).startswith(('http:', 'https:')) is False else url
|
||||
# Clean up urls with proper schemas
|
||||
async with session.get(url) as response:
|
||||
await asyncio.sleep(2)
|
||||
return url, await response.text()
|
||||
if proxy != "":
|
||||
async with session.get(url, proxy=proxy, ssl=True if proxy.startswith('https') else False) as response:
|
||||
await asyncio.sleep(2)
|
||||
return url, await response.text()
|
||||
else:
|
||||
async with session.get(url) as response:
|
||||
await asyncio.sleep(2)
|
||||
return url, await response.text()
|
||||
except Exception:
|
||||
return url, ''
|
||||
|
||||
@staticmethod
|
||||
async def fetch_all(urls, headers='', params='', json=False, takeover=False) -> list:
|
||||
@classmethod
|
||||
async def fetch_all(cls, urls, headers='', params='', json=False, takeover=False, proxy=False) -> list:
|
||||
# By default timeout is 5 minutes, 30 seconds should suffice
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
||||
if len(headers) == 0:
|
||||
headers = {'User-Agent': Core.get_user_agent()}
|
||||
if takeover:
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as session:
|
||||
tuples = await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls])
|
||||
return tuples
|
||||
if proxy:
|
||||
tuples = await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url, proxy=random.choice(cls().proxy_list)) for url in urls])
|
||||
return tuples
|
||||
else:
|
||||
tuples = await asyncio.gather(*[AsyncFetcher.takeover_fetch(session, url) for url in urls])
|
||||
return tuples
|
||||
|
||||
if len(params) == 0:
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
|
||||
return texts
|
||||
if proxy:
|
||||
print('proxy is none and so are params :) ')
|
||||
print('proxy is: ', cls().proxy_list)
|
||||
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json, proxy=random.choice(cls().proxy_list)) for url in urls])
|
||||
return texts
|
||||
else:
|
||||
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, json=json) for url in urls])
|
||||
return texts
|
||||
else:
|
||||
# Indicates the request has certain params
|
||||
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
||||
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
|
||||
return texts
|
||||
if proxy:
|
||||
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json,
|
||||
proxy=random.choice(cls().proxy_list)) for url in urls])
|
||||
return texts
|
||||
else:
|
||||
texts = await asyncio.gather(*[AsyncFetcher.fetch(session, url, params, json) for url in urls])
|
||||
return texts
|
||||
|
||||
if __name__ == '__main__':
|
||||
x = Core()
|
||||
x.proxy_list()
|
||||
|
|
Loading…
Reference in a new issue