mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 16:26:34 +08:00
commit
97b88239f6
|
@ -40,6 +40,8 @@ Passive:
|
||||||
|
|
||||||
* duckduckgo: DuckDuckGo search engine - www.duckduckgo.com
|
* duckduckgo: DuckDuckGo search engine - www.duckduckgo.com
|
||||||
|
|
||||||
|
* github-code: Github code search engine (Requires Github Personal Access Token, see below.) - www.github.com
|
||||||
|
|
||||||
* google: Google search engine (Optional Google dorking.) - www.google.com
|
* google: Google search engine (Optional Google dorking.) - www.google.com
|
||||||
|
|
||||||
* google-certificates: Google Certificate Transparency report
|
* google-certificates: Google Certificate Transparency report
|
||||||
|
@ -83,6 +85,7 @@ Modules that require an API key:
|
||||||
Add your keys to api-keys.yaml
|
Add your keys to api-keys.yaml
|
||||||
|
|
||||||
* bingapi
|
* bingapi
|
||||||
|
* github
|
||||||
* hunter
|
* hunter
|
||||||
* intelx
|
* intelx
|
||||||
* securityTrails
|
* securityTrails
|
||||||
|
|
|
@ -13,3 +13,6 @@ apikeys:
|
||||||
|
|
||||||
shodan:
|
shodan:
|
||||||
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
|
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
|
||||||
|
|
||||||
|
github:
|
||||||
|
key:
|
||||||
|
|
0
tests/discovery/__init__.py
Normal file
0
tests/discovery/__init__.py
Normal file
116
tests/discovery/test_githubcode.py
Normal file
116
tests/discovery/test_githubcode.py
Normal file
|
@ -0,0 +1,116 @@
|
||||||
|
from theHarvester.discovery import githubcode
|
||||||
|
from theHarvester.discovery.githubcode import RetryResult, ErrorResult, SuccessResult
|
||||||
|
from theHarvester.discovery.constants import MissingKey
|
||||||
|
from theHarvester.lib.core import Core
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
from requests import Response
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchGithubCode:
|
||||||
|
|
||||||
|
class OkResponse:
|
||||||
|
response = Response()
|
||||||
|
json = {
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"text_matches": [
|
||||||
|
{
|
||||||
|
"fragment": "test1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text_matches": [
|
||||||
|
{
|
||||||
|
"fragment": "test2"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
response.status_code = 200
|
||||||
|
response.json = MagicMock(return_value=json)
|
||||||
|
|
||||||
|
class FailureResponse:
|
||||||
|
response = Response()
|
||||||
|
response.json = MagicMock(return_value={})
|
||||||
|
response.status_code = 401
|
||||||
|
|
||||||
|
class RetryResponse:
|
||||||
|
response = Response()
|
||||||
|
response.json = MagicMock(return_value={})
|
||||||
|
response.status_code = 403
|
||||||
|
|
||||||
|
class MalformedResponse:
|
||||||
|
response = Response()
|
||||||
|
json = {
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"fail": True
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text_matches": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text_matches": [
|
||||||
|
{
|
||||||
|
"weird": "result"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
response.json = MagicMock(return_value=json)
|
||||||
|
response.status_code = 200
|
||||||
|
|
||||||
|
def test_missing_key(self):
|
||||||
|
with pytest.raises(MissingKey):
|
||||||
|
Core.github_key = MagicMock(return_value=None)
|
||||||
|
githubcode.SearchGithubCode(word="test", limit=500)
|
||||||
|
|
||||||
|
def test_fragments_from_response(self):
|
||||||
|
Core.github_key = MagicMock(return_value="lol")
|
||||||
|
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||||
|
test_result = test_class_instance.fragments_from_response(self.OkResponse.response)
|
||||||
|
assert test_result == ["test1", "test2"]
|
||||||
|
|
||||||
|
def test_invalid_fragments_from_response(self):
|
||||||
|
Core.github_key = MagicMock(return_value="lol")
|
||||||
|
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||||
|
test_result = test_class_instance.fragments_from_response(self.MalformedResponse.response)
|
||||||
|
assert test_result == []
|
||||||
|
|
||||||
|
def test_handle_response_ok(self):
|
||||||
|
Core.github_key = MagicMock(return_value="lol")
|
||||||
|
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||||
|
test_result = test_class_instance.handle_response(self.OkResponse.response)
|
||||||
|
assert isinstance(test_result, SuccessResult)
|
||||||
|
|
||||||
|
def test_handle_response_retry(self):
|
||||||
|
Core.github_key = MagicMock(return_value="lol")
|
||||||
|
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||||
|
test_result = test_class_instance.handle_response(self.RetryResponse.response)
|
||||||
|
assert isinstance(test_result, RetryResult)
|
||||||
|
|
||||||
|
def test_handle_response_fail(self):
|
||||||
|
Core.github_key = MagicMock(return_value="lol")
|
||||||
|
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||||
|
test_result = test_class_instance.handle_response(self.FailureResponse.response)
|
||||||
|
assert isinstance(test_result, ErrorResult)
|
||||||
|
|
||||||
|
def test_next_page(self):
|
||||||
|
Core.github_key = MagicMock(return_value="lol")
|
||||||
|
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||||
|
test_result = githubcode.SuccessResult(list(), next_page=2, last_page=4)
|
||||||
|
assert(2 == test_class_instance.next_page_or_end(test_result))
|
||||||
|
|
||||||
|
def test_last_page(self):
|
||||||
|
Core.github_key = MagicMock(return_value="lol")
|
||||||
|
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||||
|
test_result = githubcode.SuccessResult(list(), None, None)
|
||||||
|
assert(None is test_class_instance.next_page_or_end(test_result))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
pytest.main()
|
||||||
|
|
|
@ -54,7 +54,7 @@ def start():
|
||||||
parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
|
parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
|
||||||
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
|
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
|
||||||
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
|
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
|
||||||
dogpile, duckduckgo, google,
|
dogpile, duckduckgo, github-code, google,
|
||||||
google-certificates, hunter, intelx,
|
google-certificates, hunter, intelx,
|
||||||
linkedin, netcraft, securityTrails, threatcrowd,
|
linkedin, netcraft, securityTrails, threatcrowd,
|
||||||
trello, twitter, vhost, virustotal, yahoo, all''')
|
trello, twitter, vhost, virustotal, yahoo, all''')
|
||||||
|
@ -194,6 +194,24 @@ def start():
|
||||||
db.store_all(word, all_hosts, 'email', 'duckduckgo')
|
db.store_all(word, all_hosts, 'email', 'duckduckgo')
|
||||||
db.store_all(word, all_hosts, 'host', 'duckduckgo')
|
db.store_all(word, all_hosts, 'host', 'duckduckgo')
|
||||||
|
|
||||||
|
elif engineitem == 'github-code':
|
||||||
|
print('\033[94m[*] Searching Github (code). \033[0m')
|
||||||
|
try:
|
||||||
|
from theHarvester.discovery import githubcode
|
||||||
|
search = githubcode.SearchGithubCode(word, limit)
|
||||||
|
search.process()
|
||||||
|
emails = filter(search.get_emails())
|
||||||
|
all_emails.extend(emails)
|
||||||
|
hosts = filter(search.get_hostnames())
|
||||||
|
all_hosts.extend(hosts)
|
||||||
|
db = stash.stash_manager()
|
||||||
|
db.store_all(word, all_hosts, 'host', 'github-code')
|
||||||
|
db.store_all(word, all_emails, 'email', 'github-code')
|
||||||
|
except MissingKey as ex:
|
||||||
|
print(ex)
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
elif engineitem == 'google':
|
elif engineitem == 'google':
|
||||||
print('\033[94m[*] Searching Google. \033[0m')
|
print('\033[94m[*] Searching Google. \033[0m')
|
||||||
search = googlesearch.search_google(word, limit, start)
|
search = googlesearch.search_google(word, limit, start)
|
||||||
|
|
126
theHarvester/discovery/githubcode.py
Normal file
126
theHarvester/discovery/githubcode.py
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
from theHarvester.discovery.constants import *
|
||||||
|
from theHarvester.lib.core import *
|
||||||
|
from theHarvester.parsers import myparser
|
||||||
|
import requests
|
||||||
|
from requests import Response
|
||||||
|
import time
|
||||||
|
from typing import List, Dict, Any, Optional, NamedTuple
|
||||||
|
import urllib.parse as urlparse
|
||||||
|
|
||||||
|
|
||||||
|
class RetryResult(NamedTuple):
|
||||||
|
time: float
|
||||||
|
|
||||||
|
|
||||||
|
class SuccessResult(NamedTuple):
|
||||||
|
fragments: List[str]
|
||||||
|
next_page: Optional[int]
|
||||||
|
last_page: Optional[int]
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorResult(NamedTuple):
|
||||||
|
status_code: int
|
||||||
|
body: any
|
||||||
|
|
||||||
|
|
||||||
|
class SearchGithubCode:
|
||||||
|
|
||||||
|
def __init__(self, word, limit):
|
||||||
|
self.word = word
|
||||||
|
self.total_results = ""
|
||||||
|
self.server = 'api.github.com'
|
||||||
|
self.hostname = 'api.github.com'
|
||||||
|
self.limit = limit
|
||||||
|
self.counter = 0
|
||||||
|
self.page = 1
|
||||||
|
self.key = Core.github_key()
|
||||||
|
# If you don't have a personal access token, github narrows your search capabilities significantly
|
||||||
|
# rate limits you more severely
|
||||||
|
# https://developer.github.com/v3/search/#rate-limit
|
||||||
|
if self.key is None:
|
||||||
|
raise MissingKey(True)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def fragments_from_response(response: Response) -> List[str]:
|
||||||
|
items: List[Dict[str, Any]] = response.json().get('items') or list()
|
||||||
|
fragments: List[str] = list()
|
||||||
|
for item in items:
|
||||||
|
matches = item.get("text_matches") or list()
|
||||||
|
for match in matches:
|
||||||
|
fragments.append(match.get("fragment"))
|
||||||
|
return [fragment for fragment in fragments if fragment is not None]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def page_from_response(page: str, response: Response) -> Optional[int]:
|
||||||
|
page_link = response.links.get(page)
|
||||||
|
if page_link:
|
||||||
|
parsed = urlparse.urlparse(page_link.get("url"))
|
||||||
|
params = urlparse.parse_qs(parsed.query)
|
||||||
|
page = params.get('page') or [None]
|
||||||
|
page_number = page[0] and int(page[0])
|
||||||
|
return page_number
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def handle_response(self, response: Response) -> Optional[Any]:
|
||||||
|
if response.ok:
|
||||||
|
results = self.fragments_from_response(response)
|
||||||
|
next_page = self.page_from_response("next", response)
|
||||||
|
last_page = self.page_from_response("last", response)
|
||||||
|
return SuccessResult(results, next_page, last_page)
|
||||||
|
elif response.status_code == 429 or response.status_code == 403:
|
||||||
|
return RetryResult(60)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
return ErrorResult(response.status_code, response.json())
|
||||||
|
except ValueError:
|
||||||
|
return ErrorResult(response.status_code, response.text)
|
||||||
|
|
||||||
|
def do_search(self, page: Optional[int]) -> Response:
|
||||||
|
if page is None:
|
||||||
|
url = f'https://{self.server}/search/code?q="{self.word}"'
|
||||||
|
else:
|
||||||
|
url = f'https://{self.server}/search/code?q="{self.word}"&page={page}'
|
||||||
|
headers = {
|
||||||
|
'Host': self.hostname,
|
||||||
|
'User-agent': Core.get_user_agent(),
|
||||||
|
'Accept': "application/vnd.github.v3.text-match+json",
|
||||||
|
'Authorization': 'token {}'.format(self.key)
|
||||||
|
}
|
||||||
|
return requests.get(url=url, headers=headers, verify=True)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def next_page_or_end(result: SuccessResult) -> Optional[int]:
|
||||||
|
if result.next_page is not None:
|
||||||
|
return result.next_page
|
||||||
|
else:
|
||||||
|
return result.last_page
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
while self.counter <= self.limit and self.page is not None:
|
||||||
|
api_response = self.do_search(self.page)
|
||||||
|
result = self.handle_response(api_response)
|
||||||
|
if type(result) == SuccessResult:
|
||||||
|
print(f'\tSearching {self.counter} results.')
|
||||||
|
for fragment in result.fragments:
|
||||||
|
self.total_results += fragment
|
||||||
|
self.counter = self.counter + 1
|
||||||
|
|
||||||
|
self.page = self.next_page_or_end(result)
|
||||||
|
time.sleep(getDelay())
|
||||||
|
elif type(result) == RetryResult:
|
||||||
|
sleepy_time = getDelay() + result.time
|
||||||
|
print(f'\tRetrying page in {sleepy_time} seconds...')
|
||||||
|
time.sleep(sleepy_time)
|
||||||
|
elif type(result) == ErrorResult:
|
||||||
|
raise Exception(f"\tException occurred: status_code: {result.status_code} reason: {result.body}")
|
||||||
|
else:
|
||||||
|
raise Exception("\tUnknown exception occurred")
|
||||||
|
|
||||||
|
def get_emails(self):
|
||||||
|
rawres = myparser.Parser(self.total_results, self.word)
|
||||||
|
return rawres.emails()
|
||||||
|
|
||||||
|
def get_hostnames(self):
|
||||||
|
rawres = myparser.Parser(self.total_results, self.word)
|
||||||
|
return rawres.hostnames()
|
|
@ -15,6 +15,12 @@ def bing_key():
|
||||||
keys = yaml.safe_load(api_keys)
|
keys = yaml.safe_load(api_keys)
|
||||||
return keys['apikeys']['bing']['key']
|
return keys['apikeys']['bing']['key']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def github_key():
|
||||||
|
with open('api-keys.yaml', 'r') as api_keys:
|
||||||
|
keys = yaml.safe_load(api_keys)
|
||||||
|
return keys['apikeys']['github']['key']
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hunter_key():
|
def hunter_key():
|
||||||
with open('api-keys.yaml', 'r') as api_keys:
|
with open('api-keys.yaml', 'r') as api_keys:
|
||||||
|
@ -66,6 +72,7 @@ def get_supportedengines():
|
||||||
'dnsdumpster',
|
'dnsdumpster',
|
||||||
'dogpile',
|
'dogpile',
|
||||||
'duckduckgo',
|
'duckduckgo',
|
||||||
|
'github-code',
|
||||||
'google',
|
'google',
|
||||||
'google-certificates',
|
'google-certificates',
|
||||||
'hunter',
|
'hunter',
|
||||||
|
|
Loading…
Reference in a new issue