mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-22 16:26:34 +08:00
commit
97b88239f6
|
@ -40,6 +40,8 @@ Passive:
|
|||
|
||||
* duckduckgo: DuckDuckGo search engine - www.duckduckgo.com
|
||||
|
||||
* github-code: Github code search engine (Requires Github Personal Access Token, see below.) - www.github.com
|
||||
|
||||
* google: Google search engine (Optional Google dorking.) - www.google.com
|
||||
|
||||
* google-certificates: Google Certificate Transparency report
|
||||
|
@ -83,6 +85,7 @@ Modules that require an API key:
|
|||
Add your keys to api-keys.yaml
|
||||
|
||||
* bingapi
|
||||
* github
|
||||
* hunter
|
||||
* intelx
|
||||
* securityTrails
|
||||
|
|
|
@ -12,4 +12,7 @@ apikeys:
|
|||
key:
|
||||
|
||||
shodan:
|
||||
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
|
||||
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
|
||||
|
||||
github:
|
||||
key:
|
||||
|
|
0
tests/discovery/__init__.py
Normal file
0
tests/discovery/__init__.py
Normal file
116
tests/discovery/test_githubcode.py
Normal file
116
tests/discovery/test_githubcode.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
from theHarvester.discovery import githubcode
|
||||
from theHarvester.discovery.githubcode import RetryResult, ErrorResult, SuccessResult
|
||||
from theHarvester.discovery.constants import MissingKey
|
||||
from theHarvester.lib.core import Core
|
||||
from unittest.mock import MagicMock
|
||||
from requests import Response
|
||||
import pytest
|
||||
|
||||
|
||||
class TestSearchGithubCode:
|
||||
|
||||
class OkResponse:
|
||||
response = Response()
|
||||
json = {
|
||||
"items": [
|
||||
{
|
||||
"text_matches": [
|
||||
{
|
||||
"fragment": "test1"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"text_matches": [
|
||||
{
|
||||
"fragment": "test2"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
response.status_code = 200
|
||||
response.json = MagicMock(return_value=json)
|
||||
|
||||
class FailureResponse:
|
||||
response = Response()
|
||||
response.json = MagicMock(return_value={})
|
||||
response.status_code = 401
|
||||
|
||||
class RetryResponse:
|
||||
response = Response()
|
||||
response.json = MagicMock(return_value={})
|
||||
response.status_code = 403
|
||||
|
||||
class MalformedResponse:
|
||||
response = Response()
|
||||
json = {
|
||||
"items": [
|
||||
{
|
||||
"fail": True
|
||||
},
|
||||
{
|
||||
"text_matches": []
|
||||
},
|
||||
{
|
||||
"text_matches": [
|
||||
{
|
||||
"weird": "result"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
response.json = MagicMock(return_value=json)
|
||||
response.status_code = 200
|
||||
|
||||
def test_missing_key(self):
|
||||
with pytest.raises(MissingKey):
|
||||
Core.github_key = MagicMock(return_value=None)
|
||||
githubcode.SearchGithubCode(word="test", limit=500)
|
||||
|
||||
def test_fragments_from_response(self):
|
||||
Core.github_key = MagicMock(return_value="lol")
|
||||
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||
test_result = test_class_instance.fragments_from_response(self.OkResponse.response)
|
||||
assert test_result == ["test1", "test2"]
|
||||
|
||||
def test_invalid_fragments_from_response(self):
|
||||
Core.github_key = MagicMock(return_value="lol")
|
||||
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||
test_result = test_class_instance.fragments_from_response(self.MalformedResponse.response)
|
||||
assert test_result == []
|
||||
|
||||
def test_handle_response_ok(self):
|
||||
Core.github_key = MagicMock(return_value="lol")
|
||||
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||
test_result = test_class_instance.handle_response(self.OkResponse.response)
|
||||
assert isinstance(test_result, SuccessResult)
|
||||
|
||||
def test_handle_response_retry(self):
|
||||
Core.github_key = MagicMock(return_value="lol")
|
||||
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||
test_result = test_class_instance.handle_response(self.RetryResponse.response)
|
||||
assert isinstance(test_result, RetryResult)
|
||||
|
||||
def test_handle_response_fail(self):
|
||||
Core.github_key = MagicMock(return_value="lol")
|
||||
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||
test_result = test_class_instance.handle_response(self.FailureResponse.response)
|
||||
assert isinstance(test_result, ErrorResult)
|
||||
|
||||
def test_next_page(self):
|
||||
Core.github_key = MagicMock(return_value="lol")
|
||||
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||
test_result = githubcode.SuccessResult(list(), next_page=2, last_page=4)
|
||||
assert(2 == test_class_instance.next_page_or_end(test_result))
|
||||
|
||||
def test_last_page(self):
|
||||
Core.github_key = MagicMock(return_value="lol")
|
||||
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
|
||||
test_result = githubcode.SuccessResult(list(), None, None)
|
||||
assert(None is test_class_instance.next_page_or_end(test_result))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main()
|
||||
|
|
@ -54,7 +54,7 @@ def start():
|
|||
parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
|
||||
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
|
||||
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
|
||||
dogpile, duckduckgo, google,
|
||||
dogpile, duckduckgo, github-code, google,
|
||||
google-certificates, hunter, intelx,
|
||||
linkedin, netcraft, securityTrails, threatcrowd,
|
||||
trello, twitter, vhost, virustotal, yahoo, all''')
|
||||
|
@ -194,6 +194,24 @@ def start():
|
|||
db.store_all(word, all_hosts, 'email', 'duckduckgo')
|
||||
db.store_all(word, all_hosts, 'host', 'duckduckgo')
|
||||
|
||||
elif engineitem == 'github-code':
|
||||
print('\033[94m[*] Searching Github (code). \033[0m')
|
||||
try:
|
||||
from theHarvester.discovery import githubcode
|
||||
search = githubcode.SearchGithubCode(word, limit)
|
||||
search.process()
|
||||
emails = filter(search.get_emails())
|
||||
all_emails.extend(emails)
|
||||
hosts = filter(search.get_hostnames())
|
||||
all_hosts.extend(hosts)
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, all_hosts, 'host', 'github-code')
|
||||
db.store_all(word, all_emails, 'email', 'github-code')
|
||||
except MissingKey as ex:
|
||||
print(ex)
|
||||
else:
|
||||
pass
|
||||
|
||||
elif engineitem == 'google':
|
||||
print('\033[94m[*] Searching Google. \033[0m')
|
||||
search = googlesearch.search_google(word, limit, start)
|
||||
|
|
126
theHarvester/discovery/githubcode.py
Normal file
126
theHarvester/discovery/githubcode.py
Normal file
|
@ -0,0 +1,126 @@
|
|||
from theHarvester.discovery.constants import *
|
||||
from theHarvester.lib.core import *
|
||||
from theHarvester.parsers import myparser
|
||||
import requests
|
||||
from requests import Response
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional, NamedTuple
|
||||
import urllib.parse as urlparse
|
||||
|
||||
|
||||
class RetryResult(NamedTuple):
|
||||
time: float
|
||||
|
||||
|
||||
class SuccessResult(NamedTuple):
|
||||
fragments: List[str]
|
||||
next_page: Optional[int]
|
||||
last_page: Optional[int]
|
||||
|
||||
|
||||
class ErrorResult(NamedTuple):
|
||||
status_code: int
|
||||
body: any
|
||||
|
||||
|
||||
class SearchGithubCode:
|
||||
|
||||
def __init__(self, word, limit):
|
||||
self.word = word
|
||||
self.total_results = ""
|
||||
self.server = 'api.github.com'
|
||||
self.hostname = 'api.github.com'
|
||||
self.limit = limit
|
||||
self.counter = 0
|
||||
self.page = 1
|
||||
self.key = Core.github_key()
|
||||
# If you don't have a personal access token, github narrows your search capabilities significantly
|
||||
# rate limits you more severely
|
||||
# https://developer.github.com/v3/search/#rate-limit
|
||||
if self.key is None:
|
||||
raise MissingKey(True)
|
||||
|
||||
@staticmethod
|
||||
def fragments_from_response(response: Response) -> List[str]:
|
||||
items: List[Dict[str, Any]] = response.json().get('items') or list()
|
||||
fragments: List[str] = list()
|
||||
for item in items:
|
||||
matches = item.get("text_matches") or list()
|
||||
for match in matches:
|
||||
fragments.append(match.get("fragment"))
|
||||
return [fragment for fragment in fragments if fragment is not None]
|
||||
|
||||
@staticmethod
|
||||
def page_from_response(page: str, response: Response) -> Optional[int]:
|
||||
page_link = response.links.get(page)
|
||||
if page_link:
|
||||
parsed = urlparse.urlparse(page_link.get("url"))
|
||||
params = urlparse.parse_qs(parsed.query)
|
||||
page = params.get('page') or [None]
|
||||
page_number = page[0] and int(page[0])
|
||||
return page_number
|
||||
else:
|
||||
return None
|
||||
|
||||
def handle_response(self, response: Response) -> Optional[Any]:
|
||||
if response.ok:
|
||||
results = self.fragments_from_response(response)
|
||||
next_page = self.page_from_response("next", response)
|
||||
last_page = self.page_from_response("last", response)
|
||||
return SuccessResult(results, next_page, last_page)
|
||||
elif response.status_code == 429 or response.status_code == 403:
|
||||
return RetryResult(60)
|
||||
else:
|
||||
try:
|
||||
return ErrorResult(response.status_code, response.json())
|
||||
except ValueError:
|
||||
return ErrorResult(response.status_code, response.text)
|
||||
|
||||
def do_search(self, page: Optional[int]) -> Response:
|
||||
if page is None:
|
||||
url = f'https://{self.server}/search/code?q="{self.word}"'
|
||||
else:
|
||||
url = f'https://{self.server}/search/code?q="{self.word}"&page={page}'
|
||||
headers = {
|
||||
'Host': self.hostname,
|
||||
'User-agent': Core.get_user_agent(),
|
||||
'Accept': "application/vnd.github.v3.text-match+json",
|
||||
'Authorization': 'token {}'.format(self.key)
|
||||
}
|
||||
return requests.get(url=url, headers=headers, verify=True)
|
||||
|
||||
@staticmethod
|
||||
def next_page_or_end(result: SuccessResult) -> Optional[int]:
|
||||
if result.next_page is not None:
|
||||
return result.next_page
|
||||
else:
|
||||
return result.last_page
|
||||
|
||||
def process(self):
|
||||
while self.counter <= self.limit and self.page is not None:
|
||||
api_response = self.do_search(self.page)
|
||||
result = self.handle_response(api_response)
|
||||
if type(result) == SuccessResult:
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
for fragment in result.fragments:
|
||||
self.total_results += fragment
|
||||
self.counter = self.counter + 1
|
||||
|
||||
self.page = self.next_page_or_end(result)
|
||||
time.sleep(getDelay())
|
||||
elif type(result) == RetryResult:
|
||||
sleepy_time = getDelay() + result.time
|
||||
print(f'\tRetrying page in {sleepy_time} seconds...')
|
||||
time.sleep(sleepy_time)
|
||||
elif type(result) == ErrorResult:
|
||||
raise Exception(f"\tException occurred: status_code: {result.status_code} reason: {result.body}")
|
||||
else:
|
||||
raise Exception("\tUnknown exception occurred")
|
||||
|
||||
def get_emails(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.emails()
|
||||
|
||||
def get_hostnames(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
return rawres.hostnames()
|
|
@ -15,6 +15,12 @@ def bing_key():
|
|||
keys = yaml.safe_load(api_keys)
|
||||
return keys['apikeys']['bing']['key']
|
||||
|
||||
@staticmethod
|
||||
def github_key():
|
||||
with open('api-keys.yaml', 'r') as api_keys:
|
||||
keys = yaml.safe_load(api_keys)
|
||||
return keys['apikeys']['github']['key']
|
||||
|
||||
@staticmethod
|
||||
def hunter_key():
|
||||
with open('api-keys.yaml', 'r') as api_keys:
|
||||
|
@ -66,6 +72,7 @@ def get_supportedengines():
|
|||
'dnsdumpster',
|
||||
'dogpile',
|
||||
'duckduckgo',
|
||||
'github-code',
|
||||
'google',
|
||||
'google-certificates',
|
||||
'hunter',
|
||||
|
|
Loading…
Reference in a new issue