Merge pull request #252 from cruatta/github-code

GitHub code
This commit is contained in:
J.Townsend 2019-07-07 23:13:48 +01:00 committed by GitHub
commit 97b88239f6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 275 additions and 2 deletions

View file

@ -40,6 +40,8 @@ Passive:
* duckduckgo: DuckDuckGo search engine - www.duckduckgo.com
* github-code: Github code search engine (Requires Github Personal Access Token, see below.) - www.github.com
* google: Google search engine (Optional Google dorking.) - www.google.com
* google-certificates: Google Certificate Transparency report
@ -83,6 +85,7 @@ Modules that require an API key:
Add your keys to api-keys.yaml
* bingapi
* github
* hunter
* intelx
* securityTrails

View file

@ -13,3 +13,6 @@ apikeys:
shodan:
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
github:
key:

View file

View file

@ -0,0 +1,116 @@
from theHarvester.discovery import githubcode
from theHarvester.discovery.githubcode import RetryResult, ErrorResult, SuccessResult
from theHarvester.discovery.constants import MissingKey
from theHarvester.lib.core import Core
from unittest.mock import MagicMock
from requests import Response
import pytest
class TestSearchGithubCode:
class OkResponse:
response = Response()
json = {
"items": [
{
"text_matches": [
{
"fragment": "test1"
}
]
},
{
"text_matches": [
{
"fragment": "test2"
}
]
}
]
}
response.status_code = 200
response.json = MagicMock(return_value=json)
class FailureResponse:
response = Response()
response.json = MagicMock(return_value={})
response.status_code = 401
class RetryResponse:
response = Response()
response.json = MagicMock(return_value={})
response.status_code = 403
class MalformedResponse:
response = Response()
json = {
"items": [
{
"fail": True
},
{
"text_matches": []
},
{
"text_matches": [
{
"weird": "result"
}
]
}
]
}
response.json = MagicMock(return_value=json)
response.status_code = 200
def test_missing_key(self):
with pytest.raises(MissingKey):
Core.github_key = MagicMock(return_value=None)
githubcode.SearchGithubCode(word="test", limit=500)
def test_fragments_from_response(self):
Core.github_key = MagicMock(return_value="lol")
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
test_result = test_class_instance.fragments_from_response(self.OkResponse.response)
assert test_result == ["test1", "test2"]
def test_invalid_fragments_from_response(self):
Core.github_key = MagicMock(return_value="lol")
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
test_result = test_class_instance.fragments_from_response(self.MalformedResponse.response)
assert test_result == []
def test_handle_response_ok(self):
Core.github_key = MagicMock(return_value="lol")
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
test_result = test_class_instance.handle_response(self.OkResponse.response)
assert isinstance(test_result, SuccessResult)
def test_handle_response_retry(self):
Core.github_key = MagicMock(return_value="lol")
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
test_result = test_class_instance.handle_response(self.RetryResponse.response)
assert isinstance(test_result, RetryResult)
def test_handle_response_fail(self):
Core.github_key = MagicMock(return_value="lol")
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
test_result = test_class_instance.handle_response(self.FailureResponse.response)
assert isinstance(test_result, ErrorResult)
def test_next_page(self):
Core.github_key = MagicMock(return_value="lol")
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
test_result = githubcode.SuccessResult(list(), next_page=2, last_page=4)
assert(2 == test_class_instance.next_page_or_end(test_result))
def test_last_page(self):
Core.github_key = MagicMock(return_value="lol")
test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
test_result = githubcode.SuccessResult(list(), None, None)
assert(None is test_class_instance.next_page_or_end(test_result))
if __name__ == '__main__':
pytest.main()

View file

@ -54,7 +54,7 @@ def start():
parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
dogpile, duckduckgo, google,
dogpile, duckduckgo, github-code, google,
google-certificates, hunter, intelx,
linkedin, netcraft, securityTrails, threatcrowd,
trello, twitter, vhost, virustotal, yahoo, all''')
@ -194,6 +194,24 @@ def start():
db.store_all(word, all_hosts, 'email', 'duckduckgo')
db.store_all(word, all_hosts, 'host', 'duckduckgo')
elif engineitem == 'github-code':
print('\033[94m[*] Searching Github (code). \033[0m')
try:
from theHarvester.discovery import githubcode
search = githubcode.SearchGithubCode(word, limit)
search.process()
emails = filter(search.get_emails())
all_emails.extend(emails)
hosts = filter(search.get_hostnames())
all_hosts.extend(hosts)
db = stash.stash_manager()
db.store_all(word, all_hosts, 'host', 'github-code')
db.store_all(word, all_emails, 'email', 'github-code')
except MissingKey as ex:
print(ex)
else:
pass
elif engineitem == 'google':
print('\033[94m[*] Searching Google. \033[0m')
search = googlesearch.search_google(word, limit, start)

View file

@ -0,0 +1,126 @@
from theHarvester.discovery.constants import *
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
import requests
from requests import Response
import time
from typing import List, Dict, Any, Optional, NamedTuple
import urllib.parse as urlparse
class RetryResult(NamedTuple):
time: float
class SuccessResult(NamedTuple):
fragments: List[str]
next_page: Optional[int]
last_page: Optional[int]
class ErrorResult(NamedTuple):
status_code: int
body: any
class SearchGithubCode:
def __init__(self, word, limit):
self.word = word
self.total_results = ""
self.server = 'api.github.com'
self.hostname = 'api.github.com'
self.limit = limit
self.counter = 0
self.page = 1
self.key = Core.github_key()
# If you don't have a personal access token, github narrows your search capabilities significantly
# rate limits you more severely
# https://developer.github.com/v3/search/#rate-limit
if self.key is None:
raise MissingKey(True)
@staticmethod
def fragments_from_response(response: Response) -> List[str]:
items: List[Dict[str, Any]] = response.json().get('items') or list()
fragments: List[str] = list()
for item in items:
matches = item.get("text_matches") or list()
for match in matches:
fragments.append(match.get("fragment"))
return [fragment for fragment in fragments if fragment is not None]
@staticmethod
def page_from_response(page: str, response: Response) -> Optional[int]:
page_link = response.links.get(page)
if page_link:
parsed = urlparse.urlparse(page_link.get("url"))
params = urlparse.parse_qs(parsed.query)
page = params.get('page') or [None]
page_number = page[0] and int(page[0])
return page_number
else:
return None
def handle_response(self, response: Response) -> Optional[Any]:
if response.ok:
results = self.fragments_from_response(response)
next_page = self.page_from_response("next", response)
last_page = self.page_from_response("last", response)
return SuccessResult(results, next_page, last_page)
elif response.status_code == 429 or response.status_code == 403:
return RetryResult(60)
else:
try:
return ErrorResult(response.status_code, response.json())
except ValueError:
return ErrorResult(response.status_code, response.text)
def do_search(self, page: Optional[int]) -> Response:
if page is None:
url = f'https://{self.server}/search/code?q="{self.word}"'
else:
url = f'https://{self.server}/search/code?q="{self.word}"&page={page}'
headers = {
'Host': self.hostname,
'User-agent': Core.get_user_agent(),
'Accept': "application/vnd.github.v3.text-match+json",
'Authorization': 'token {}'.format(self.key)
}
return requests.get(url=url, headers=headers, verify=True)
@staticmethod
def next_page_or_end(result: SuccessResult) -> Optional[int]:
if result.next_page is not None:
return result.next_page
else:
return result.last_page
def process(self):
while self.counter <= self.limit and self.page is not None:
api_response = self.do_search(self.page)
result = self.handle_response(api_response)
if type(result) == SuccessResult:
print(f'\tSearching {self.counter} results.')
for fragment in result.fragments:
self.total_results += fragment
self.counter = self.counter + 1
self.page = self.next_page_or_end(result)
time.sleep(getDelay())
elif type(result) == RetryResult:
sleepy_time = getDelay() + result.time
print(f'\tRetrying page in {sleepy_time} seconds...')
time.sleep(sleepy_time)
elif type(result) == ErrorResult:
raise Exception(f"\tException occurred: status_code: {result.status_code} reason: {result.body}")
else:
raise Exception("\tUnknown exception occurred")
def get_emails(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.emails()
def get_hostnames(self):
rawres = myparser.Parser(self.total_results, self.word)
return rawres.hostnames()

View file

@ -15,6 +15,12 @@ def bing_key():
keys = yaml.safe_load(api_keys)
return keys['apikeys']['bing']['key']
@staticmethod
def github_key():
with open('api-keys.yaml', 'r') as api_keys:
keys = yaml.safe_load(api_keys)
return keys['apikeys']['github']['key']
@staticmethod
def hunter_key():
with open('api-keys.yaml', 'r') as api_keys:
@ -66,6 +72,7 @@ def get_supportedengines():
'dnsdumpster',
'dogpile',
'duckduckgo',
'github-code',
'google',
'google-certificates',
'hunter',