mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-24 14:32:57 +08:00
WIP: Adding github-code search
This commit is contained in:
parent
f8cc29b6f2
commit
f2b2c0fbf9
2 changed files with 25 additions and 17 deletions
|
@ -15,4 +15,4 @@ apikeys:
|
|||
key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
|
||||
|
||||
github:
|
||||
key: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
key:
|
||||
|
|
|
@ -32,9 +32,11 @@ def __init__(self, word, limit):
|
|||
self.hostname = 'api.github.com'
|
||||
self.limit = limit
|
||||
self.counter = 0
|
||||
self.page = None
|
||||
self.page = 1
|
||||
self.key = Core.github_key()
|
||||
# If you don't have a personal access token, github narrows your search capabilities significantly
|
||||
# and rate limits you more severely
|
||||
# https://developer.github.com/v3/search/#rate-limit
|
||||
if self.key is None:
|
||||
raise MissingKey(True)
|
||||
|
||||
|
@ -56,7 +58,8 @@ def next_page_from_response(response: Response) -> Optional[int]:
|
|||
parsed = urlparse.urlparse(next_link.get("url"))
|
||||
params = urlparse.parse_qs(parsed.query)
|
||||
page = params.get('page') or [None]
|
||||
return page[0]
|
||||
next_page = page[0] and int(page[0])
|
||||
return next_page
|
||||
else:
|
||||
return None
|
||||
|
||||
|
@ -67,18 +70,18 @@ def last_page_from_response(response: Response) -> Optional[int]:
|
|||
parsed = urlparse.urlparse(next_link.get("url"))
|
||||
params = urlparse.parse_qs(parsed.query)
|
||||
page = params.get('page') or [None]
|
||||
return page[0]
|
||||
last_page = page[0] and int(page[0])
|
||||
return last_page
|
||||
else:
|
||||
return None
|
||||
|
||||
def handle_response(self, response: Response) -> Optional[Any]:
|
||||
if response.ok:
|
||||
fragments = self.fragments_from_response(response)
|
||||
print(fragments)
|
||||
next_page = self.next_page_from_response(response)
|
||||
last_page = self.last_page_from_response(response)
|
||||
return SuccessResult(fragments, next_page, last_page)
|
||||
elif response.status_code == 429:
|
||||
elif response.status_code == 429 or response.status_code == 403:
|
||||
return RetryResult(60)
|
||||
else:
|
||||
return ErrorResult(response.status_code, response.reason)
|
||||
|
@ -101,26 +104,31 @@ def do_search(self, page: Optional[int]) -> Optional[Any]:
|
|||
except Exception as e:
|
||||
print(f'Error Occurred: {e}')
|
||||
|
||||
@staticmethod
|
||||
def next_page_or_end(page: Optional[int], result: SuccessResult) -> Optional[int]:
|
||||
if page is not None and result.last_page is not None and page < result.last_page:
|
||||
return page + 1
|
||||
else:
|
||||
return None
|
||||
|
||||
def process(self):
|
||||
while len(self.total_results) <= self.limit:
|
||||
print(self.total_results)
|
||||
print(self.limit)
|
||||
while self.counter <= self.limit and self.page is not None:
|
||||
result = self.do_search(self.page)
|
||||
if type(result) == SuccessResult:
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
for f in result.fragments:
|
||||
self.total_results += f
|
||||
self.counter = self.counter + 1
|
||||
if self.page != result.last_page:
|
||||
self.page = result.next_page
|
||||
else:
|
||||
break
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
|
||||
self.page = self.next_page_or_end(self.page, result)
|
||||
time.sleep(getDelay())
|
||||
elif type(result) == RetryResult:
|
||||
print(f'\tRetrying page...')
|
||||
time.sleep(getDelay() + result.time)
|
||||
sleepy_time = getDelay() + result.time
|
||||
print(f'\tRetrying page in {sleepy_time} seconds...')
|
||||
time.sleep(sleepy_time)
|
||||
else:
|
||||
raise Exception("Exception occurred: status_code: {} reason: {}".format(result.status_code,
|
||||
result.text))
|
||||
result.message))
|
||||
|
||||
def get_emails(self):
|
||||
rawres = myparser.Parser(self.total_results, self.word)
|
||||
|
|
Loading…
Reference in a new issue