diff --git a/api-keys.yaml b/api-keys.yaml index 8b41630f..5cbffed6 100644 --- a/api-keys.yaml +++ b/api-keys.yaml @@ -15,4 +15,4 @@ apikeys: key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt github: - key: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + key: diff --git a/theHarvester/discovery/githubcode.py b/theHarvester/discovery/githubcode.py index 8aded983..f8a492a8 100644 --- a/theHarvester/discovery/githubcode.py +++ b/theHarvester/discovery/githubcode.py @@ -32,9 +32,11 @@ def __init__(self, word, limit): self.hostname = 'api.github.com' self.limit = limit self.counter = 0 - self.page = None + self.page = 1 self.key = Core.github_key() # If you don't have a personal access token, github narrows your search capabilities significantly + # and rate limits you more severely + # https://developer.github.com/v3/search/#rate-limit if self.key is None: raise MissingKey(True) @@ -56,7 +58,8 @@ def next_page_from_response(response: Response) -> Optional[int]: parsed = urlparse.urlparse(next_link.get("url")) params = urlparse.parse_qs(parsed.query) page = params.get('page') or [None] - return page[0] + next_page = page[0] and int(page[0]) + return next_page else: return None @@ -67,18 +70,18 @@ def last_page_from_response(response: Response) -> Optional[int]: parsed = urlparse.urlparse(next_link.get("url")) params = urlparse.parse_qs(parsed.query) page = params.get('page') or [None] - return page[0] + last_page = page[0] and int(page[0]) + return last_page else: return None def handle_response(self, response: Response) -> Optional[Any]: if response.ok: fragments = self.fragments_from_response(response) - print(fragments) next_page = self.next_page_from_response(response) last_page = self.last_page_from_response(response) return SuccessResult(fragments, next_page, last_page) - elif response.status_code == 429: + elif response.status_code == 429 or response.status_code == 403: return RetryResult(60) else: return ErrorResult(response.status_code, response.reason) @@ -101,26 +104,31 @@ def do_search(self, page: Optional[int]) -> Optional[Any]: except Exception as e: print(f'Error Occurred: {e}') + @staticmethod + def next_page_or_end(page: Optional[int], result: SuccessResult) -> Optional[int]: + if page is not None and result.last_page is not None and page < result.last_page: + return page + 1 + else: + return None + def process(self): - while len(self.total_results) <= self.limit: - print(self.total_results) - print(self.limit) + while self.counter <= self.limit and self.page is not None: result = self.do_search(self.page) if type(result) == SuccessResult: + print(f'\tSearching {self.counter} results.') for f in result.fragments: self.total_results += f self.counter = self.counter + 1 - if self.page != result.last_page: - self.page = result.next_page - else: - break - print(f'\tSearching {self.counter} results.') + + self.page = self.next_page_or_end(self.page, result) + time.sleep(getDelay()) elif type(result) == RetryResult: - print(f'\tRetrying page...') - time.sleep(getDelay() + result.time) + sleepy_time = getDelay() + result.time + print(f'\tRetrying page in {sleepy_time} seconds...') + time.sleep(sleepy_time) else: raise Exception("Exception occurred: status_code: {} reason: {}".format(result.status_code, - result.text)) + result.message)) def get_emails(self): rawres = myparser.Parser(self.total_results, self.word)