From d6558bcc65972e1f5c7e5d97f13159ba52a83cb0 Mon Sep 17 00:00:00 2001 From: VKen Date: Thu, 19 Sep 2019 15:18:50 +0800 Subject: [PATCH 1/7] fixed code flow to run google_workaround for googlesearch linkedinsearch --- theHarvester/discovery/googlesearch.py | 24 ++++++++++++------------ theHarvester/discovery/linkedinsearch.py | 8 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/theHarvester/discovery/googlesearch.py b/theHarvester/discovery/googlesearch.py index ee9ab214..4eb64843 100644 --- a/theHarvester/discovery/googlesearch.py +++ b/theHarvester/discovery/googlesearch.py @@ -30,13 +30,13 @@ def do_search(self): self.results = r.text if search(self.results): try: - if isinstance(search(self.results), bool): + self.results = google_workaround(urly) + if isinstance(self.results, bool): print('Google is blocking your ip and the workaround, returning') return - else: - self.results = google_workaround(urly) except Exception: - pass + # google blocked, no useful result + return time.sleep(getDelay()) self.totalresults += self.results @@ -51,13 +51,13 @@ def do_search_profiles(self): self.results = r.text if search(self.results): try: - if isinstance(search(self.results), bool): + self.results = google_workaround(urly) + if isinstance(self.results, bool): print('Google is blocking your ip and the workaround, returning') return - else: - self.results = google_workaround(urly) except Exception: - pass + # google blocked, no useful result + return time.sleep(getDelay()) self.totalresults += self.results @@ -145,13 +145,13 @@ def send_dorks(self): # Helper function to minimize code reusability. self.results = req.text if search(self.results): try: - if isinstance(search(self.results), bool): + self.results = google_workaround(link) + if isinstance(self.results, bool): print('Google is blocking your ip and the workaround, returning') return - else: - self.results = google_workaround(link) except Exception: - pass + # google blocked, no useful result + return time.sleep(getDelay()) self.totalresults += self.results except Exception as e: diff --git a/theHarvester/discovery/linkedinsearch.py b/theHarvester/discovery/linkedinsearch.py index 5c59a2af..a0b165f8 100644 --- a/theHarvester/discovery/linkedinsearch.py +++ b/theHarvester/discovery/linkedinsearch.py @@ -24,13 +24,13 @@ def do_search(self): self.results = r.text if search(self.results): try: - if isinstance(search(self.results), bool): + self.results = google_workaround(urly) + if isinstance(self.results, bool): print('Google is blocking your ip and the workaround, returning') return - else: - self.results = google_workaround(urly) except Exception: - pass + # google blocked, no useful result + return except Exception as e: print(e) time.sleep(getDelay()) From 7fc1201318a75505381de9e65f4b38fd1fbf3e58 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Sun, 22 Sep 2019 13:38:56 -0400 Subject: [PATCH 2/7] Updated trello search to fix pep8 errors. --- theHarvester/discovery/trello.py | 6 ++---- theHarvester/parsers/myparser.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/theHarvester/discovery/trello.py b/theHarvester/discovery/trello.py index 2718139e..2b2ff7b1 100644 --- a/theHarvester/discovery/trello.py +++ b/theHarvester/discovery/trello.py @@ -5,6 +5,7 @@ import random import time + class SearchTrello: def __init__(self, word): @@ -36,7 +37,7 @@ def do_search(self): except Exception as e: print(e) self.totalresults += self.results - time.sleep(getDelay()-.5) + time.sleep(getDelay() - .5) except Exception as e: pass @@ -69,6 +70,3 @@ def process(self): def get_results(self) -> tuple: return self.get_emails(), self.hostnames, self.trello_urls - - - diff --git a/theHarvester/parsers/myparser.py b/theHarvester/parsers/myparser.py index d1bfff1e..ddc03c94 100644 --- a/theHarvester/parsers/myparser.py +++ b/theHarvester/parsers/myparser.py @@ -50,7 +50,7 @@ def hostnames(self): reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word.replace('www.', '')) self.temp = reg_hosts.findall(self.results) hostnames.extend(self.unique()) - return hostnames + return list(set(hostnames)) def people_googleplus(self): self.results = re.sub('', '', self.results) @@ -145,7 +145,7 @@ def urls(self): urls = {match.group().strip() for match in found} return urls - def unique(self): + def unique(self) -> list: self.new = [] for x in self.temp: if x not in self.new: From 8e3bc4b3b99591ca184ff39953d80f58ee94ad00 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Sun, 22 Sep 2019 13:59:13 -0400 Subject: [PATCH 3/7] Updated trello to conform to pep8 standards. --- theHarvester/discovery/trello.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/theHarvester/discovery/trello.py b/theHarvester/discovery/trello.py index 2b2ff7b1..c3d1c5ad 100644 --- a/theHarvester/discovery/trello.py +++ b/theHarvester/discovery/trello.py @@ -39,7 +39,7 @@ def do_search(self): self.totalresults += self.results time.sleep(getDelay() - .5) except Exception as e: - pass + print(f'An exception has occurred in trello: {e}') def get_emails(self): rawres = myparser.Parser(self.totalresults, self.word) From 8348b5cd022c2d357fd44033138de6e7bdee39c5 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Sun, 22 Sep 2019 14:12:24 -0400 Subject: [PATCH 4/7] Updated travis config to test matrices. --- .travis.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index d56623c5..d1f7b8e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,14 @@ dist: bionic language: python -python: -- '3.6' -- '3.7' -- '3.8-dev' + +matrix: + include: + - python: '3.6' + env: TEST_SUITE=suite_3_6 + - python: '3.7' + env: TEST_SUITE=suite_3_7 + - python: '3.8-dev' + env: TEST_SUITE=suite_3_8_dev before_install: - pip install -r requirements.txt install: From 3e816459bc1166652c8fc375886147ceb0860bc6 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Sun, 22 Sep 2019 14:27:49 -0400 Subject: [PATCH 5/7] Made change to main.py to iterate through search engines in order. --- theHarvester/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py index 43bab578..f0dc79b7 100644 --- a/theHarvester/__main__.py +++ b/theHarvester/__main__.py @@ -67,8 +67,8 @@ def start(): word = args.domain # type: str if args.source is not None: - engines = set(map(str.strip, args.source.split(','))) - + engines = sorted(set(map(str.strip, args.source.split(',')))) + # Iterate through search engines in order if set(engines).issubset(Core.get_supportedengines()): print(f'\033[94m[*] Target: {word} \n \033[0m') From fafa451c34eb1ff78f728f791c8ded94139e2c08 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Sun, 22 Sep 2019 14:30:14 -0400 Subject: [PATCH 6/7] Updated travis to cache pip. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d1f7b8e1..8ebe51b8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ dist: bionic language: python - +cache: pip matrix: include: - python: '3.6' From 15e61523a41aacfe9ab3be6ba315d01825a6e416 Mon Sep 17 00:00:00 2001 From: NotoriousRebel Date: Sun, 22 Sep 2019 14:46:09 -0400 Subject: [PATCH 7/7] Set limit to 200. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8ebe51b8..fd7d818a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ before_install: install: - python setup.py test script: -- python theHarvester.py -d metasploit.com -b baidu,bing,censys,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,threatcrowd,trello,twitter,virustotal,yahoo +- python theHarvester.py -d metasploit.com -b baidu,bing,censys,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,threatcrowd,trello,twitter,virustotal,yahoo -l 200 - pytest - flake8 . --count --show-source --statistics #- mypy *.py