diff --git a/.travis.yml b/.travis.yml index d56623c5..fd7d818a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,20 @@ dist: bionic language: python -python: -- '3.6' -- '3.7' -- '3.8-dev' +cache: pip +matrix: + include: + - python: '3.6' + env: TEST_SUITE=suite_3_6 + - python: '3.7' + env: TEST_SUITE=suite_3_7 + - python: '3.8-dev' + env: TEST_SUITE=suite_3_8_dev before_install: - pip install -r requirements.txt install: - python setup.py test script: -- python theHarvester.py -d metasploit.com -b baidu,bing,censys,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,threatcrowd,trello,twitter,virustotal,yahoo +- python theHarvester.py -d metasploit.com -b baidu,bing,censys,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,threatcrowd,trello,twitter,virustotal,yahoo -l 200 - pytest - flake8 . --count --show-source --statistics #- mypy *.py diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py index 43bab578..f0dc79b7 100644 --- a/theHarvester/__main__.py +++ b/theHarvester/__main__.py @@ -67,8 +67,8 @@ def start(): word = args.domain # type: str if args.source is not None: - engines = set(map(str.strip, args.source.split(','))) - + engines = sorted(set(map(str.strip, args.source.split(',')))) + # Iterate through search engines in order if set(engines).issubset(Core.get_supportedengines()): print(f'\033[94m[*] Target: {word} \n \033[0m') diff --git a/theHarvester/discovery/googlesearch.py b/theHarvester/discovery/googlesearch.py index ee9ab214..4eb64843 100644 --- a/theHarvester/discovery/googlesearch.py +++ b/theHarvester/discovery/googlesearch.py @@ -30,13 +30,13 @@ def do_search(self): self.results = r.text if search(self.results): try: - if isinstance(search(self.results), bool): + self.results = google_workaround(urly) + if isinstance(self.results, bool): print('Google is blocking your ip and the workaround, returning') return - else: - self.results = google_workaround(urly) except Exception: - pass + # google blocked, no useful result + return time.sleep(getDelay()) self.totalresults += self.results @@ -51,13 +51,13 @@ def do_search_profiles(self): self.results = r.text if search(self.results): try: - if isinstance(search(self.results), bool): + self.results = google_workaround(urly) + if isinstance(self.results, bool): print('Google is blocking your ip and the workaround, returning') return - else: - self.results = google_workaround(urly) except Exception: - pass + # google blocked, no useful result + return time.sleep(getDelay()) self.totalresults += self.results @@ -145,13 +145,13 @@ def send_dorks(self): # Helper function to minimize code reusability. self.results = req.text if search(self.results): try: - if isinstance(search(self.results), bool): + self.results = google_workaround(link) + if isinstance(self.results, bool): print('Google is blocking your ip and the workaround, returning') return - else: - self.results = google_workaround(link) except Exception: - pass + # google blocked, no useful result + return time.sleep(getDelay()) self.totalresults += self.results except Exception as e: diff --git a/theHarvester/discovery/linkedinsearch.py b/theHarvester/discovery/linkedinsearch.py index 5c59a2af..a0b165f8 100644 --- a/theHarvester/discovery/linkedinsearch.py +++ b/theHarvester/discovery/linkedinsearch.py @@ -24,13 +24,13 @@ def do_search(self): self.results = r.text if search(self.results): try: - if isinstance(search(self.results), bool): + self.results = google_workaround(urly) + if isinstance(self.results, bool): print('Google is blocking your ip and the workaround, returning') return - else: - self.results = google_workaround(urly) except Exception: - pass + # google blocked, no useful result + return except Exception as e: print(e) time.sleep(getDelay()) diff --git a/theHarvester/discovery/trello.py b/theHarvester/discovery/trello.py index 2718139e..c3d1c5ad 100644 --- a/theHarvester/discovery/trello.py +++ b/theHarvester/discovery/trello.py @@ -5,6 +5,7 @@ import random import time + class SearchTrello: def __init__(self, word): @@ -36,9 +37,9 @@ def do_search(self): except Exception as e: print(e) self.totalresults += self.results - time.sleep(getDelay()-.5) + time.sleep(getDelay() - .5) except Exception as e: - pass + print(f'An exception has occurred in trello: {e}') def get_emails(self): rawres = myparser.Parser(self.totalresults, self.word) @@ -69,6 +70,3 @@ def process(self): def get_results(self) -> tuple: return self.get_emails(), self.hostnames, self.trello_urls - - - diff --git a/theHarvester/parsers/myparser.py b/theHarvester/parsers/myparser.py index d1bfff1e..ddc03c94 100644 --- a/theHarvester/parsers/myparser.py +++ b/theHarvester/parsers/myparser.py @@ -50,7 +50,7 @@ def hostnames(self): reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word.replace('www.', '')) self.temp = reg_hosts.findall(self.results) hostnames.extend(self.unique()) - return hostnames + return list(set(hostnames)) def people_googleplus(self): self.results = re.sub('', '', self.results) @@ -145,7 +145,7 @@ def urls(self): urls = {match.group().strip() for match in found} return urls - def unique(self): + def unique(self) -> list: self.new = [] for x in self.temp: if x not in self.new: