From 4d825aaf3370acdd0ddddb0edf39802fd8ec24b2 Mon Sep 17 00:00:00 2001 From: Branch Vincent Date: Sun, 8 Oct 2023 11:45:23 -0700 Subject: [PATCH] build: migrate to pep517 backend (#1505) * build: migrate to pep517 backend * test loading config files --- .github/workflows/theHarvester.yml | 36 +++++----- Dockerfile | 9 +-- pyproject.toml | 42 ++++++++++- requirements/dev.txt | 1 - restfulHarvest.py | 42 +---------- setup.py | 42 ----------- tests/lib/test_core.py | 72 +++++++++++++++++++ theHarvester.py | 22 +----- .../data/api-keys.yaml | 0 .../data/proxies.yaml | 0 .../data/wordlists}/dns-big.txt | 0 .../data/wordlists}/dns-names.txt | 0 .../data/wordlists}/dorks.txt | 0 .../data/wordlists}/general/common.txt | 0 .../data/wordlists}/names_small.txt | 0 theHarvester/discovery/dnssearch.py | 17 ++--- theHarvester/lib/core.py | 54 +++++++------- theHarvester/lib/version.py | 4 +- theHarvester/restfulHarvest.py | 46 ++++++++++++ theHarvester/theHarvester.py | 25 +++++++ 20 files changed, 244 insertions(+), 168 deletions(-) delete mode 100755 setup.py create mode 100644 tests/lib/test_core.py rename api-keys.yaml => theHarvester/data/api-keys.yaml (100%) rename proxies.yaml => theHarvester/data/proxies.yaml (100%) rename {wordlists => theHarvester/data/wordlists}/dns-big.txt (100%) rename {wordlists => theHarvester/data/wordlists}/dns-names.txt (100%) rename {wordlists => theHarvester/data/wordlists}/dorks.txt (100%) rename {wordlists => theHarvester/data/wordlists}/general/common.txt (100%) rename {wordlists => theHarvester/data/wordlists}/names_small.txt (100%) create mode 100644 theHarvester/restfulHarvest.py create mode 100644 theHarvester/theHarvester.py diff --git a/.github/workflows/theHarvester.yml b/.github/workflows/theHarvester.yml index 553fc797..a1673c10 100644 --- a/.github/workflows/theHarvester.yml +++ b/.github/workflows/theHarvester.yml @@ -28,13 +28,12 @@ jobs: - name: Install dependencies run: | pip install --upgrade pip - pip install wheel - pip install -r requirements/dev.txt + pip install .[dev] - name: Lint with black run: | black . --diff --check - + - name: Lint with isort run: | isort . --diff --check @@ -52,64 +51,63 @@ jobs: - name: Run theHarvester module Anubis run: | - python theHarvester.py -d apple.com -b anubis + theHarvester -d apple.com -b anubis - name: Run theHarvester module Baidu run: | - python theHarvester.py -d yale.edu -b baidu + theHarvester -d yale.edu -b baidu - name: Run theHarvester module Bing run: | - python theHarvester.py -d yale.edu -b bing + theHarvester -d yale.edu -b bing - name: Run theHarvester module CertSpotter run: | - python theHarvester.py -d yale.edu -b certspotter + theHarvester -d yale.edu -b certspotter - name: Run theHarvester module Crtsh run: | - python theHarvester.py -d hcl.com -b crtsh + theHarvester -d hcl.com -b crtsh - name: Run theHarvester module DnsDumpster run: | - python theHarvester.py -d yale.edu -b dnsdumpster + theHarvester -d yale.edu -b dnsdumpster - name: Run theHarvester module DuckDuckGo run: | - python theHarvester.py -d yale.edu -b duckduckgo + theHarvester -d yale.edu -b duckduckgo - name: Run theHarvester module HackerTarget run: | - python theHarvester.py -d yale.edu -b hackertarget + theHarvester -d yale.edu -b hackertarget - name: Run theHarvester module Intelx run: | - python theHarvester.py -d yale.edu -b intelx + theHarvester -d yale.edu -b intelx - name: Run theHarvester module Otx run: | - python theHarvester.py -d yale.edu -b otx - + theHarvester -d yale.edu -b otx - name: Run theHarvester module RapidDns run: | - python theHarvester.py -d yale.edu -b rapiddns + theHarvester -d yale.edu -b rapiddns - name: Run theHarvester module Threatminer run: | - python theHarvester.py -d yale.edu -b threatminer + theHarvester -d yale.edu -b threatminer - name: Run theHarvester module Urlscan run: | - python theHarvester.py -d yale.edu -b urlscan + theHarvester -d yale.edu -b urlscan - name: Run theHarvester module Yahoo run: | - python theHarvester.py -d yale.edu -b yahoo + theHarvester -d yale.edu -b yahoo - name: Run theHarvester module DNS brute force run: | - python theHarvester.py -d yale.edu -c + theHarvester -d yale.edu -c - name: Static type checking with mypy run: | diff --git a/Dockerfile b/Dockerfile index e939d926..d4956fa0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,11 @@ FROM alpine:3 LABEL maintainer="@jay_townsend1 & @NotoriousRebel1 (alpine @viardant)" -RUN mkdir /app -RUN mkdir /etc/theHarvester/ -COPY api-keys.yaml /etc/theHarvester/ -COPY proxies.yaml /etc/theHarvester/ WORKDIR /app COPY requirements.txt requirements.txt COPY requirements requirements RUN apk update && apk upgrade --available && apk add --no-cache musl-dev git libffi-dev gcc python3-dev py3-pip libxml2-dev libxslt-dev && python3 -m pip install --upgrade pip RUN python3 --version && pip3 install --no-cache-dir -r requirements.txt COPY . /app -RUN chmod +x ./*.py -ENTRYPOINT ["/app/theHarvester.py"] -ENTRYPOINT ["/app/restfulHarvest.py", "-H", "0.0.0.0", "-p", "80"] +RUN pip3 install --no-cache-dir . +ENTRYPOINT ["restfulHarvest", "-H", "0.0.0.0", "-p", "80"] EXPOSE 80 diff --git a/pyproject.toml b/pyproject.toml index 8ae1a2a4..153bc283 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,47 @@ +[project] +name = "theHarvester" +description = "theHarvester is a very simple, yet effective tool designed to be used in the early stages of a penetration test" +readme = "README.md" +authors = [ + { name = "Christian Martorella", email = "cmartorella@edge-security.com" }, + { name = "Jay Townsend", email = "townsend891@hotmail.com" }, + { name = "Matthew Brown", email = "36310667+NotoriousRebel@users.noreply.github.com" }, +] +requires-python = ">=3.9" +urls.Homepage = "https://github.com/laramies/theHarvester" +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", + "Operating System :: OS Independent", +] +dynamic = ["dependencies", "optional-dependencies", "version"] + +[project.scripts] +theHarvester = "theHarvester.theHarvester:main" +restfulHarvest = "theHarvester.restfulHarvest:main" + +[tool.setuptools.dynamic] +version = { attr = "theHarvester.lib.version.VERSION" } +dependencies = { file = "requirements/base.txt" } +optional-dependencies.dev = { file = "requirements/dev.txt" } + +[tool.setuptools.packages.find] +include = ["theHarvester*"] + +[tool.setuptools.package-data] +"*" = ["*.txt", "*.yaml"] + [tool.pytest.ini_options] minversion = "7.1" addopts = "--no-header --asyncio-mode=auto" testpaths = [ "tests", "tests/discovery/", -] \ No newline at end of file +] + +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" diff --git a/requirements/dev.txt b/requirements/dev.txt index 0ed0feab..13a89a9a 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,4 +1,3 @@ --r base.txt black==23.9.1 flake8==6.1.0 isort==5.12.0 diff --git a/restfulHarvest.py b/restfulHarvest.py index bde18442..de7c4756 100755 --- a/restfulHarvest.py +++ b/restfulHarvest.py @@ -1,43 +1,5 @@ #!/usr/bin/env python3 -import argparse - -import uvicorn - -parser = argparse.ArgumentParser() -parser.add_argument( - "-H", - "--host", - default="127.0.0.1", - help="IP address to listen on default is 127.0.0.1", -) -parser.add_argument( - "-p", - "--port", - default=5000, - help="Port to bind the web server to, default is 5000", - type=int, -) -parser.add_argument( - "-l", - "--log-level", - default="info", - help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set", -) -parser.add_argument( - "-r", - "--reload", - default=False, - help="Enable automatic reload used during development of the api", - action="store_true", -) - -args: argparse.Namespace = parser.parse_args() +from theHarvester.restfulHarvest import main if __name__ == "__main__": - uvicorn.run( - "theHarvester.lib.api.api:app", - host=args.host, - port=args.port, - log_level=args.log_level, - reload=args.reload, - ) + main() diff --git a/setup.py b/setup.py deleted file mode 100755 index aa8904a8..00000000 --- a/setup.py +++ /dev/null @@ -1,42 +0,0 @@ -from setuptools import find_packages, setup - -from theHarvester.lib.version import version - -with open("README.md", "r") as fh: - long_description: str = fh.read() - -setup( - name="theHarvester", - version=version(), - author="Christian Martorella", - author_email="cmartorella@edge-security.com", - description="theHarvester is a very simple, yet effective tool designed to be used in the early stages of a penetration test", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/laramies/theHarvester", - packages=find_packages(exclude=["tests"]), - python_requires=">=3.9", - scripts=["bin/theHarvester", "bin/restfulHarvest"], - classifiers=[ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", - "Operating System :: OS Independent", - ], - data_files=[ - ( - "/etc/theHarvester", - [ - "wordlists/general/common.txt", - "wordlists/dns-big.txt", - "wordlists/dns-names.txt", - "wordlists/dorks.txt", - "wordlists/names_small.txt", - "api-keys.yaml", - "proxies.yaml", - ], - ) - ], -) diff --git a/tests/lib/test_core.py b/tests/lib/test_core.py new file mode 100644 index 00000000..dbf0e86d --- /dev/null +++ b/tests/lib/test_core.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any +from unittest import mock + +import pytest +import yaml + +from theHarvester.lib.core import CONFIG_DIRS, DATA_DIR, Core + + +@pytest.fixture(autouse=True) +def mock_environ(monkeypatch, tmp_path: Path): + monkeypatch.setenv("HOME", str(tmp_path)) + + +def mock_read_text(mocked: dict[Path, str | Exception]): + read_text = Path.read_text + + def _read_text(self: Path, *args, **kwargs): + if result := mocked.get(self): + if isinstance(result, Exception): + raise result + return result + return read_text(self, *args, **kwargs) + + return _read_text + + +@pytest.mark.parametrize( + ("name", "contents", "expected"), + [ + ("api-keys", "apikeys: {}", {}), + ("proxies", "http: [localhost:8080]", ["http://localhost:8080"]), + ], +) +@pytest.mark.parametrize("dir", CONFIG_DIRS) +def test_read_config_searches_config_dirs( + name: str, contents: str, expected: Any, dir: Path, capsys +): + file = dir.expanduser() / f"{name}.yaml" + config_files = [d.expanduser() / file.name for d in CONFIG_DIRS] + side_effect = mock_read_text( + {f: contents if f == file else FileNotFoundError() for f in config_files} + ) + + with mock.patch("pathlib.Path.read_text", autospec=True, side_effect=side_effect): + got = Core.api_keys() if name == "api-keys" else Core.proxy_list() + + assert got == expected + assert f"Read {file.name} from {file}" in capsys.readouterr().out + + +@pytest.mark.parametrize("name", ("api-keys", "proxies")) +def test_read_config_copies_default_to_home(name: str, capsys): + file = Path(f"~/.theHarvester/{name}.yaml").expanduser() + config_files = [d.expanduser() / file.name for d in CONFIG_DIRS] + side_effect = mock_read_text({f: FileNotFoundError() for f in config_files}) + + with mock.patch("pathlib.Path.read_text", autospec=True, side_effect=side_effect): + got = Core.api_keys() if name == "api-keys" else Core.proxy_list() + + default = yaml.safe_load((DATA_DIR / file.name).read_text()) + expected = ( + default["apikeys"] + if name == "api-keys" + else [f"http://{h}" for h in default["http"]] + ) + assert got == expected + assert f"Created default {file.name} at {file}" in capsys.readouterr().out + assert file.exists() diff --git a/theHarvester.py b/theHarvester.py index 7f1811e5..c299433d 100755 --- a/theHarvester.py +++ b/theHarvester.py @@ -1,30 +1,12 @@ #!/usr/bin/env python3 # Note: This script runs theHarvester -import asyncio import sys -from theHarvester import __main__ +from theHarvester.theHarvester import main if sys.version_info.major < 3 or sys.version_info.minor < 9: print("\033[93m[!] Make sure you have Python 3.9+ installed, quitting.\n\n \033[0m") sys.exit(1) if __name__ == "__main__": - platform = sys.platform - if platform == "win32": - # Required or things will break if trying to take screenshots - import multiprocessing - - multiprocessing.freeze_support() - asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy - else: - import uvloop - - uvloop.install() - - if "linux" in platform: - import aiomultiprocess - - # As we are not using Windows, we can change the spawn method to fork for greater performance - aiomultiprocess.set_context("fork") - asyncio.run(__main__.entry_point()) + main() diff --git a/api-keys.yaml b/theHarvester/data/api-keys.yaml similarity index 100% rename from api-keys.yaml rename to theHarvester/data/api-keys.yaml diff --git a/proxies.yaml b/theHarvester/data/proxies.yaml similarity index 100% rename from proxies.yaml rename to theHarvester/data/proxies.yaml diff --git a/wordlists/dns-big.txt b/theHarvester/data/wordlists/dns-big.txt similarity index 100% rename from wordlists/dns-big.txt rename to theHarvester/data/wordlists/dns-big.txt diff --git a/wordlists/dns-names.txt b/theHarvester/data/wordlists/dns-names.txt similarity index 100% rename from wordlists/dns-names.txt rename to theHarvester/data/wordlists/dns-names.txt diff --git a/wordlists/dorks.txt b/theHarvester/data/wordlists/dorks.txt similarity index 100% rename from wordlists/dorks.txt rename to theHarvester/data/wordlists/dorks.txt diff --git a/wordlists/general/common.txt b/theHarvester/data/wordlists/general/common.txt similarity index 100% rename from wordlists/general/common.txt rename to theHarvester/data/wordlists/general/common.txt diff --git a/wordlists/names_small.txt b/theHarvester/data/wordlists/names_small.txt similarity index 100% rename from wordlists/names_small.txt rename to theHarvester/data/wordlists/names_small.txt diff --git a/theHarvester/discovery/dnssearch.py b/theHarvester/discovery/dnssearch.py index 6db6f8fb..7dd6977b 100644 --- a/theHarvester/discovery/dnssearch.py +++ b/theHarvester/discovery/dnssearch.py @@ -17,11 +17,14 @@ from aiodns import DNSResolver from theHarvester.lib import hostchecker +from theHarvester.lib.core import DATA_DIR ##################################################################### # DNS FORCE ##################################################################### +DNS_NAMES = DATA_DIR / "wordlists" / "dns-names.txt" + class DnsForce: def __init__(self, domain, dnsserver, verbose: bool = False) -> None: @@ -31,18 +34,8 @@ def __init__(self, domain, dnsserver, verbose: bool = False) -> None: # self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver # self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver self.dnsserver = dnsserver - try: - with open("/etc/theHarvester/wordlists/dns-names.txt", "r") as file: - self.list = file.readlines() - except FileNotFoundError: - try: - with open( - "/usr/local/etc/theHarvester/wordlists/dns-names.txt", "r" - ) as file: - self.list = file.readlines() - except FileNotFoundError: - with open("wordlists/dns-names.txt", "r") as file: - self.list = file.readlines() + with DNS_NAMES.open("r") as file: + self.list = file.readlines() self.domain = domain.replace("www.", "") self.list = [f"{word.strip()}.{self.domain}" for word in self.list] diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py index ccc2fa12..9c7e41c0 100644 --- a/theHarvester/lib/core.py +++ b/theHarvester/lib/core.py @@ -2,8 +2,10 @@ from __future__ import annotations import asyncio +import contextlib import random import ssl +from pathlib import Path from typing import Any, List, Sized, Tuple, Union import aiohttp @@ -15,20 +17,36 @@ from .version import version +DATA_DIR = Path(__file__).parents[1] / "data" +CONFIG_DIRS = [ + Path("/etc/theHarvester/"), + Path("/usr/local/etc/theHarvester/"), + Path("~/.theHarvester"), +] + class Core: + @staticmethod + def _read_config(filename: str) -> str: + # Return the first we find + for path in CONFIG_DIRS: + with contextlib.suppress(FileNotFoundError): + file = path.expanduser() / filename + config = file.read_text() + print(f"Read {filename} from {file}") + return config + + # Fallback to creating default in user's home dir + default = (DATA_DIR / filename).read_text() + dest = CONFIG_DIRS[-1].expanduser() / filename + dest.parent.mkdir(exist_ok=True) + dest.write_text(default) + print(f"Created default {filename} at {dest}") + return default + @staticmethod def api_keys() -> dict: - try: - with open("/etc/theHarvester/api-keys.yaml", "r") as api_keys: - keys = yaml.safe_load(api_keys) - except FileNotFoundError: - try: - with open("/usr/local/etc/theHarvester/api-keys.yaml", "r") as api_keys: - keys = yaml.safe_load(api_keys) - except FileNotFoundError: - with open("api-keys.yaml", "r") as api_keys: - keys = yaml.safe_load(api_keys) + keys = yaml.safe_load(Core._read_config("api-keys.yaml")) return keys["apikeys"] @staticmethod @@ -117,21 +135,7 @@ def virustotal_key() -> str: @staticmethod def proxy_list() -> List: - try: - with open("/etc/theHarvester/proxies.yaml", "r") as proxy_file: - keys = yaml.safe_load(proxy_file) - except FileNotFoundError: - try: - with open( - "/usr/local/etc/theHarvester/proxies.yaml", "r" - ) as proxy_file: - keys = yaml.safe_load(proxy_file) - except FileNotFoundError: - try: - with open("proxies.yaml", "r") as proxy_file: - keys = yaml.safe_load(proxy_file) - except Exception: - return [] + keys = yaml.safe_load(Core._read_config("proxies.yaml")) http_list = ( [f"http://{proxy}" for proxy in keys["http"]] if keys["http"] is not None diff --git a/theHarvester/lib/version.py b/theHarvester/lib/version.py index 6f5556a5..6b0a7c11 100644 --- a/theHarvester/lib/version.py +++ b/theHarvester/lib/version.py @@ -1,5 +1,7 @@ # coding=utf-8 +VERSION = "4.4.4" + def version() -> str: - return "4.4.4" + return VERSION diff --git a/theHarvester/restfulHarvest.py b/theHarvester/restfulHarvest.py new file mode 100644 index 00000000..13d8d3c7 --- /dev/null +++ b/theHarvester/restfulHarvest.py @@ -0,0 +1,46 @@ +import argparse + +import uvicorn + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-H", + "--host", + default="127.0.0.1", + help="IP address to listen on default is 127.0.0.1", + ) + parser.add_argument( + "-p", + "--port", + default=5000, + help="Port to bind the web server to, default is 5000", + type=int, + ) + parser.add_argument( + "-l", + "--log-level", + default="info", + help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set", + ) + parser.add_argument( + "-r", + "--reload", + default=False, + help="Enable automatic reload used during development of the api", + action="store_true", + ) + + args: argparse.Namespace = parser.parse_args() + uvicorn.run( + "theHarvester.lib.api.api:app", + host=args.host, + port=args.port, + log_level=args.log_level, + reload=args.reload, + ) + + +if __name__ == "__main__": + main() diff --git a/theHarvester/theHarvester.py b/theHarvester/theHarvester.py new file mode 100644 index 00000000..2a2dac1f --- /dev/null +++ b/theHarvester/theHarvester.py @@ -0,0 +1,25 @@ +import asyncio +import sys + +from theHarvester import __main__ + + +def main(): + platform = sys.platform + if platform == "win32": + # Required or things will break if trying to take screenshots + import multiprocessing + + multiprocessing.freeze_support() + asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy + else: + import uvloop + + uvloop.install() + + if "linux" in platform: + import aiomultiprocess + + # As we are not using Windows, we can change the spawn method to fork for greater performance + aiomultiprocess.set_context("fork") + asyncio.run(__main__.entry_point())