build: migrate to pep517 backend (#1505)

* build: migrate to pep517 backend

* test loading config files
This commit is contained in:
Branch Vincent 2023-10-08 11:45:23 -07:00 committed by GitHub
parent 536d5da7f0
commit 4d825aaf33
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 244 additions and 168 deletions

View file

@ -28,8 +28,7 @@ jobs:
- name: Install dependencies
run: |
pip install --upgrade pip
pip install wheel
pip install -r requirements/dev.txt
pip install .[dev]
- name: Lint with black
run: |
@ -52,64 +51,63 @@ jobs:
- name: Run theHarvester module Anubis
run: |
python theHarvester.py -d apple.com -b anubis
theHarvester -d apple.com -b anubis
- name: Run theHarvester module Baidu
run: |
python theHarvester.py -d yale.edu -b baidu
theHarvester -d yale.edu -b baidu
- name: Run theHarvester module Bing
run: |
python theHarvester.py -d yale.edu -b bing
theHarvester -d yale.edu -b bing
- name: Run theHarvester module CertSpotter
run: |
python theHarvester.py -d yale.edu -b certspotter
theHarvester -d yale.edu -b certspotter
- name: Run theHarvester module Crtsh
run: |
python theHarvester.py -d hcl.com -b crtsh
theHarvester -d hcl.com -b crtsh
- name: Run theHarvester module DnsDumpster
run: |
python theHarvester.py -d yale.edu -b dnsdumpster
theHarvester -d yale.edu -b dnsdumpster
- name: Run theHarvester module DuckDuckGo
run: |
python theHarvester.py -d yale.edu -b duckduckgo
theHarvester -d yale.edu -b duckduckgo
- name: Run theHarvester module HackerTarget
run: |
python theHarvester.py -d yale.edu -b hackertarget
theHarvester -d yale.edu -b hackertarget
- name: Run theHarvester module Intelx
run: |
python theHarvester.py -d yale.edu -b intelx
theHarvester -d yale.edu -b intelx
- name: Run theHarvester module Otx
run: |
python theHarvester.py -d yale.edu -b otx
theHarvester -d yale.edu -b otx
- name: Run theHarvester module RapidDns
run: |
python theHarvester.py -d yale.edu -b rapiddns
theHarvester -d yale.edu -b rapiddns
- name: Run theHarvester module Threatminer
run: |
python theHarvester.py -d yale.edu -b threatminer
theHarvester -d yale.edu -b threatminer
- name: Run theHarvester module Urlscan
run: |
python theHarvester.py -d yale.edu -b urlscan
theHarvester -d yale.edu -b urlscan
- name: Run theHarvester module Yahoo
run: |
python theHarvester.py -d yale.edu -b yahoo
theHarvester -d yale.edu -b yahoo
- name: Run theHarvester module DNS brute force
run: |
python theHarvester.py -d yale.edu -c
theHarvester -d yale.edu -c
- name: Static type checking with mypy
run: |

View file

@ -1,16 +1,11 @@
FROM alpine:3
LABEL maintainer="@jay_townsend1 & @NotoriousRebel1 (alpine @viardant)"
RUN mkdir /app
RUN mkdir /etc/theHarvester/
COPY api-keys.yaml /etc/theHarvester/
COPY proxies.yaml /etc/theHarvester/
WORKDIR /app
COPY requirements.txt requirements.txt
COPY requirements requirements
RUN apk update && apk upgrade --available && apk add --no-cache musl-dev git libffi-dev gcc python3-dev py3-pip libxml2-dev libxslt-dev && python3 -m pip install --upgrade pip
RUN python3 --version && pip3 install --no-cache-dir -r requirements.txt
COPY . /app
RUN chmod +x ./*.py
ENTRYPOINT ["/app/theHarvester.py"]
ENTRYPOINT ["/app/restfulHarvest.py", "-H", "0.0.0.0", "-p", "80"]
RUN pip3 install --no-cache-dir .
ENTRYPOINT ["restfulHarvest", "-H", "0.0.0.0", "-p", "80"]
EXPOSE 80

View file

@ -1,3 +1,39 @@
[project]
name = "theHarvester"
description = "theHarvester is a very simple, yet effective tool designed to be used in the early stages of a penetration test"
readme = "README.md"
authors = [
{ name = "Christian Martorella", email = "cmartorella@edge-security.com" },
{ name = "Jay Townsend", email = "townsend891@hotmail.com" },
{ name = "Matthew Brown", email = "36310667+NotoriousRebel@users.noreply.github.com" },
]
requires-python = ">=3.9"
urls.Homepage = "https://github.com/laramies/theHarvester"
classifiers = [
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
"Operating System :: OS Independent",
]
dynamic = ["dependencies", "optional-dependencies", "version"]
[project.scripts]
theHarvester = "theHarvester.theHarvester:main"
restfulHarvest = "theHarvester.restfulHarvest:main"
[tool.setuptools.dynamic]
version = { attr = "theHarvester.lib.version.VERSION" }
dependencies = { file = "requirements/base.txt" }
optional-dependencies.dev = { file = "requirements/dev.txt" }
[tool.setuptools.packages.find]
include = ["theHarvester*"]
[tool.setuptools.package-data]
"*" = ["*.txt", "*.yaml"]
[tool.pytest.ini_options]
minversion = "7.1"
addopts = "--no-header --asyncio-mode=auto"
@ -5,3 +41,7 @@ testpaths = [
"tests",
"tests/discovery/",
]
[build-system]
requires = ["setuptools>=68"]
build-backend = "setuptools.build_meta"

View file

@ -1,4 +1,3 @@
-r base.txt
black==23.9.1
flake8==6.1.0
isort==5.12.0

View file

@ -1,43 +1,5 @@
#!/usr/bin/env python3
import argparse
import uvicorn
parser = argparse.ArgumentParser()
parser.add_argument(
"-H",
"--host",
default="127.0.0.1",
help="IP address to listen on default is 127.0.0.1",
)
parser.add_argument(
"-p",
"--port",
default=5000,
help="Port to bind the web server to, default is 5000",
type=int,
)
parser.add_argument(
"-l",
"--log-level",
default="info",
help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set",
)
parser.add_argument(
"-r",
"--reload",
default=False,
help="Enable automatic reload used during development of the api",
action="store_true",
)
args: argparse.Namespace = parser.parse_args()
from theHarvester.restfulHarvest import main
if __name__ == "__main__":
uvicorn.run(
"theHarvester.lib.api.api:app",
host=args.host,
port=args.port,
log_level=args.log_level,
reload=args.reload,
)
main()

View file

@ -1,42 +0,0 @@
from setuptools import find_packages, setup
from theHarvester.lib.version import version
with open("README.md", "r") as fh:
long_description: str = fh.read()
setup(
name="theHarvester",
version=version(),
author="Christian Martorella",
author_email="cmartorella@edge-security.com",
description="theHarvester is a very simple, yet effective tool designed to be used in the early stages of a penetration test",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/laramies/theHarvester",
packages=find_packages(exclude=["tests"]),
python_requires=">=3.9",
scripts=["bin/theHarvester", "bin/restfulHarvest"],
classifiers=[
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
"Operating System :: OS Independent",
],
data_files=[
(
"/etc/theHarvester",
[
"wordlists/general/common.txt",
"wordlists/dns-big.txt",
"wordlists/dns-names.txt",
"wordlists/dorks.txt",
"wordlists/names_small.txt",
"api-keys.yaml",
"proxies.yaml",
],
)
],
)

72
tests/lib/test_core.py Normal file
View file

@ -0,0 +1,72 @@
from __future__ import annotations
from pathlib import Path
from typing import Any
from unittest import mock
import pytest
import yaml
from theHarvester.lib.core import CONFIG_DIRS, DATA_DIR, Core
@pytest.fixture(autouse=True)
def mock_environ(monkeypatch, tmp_path: Path):
monkeypatch.setenv("HOME", str(tmp_path))
def mock_read_text(mocked: dict[Path, str | Exception]):
read_text = Path.read_text
def _read_text(self: Path, *args, **kwargs):
if result := mocked.get(self):
if isinstance(result, Exception):
raise result
return result
return read_text(self, *args, **kwargs)
return _read_text
@pytest.mark.parametrize(
("name", "contents", "expected"),
[
("api-keys", "apikeys: {}", {}),
("proxies", "http: [localhost:8080]", ["http://localhost:8080"]),
],
)
@pytest.mark.parametrize("dir", CONFIG_DIRS)
def test_read_config_searches_config_dirs(
name: str, contents: str, expected: Any, dir: Path, capsys
):
file = dir.expanduser() / f"{name}.yaml"
config_files = [d.expanduser() / file.name for d in CONFIG_DIRS]
side_effect = mock_read_text(
{f: contents if f == file else FileNotFoundError() for f in config_files}
)
with mock.patch("pathlib.Path.read_text", autospec=True, side_effect=side_effect):
got = Core.api_keys() if name == "api-keys" else Core.proxy_list()
assert got == expected
assert f"Read {file.name} from {file}" in capsys.readouterr().out
@pytest.mark.parametrize("name", ("api-keys", "proxies"))
def test_read_config_copies_default_to_home(name: str, capsys):
file = Path(f"~/.theHarvester/{name}.yaml").expanduser()
config_files = [d.expanduser() / file.name for d in CONFIG_DIRS]
side_effect = mock_read_text({f: FileNotFoundError() for f in config_files})
with mock.patch("pathlib.Path.read_text", autospec=True, side_effect=side_effect):
got = Core.api_keys() if name == "api-keys" else Core.proxy_list()
default = yaml.safe_load((DATA_DIR / file.name).read_text())
expected = (
default["apikeys"]
if name == "api-keys"
else [f"http://{h}" for h in default["http"]]
)
assert got == expected
assert f"Created default {file.name} at {file}" in capsys.readouterr().out
assert file.exists()

View file

@ -1,30 +1,12 @@
#!/usr/bin/env python3
# Note: This script runs theHarvester
import asyncio
import sys
from theHarvester import __main__
from theHarvester.theHarvester import main
if sys.version_info.major < 3 or sys.version_info.minor < 9:
print("\033[93m[!] Make sure you have Python 3.9+ installed, quitting.\n\n \033[0m")
sys.exit(1)
if __name__ == "__main__":
platform = sys.platform
if platform == "win32":
# Required or things will break if trying to take screenshots
import multiprocessing
multiprocessing.freeze_support()
asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy
else:
import uvloop
uvloop.install()
if "linux" in platform:
import aiomultiprocess
# As we are not using Windows, we can change the spawn method to fork for greater performance
aiomultiprocess.set_context("fork")
asyncio.run(__main__.entry_point())
main()

View file

@ -17,11 +17,14 @@
from aiodns import DNSResolver
from theHarvester.lib import hostchecker
from theHarvester.lib.core import DATA_DIR
#####################################################################
# DNS FORCE
#####################################################################
DNS_NAMES = DATA_DIR / "wordlists" / "dns-names.txt"
class DnsForce:
def __init__(self, domain, dnsserver, verbose: bool = False) -> None:
@ -31,18 +34,8 @@ def __init__(self, domain, dnsserver, verbose: bool = False) -> None:
# self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver
# self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver
self.dnsserver = dnsserver
try:
with open("/etc/theHarvester/wordlists/dns-names.txt", "r") as file:
self.list = file.readlines()
except FileNotFoundError:
try:
with open(
"/usr/local/etc/theHarvester/wordlists/dns-names.txt", "r"
) as file:
self.list = file.readlines()
except FileNotFoundError:
with open("wordlists/dns-names.txt", "r") as file:
self.list = file.readlines()
with DNS_NAMES.open("r") as file:
self.list = file.readlines()
self.domain = domain.replace("www.", "")
self.list = [f"{word.strip()}.{self.domain}" for word in self.list]

View file

@ -2,8 +2,10 @@
from __future__ import annotations
import asyncio
import contextlib
import random
import ssl
from pathlib import Path
from typing import Any, List, Sized, Tuple, Union
import aiohttp
@ -15,20 +17,36 @@
from .version import version
DATA_DIR = Path(__file__).parents[1] / "data"
CONFIG_DIRS = [
Path("/etc/theHarvester/"),
Path("/usr/local/etc/theHarvester/"),
Path("~/.theHarvester"),
]
class Core:
@staticmethod
def _read_config(filename: str) -> str:
# Return the first we find
for path in CONFIG_DIRS:
with contextlib.suppress(FileNotFoundError):
file = path.expanduser() / filename
config = file.read_text()
print(f"Read {filename} from {file}")
return config
# Fallback to creating default in user's home dir
default = (DATA_DIR / filename).read_text()
dest = CONFIG_DIRS[-1].expanduser() / filename
dest.parent.mkdir(exist_ok=True)
dest.write_text(default)
print(f"Created default {filename} at {dest}")
return default
@staticmethod
def api_keys() -> dict:
try:
with open("/etc/theHarvester/api-keys.yaml", "r") as api_keys:
keys = yaml.safe_load(api_keys)
except FileNotFoundError:
try:
with open("/usr/local/etc/theHarvester/api-keys.yaml", "r") as api_keys:
keys = yaml.safe_load(api_keys)
except FileNotFoundError:
with open("api-keys.yaml", "r") as api_keys:
keys = yaml.safe_load(api_keys)
keys = yaml.safe_load(Core._read_config("api-keys.yaml"))
return keys["apikeys"]
@staticmethod
@ -117,21 +135,7 @@ def virustotal_key() -> str:
@staticmethod
def proxy_list() -> List:
try:
with open("/etc/theHarvester/proxies.yaml", "r") as proxy_file:
keys = yaml.safe_load(proxy_file)
except FileNotFoundError:
try:
with open(
"/usr/local/etc/theHarvester/proxies.yaml", "r"
) as proxy_file:
keys = yaml.safe_load(proxy_file)
except FileNotFoundError:
try:
with open("proxies.yaml", "r") as proxy_file:
keys = yaml.safe_load(proxy_file)
except Exception:
return []
keys = yaml.safe_load(Core._read_config("proxies.yaml"))
http_list = (
[f"http://{proxy}" for proxy in keys["http"]]
if keys["http"] is not None

View file

@ -1,5 +1,7 @@
# coding=utf-8
VERSION = "4.4.4"
def version() -> str:
return "4.4.4"
return VERSION

View file

@ -0,0 +1,46 @@
import argparse
import uvicorn
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"-H",
"--host",
default="127.0.0.1",
help="IP address to listen on default is 127.0.0.1",
)
parser.add_argument(
"-p",
"--port",
default=5000,
help="Port to bind the web server to, default is 5000",
type=int,
)
parser.add_argument(
"-l",
"--log-level",
default="info",
help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set",
)
parser.add_argument(
"-r",
"--reload",
default=False,
help="Enable automatic reload used during development of the api",
action="store_true",
)
args: argparse.Namespace = parser.parse_args()
uvicorn.run(
"theHarvester.lib.api.api:app",
host=args.host,
port=args.port,
log_level=args.log_level,
reload=args.reload,
)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,25 @@
import asyncio
import sys
from theHarvester import __main__
def main():
platform = sys.platform
if platform == "win32":
# Required or things will break if trying to take screenshots
import multiprocessing
multiprocessing.freeze_support()
asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy
else:
import uvloop
uvloop.install()
if "linux" in platform:
import aiomultiprocess
# As we are not using Windows, we can change the spawn method to fork for greater performance
aiomultiprocess.set_context("fork")
asyncio.run(__main__.entry_point())