mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-20 07:16:31 +08:00
build: migrate to pep517 backend (#1505)
* build: migrate to pep517 backend * test loading config files
This commit is contained in:
parent
536d5da7f0
commit
4d825aaf33
34
.github/workflows/theHarvester.yml
vendored
34
.github/workflows/theHarvester.yml
vendored
|
@ -28,8 +28,7 @@ jobs:
|
|||
- name: Install dependencies
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install wheel
|
||||
pip install -r requirements/dev.txt
|
||||
pip install .[dev]
|
||||
|
||||
- name: Lint with black
|
||||
run: |
|
||||
|
@ -52,64 +51,63 @@ jobs:
|
|||
|
||||
- name: Run theHarvester module Anubis
|
||||
run: |
|
||||
python theHarvester.py -d apple.com -b anubis
|
||||
theHarvester -d apple.com -b anubis
|
||||
|
||||
- name: Run theHarvester module Baidu
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b baidu
|
||||
theHarvester -d yale.edu -b baidu
|
||||
|
||||
- name: Run theHarvester module Bing
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b bing
|
||||
theHarvester -d yale.edu -b bing
|
||||
|
||||
- name: Run theHarvester module CertSpotter
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b certspotter
|
||||
theHarvester -d yale.edu -b certspotter
|
||||
|
||||
- name: Run theHarvester module Crtsh
|
||||
run: |
|
||||
python theHarvester.py -d hcl.com -b crtsh
|
||||
theHarvester -d hcl.com -b crtsh
|
||||
|
||||
- name: Run theHarvester module DnsDumpster
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b dnsdumpster
|
||||
theHarvester -d yale.edu -b dnsdumpster
|
||||
|
||||
- name: Run theHarvester module DuckDuckGo
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b duckduckgo
|
||||
theHarvester -d yale.edu -b duckduckgo
|
||||
|
||||
- name: Run theHarvester module HackerTarget
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b hackertarget
|
||||
theHarvester -d yale.edu -b hackertarget
|
||||
|
||||
- name: Run theHarvester module Intelx
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b intelx
|
||||
theHarvester -d yale.edu -b intelx
|
||||
|
||||
- name: Run theHarvester module Otx
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b otx
|
||||
|
||||
theHarvester -d yale.edu -b otx
|
||||
|
||||
- name: Run theHarvester module RapidDns
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b rapiddns
|
||||
theHarvester -d yale.edu -b rapiddns
|
||||
|
||||
- name: Run theHarvester module Threatminer
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b threatminer
|
||||
theHarvester -d yale.edu -b threatminer
|
||||
|
||||
- name: Run theHarvester module Urlscan
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b urlscan
|
||||
theHarvester -d yale.edu -b urlscan
|
||||
|
||||
- name: Run theHarvester module Yahoo
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -b yahoo
|
||||
theHarvester -d yale.edu -b yahoo
|
||||
|
||||
- name: Run theHarvester module DNS brute force
|
||||
run: |
|
||||
python theHarvester.py -d yale.edu -c
|
||||
theHarvester -d yale.edu -c
|
||||
|
||||
- name: Static type checking with mypy
|
||||
run: |
|
||||
|
|
|
@ -1,16 +1,11 @@
|
|||
FROM alpine:3
|
||||
LABEL maintainer="@jay_townsend1 & @NotoriousRebel1 (alpine @viardant)"
|
||||
RUN mkdir /app
|
||||
RUN mkdir /etc/theHarvester/
|
||||
COPY api-keys.yaml /etc/theHarvester/
|
||||
COPY proxies.yaml /etc/theHarvester/
|
||||
WORKDIR /app
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements requirements
|
||||
RUN apk update && apk upgrade --available && apk add --no-cache musl-dev git libffi-dev gcc python3-dev py3-pip libxml2-dev libxslt-dev && python3 -m pip install --upgrade pip
|
||||
RUN python3 --version && pip3 install --no-cache-dir -r requirements.txt
|
||||
COPY . /app
|
||||
RUN chmod +x ./*.py
|
||||
ENTRYPOINT ["/app/theHarvester.py"]
|
||||
ENTRYPOINT ["/app/restfulHarvest.py", "-H", "0.0.0.0", "-p", "80"]
|
||||
RUN pip3 install --no-cache-dir .
|
||||
ENTRYPOINT ["restfulHarvest", "-H", "0.0.0.0", "-p", "80"]
|
||||
EXPOSE 80
|
||||
|
|
|
@ -1,3 +1,39 @@
|
|||
[project]
|
||||
name = "theHarvester"
|
||||
description = "theHarvester is a very simple, yet effective tool designed to be used in the early stages of a penetration test"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "Christian Martorella", email = "cmartorella@edge-security.com" },
|
||||
{ name = "Jay Townsend", email = "townsend891@hotmail.com" },
|
||||
{ name = "Matthew Brown", email = "36310667+NotoriousRebel@users.noreply.github.com" },
|
||||
]
|
||||
requires-python = ">=3.9"
|
||||
urls.Homepage = "https://github.com/laramies/theHarvester"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
dynamic = ["dependencies", "optional-dependencies", "version"]
|
||||
|
||||
[project.scripts]
|
||||
theHarvester = "theHarvester.theHarvester:main"
|
||||
restfulHarvest = "theHarvester.restfulHarvest:main"
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = { attr = "theHarvester.lib.version.VERSION" }
|
||||
dependencies = { file = "requirements/base.txt" }
|
||||
optional-dependencies.dev = { file = "requirements/dev.txt" }
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["theHarvester*"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"*" = ["*.txt", "*.yaml"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
minversion = "7.1"
|
||||
addopts = "--no-header --asyncio-mode=auto"
|
||||
|
@ -5,3 +41,7 @@ testpaths = [
|
|||
"tests",
|
||||
"tests/discovery/",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=68"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
-r base.txt
|
||||
black==23.9.1
|
||||
flake8==6.1.0
|
||||
isort==5.12.0
|
||||
|
|
|
@ -1,43 +1,5 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
|
||||
import uvicorn
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"-H",
|
||||
"--host",
|
||||
default="127.0.0.1",
|
||||
help="IP address to listen on default is 127.0.0.1",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--port",
|
||||
default=5000,
|
||||
help="Port to bind the web server to, default is 5000",
|
||||
type=int,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--log-level",
|
||||
default="info",
|
||||
help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--reload",
|
||||
default=False,
|
||||
help="Enable automatic reload used during development of the api",
|
||||
action="store_true",
|
||||
)
|
||||
|
||||
args: argparse.Namespace = parser.parse_args()
|
||||
from theHarvester.restfulHarvest import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(
|
||||
"theHarvester.lib.api.api:app",
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
log_level=args.log_level,
|
||||
reload=args.reload,
|
||||
)
|
||||
main()
|
||||
|
|
42
setup.py
42
setup.py
|
@ -1,42 +0,0 @@
|
|||
from setuptools import find_packages, setup
|
||||
|
||||
from theHarvester.lib.version import version
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description: str = fh.read()
|
||||
|
||||
setup(
|
||||
name="theHarvester",
|
||||
version=version(),
|
||||
author="Christian Martorella",
|
||||
author_email="cmartorella@edge-security.com",
|
||||
description="theHarvester is a very simple, yet effective tool designed to be used in the early stages of a penetration test",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/laramies/theHarvester",
|
||||
packages=find_packages(exclude=["tests"]),
|
||||
python_requires=">=3.9",
|
||||
scripts=["bin/theHarvester", "bin/restfulHarvest"],
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
data_files=[
|
||||
(
|
||||
"/etc/theHarvester",
|
||||
[
|
||||
"wordlists/general/common.txt",
|
||||
"wordlists/dns-big.txt",
|
||||
"wordlists/dns-names.txt",
|
||||
"wordlists/dorks.txt",
|
||||
"wordlists/names_small.txt",
|
||||
"api-keys.yaml",
|
||||
"proxies.yaml",
|
||||
],
|
||||
)
|
||||
],
|
||||
)
|
72
tests/lib/test_core.py
Normal file
72
tests/lib/test_core.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from theHarvester.lib.core import CONFIG_DIRS, DATA_DIR, Core
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_environ(monkeypatch, tmp_path: Path):
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
|
||||
def mock_read_text(mocked: dict[Path, str | Exception]):
|
||||
read_text = Path.read_text
|
||||
|
||||
def _read_text(self: Path, *args, **kwargs):
|
||||
if result := mocked.get(self):
|
||||
if isinstance(result, Exception):
|
||||
raise result
|
||||
return result
|
||||
return read_text(self, *args, **kwargs)
|
||||
|
||||
return _read_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("name", "contents", "expected"),
|
||||
[
|
||||
("api-keys", "apikeys: {}", {}),
|
||||
("proxies", "http: [localhost:8080]", ["http://localhost:8080"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dir", CONFIG_DIRS)
|
||||
def test_read_config_searches_config_dirs(
|
||||
name: str, contents: str, expected: Any, dir: Path, capsys
|
||||
):
|
||||
file = dir.expanduser() / f"{name}.yaml"
|
||||
config_files = [d.expanduser() / file.name for d in CONFIG_DIRS]
|
||||
side_effect = mock_read_text(
|
||||
{f: contents if f == file else FileNotFoundError() for f in config_files}
|
||||
)
|
||||
|
||||
with mock.patch("pathlib.Path.read_text", autospec=True, side_effect=side_effect):
|
||||
got = Core.api_keys() if name == "api-keys" else Core.proxy_list()
|
||||
|
||||
assert got == expected
|
||||
assert f"Read {file.name} from {file}" in capsys.readouterr().out
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ("api-keys", "proxies"))
|
||||
def test_read_config_copies_default_to_home(name: str, capsys):
|
||||
file = Path(f"~/.theHarvester/{name}.yaml").expanduser()
|
||||
config_files = [d.expanduser() / file.name for d in CONFIG_DIRS]
|
||||
side_effect = mock_read_text({f: FileNotFoundError() for f in config_files})
|
||||
|
||||
with mock.patch("pathlib.Path.read_text", autospec=True, side_effect=side_effect):
|
||||
got = Core.api_keys() if name == "api-keys" else Core.proxy_list()
|
||||
|
||||
default = yaml.safe_load((DATA_DIR / file.name).read_text())
|
||||
expected = (
|
||||
default["apikeys"]
|
||||
if name == "api-keys"
|
||||
else [f"http://{h}" for h in default["http"]]
|
||||
)
|
||||
assert got == expected
|
||||
assert f"Created default {file.name} at {file}" in capsys.readouterr().out
|
||||
assert file.exists()
|
|
@ -1,30 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
# Note: This script runs theHarvester
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from theHarvester import __main__
|
||||
from theHarvester.theHarvester import main
|
||||
|
||||
if sys.version_info.major < 3 or sys.version_info.minor < 9:
|
||||
print("\033[93m[!] Make sure you have Python 3.9+ installed, quitting.\n\n \033[0m")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
platform = sys.platform
|
||||
if platform == "win32":
|
||||
# Required or things will break if trying to take screenshots
|
||||
import multiprocessing
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy
|
||||
else:
|
||||
import uvloop
|
||||
|
||||
uvloop.install()
|
||||
|
||||
if "linux" in platform:
|
||||
import aiomultiprocess
|
||||
|
||||
# As we are not using Windows, we can change the spawn method to fork for greater performance
|
||||
aiomultiprocess.set_context("fork")
|
||||
asyncio.run(__main__.entry_point())
|
||||
main()
|
||||
|
|
|
@ -17,11 +17,14 @@
|
|||
from aiodns import DNSResolver
|
||||
|
||||
from theHarvester.lib import hostchecker
|
||||
from theHarvester.lib.core import DATA_DIR
|
||||
|
||||
#####################################################################
|
||||
# DNS FORCE
|
||||
#####################################################################
|
||||
|
||||
DNS_NAMES = DATA_DIR / "wordlists" / "dns-names.txt"
|
||||
|
||||
|
||||
class DnsForce:
|
||||
def __init__(self, domain, dnsserver, verbose: bool = False) -> None:
|
||||
|
@ -31,18 +34,8 @@ def __init__(self, domain, dnsserver, verbose: bool = False) -> None:
|
|||
# self.dnsserver = [dnsserver] if isinstance(dnsserver, str) else dnsserver
|
||||
# self.dnsserver = list(map(str, dnsserver.split(','))) if isinstance(dnsserver, str) else dnsserver
|
||||
self.dnsserver = dnsserver
|
||||
try:
|
||||
with open("/etc/theHarvester/wordlists/dns-names.txt", "r") as file:
|
||||
self.list = file.readlines()
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
with open(
|
||||
"/usr/local/etc/theHarvester/wordlists/dns-names.txt", "r"
|
||||
) as file:
|
||||
self.list = file.readlines()
|
||||
except FileNotFoundError:
|
||||
with open("wordlists/dns-names.txt", "r") as file:
|
||||
self.list = file.readlines()
|
||||
with DNS_NAMES.open("r") as file:
|
||||
self.list = file.readlines()
|
||||
self.domain = domain.replace("www.", "")
|
||||
self.list = [f"{word.strip()}.{self.domain}" for word in self.list]
|
||||
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import random
|
||||
import ssl
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Sized, Tuple, Union
|
||||
|
||||
import aiohttp
|
||||
|
@ -15,20 +17,36 @@
|
|||
|
||||
from .version import version
|
||||
|
||||
DATA_DIR = Path(__file__).parents[1] / "data"
|
||||
CONFIG_DIRS = [
|
||||
Path("/etc/theHarvester/"),
|
||||
Path("/usr/local/etc/theHarvester/"),
|
||||
Path("~/.theHarvester"),
|
||||
]
|
||||
|
||||
|
||||
class Core:
|
||||
@staticmethod
|
||||
def _read_config(filename: str) -> str:
|
||||
# Return the first we find
|
||||
for path in CONFIG_DIRS:
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
file = path.expanduser() / filename
|
||||
config = file.read_text()
|
||||
print(f"Read {filename} from {file}")
|
||||
return config
|
||||
|
||||
# Fallback to creating default in user's home dir
|
||||
default = (DATA_DIR / filename).read_text()
|
||||
dest = CONFIG_DIRS[-1].expanduser() / filename
|
||||
dest.parent.mkdir(exist_ok=True)
|
||||
dest.write_text(default)
|
||||
print(f"Created default {filename} at {dest}")
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def api_keys() -> dict:
|
||||
try:
|
||||
with open("/etc/theHarvester/api-keys.yaml", "r") as api_keys:
|
||||
keys = yaml.safe_load(api_keys)
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
with open("/usr/local/etc/theHarvester/api-keys.yaml", "r") as api_keys:
|
||||
keys = yaml.safe_load(api_keys)
|
||||
except FileNotFoundError:
|
||||
with open("api-keys.yaml", "r") as api_keys:
|
||||
keys = yaml.safe_load(api_keys)
|
||||
keys = yaml.safe_load(Core._read_config("api-keys.yaml"))
|
||||
return keys["apikeys"]
|
||||
|
||||
@staticmethod
|
||||
|
@ -117,21 +135,7 @@ def virustotal_key() -> str:
|
|||
|
||||
@staticmethod
|
||||
def proxy_list() -> List:
|
||||
try:
|
||||
with open("/etc/theHarvester/proxies.yaml", "r") as proxy_file:
|
||||
keys = yaml.safe_load(proxy_file)
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
with open(
|
||||
"/usr/local/etc/theHarvester/proxies.yaml", "r"
|
||||
) as proxy_file:
|
||||
keys = yaml.safe_load(proxy_file)
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
with open("proxies.yaml", "r") as proxy_file:
|
||||
keys = yaml.safe_load(proxy_file)
|
||||
except Exception:
|
||||
return []
|
||||
keys = yaml.safe_load(Core._read_config("proxies.yaml"))
|
||||
http_list = (
|
||||
[f"http://{proxy}" for proxy in keys["http"]]
|
||||
if keys["http"] is not None
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# coding=utf-8
|
||||
|
||||
VERSION = "4.4.4"
|
||||
|
||||
|
||||
def version() -> str:
|
||||
return "4.4.4"
|
||||
return VERSION
|
||||
|
|
46
theHarvester/restfulHarvest.py
Normal file
46
theHarvester/restfulHarvest.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
import argparse
|
||||
|
||||
import uvicorn
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"-H",
|
||||
"--host",
|
||||
default="127.0.0.1",
|
||||
help="IP address to listen on default is 127.0.0.1",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--port",
|
||||
default=5000,
|
||||
help="Port to bind the web server to, default is 5000",
|
||||
type=int,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--log-level",
|
||||
default="info",
|
||||
help="Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--reload",
|
||||
default=False,
|
||||
help="Enable automatic reload used during development of the api",
|
||||
action="store_true",
|
||||
)
|
||||
|
||||
args: argparse.Namespace = parser.parse_args()
|
||||
uvicorn.run(
|
||||
"theHarvester.lib.api.api:app",
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
log_level=args.log_level,
|
||||
reload=args.reload,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
25
theHarvester/theHarvester.py
Normal file
25
theHarvester/theHarvester.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import asyncio
|
||||
import sys
|
||||
|
||||
from theHarvester import __main__
|
||||
|
||||
|
||||
def main():
|
||||
platform = sys.platform
|
||||
if platform == "win32":
|
||||
# Required or things will break if trying to take screenshots
|
||||
import multiprocessing
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
asyncio.DefaultEventLoopPolicy = asyncio.WindowsSelectorEventLoopPolicy
|
||||
else:
|
||||
import uvloop
|
||||
|
||||
uvloop.install()
|
||||
|
||||
if "linux" in platform:
|
||||
import aiomultiprocess
|
||||
|
||||
# As we are not using Windows, we can change the spawn method to fork for greater performance
|
||||
aiomultiprocess.set_context("fork")
|
||||
asyncio.run(__main__.entry_point())
|
Loading…
Reference in a new issue