Added parser for example interaction with api as well as introduced ratelimit with slowapi.

2025-02-24 22:42:56 +08:00 · 2020-06-04 23:33:58 -04:00 · 2020-06-04 23:33:58 -04:00 · e8f43ba424
commit e8f43ba424
parent 55ff1f1792
4 changed files with 81 additions and 40 deletions
--- a/theHarvester/lib/web/api.py
+++ b/theHarvester/lib/web/api.py
@ -3,15 +3,21 @@
 from argparse import Namespace
 from typing import List

-from fastapi import FastAPI, Header, Query
+from fastapi import FastAPI, Header, Query, Request
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from slowapi.util import get_remote_address
 from starlette.responses import StreamingResponse, RedirectResponse
 from starlette.staticfiles import StaticFiles

 from theHarvester import __main__

+limiter = Limiter(key_func=get_remote_address)
 app = FastAPI(title="Restful Harvest",
              description="Rest API for theHarvester powered by FastAPI",
              version="0.0.1")
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

 # This is where we will host files that arise if the user specifies a filename
 # app.mount("/static", StaticFiles(directory="static/"), name="static")
@ -21,7 +27,7 @@
@app.get("/")
 async def root(*, user_agent: str = Header(None)):
    # very basic user agent filtering
-    if "gobuster" in user_agent or "sqlmap" in user_agent:
+    if "gobuster" in user_agent or "sqlmap" in user_agent or "rustbuster" in user_agent:
        response = RedirectResponse(app.url_path_for("picture"))
        return response
    return {'message': 'Thank you for using theHarvester rest API plea'
@ -36,7 +42,8 @@ async def picture():


@app.get("/query")
-async def query(dns_server: str = Query(""),
+@limiter.limit("5/minute")
+async def query(request: Request, dns_server: str = Query(""),
                dns_brute=Query(False), dns_lookup: bool = Query(False),
                dns_tld: bool = Query(False),
                filename: str = Query(""),
@ -45,6 +52,8 @@ async def query(dns_server: str = Query(""),
                source: List[str] = Query(..., description="Data sources to query comma separated with no space"),
                limit: int = Query(500), start: int = Query(0), domain: str = Query(..., description="Domain to be "
                                                                                                     "harvested")):
+    # Query function that allows user to query theHarvester rest API
+    # Rate limit of 5 requests per minute
    try:
        emails, ips, urls, html_filename, xml_filename = await __main__.start(Namespace(dns_brute=dns_brute,
                                                                                        dns_lookup=dns_lookup,
@ -60,29 +69,8 @@ async def query(dns_server: str = Query(""),
                                                                                        start=start,
                                                                                        take_over=take_over,
                                                                                        virtual_host=virtual_host))
+
        return {'domain': f'{domain}', 'emails': emails, 'ips': ips, 'urls': urls, 'html_file': f'{html_filename}',
-                'xml_filename': f'{xml_filename}'}
-    except Exception as e:
-        return {'exception': f'{e}'}
-
-
-@app.get("/test")
-async def read_item():
-    try:
-        emails, ips, urls = await __main__.start(Namespace(dns_brute=False,
-                                                           dns_lookup=False,
-                                                           dns_server=None,
-                                                           dns_tld=False,
-                                                           domain='yale.edu',
-                                                           filename='',
-                                                           google_dork=False,
-                                                           limit=250,
-                                                           proxies=False,
-                                                           shodan=False,
-                                                           source='bing,intelx',
-                                                           start=0,
-                                                           take_over=False,
-                                                           virtual_host=False))
-        return {'emails': emails, 'ips': ips, 'urls': urls}
+                'xml_file': f'{xml_filename}'}
    except Exception as e:
        return {'exception': f'{e}'}
--- a/theHarvester/lib/web/parser.py
+++ b/theHarvester/lib/web/parser.py
@ -0,0 +1,61 @@
+"""
+Example script to query theHarvester rest API, obtain results, and write out to stdout as well as an html & xml file
+"""
+
+import asyncio
+import pprint
+
+import aiohttp
+
+
+async def fetch_json(session, url):
+    async with session.get(url) as response:
+        return await response.json()
+
+
+async def fetch(session, url):
+    async with session.get(url) as response:
+        return await response.text()
+
+
+async def main():
+    """
+    Just a simple example of how to interact with the rest api
+    you can easily use requests instead of aiohttp or whatever you best see fit
+    :return:
+    """
+    url = "http://127.0.0.1:5000"
+    domain = "netflix.com"
+    query_url = f'{url}/query?limit=300&filename=helloworld&source=bing,baidu,duckduckgo,dogpile&domain={domain}'
+    async with aiohttp.ClientSession() as session:
+        fetched_json = await fetch_json(session, query_url)
+        emails = fetched_json["emails"]
+        ips = fetched_json["ips"]
+        urls = fetched_json["urls"]
+        html_filename = fetched_json["html_file"]
+        xml_filename = fetched_json["xml_file"]
+
+    async with aiohttp.ClientSession() as session:
+        html_file = await fetch(session, f"{url}{html_filename}")
+        xml_file = await fetch(session, f"{url}{xml_filename}")
+
+    if len(html_file) > 0:
+        with open('results.html', 'w+') as fp:
+            fp.write(html_file)
+
+    if len(xml_file) > 0:
+        with open('results.xml', 'w+') as fp:
+            fp.write(xml_file)
+
+    print('Emails found: ')
+    pprint.pprint(emails, indent=4)
+    print('\n')
+    print('Ips found: ')
+    pprint.pprint(ips, indent=4)
+    print('\n')
+    print('Urls found: ')
+    pprint.pprint(urls, indent=4)
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
--- a/theHarvester/lib/web/requirements.txt
+++ b/theHarvester/lib/web/requirements.txt
@ -0,0 +1,6 @@
+aiofiles
+argparse
+fastapi
+slowapi
+uvicorn
+uvloop
--- a/theHarvester/lib/web/temp.py
+++ b/theHarvester/lib/web/temp.py
@ -1,14 +0,0 @@
-import uvicorn
-import argparse
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-H', '--host', default='127.0.0.1', help='IP address to listen on default is 127.0.0.1')
-parser.add_argument('-p', '--port', default=5000, help='Port to bind the web server to, default is 5000')
-parser.add_argument('-l', '--log-level', default='info',
-                    help='Set logging level, default is info but [critical|error|warning|info|debug|trace] can be set')
-parser.add_argument('-r', '--reload', default=True, help='Enable auto-reload.', action='store_true')
-
-args = parser.parse_args()
-
-if __name__ == "__main__":
-    uvicorn.run(app="api:app", host=args.host, port=args.port, log_level=args.log_level, reload=args.reload)