Removed old code, ported exalead to use aiohttp.

2024-09-22 00:06:30 +08:00 · 2019-12-25 17:54:32 -05:00 · 2019-12-25 17:54:32 -05:00 · 90ed486184
parent 9fe27a0379
commit 90ed486184
4 changed files with 31 additions and 42 deletions
--- a/theHarvester/discovery/bingsearch.py
+++ b/theHarvester/discovery/bingsearch.py
@ -1,7 +1,6 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
 # import grequests
 from theHarvester.lib.core import async_fetcher
@ -19,7 +18,6 @@ def __init__(self, word, limit, start):
        self.counter = start
    async def do_search(self):
        print('hello from bing do search')
        headers = {
            'Host': self.hostname,
            'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
--- a/theHarvester/discovery/exaleadsearch.py
+++ b/theHarvester/discovery/exaleadsearch.py
@ -1,10 +1,7 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
 import re
-import time
+import asyncio
 import grequests
 import requests
 class SearchExalead:
@ -19,7 +16,7 @@ def __init__(self, word, limit, start):
        self.limit = limit
        self.counter = start
-    def do_search(self):
+    async def do_search(self):
        base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
        headers = {
            'Host': self.hostname,
@ -27,29 +24,23 @@ def do_search(self):
            'User-agent': Core.get_user_agent()
        }
        urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
-        req = []
+        responses = await async_fetcher.fetch_all(urls, headers=headers)
        for url in urls:
            req.append(grequests.get(url, headers=headers, timeout=5))
            time.sleep(3)
        responses = grequests.imap(tuple(req), size=3)
        for response in responses:
-            # TODO if decoded content contains information about solving captcha print message to user to visit website
+            self.total_results += response
            # TODO to solve it or use a vpn as it appears to be ip based
            self.total_results += response.content.decode('UTF-8')
-    def do_search_files(self, files):
+    async def do_search_files(self, files):
        url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
-            f'=50&start_index={self.counter} '
+              f'=50&start_index={self.counter} '
        headers = {
            'Host': self.hostname,
            'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
            'User-agent': Core.get_user_agent()
        }
-        h = requests.get(url=url, headers=headers)
+        responses = await async_fetcher.fetch_all(url, headers=headers)
-        self.results = h.text
+        self.results = responses[0]
        self.total_results += self.results
-    def check_next(self):
+    async def check_next(self):
        renext = re.compile('topNextUrl')
        nextres = renext.findall(self.results)
        if nextres != []:
@ -59,27 +50,27 @@ def check_next(self):
            nexty = '0'
        return nexty
-    def get_emails(self):
+    async def get_emails(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.emails()
-    def get_hostnames(self):
+    async def get_hostnames(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.hostnames()
-    def get_files(self):
+    async def get_files(self):
        rawres = myparser.Parser(self.total_results, self.word)
        return rawres.fileurls(self.files)
-    def process(self):
+    async def process(self):
        print('Searching results')
-        self.do_search()
+        await self.do_search()
-    def process_files(self, files):
+    async def process_files(self, files):
        while self.counter < self.limit:
-            self.do_search_files(files)
+            await self.do_search_files(files)
            time.sleep(getDelay())
            more = self.check_next()
            await asyncio.sleep(2)
            if more == '1':
                self.counter += 50
            else:
--- a/theHarvester/discovery/suip.py
+++ b/theHarvester/discovery/suip.py
@ -1,6 +1,5 @@
 from theHarvester.lib.core import *
 from bs4 import BeautifulSoup
 import requests
 import aiohttp
 import asyncio
@ -17,7 +16,7 @@ def __init__(self, word: str):
    async def request(self, url, params):
        headers = {'User-Agent': Core.get_user_agent()}
        data = {'url': self.word.replace('www.', ''), 'Submit1': 'Submit'}
-        timeout = aiohttp.ClientTimeout(total=360)
+        timeout = aiohttp.ClientTimeout(total=720)
        # by default timeout is 5 minutes we will change that to 6 minutes
        # Depending on the domain and if it has a lot of subdomains you may want to tweak it
        # The results are well worth the wait :)
@ -51,9 +50,7 @@ async def do_search(self):
                hosts: list = str(soup.find('pre')).splitlines()
                await self.clean_hosts(hosts)
        except Exception as e:
-            print('An exception has occurred: ', e)
+            print(f'An exception has occurred: {e}')
            import traceback as t
            t.print_exc()
    async def get_hostnames(self) -> set:
        return self.totalhosts
@ -69,4 +66,4 @@ async def clean_hosts(self, soup_hosts):
                if host[0] == '.':
                    self.totalhosts.add(host[1:])
                else:
-                    self.totalhosts.add(host)
+                    self.totalhosts.add(host)
--- a/theHarvester/lib/core.py
+++ b/theHarvester/lib/core.py
@ -378,14 +378,17 @@ class async_fetcher:
    async def fetch(session, url, params='', json=False) -> Union[str, dict, list]:
        # This fetch method solely focuses on get requests
        # TODO determine if method for post requests is necessary
-        if len(params) == 0:
+        try:
-            async with session.get(url, params=params) as response:
+            if params != '':
-                await asyncio.sleep(2)
+                async with session.get(url, params=params) as response:
-                return await response.text() if json is False else await response.json()
+                    await asyncio.sleep(2)
-        else:
+                    return await response.text() if json is False else await response.json()
-            async with session.get(url) as response:
+            else:
-                await asyncio.sleep(2)
+                async with session.get(url) as response:
-                return await response.text() if json is False else await response.json()
+                    await asyncio.sleep(2)
                    return await response.text() if json is False else await response.json()
        except Exception:
            return ''
    @staticmethod
    async def fetch_all(urls, headers='', params='') -> list: