Merge pull request #29 from NotoriousRebel/dev

Ported rest of modules to use aiohttp
2025-02-24 14:32:57 +08:00 · 2020-01-04 00:02:53 -05:00 · 2020-01-04 00:02:53 -05:00 · ac791355e2
commit ac791355e2
parent 6ea654a2e2 b5f65cc3b1
4 changed files with 91 additions and 77 deletions
--- a/theHarvester/discovery/googlesearch.py
+++ b/theHarvester/discovery/googlesearch.py
@ -1,8 +1,6 @@
 from theHarvester.discovery.constants import *
 from theHarvester.parsers import myparser
-import requests
-import time
-
+import asyncio

 class SearchGoogle:

@ -18,85 +16,91 @@ def __init__(self, word, limit, start):
        self.limit = limit
        self.counter = start

-    def do_search(self):
+    async def do_search(self):
        # Do normal scraping.
        urly = 'http://' + self.server + '/search?num=' + self.quantity + '&start=' + str(
            self.counter) + '&hl=en&meta=&q=%40\"' + self.word + '\"'
        try:
            headers = {'User-Agent': googleUA}
-            r = requests.get(urly, headers=headers)
+            resp = await AsyncFetcher.fetch_all([urly], headers=headers)
        except Exception as e:
            print(e)
-        self.results = r.text
-        if search(self.results):
+        self.results = resp[0]
+        searched = await search(self.results)
+        if searched:
            try:
-                self.results = google_workaround(urly)
+                self.results = await google_workaround(urly)
+                print('self.results: ', self.results)
+                p.pprint(self.results, indent=4)
                if isinstance(self.results, bool):
                    print('Google is blocking your ip and the workaround, returning')
                    return
-            except Exception:
+            except Exception as e:
+                print(e)
+                import traceback as t
+                t.print_exc()
                # google blocked, no useful result
                return
-        time.sleep(getDelay())
+        await asyncio.sleep(getDelay())
        self.totalresults += self.results

-    def do_search_profiles(self):
+    async def do_search_profiles(self):
        urly = 'http://' + self.server + '/search?num=' + self.quantity + '&start=' + str(
            self.counter) + '&hl=en&meta=&q=site:www.google.com%20intitle:\"Google%20Profile\"%20\"Companies%20I%27ve%20worked%20for\"%20\"at%20' + self.word + '\"'
        try:
            headers = {'User-Agent': googleUA}
-            r = requests.get(urly, headers=headers)
+            resp = await AsyncFetcher.fetch_all([urly], headers=headers)
        except Exception as e:
            print(e)
-        self.results = r.text
-        if search(self.results):
+        self.results = resp[0]
+        if await search(self.results):
            try:
-                self.results = google_workaround(urly)
+                self.results = await google_workaround(urly)
                if isinstance(self.results, bool):
                    print('Google is blocking your ip and the workaround, returning')
                    return
            except Exception:
                # google blocked, no useful result
                return
-        time.sleep(getDelay())
+        await asyncio.sleep(getDelay())
        self.totalresults += self.results

-    def get_emails(self):
+    async def get_emails(self):
        rawres = myparser.Parser(self.totalresults, self.word)
-        return rawres.emails()
+        return await rawres.emails()

-    def get_hostnames(self):
+    async def get_hostnames(self):
        rawres = myparser.Parser(self.totalresults, self.word)
-        return rawres.hostnames()
+        return await rawres.hostnames()

-    def get_files(self):
+    async def get_files(self):
        rawres = myparser.Parser(self.totalresults, self.word)
        return rawres.fileurls(self.files)

-    def get_profiles(self):
+    async def get_profiles(self):
        rawres = myparser.Parser(self.totalresults, self.word)
        return rawres.profiles()

-    def process(self, google_dorking):
+    async def process(self, google_dorking):
        if google_dorking is False:
            while self.counter <= self.limit and self.counter <= 1000:
-                self.do_search()
+                await self.do_search()
                print(f'\tSearching {self.counter} results.')
                self.counter += 100
        else:  # Google dorking is true.
            self.counter = 0  # Reset counter.
            print('\n')
            print('[-] Searching with Google Dorks: ')
-            self.googledork()  # Call Google dorking method if user wanted it!
+            await self.googledork()  # Call Google dorking method if user wanted it!

-    def process_profiles(self):
+    async def process_profiles(self):
        while self.counter < self.limit:
-            self.do_search_profiles()
-            time.sleep(getDelay())
+            await self.do_search_profiles()
+            await asyncio.sleep(getDelay())
            self.counter += 100
            print(f'\tSearching {self.counter} results.')

-    def append_dorks(self):
+    async def append_dorks(self):
        # Wrap in try-except incase filepaths are messed up.
        try:
            with open('wordlists/dorks.txt', mode='r') as fp:
@ -104,7 +108,7 @@ def append_dorks(self):
        except FileNotFoundError as error:
            print(error)

-    def construct_dorks(self):
+    async def construct_dorks(self):
        # Format is: site:targetwebsite.com + space + inurl:admindork
        colon = '%3A'
        plus = '%2B'
@ -128,12 +132,12 @@ def construct_dorks(self):
                           .replace('&', ampersand).replace('(', left_peren).replace(')', right_peren).replace('|', pipe) + space + self.word
                           for dork in self.dorks)

-    def googledork(self):
-        self.append_dorks()  # Call functions to create list.
-        self.construct_dorks()
-        self.send_dorks()
+    async def googledork(self):
+        await self.append_dorks()  # Call functions to create list.
+        await self.construct_dorks()
+        await self.send_dorks()

-    def send_dorks(self):  # Helper function to minimize code reusability.
+    async def send_dorks(self):  # Helper function to minimize code reusability.
        headers = {'User-Agent': googleUA}
        # Get random user agent to try and prevent google from blocking IP.
        for num in range(len(self.links)):
@ -141,18 +145,18 @@ def send_dorks(self):  # Helper function to minimize code reusability.
                if num % 10 == 0 and num > 0:
                    print(f'\tSearching through {num} results')
                link = self.links[num]
-                req = requests.get(link, headers=headers)
-                self.results = req.text
-                if search(self.results):
+                req = await AsyncFetcher.fetch_all([link], headers=headers)
+                self.results = req[0]
+                if await search(self.results):
                    try:
-                        self.results = google_workaround(link)
+                        self.results = await google_workaround(link)
                        if isinstance(self.results, bool):
                            print('Google is blocking your ip and the workaround, returning')
                            return
                    except Exception:
                        # google blocked, no useful result
                        return
-                time.sleep(getDelay())
+                await asyncio.sleep(getDelay())
                self.totalresults += self.results
            except Exception as e:
                print(f'\tException Occurred {e}')
--- a/theHarvester/discovery/linkedinsearch.py
+++ b/theHarvester/discovery/linkedinsearch.py
@ -1,8 +1,7 @@
 from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
-import requests
-import time
+import asyncio


 class SearchLinkedin:
@ -16,15 +15,15 @@ def __init__(self, word, limit):
        self.limit = int(limit)
        self.counter = 0

-    def do_search(self):
+    async def do_search(self):
        urly = 'http://' + self.server + '/search?num=100&start=' + str(self.counter) + '&hl=en&meta=&q=site%3Alinkedin.com/in%20' + self.word
        try:
            headers = {'User-Agent': Core.get_user_agent()}
-            r = requests.get(urly, headers=headers)
-            self.results = r.text
-            if search(self.results):
+            resp = await AsyncFetcher.fetch_all([urly], headers=headers)
+            self.results = resp[0]
+            if await search(self.results):
                try:
-                    self.results = google_workaround(urly)
+                    self.results = await google_workaround(urly)
                    if isinstance(self.results, bool):
                        print('Google is blocking your ip and the workaround, returning')
                        return
@ -33,20 +32,20 @@ def do_search(self):
                    return
        except Exception as e:
            print(e)
-        time.sleep(getDelay())
+        await asyncio.sleep(getDelay())
        self.totalresults += self.results

-    def get_people(self):
+    async def get_people(self):
        rawres = myparser.Parser(self.totalresults, self.word)
-        return rawres.people_linkedin()
+        return await rawres.people_linkedin()

-    def get_links(self):
+    async def get_links(self):
        links = myparser.Parser(self.totalresults, self.word)
-        return splitter(links.links_linkedin())
+        return splitter(await links.links_linkedin())

-    def process(self):
+    async def process(self):
        while self.counter < self.limit:
-            self.do_search()
-            time.sleep(getDelay())
+            await self.do_search()
+            await asyncio.sleep(getDelay())
            self.counter += 100
            print(f'\tSearching {self.counter} results.')
--- a/theHarvester/discovery/trello.py
+++ b/theHarvester/discovery/trello.py
@ -2,7 +2,7 @@
 from theHarvester.parsers import myparser
 import requests
 import random
-import time
+import asyncio


 class SearchTrello:
@ -18,54 +18,54 @@ def __init__(self, word):
        self.hostnames = []
        self.counter = 0

-    def do_search(self):
+    async def do_search(self):
        base_url = f'https://{self.server}/search?num=300&start=xx&hl=en&q=site%3Atrello.com%20{self.word}'
        urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 20) if num <= self.limit]
        # limit is 20 as that is the most results google will show per num
        headers = {'User-Agent': googleUA}
        for url in urls:
            try:
-                resp = requests.get(url, headers=headers)
-                self.results = resp.text
-                if search(self.results):
+                resp = await AsyncFetcher.fetch_all([url], headers=headers)
+                self.results = resp[0]
+                if await search(self.results):
                    try:
-                        self.results = google_workaround(base_url)
+                        self.results = await google_workaround(base_url)
                        if isinstance(self.results, bool):
                            print('Google is blocking your ip and the workaround, returning')
                            return
                    except Exception as e:
                        print(e)
                self.totalresults += self.results
-                time.sleep(getDelay() - .5)
+                await asyncio.sleep(getDelay() - .5)
            except Exception as e:
                print(f'An exception has occurred in trello: {e}')

-    def get_emails(self):
+    async def get_emails(self):
        rawres = myparser.Parser(self.totalresults, self.word)
        return rawres.emails()

-    def get_urls(self):
+    async def get_urls(self):
        try:
            rawres = myparser.Parser(self.totalresults, 'trello.com')
-            self.trello_urls = set(rawres.urls())
+            self.trello_urls = set(await rawres.urls())
            self.totalresults = ''
            # reset what totalresults as before it was just google results now it is trello results
            headers = {'User-Agent': random.choice(['curl/7.37.0', 'Wget/1.19.4'])}
            # do not change the headers
-            req = (grequests.get(url, headers=headers, timeout=4) for url in self.trello_urls)
-            responses = grequests.imap(req, size=8)
+            print('fetching trello urls')
+            responses = await AsyncFetcher.fetch_all(self.trello_urls, headers=headers)
            for response in responses:
-                self.totalresults += response.content.decode('UTF-8')
+                self.totalresults += response

            rawres = myparser.Parser(self.totalresults, self.word)
-            self.hostnames = rawres.hostnames()
+            self.hostnames = await rawres.hostnames()
        except Exception as e:
            print(f'Error occurred: {e}')

-    def process(self):
-        self.do_search()
-        self.get_urls()
+    async def process(self):
+        await self.do_search()
+        await self.get_urls()
        print(f'\tSearching {self.counter} results.')

-    def get_results(self) -> tuple:
-        return self.get_emails(), self.hostnames, self.trello_urls
+    async def get_results(self) -> tuple:
+        return await self.get_emails(), self.hostnames, self.trello_urls
--- a/theHarvester/discovery/twittersearch.py
+++ b/theHarvester/discovery/twittersearch.py
@ -1,3 +1,4 @@
+from theHarvester.discovery.constants import *
 from theHarvester.lib.core import *
 from theHarvester.parsers import myparser
 import re
@ -19,9 +20,19 @@ async def do_search(self):
        headers = {'User-Agent': Core.get_user_agent()}
        try:
            urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
-            responses = await AsyncFetcher.fetch_all(urls, headers=headers)
-            for response in responses:
-                self.totalresults += response
+            for url in urls:
+                response = await AsyncFetcher.fetch_all([url], headers=headers)
+                self.results = response[0]
+                if await search(self.results):
+                    try:
+                        self.results = await google_workaround(url)
+                        if isinstance(self.results, bool):
+                            print('Google is blocking your ip and the workaround, returning')
+                            return
+                    except Exception:
+                        # google blocked, no useful result
+                        return
+                self.totalresults += self.results
        except Exception as error:
            print(error)