diff --git a/theHarvester/discovery/threatcrowd.py b/theHarvester/discovery/threatcrowd.py
index 2b396c4e..cbdbcda3 100644
--- a/theHarvester/discovery/threatcrowd.py
+++ b/theHarvester/discovery/threatcrowd.py
@@ -1,3 +1,4 @@
+from typing import Coroutine
from theHarvester.lib.core import *
from theHarvester.parsers import myparser
@@ -19,7 +20,7 @@ async def do_search(self):
print(e)
self.totalresults += self.results
- async def get_hostnames(self) -> set:
+ async def get_hostnames(self) -> Coroutine:
return myparser.Parser(self.results, self.word).hostnames()
async def process(self):
diff --git a/theHarvester/discovery/twittersearch.py b/theHarvester/discovery/twittersearch.py
index e4a04e7b..0c18e790 100644
--- a/theHarvester/discovery/twittersearch.py
+++ b/theHarvester/discovery/twittersearch.py
@@ -14,21 +14,21 @@ def __init__(self, word, limit):
self.limit = int(limit)
self.counter = 0
- def do_search(self):
+ async def do_search(self):
base_url = f'https://{self.server}/search?num=100&start=xx&hl=en&meta=&q=site%3Atwitter.com%20intitle%3A%22on+Twitter%22%20{self.word}'
headers = {'User-Agent': Core.get_user_agent()}
try:
urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
- request = (grequests.get(url, headers=headers) for url in urls)
- response = grequests.imap(request, size=5)
+ request = (await AsyncFetcher.fetch_all([base_url], headers=headers) for url in urls)
+ response = request
for entry in response:
- self.totalresults += entry.content.decode('UTF-8')
+ self.totalresults += entry
except Exception as error:
print(error)
- def get_people(self):
+ async def get_people(self):
rawres = myparser.Parser(self.totalresults, self.word)
- to_parse = rawres.people_twitter()
+ to_parse = await rawres.people_twitter()
# fix invalid handles that look like @user other_output
handles = set()
for handle in to_parse:
@@ -37,5 +37,5 @@ def get_people(self):
handles.add(result.group(0))
return handles
- def process(self):
- self.do_search()
+ async def process(self):
+ await self.do_search()
diff --git a/theHarvester/parsers/myparser.py b/theHarvester/parsers/myparser.py
index 16a74865..63feb6ba 100644
--- a/theHarvester/parsers/myparser.py
+++ b/theHarvester/parsers/myparser.py
@@ -8,7 +8,7 @@ def __init__(self, results, word):
self.word = word
self.temp = []
- def genericClean(self):
+ async def genericClean(self):
self.results = self.results.replace('', '').replace('', '').replace('', '').replace('', '')\
.replace('%2f', '').replace('%3a', '').replace('', '').replace('', '')\
.replace('', '').replace('', '')
@@ -16,13 +16,13 @@ def genericClean(self):
for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C', '/', '\\'):
self.results = self.results.replace(search, ' ')
- def urlClean(self):
+ async def urlClean(self):
self.results = self.results.replace('', '').replace('', '').replace('%2f', '').replace('%3a', '')
for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
self.results = self.results.replace(search, ' ')
- def emails(self):
- self.genericClean()
+ async def emails(self):
+ await self.genericClean()
# Local part is required, charset is flexible.
# https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly)
reg_emails = re.compile(r'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' + '@' + '[a-zA-Z0-9.-]*' + self.word.replace('www.', ''))
@@ -33,7 +33,7 @@ def emails(self):
# if email starts with dot shift email string and make sure all emails are lowercase
return true_emails
- def fileurls(self, file):
+ async def fileurls(self, file):
urls = []
reg_urls = re.compile('', '', self.results)
self.results = re.sub('', '', self.results)
reg_people = re.compile(r'>[a-zA-Z0-9._ ]* - Google\+')
@@ -71,7 +71,7 @@ def people_googleplus(self):
resul.append(delete)
return resul
- def hostnames_all(self):
+ async def hostnames_all(self):
reg_hosts = re.compile('(.*?)')
temp = reg_hosts.findall(self.results)
for iteration in temp:
@@ -83,7 +83,7 @@ def hostnames_all(self):
hostnames = self.unique()
return hostnames
- def links_linkedin(self):
+ async def links_linkedin(self):
reg_links = re.compile(r"url=https:\/\/www\.linkedin.com(.*?)&")
self.temp = reg_links.findall(self.results)
resul = []
@@ -92,7 +92,7 @@ def links_linkedin(self):
resul.append("https://www.linkedin.com" + final_url)
return resul
- def people_linkedin(self):
+ async def people_linkedin(self):
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* \| LinkedIn')
self.temp = reg_people.findall(self.results)
resul = []
@@ -106,7 +106,7 @@ def people_linkedin(self):
resul.append(delete)
return resul
- def people_twitter(self):
+ async def people_twitter(self):
reg_people = re.compile(r'(@[a-zA-Z0-9._ -]*)')
self.temp = reg_people.findall(self.results)
users = self.unique()
@@ -121,7 +121,7 @@ def people_twitter(self):
resul.append(delete)
return resul
- def profiles(self):
+ async def profiles(self):
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* - Google Profile')
self.temp = reg_people.findall(self.results)
resul = []
@@ -133,7 +133,7 @@ def profiles(self):
resul.append(delete)
return resul
- def set(self):
+ async def set(self):
reg_sets = re.compile(r'>[a-zA-Z0-9]*')
self.temp = reg_sets.findall(self.results)
sets = []
@@ -143,10 +143,10 @@ def set(self):
sets.append(delete)
return sets
- def urls(self):
+ async def urls(self):
found = re.finditer(r'(http|https)://(www\.)?trello.com/([a-zA-Z0-9\-_\.]+/?)*', self.results)
urls = {match.group().strip() for match in found}
return urls
- def unique(self) -> list:
+ async def unique(self) -> list:
return list(set(self.temp))