mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-21 07:46:32 +08:00
added option to return linkedin links
This commit is contained in:
parent
1f4bc12de4
commit
4806751470
|
@ -53,9 +53,9 @@ def start():
|
|||
parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
|
||||
parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
|
||||
parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
|
||||
dogpile, duckduckgo, github-code, google,
|
||||
dogpile, duckduckgo, github-code, google,
|
||||
hunter, intelx,
|
||||
linkedin, netcraft, securityTrails, threatcrowd,
|
||||
linkedin,linkedin_links, netcraft, securityTrails, threatcrowd,
|
||||
trello, twitter, vhost, virustotal, yahoo, all''')
|
||||
parser.add_argument('-x', '--exclude', help='exclude options when using all sources', type=str)
|
||||
args = parser.parse_args()
|
||||
|
@ -279,6 +279,22 @@ def start():
|
|||
for user in sorted(list(set(people))):
|
||||
print(user)
|
||||
|
||||
elif engineitem == 'linkedin_links':
|
||||
print('\033[94m[*] Searching Linkedin. \033[0m')
|
||||
search = linkedinsearch.SearchLinkedin(word, limit)
|
||||
search.process()
|
||||
people = search.get_links()
|
||||
db = stash.stash_manager()
|
||||
db.store_all(word, people, 'name', 'linkedin')
|
||||
|
||||
if len(people) == 0:
|
||||
print('\n[*] No links found Linkedin.\n\n')
|
||||
else:
|
||||
print(f'\n[*] Links found: {len(people)}')
|
||||
print('---------------------')
|
||||
for user in sorted(list(set(people))):
|
||||
print(user)
|
||||
|
||||
elif engineitem == 'netcraft':
|
||||
print('\033[94m[*] Searching Netcraft. \033[0m')
|
||||
search = netcraft.SearchNetcraft(word)
|
||||
|
|
|
@ -4,6 +4,28 @@
|
|||
googleUA = 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36'
|
||||
|
||||
|
||||
def splitter(links):
|
||||
"""
|
||||
Method that tries to remove duplicates
|
||||
:param links: list of links to remove duplicates from
|
||||
:return: unique-ish list
|
||||
"""
|
||||
unique_list = []
|
||||
name_check = []
|
||||
for url in links:
|
||||
tail = url.split("/")[-1]
|
||||
if len(tail) == 2 or tail == "zh-cn":
|
||||
tail = url.split("/")[-2]
|
||||
name = tail.split("-")
|
||||
if len(name) > 1:
|
||||
joined_name = name[0] + name[1]
|
||||
else:
|
||||
joined_name = name[0]
|
||||
if joined_name not in name_check:
|
||||
unique_list.append(url)
|
||||
name_check.append(joined_name)
|
||||
return unique_list
|
||||
|
||||
def filter(lst):
|
||||
"""
|
||||
Method that filters list
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
import requests
|
||||
import time
|
||||
|
||||
|
||||
class SearchLinkedin:
|
||||
|
||||
def __init__(self, word, limit):
|
||||
|
@ -34,9 +33,20 @@ def get_people(self):
|
|||
rawres = myparser.Parser(self.totalresults, self.word)
|
||||
return rawres.people_linkedin()
|
||||
|
||||
def get_links(self):
|
||||
links = myparser.Parser(self.totalresults, self.word)
|
||||
return splitter(links.links_linkedin())
|
||||
|
||||
|
||||
def process(self):
|
||||
while self.counter < self.limit:
|
||||
self.do_search()
|
||||
time.sleep(getDelay())
|
||||
self.counter += 100
|
||||
print(f'\tSearching {self.counter} results.')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -77,6 +77,7 @@ def get_supportedengines():
|
|||
'hunter',
|
||||
'intelx',
|
||||
'linkedin',
|
||||
'linkedin_links',
|
||||
'netcraft',
|
||||
'securityTrails',
|
||||
'threatcrowd',
|
||||
|
|
|
@ -92,11 +92,22 @@ def hostnames_all(self):
|
|||
hostnames = self.unique()
|
||||
return hostnames
|
||||
|
||||
def links_linkedin(self):
|
||||
reg_links = re.compile("url=https:\/\/www.linkedin.com(.*?)&")
|
||||
self.temp = reg_links.findall(self.results)
|
||||
resul = []
|
||||
for x in self.temp:
|
||||
y = x.replace("url=", "")
|
||||
resul.append("https://www.linkedin.com" + y)
|
||||
return set(resul)
|
||||
|
||||
def people_linkedin(self):
|
||||
reg_people = re.compile(r'">[a-zA-Z0-9._ -]* \| LinkedIn')
|
||||
self.temp = reg_people.findall(self.results)
|
||||
resul = []
|
||||
for x in self.temp:
|
||||
|
||||
|
||||
for x in (self.temp):
|
||||
y = x.replace(' | LinkedIn', '')
|
||||
y = y.replace(' profiles ', '')
|
||||
y = y.replace('LinkedIn', '')
|
||||
|
|
Loading…
Reference in a new issue