diff --git a/theHarvester/__main__.py b/theHarvester/__main__.py index 8d87cf47..ebbcdbb5 100644 --- a/theHarvester/__main__.py +++ b/theHarvester/__main__.py @@ -53,9 +53,9 @@ def start(): parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true') parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str) parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster, - dogpile, duckduckgo, github-code, google, + dogpile, duckduckgo, github-code, google, hunter, intelx, - linkedin, netcraft, securityTrails, threatcrowd, + linkedin,linkedin_links, netcraft, securityTrails, threatcrowd, trello, twitter, vhost, virustotal, yahoo, all''') parser.add_argument('-x', '--exclude', help='exclude options when using all sources', type=str) args = parser.parse_args() @@ -279,6 +279,22 @@ def start(): for user in sorted(list(set(people))): print(user) + elif engineitem == 'linkedin_links': + print('\033[94m[*] Searching Linkedin. \033[0m') + search = linkedinsearch.SearchLinkedin(word, limit) + search.process() + people = search.get_links() + db = stash.stash_manager() + db.store_all(word, people, 'name', 'linkedin') + + if len(people) == 0: + print('\n[*] No links found Linkedin.\n\n') + else: + print(f'\n[*] Links found: {len(people)}') + print('---------------------') + for user in sorted(list(set(people))): + print(user) + elif engineitem == 'netcraft': print('\033[94m[*] Searching Netcraft. \033[0m') search = netcraft.SearchNetcraft(word) diff --git a/theHarvester/discovery/constants.py b/theHarvester/discovery/constants.py index eeb8dcaf..2be35d84 100644 --- a/theHarvester/discovery/constants.py +++ b/theHarvester/discovery/constants.py @@ -4,6 +4,28 @@ googleUA = 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36' +def splitter(links): + """ + Method that tries to remove duplicates + :param links: list of links to remove duplicates from + :return: unique-ish list + """ + unique_list = [] + name_check = [] + for url in links: + tail = url.split("/")[-1] + if len(tail) == 2 or tail == "zh-cn": + tail = url.split("/")[-2] + name = tail.split("-") + if len(name) > 1: + joined_name = name[0] + name[1] + else: + joined_name = name[0] + if joined_name not in name_check: + unique_list.append(url) + name_check.append(joined_name) + return unique_list + def filter(lst): """ Method that filters list diff --git a/theHarvester/discovery/linkedinsearch.py b/theHarvester/discovery/linkedinsearch.py index d59d14fd..7992200d 100644 --- a/theHarvester/discovery/linkedinsearch.py +++ b/theHarvester/discovery/linkedinsearch.py @@ -4,7 +4,6 @@ import requests import time - class SearchLinkedin: def __init__(self, word, limit): @@ -34,9 +33,20 @@ def get_people(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.people_linkedin() + def get_links(self): + links = myparser.Parser(self.totalresults, self.word) + return splitter(links.links_linkedin()) + + def process(self): while self.counter < self.limit: self.do_search() time.sleep(getDelay()) self.counter += 100 print(f'\tSearching {self.counter} results.') + + + + + + diff --git a/theHarvester/lib/core.py b/theHarvester/lib/core.py index 66a9ef27..40042bbf 100644 --- a/theHarvester/lib/core.py +++ b/theHarvester/lib/core.py @@ -77,6 +77,7 @@ def get_supportedengines(): 'hunter', 'intelx', 'linkedin', + 'linkedin_links', 'netcraft', 'securityTrails', 'threatcrowd', diff --git a/theHarvester/parsers/myparser.py b/theHarvester/parsers/myparser.py index a63b0612..5958c8d9 100644 --- a/theHarvester/parsers/myparser.py +++ b/theHarvester/parsers/myparser.py @@ -92,11 +92,22 @@ def hostnames_all(self): hostnames = self.unique() return hostnames + def links_linkedin(self): + reg_links = re.compile("url=https:\/\/www.linkedin.com(.*?)&") + self.temp = reg_links.findall(self.results) + resul = [] + for x in self.temp: + y = x.replace("url=", "") + resul.append("https://www.linkedin.com" + y) + return set(resul) + def people_linkedin(self): reg_people = re.compile(r'">[a-zA-Z0-9._ -]* \| LinkedIn') self.temp = reg_people.findall(self.results) resul = [] - for x in self.temp: + + + for x in (self.temp): y = x.replace(' | LinkedIn', '') y = y.replace(' profiles ', '') y = y.replace('LinkedIn', '')