From 88e8e88874b91da229bbde3ab76ff9816a2ac3d1 Mon Sep 17 00:00:00 2001 From: csparks Date: Sat, 7 Sep 2019 02:58:57 +0000 Subject: [PATCH] added unit test; removed splitter function since regex now handles dupes --- tests/discovery/test_linkedin_links.py | 21 +++++++++++++++++++++ theHarvester/discovery/constants.py | 23 ----------------------- theHarvester/discovery/linkedinsearch.py | 2 +- 3 files changed, 22 insertions(+), 24 deletions(-) create mode 100644 tests/discovery/test_linkedin_links.py diff --git a/tests/discovery/test_linkedin_links.py b/tests/discovery/test_linkedin_links.py new file mode 100644 index 00000000..e03f8860 --- /dev/null +++ b/tests/discovery/test_linkedin_links.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# coding=utf-8 +import sys +sys.path.append("../../") +from theHarvester.parsers import myparser +from theHarvester.discovery import linkedinsearch +import pytest +from theHarvester.lib import stash + + +class TestGetLinks(object): + + def test_get_links(self): + search = linkedinsearch.SearchLinkedin("facebook.com", '100') + search.process() + links = search.get_links() + for link in links: + print(link) + +if __name__ == '__main__': + pytest.main() diff --git a/theHarvester/discovery/constants.py b/theHarvester/discovery/constants.py index 5f4c55dd..7549c4c6 100644 --- a/theHarvester/discovery/constants.py +++ b/theHarvester/discovery/constants.py @@ -4,29 +4,6 @@ googleUA = 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36' -def splitter(links): - """ - Method that tries to remove duplicates - :param links: list of links to remove duplicates from - :return: unique-ish list - """ - unique_list = [] - name_check = [] - for url in links: - tail = url.split("/")[-1] - if len(tail) == 2 or tail == "zh-cn": - tail = url.split("/")[-2] - name = tail.split("-") - if len(name) > 1: - joined_name = name[0] + name[1] - else: - joined_name = name[0] - if joined_name not in name_check: - unique_list.append(url) - name_check.append(joined_name) - return unique_list - - def filter(lst): """ Method that filters list diff --git a/theHarvester/discovery/linkedinsearch.py b/theHarvester/discovery/linkedinsearch.py index ef774df3..22a58699 100644 --- a/theHarvester/discovery/linkedinsearch.py +++ b/theHarvester/discovery/linkedinsearch.py @@ -36,7 +36,7 @@ def get_people(self): def get_links(self): links = myparser.Parser(self.totalresults, self.word) - return splitter(links.links_linkedin()) + return links.links_linkedin() def process(self): while self.counter < self.limit: