added unit test; removed splitter function since regex now handles dupes

2025-02-24 06:22:57 +08:00 · 2019-09-07 02:58:57 +00:00 · 2019-09-07 02:58:57 +00:00 · 88e8e88874
commit 88e8e88874
parent efa7bf8dab
3 changed files with 22 additions and 24 deletions
--- a/tests/discovery/test_linkedin_links.py
+++ b/tests/discovery/test_linkedin_links.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# coding=utf-8
+import sys
+sys.path.append("../../")
+from theHarvester.parsers import myparser
+from theHarvester.discovery import linkedinsearch
+import pytest
+from theHarvester.lib import stash
+
+
+class TestGetLinks(object):
+
+    def test_get_links(self):
+        search = linkedinsearch.SearchLinkedin("facebook.com", '100')
+        search.process()
+        links = search.get_links()
+        for link in links:
+            print(link)
+
+if __name__ == '__main__':
+    pytest.main()
--- a/theHarvester/discovery/constants.py
+++ b/theHarvester/discovery/constants.py
@ -4,29 +4,6 @@
 googleUA = 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36'


-def splitter(links):
-    """
-    Method that tries to remove duplicates
-    :param links: list of links to remove duplicates from
-    :return: unique-ish list
-    """
-    unique_list = []
-    name_check = []
-    for url in links:
-        tail = url.split("/")[-1]
-        if len(tail) == 2 or tail == "zh-cn":
-            tail = url.split("/")[-2]
-            name = tail.split("-")
-            if len(name) > 1:
-                joined_name = name[0] + name[1]
-            else:
-                joined_name = name[0]
-            if joined_name not in name_check:
-                unique_list.append(url)
-                name_check.append(joined_name)
-    return unique_list
-
-
 def filter(lst):
    """
    Method that filters list
--- a/theHarvester/discovery/linkedinsearch.py
+++ b/theHarvester/discovery/linkedinsearch.py
@ -36,7 +36,7 @@ def get_people(self):

    def get_links(self):
        links = myparser.Parser(self.totalresults, self.word)
-        return splitter(links.links_linkedin())
+        return links.links_linkedin()

    def process(self):
        while self.counter < self.limit: