added unit test; removed splitter function since regex now handles dupes

This commit is contained in:
csparks 2019-09-07 02:58:57 +00:00
parent efa7bf8dab
commit 88e8e88874
3 changed files with 22 additions and 24 deletions

View file

@ -0,0 +1,21 @@
#!/usr/bin/env python3
# coding=utf-8
import sys
sys.path.append("../../")
from theHarvester.parsers import myparser
from theHarvester.discovery import linkedinsearch
import pytest
from theHarvester.lib import stash
class TestGetLinks(object):
def test_get_links(self):
search = linkedinsearch.SearchLinkedin("facebook.com", '100')
search.process()
links = search.get_links()
for link in links:
print(link)
if __name__ == '__main__':
pytest.main()

View file

@ -4,29 +4,6 @@
googleUA = 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36'
def splitter(links):
"""
Method that tries to remove duplicates
:param links: list of links to remove duplicates from
:return: unique-ish list
"""
unique_list = []
name_check = []
for url in links:
tail = url.split("/")[-1]
if len(tail) == 2 or tail == "zh-cn":
tail = url.split("/")[-2]
name = tail.split("-")
if len(name) > 1:
joined_name = name[0] + name[1]
else:
joined_name = name[0]
if joined_name not in name_check:
unique_list.append(url)
name_check.append(joined_name)
return unique_list
def filter(lst):
"""
Method that filters list

View file

@ -36,7 +36,7 @@ def get_people(self):
def get_links(self):
links = myparser.Parser(self.totalresults, self.word)
return splitter(links.links_linkedin())
return links.links_linkedin()
def process(self):
while self.counter < self.limit: