mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-24 06:22:57 +08:00
added unit test; removed splitter function since regex now handles dupes
This commit is contained in:
parent
efa7bf8dab
commit
88e8e88874
3 changed files with 22 additions and 24 deletions
21
tests/discovery/test_linkedin_links.py
Normal file
21
tests/discovery/test_linkedin_links.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
import sys
|
||||
sys.path.append("../../")
|
||||
from theHarvester.parsers import myparser
|
||||
from theHarvester.discovery import linkedinsearch
|
||||
import pytest
|
||||
from theHarvester.lib import stash
|
||||
|
||||
|
||||
class TestGetLinks(object):
|
||||
|
||||
def test_get_links(self):
|
||||
search = linkedinsearch.SearchLinkedin("facebook.com", '100')
|
||||
search.process()
|
||||
links = search.get_links()
|
||||
for link in links:
|
||||
print(link)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main()
|
|
@ -4,29 +4,6 @@
|
|||
googleUA = 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36'
|
||||
|
||||
|
||||
def splitter(links):
|
||||
"""
|
||||
Method that tries to remove duplicates
|
||||
:param links: list of links to remove duplicates from
|
||||
:return: unique-ish list
|
||||
"""
|
||||
unique_list = []
|
||||
name_check = []
|
||||
for url in links:
|
||||
tail = url.split("/")[-1]
|
||||
if len(tail) == 2 or tail == "zh-cn":
|
||||
tail = url.split("/")[-2]
|
||||
name = tail.split("-")
|
||||
if len(name) > 1:
|
||||
joined_name = name[0] + name[1]
|
||||
else:
|
||||
joined_name = name[0]
|
||||
if joined_name not in name_check:
|
||||
unique_list.append(url)
|
||||
name_check.append(joined_name)
|
||||
return unique_list
|
||||
|
||||
|
||||
def filter(lst):
|
||||
"""
|
||||
Method that filters list
|
||||
|
|
|
@ -36,7 +36,7 @@ def get_people(self):
|
|||
|
||||
def get_links(self):
|
||||
links = myparser.Parser(self.totalresults, self.word)
|
||||
return splitter(links.links_linkedin())
|
||||
return links.links_linkedin()
|
||||
|
||||
def process(self):
|
||||
while self.counter < self.limit:
|
||||
|
|
Loading…
Reference in a new issue