mirror of
https://github.com/laramies/theHarvester.git
synced 2025-02-25 15:03:01 +08:00
41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
from bs4 import BeautifulSoup
|
|
import re
|
|
|
|
|
|
class parser:
|
|
|
|
def __init__(self, results):
|
|
self.results = results
|
|
self.ipaddresses = []
|
|
self.soup = BeautifulSoup(results.results, features="html.parser")
|
|
self.hostnames = []
|
|
self.urls = []
|
|
self.numberofpages = 0
|
|
|
|
def search_hostnames(self):
|
|
try:
|
|
hostnamelist = self.soup.findAll('tt')
|
|
for hostnameitem in hostnamelist:
|
|
self.hostnames.append(hostnameitem.text)
|
|
return self.hostnames
|
|
except Exception as e:
|
|
print("Error occurred: " + str(e))
|
|
|
|
def search_ipaddresses(self):
|
|
try:
|
|
ipaddresslist = self.soup.findAll('a', 'SearchResult__title-text')
|
|
for ipaddressitem in ipaddresslist:
|
|
self.ipaddresses.append(ipaddressitem.text.strip())
|
|
return self.ipaddresses
|
|
except Exception as e:
|
|
print("Error occurred: " + str(e))
|
|
|
|
def search_numberofpages(self):
|
|
try:
|
|
items = self.soup.findAll(href=re.compile("page"))
|
|
for item in items:
|
|
if (item.text != 'next'): # to filter out pagination
|
|
self.numberofpages += 1
|
|
return self.numberofpages
|
|
except Exception as e:
|
|
print("Error occurred: " + str(e))
|