mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-20 07:16:31 +08:00
WIP stuff
This commit is contained in:
parent
ec45f7bd2b
commit
20cf7a677f
|
@ -3,7 +3,6 @@ RUN mkdir /app
|
|||
WORKDIR /app
|
||||
COPY . /app
|
||||
RUN apt-get -qq update
|
||||
RUN apt-get install -yqq python3-pip
|
||||
RUN pip3 install -r requirements.txt
|
||||
RUN apt-get install -yqq theharvester
|
||||
RUN chmod +x *.py
|
||||
ENTRYPOINT ["/app/theHarvester.py"]
|
||||
|
|
6
Pipfile.lock
generated
6
Pipfile.lock
generated
|
@ -301,10 +301,10 @@
|
|||
},
|
||||
"soupsieve": {
|
||||
"hashes": [
|
||||
"sha256:605f89ad5fdbfefe30cdc293303665eff2d188865d4dbe4eb510bba1edfbfce3",
|
||||
"sha256:b91d676b330a0ebd5b21719cb6e9b57c57d433671f65b9c28dd3461d9a1ed0b6"
|
||||
"sha256:bdb0d917b03a1369ce964056fc195cfdff8819c40de04695a80bc813c3cfa1f5",
|
||||
"sha256:e2c1c5dee4a1c36bcb790e0fabd5492d874b8ebd4617622c4f6a731701060dda"
|
||||
],
|
||||
"version": "==1.9.4"
|
||||
"version": "==1.9.5"
|
||||
},
|
||||
"texttable": {
|
||||
"hashes": [
|
||||
|
|
|
@ -12,6 +12,7 @@ def __init__(self, word):
|
|||
self.totalresults = ""
|
||||
self.server = 'www.google.com'
|
||||
self.start = 0
|
||||
self.links = set()
|
||||
|
||||
def do_search(self):
|
||||
filetype = ['doc', 'docx', 'pdf', 'ppt', 'pptx', 'txt', 'xls', 'xlsx']
|
||||
|
@ -27,19 +28,22 @@ def do_search(self):
|
|||
page = requests.get(url, headers=headers)
|
||||
tree = html.fromstring(page.content)
|
||||
self.results = tree.xpath('//*[@class="r"]/a/@href')
|
||||
|
||||
#print('results: ', self.results)
|
||||
for link in self.results:
|
||||
match = re.search(regex, link)
|
||||
if match:
|
||||
self.totalresults += match.group('urls')
|
||||
#print('type: ', type(match.group('urls')))
|
||||
self.links.update(set(list(match.group('urls'))))
|
||||
# print(match.group('urls'))
|
||||
else:
|
||||
self.totalresults += f'{link}'
|
||||
|
||||
# print('not matched')
|
||||
# print(f'{link}')
|
||||
self.links.add(link)
|
||||
if self.results:
|
||||
self.start += 100
|
||||
|
||||
def get_links(self):
|
||||
return self.totalresults
|
||||
def get_links(self) -> set:
|
||||
return self.links
|
||||
|
||||
def process(self):
|
||||
self.do_search()
|
||||
|
|
Loading…
Reference in a new issue