mirror of
https://github.com/laramies/theHarvester.git
synced 2024-09-20 15:26:31 +08:00
#1383 need figure out page_from_response
method
This commit is contained in:
parent
0b371c5484
commit
b9af76f4c3
|
@ -1,7 +1,7 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import random
|
import random
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union
|
from typing import Any, Dict, List, NamedTuple, Tuple, Union
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
|
@ -16,8 +16,8 @@ class RetryResult(NamedTuple):
|
||||||
|
|
||||||
class SuccessResult(NamedTuple):
|
class SuccessResult(NamedTuple):
|
||||||
fragments: List[str]
|
fragments: List[str]
|
||||||
next_page: Optional[Any]
|
next_page: Union[int, None]
|
||||||
last_page: Optional[Any]
|
last_page: int
|
||||||
|
|
||||||
|
|
||||||
class ErrorResult(NamedTuple):
|
class ErrorResult(NamedTuple):
|
||||||
|
@ -53,7 +53,7 @@ async def fragments_from_response(json_data: dict) -> List[str]:
|
||||||
return [fragment for fragment in fragments if fragment is not None]
|
return [fragment for fragment in fragments if fragment is not None]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def page_from_response(page: str, links) -> Optional[Any]:
|
async def page_from_response(page: str, links) -> Union[int, None]:
|
||||||
page_link = links.get(page)
|
page_link = links.get(page)
|
||||||
if page_link:
|
if page_link:
|
||||||
parsed = urlparse.urlparse(str(page_link.get("url")))
|
parsed = urlparse.urlparse(str(page_link.get("url")))
|
||||||
|
@ -71,6 +71,7 @@ async def handle_response(
|
||||||
if status == 200:
|
if status == 200:
|
||||||
results = await self.fragments_from_response(json_data)
|
results = await self.fragments_from_response(json_data)
|
||||||
next_page = await self.page_from_response("next", links)
|
next_page = await self.page_from_response("next", links)
|
||||||
|
# TODO: figure out what int is last page
|
||||||
last_page = await self.page_from_response("last", links)
|
last_page = await self.page_from_response("last", links)
|
||||||
return SuccessResult(results, next_page, last_page)
|
return SuccessResult(results, next_page, last_page)
|
||||||
elif status == 429 or status == 403:
|
elif status == 429 or status == 403:
|
||||||
|
|
Loading…
Reference in a new issue