yt-dlp-bot/app_bot/bot/core/service.py

92 lines
3.3 KiB
Python
Raw Normal View History

2022-02-04 06:21:27 +08:00
import logging
2023-03-29 03:26:16 +08:00
import re
from itertools import product
2023-10-01 05:23:30 +08:00
from urllib.parse import urljoin, urlparse
2022-02-04 06:21:27 +08:00
2023-03-29 03:26:16 +08:00
from pyrogram.types import Message
2023-10-01 05:23:30 +08:00
from yt_shared.constants import REMOVE_QUERY_PARAMS_HOSTS
2023-03-29 03:26:16 +08:00
from yt_shared.enums import TaskSource, TelegramChatType
2023-04-02 05:20:26 +08:00
from yt_shared.rabbit.publisher import RmqPublisher
2023-04-11 02:36:10 +08:00
from yt_shared.schemas.media import InbMediaPayload
2022-06-14 04:46:54 +08:00
from yt_shared.schemas.url import URL
2022-02-04 06:21:27 +08:00
2024-03-14 05:51:30 +08:00
from bot.core.schema import UserSchema
2023-10-01 05:23:30 +08:00
from bot.core.utils import can_remove_url_params
2022-02-04 06:21:27 +08:00
2023-03-29 03:26:16 +08:00
class UrlService:
2022-02-04 06:21:27 +08:00
def __init__(self) -> None:
self._log = logging.getLogger(self.__class__.__name__)
2023-04-02 05:20:26 +08:00
self._rmq_publisher = RmqPublisher()
2022-02-04 06:21:27 +08:00
2022-11-03 01:56:19 +08:00
async def process_urls(self, urls: list[URL]) -> None:
for url in urls:
await self._send_to_worker(url)
2022-06-14 04:46:54 +08:00
async def _send_to_worker(self, url: URL) -> bool:
2023-04-11 02:36:10 +08:00
payload = InbMediaPayload(
2022-06-14 04:46:54 +08:00
url=url.url,
2023-10-01 05:23:30 +08:00
original_url=url.original_url,
2022-06-14 04:46:54 +08:00
message_id=url.message_id,
2023-10-01 05:23:30 +08:00
ack_message_id=url.ack_message_id,
2022-06-14 04:46:54 +08:00
from_user_id=url.from_user_id,
2022-11-03 01:56:19 +08:00
from_chat_id=url.from_chat_id,
from_chat_type=url.from_chat_type,
2022-06-14 04:46:54 +08:00
source=TaskSource.BOT,
save_to_storage=url.save_to_storage,
download_media_type=url.download_media_type,
2022-06-14 04:46:54 +08:00
)
2023-04-02 05:20:26 +08:00
is_sent = await self._rmq_publisher.send_for_download(payload)
2022-02-04 06:21:27 +08:00
if not is_sent:
2022-06-14 04:46:54 +08:00
self._log.error('Failed to publish URL %s to message broker', url.url)
2022-02-04 06:21:27 +08:00
return is_sent
2023-03-29 03:26:16 +08:00
class UrlParser:
def __init__(self) -> None:
self._log = logging.getLogger(self.__class__.__name__)
@staticmethod
2023-10-01 05:23:30 +08:00
def _preprocess_urls(urls: list[str]) -> dict[str, str]:
preprocessed_urls = {}
for url in urls:
if can_remove_url_params(url, REMOVE_QUERY_PARAMS_HOSTS):
preprocessed_urls[url] = urljoin(url, urlparse(url).path)
else:
preprocessed_urls[url] = url
return preprocessed_urls
2023-09-24 22:16:50 +08:00
def parse_urls(
2023-10-01 05:23:30 +08:00
self, urls: list[str], context: dict[str, Message | UserSchema]
2023-09-24 22:16:50 +08:00
) -> list[URL]:
message: Message = context['message']
user: UserSchema = context['user']
2023-10-01 05:23:30 +08:00
ack_message: Message = context['ack_message']
2023-10-05 01:46:39 +08:00
from_user_id = message.from_user.id if message.from_user else None
2023-03-29 03:26:16 +08:00
return [
URL(
url=url,
2023-10-01 05:23:30 +08:00
original_url=orig_url,
2023-03-29 03:26:16 +08:00
from_chat_id=message.chat.id,
from_chat_type=TelegramChatType(message.chat.type.value),
2023-10-05 01:46:39 +08:00
from_user_id=from_user_id,
2023-03-29 03:26:16 +08:00
message_id=message.id,
2023-10-01 05:23:30 +08:00
ack_message_id=ack_message.id,
2023-03-29 03:26:16 +08:00
save_to_storage=user.save_to_storage,
download_media_type=user.download_media_type,
)
2023-10-01 05:23:30 +08:00
for orig_url, url in self._preprocess_urls(urls).items()
2023-03-29 03:26:16 +08:00
]
def filter_urls(self, urls: list[str], regexes: list[str]) -> list[str]:
"""Return valid urls."""
self._log.debug('Matching urls: %s against regexes %s', urls, regexes)
2023-05-25 03:45:38 +08:00
valid_urls = []
2023-03-29 03:26:16 +08:00
for url, regex in product(urls, regexes):
if re.match(regex, url):
2023-05-25 03:45:38 +08:00
valid_urls.append(url)
2023-03-29 03:54:06 +08:00
2023-05-25 03:45:38 +08:00
valid_urls = list(dict.fromkeys(valid_urls))
self._log.debug('Matched urls: %s', valid_urls)
return valid_urls