mirror of
https://github.com/tropicoo/yt-dlp-bot.git
synced 2024-09-20 06:46:08 +08:00
Refactor
This commit is contained in:
parent
39b59d9efc
commit
b5f2031cbc
|
@ -39,13 +39,13 @@ class TelegramCallback:
|
|||
self._log.debug('No urls to download, skipping message')
|
||||
return
|
||||
|
||||
acknowledge_message = await self._send_acknowledge_message(
|
||||
ack_message = await self._send_acknowledge_message(
|
||||
message=message, url_count=len(urls)
|
||||
)
|
||||
context = {
|
||||
'message': message,
|
||||
'user': user,
|
||||
'acknowledge_message': acknowledge_message,
|
||||
'ack_message': ack_message,
|
||||
}
|
||||
url_objects = self._url_parser.parse_urls(urls=urls, context=context)
|
||||
await self._url_service.process_urls(url_objects)
|
||||
|
|
|
@ -9,7 +9,8 @@ from yt_shared.rabbit.publisher import RmqPublisher
|
|||
from yt_shared.schemas.error import ErrorDownloadGeneralPayload
|
||||
from yt_shared.schemas.media import BaseMedia
|
||||
from yt_shared.schemas.success import SuccessDownloadPayload
|
||||
from yt_shared.utils.file import remove_dir
|
||||
from yt_shared.utils.common import format_bytes
|
||||
from yt_shared.utils.file import file_size, remove_dir
|
||||
from yt_shared.utils.tasks.tasks import create_task
|
||||
|
||||
from bot.core.handlers.abstract import AbstractDownloadHandler
|
||||
|
@ -43,7 +44,7 @@ class SuccessDownloadHandler(AbstractDownloadHandler):
|
|||
async def _delete_acknowledge_message(self) -> None:
|
||||
await self._bot.delete_messages(
|
||||
chat_id=self._body.from_chat_id,
|
||||
message_ids=[self._body.context.acknowledge_message_id],
|
||||
message_ids=[self._body.context.ack_message_id],
|
||||
)
|
||||
|
||||
async def _publish_error_message(self, err: Exception) -> None:
|
||||
|
@ -83,7 +84,10 @@ class SuccessDownloadHandler(AbstractDownloadHandler):
|
|||
'Cleaning up task "%s": removing download content directory "%s" with files %s',
|
||||
self._body.task_id,
|
||||
root_path,
|
||||
os.listdir(root_path),
|
||||
{
|
||||
fn: format_bytes(file_size(os.path.join(root_path, fn)))
|
||||
for fn in os.listdir(root_path)
|
||||
},
|
||||
)
|
||||
remove_dir(root_path)
|
||||
|
||||
|
|
|
@ -1,14 +1,17 @@
|
|||
import logging
|
||||
import re
|
||||
from itertools import product
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from pyrogram.types import Message
|
||||
from yt_shared.constants import REMOVE_QUERY_PARAMS_HOSTS
|
||||
from yt_shared.enums import TaskSource, TelegramChatType
|
||||
from yt_shared.rabbit.publisher import RmqPublisher
|
||||
from yt_shared.schemas.media import InbMediaPayload
|
||||
from yt_shared.schemas.url import URL
|
||||
|
||||
from bot.core.config.schema import UserSchema
|
||||
from bot.core.utils import can_remove_url_params
|
||||
|
||||
|
||||
class UrlService:
|
||||
|
@ -23,8 +26,9 @@ class UrlService:
|
|||
async def _send_to_worker(self, url: URL) -> bool:
|
||||
payload = InbMediaPayload(
|
||||
url=url.url,
|
||||
original_url=url.original_url,
|
||||
message_id=url.message_id,
|
||||
acknowledge_message_id=url.acknowledge_message_id,
|
||||
ack_message_id=url.ack_message_id,
|
||||
from_user_id=url.from_user_id,
|
||||
from_chat_id=url.from_chat_id,
|
||||
from_chat_type=url.from_chat_type,
|
||||
|
@ -43,24 +47,34 @@ class UrlParser:
|
|||
self._log = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
@staticmethod
|
||||
def _preprocess_urls(urls: list[str]) -> dict[str, str]:
|
||||
preprocessed_urls = {}
|
||||
for url in urls:
|
||||
if can_remove_url_params(url, REMOVE_QUERY_PARAMS_HOSTS):
|
||||
preprocessed_urls[url] = urljoin(url, urlparse(url).path)
|
||||
else:
|
||||
preprocessed_urls[url] = url
|
||||
return preprocessed_urls
|
||||
|
||||
def parse_urls(
|
||||
urls: list[str], context: dict[str, Message | UserSchema]
|
||||
self, urls: list[str], context: dict[str, Message | UserSchema]
|
||||
) -> list[URL]:
|
||||
message: Message = context['message']
|
||||
user: UserSchema = context['user']
|
||||
acknowledge_message: Message = context['acknowledge_message']
|
||||
ack_message: Message = context['ack_message']
|
||||
return [
|
||||
URL(
|
||||
url=url,
|
||||
original_url=orig_url,
|
||||
from_chat_id=message.chat.id,
|
||||
from_chat_type=TelegramChatType(message.chat.type.value),
|
||||
from_user_id=message.from_user.id,
|
||||
message_id=message.id,
|
||||
acknowledge_message_id=acknowledge_message.id,
|
||||
ack_message_id=ack_message.id,
|
||||
save_to_storage=user.save_to_storage,
|
||||
download_media_type=user.download_media_type,
|
||||
)
|
||||
for url in urls
|
||||
for orig_url, url in self._preprocess_urls(urls).items()
|
||||
]
|
||||
|
||||
def filter_urls(self, urls: list[str], regexes: list[str]) -> list[str]:
|
||||
|
|
|
@ -269,13 +269,13 @@ class VideoUploadTask(AbstractUploadTask):
|
|||
caption_conf = self._get_caption_conf()
|
||||
|
||||
if caption_conf.include_title:
|
||||
caption_items.append(f'📝 {self._media_object.title}')
|
||||
caption_items.append(self._media_object.title)
|
||||
if caption_conf.include_filename:
|
||||
caption_items.append(f'ℹ️ {self._filename}')
|
||||
caption_items.append(self._filename)
|
||||
if caption_conf.include_link:
|
||||
caption_items.append(f'👀 {self._ctx.context.url}')
|
||||
caption_items.append(self._ctx.context.url)
|
||||
if caption_conf.include_size:
|
||||
caption_items.append(f'💾 {self._media_object.file_size_human()}')
|
||||
caption_items.append(self._media_object.file_size_human())
|
||||
return caption_items
|
||||
|
||||
def _generate_send_media_coroutine(self, chat_id: int) -> Coroutine:
|
||||
|
|
|
@ -3,7 +3,8 @@ import asyncio
|
|||
import random
|
||||
import string
|
||||
from datetime import datetime
|
||||
from typing import Generator
|
||||
from typing import Generator, Iterable
|
||||
from urllib.parse import urlparse
|
||||
from uuid import uuid4
|
||||
|
||||
from pyrogram.enums import ChatType
|
||||
|
@ -87,3 +88,7 @@ def split_telegram_message(
|
|||
break
|
||||
else:
|
||||
yield text
|
||||
|
||||
|
||||
def can_remove_url_params(url: str, matching_hosts: Iterable[str]) -> bool:
|
||||
return urlparse(url).netloc in set(matching_hosts)
|
||||
|
|
|
@ -2,37 +2,26 @@ import glob
|
|||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from copy import deepcopy
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import yt_dlp
|
||||
from yt_shared.enums import DownMediaType
|
||||
from yt_shared.schemas.media import Audio, DownMedia, Video
|
||||
from yt_shared.utils.common import random_string
|
||||
from yt_shared.utils.common import format_bytes, random_string
|
||||
from yt_shared.utils.file import file_size
|
||||
|
||||
from worker.core.config import settings
|
||||
from worker.core.exceptions import MediaDownloaderError
|
||||
from worker.utils import cli_to_api
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ytdl_opts.per_host._base import AbstractHostConfig
|
||||
|
||||
|
||||
try:
|
||||
from ytdl_opts.user import (
|
||||
AUDIO_FORMAT_YTDL_OPTS,
|
||||
AUDIO_YTDL_OPTS,
|
||||
DEFAULT_YTDL_OPTS,
|
||||
FINAL_AUDIO_FORMAT,
|
||||
FINAL_THUMBNAIL_FORMAT,
|
||||
VIDEO_YTDL_OPTS,
|
||||
)
|
||||
from ytdl_opts.user import FINAL_THUMBNAIL_FORMAT
|
||||
except ImportError:
|
||||
from ytdl_opts.default import (
|
||||
AUDIO_FORMAT_YTDL_OPTS,
|
||||
AUDIO_YTDL_OPTS,
|
||||
DEFAULT_YTDL_OPTS,
|
||||
FINAL_AUDIO_FORMAT,
|
||||
FINAL_THUMBNAIL_FORMAT,
|
||||
VIDEO_YTDL_OPTS,
|
||||
)
|
||||
from ytdl_opts.default import FINAL_AUDIO_FORMAT, FINAL_THUMBNAIL_FORMAT
|
||||
|
||||
|
||||
class MediaDownloader:
|
||||
|
@ -51,49 +40,34 @@ class MediaDownloader:
|
|||
settings.TMP_DOWNLOAD_ROOT_PATH, settings.TMP_DOWNLOADED_DIR
|
||||
)
|
||||
|
||||
def download(self, url: str, media_type: DownMediaType) -> DownMedia:
|
||||
def download(
|
||||
self, host_conf: 'AbstractHostConfig', media_type: DownMediaType
|
||||
) -> DownMedia:
|
||||
try:
|
||||
return self._download(url=url, media_type=media_type)
|
||||
return self._download(host_conf=host_conf, media_type=media_type)
|
||||
except Exception:
|
||||
self._log.error('Failed to download %s', url)
|
||||
self._log.error('Failed to download %s', host_conf.url)
|
||||
raise
|
||||
|
||||
def _configure_ytdl_opts(
|
||||
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||
) -> dict:
|
||||
ytdl_opts = deepcopy(DEFAULT_YTDL_OPTS)
|
||||
match media_type: # noqa: E999
|
||||
case DownMediaType.AUDIO:
|
||||
ytdl_opts.extend(AUDIO_YTDL_OPTS)
|
||||
ytdl_opts.extend(AUDIO_FORMAT_YTDL_OPTS)
|
||||
case DownMediaType.VIDEO:
|
||||
ytdl_opts.extend(VIDEO_YTDL_OPTS)
|
||||
case DownMediaType.AUDIO_VIDEO:
|
||||
ytdl_opts.extend(AUDIO_YTDL_OPTS)
|
||||
ytdl_opts.extend(VIDEO_YTDL_OPTS)
|
||||
ytdl_opts.append(self._KEEP_VIDEO_OPTION)
|
||||
|
||||
ytdl_opts = cli_to_api(ytdl_opts)
|
||||
ytdl_opts['outtmpl']['default'] = os.path.join(
|
||||
curr_tmp_dir,
|
||||
ytdl_opts['outtmpl']['default'],
|
||||
)
|
||||
return ytdl_opts
|
||||
|
||||
def _download(self, url: str, media_type: DownMediaType) -> DownMedia:
|
||||
def _download(
|
||||
self, host_conf: 'AbstractHostConfig', media_type: DownMediaType
|
||||
) -> DownMedia:
|
||||
url = host_conf.url
|
||||
self._log.info('Downloading %s, media_type %s', url, media_type)
|
||||
tmp_down_path = os.path.join(
|
||||
settings.TMP_DOWNLOAD_ROOT_PATH, settings.TMP_DOWNLOAD_DIR
|
||||
)
|
||||
with TemporaryDirectory(prefix='tmp_media_dir-', dir=tmp_down_path) as tmp_dir:
|
||||
curr_tmp_dir = os.path.join(tmp_down_path, tmp_dir)
|
||||
ytdl_opts = self._configure_ytdl_opts(
|
||||
|
||||
ytdl_opts_model = host_conf.build_config(
|
||||
media_type=media_type, curr_tmp_dir=curr_tmp_dir
|
||||
)
|
||||
with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
|
||||
|
||||
with yt_dlp.YoutubeDL(ytdl_opts_model.ytdl_opts) as ytdl:
|
||||
self._log.info('Downloading %s', url)
|
||||
self._log.info('Downloading to %s', curr_tmp_dir)
|
||||
self._log.info('Downloading with options %s', ytdl_opts)
|
||||
self._log.info('Downloading with options %s', ytdl_opts_model.ytdl_opts)
|
||||
|
||||
meta: dict | None = ytdl.extract_info(url, download=True)
|
||||
current_files = os.listdir(curr_tmp_dir)
|
||||
|
@ -223,7 +197,12 @@ class MediaDownloader:
|
|||
"""Try to find downloaded audio or thumbnail file."""
|
||||
verbose_name = self._EXT_TO_NAME[extension]
|
||||
for file_name in glob.glob(f"*.{extension}", root_dir=root_path):
|
||||
self._log.info('Found downloaded %s: "%s"', verbose_name, file_name)
|
||||
self._log.info(
|
||||
'Found downloaded %s: "%s" [%s]',
|
||||
verbose_name,
|
||||
file_name,
|
||||
format_bytes(file_size(os.path.join(root_path, file_name))),
|
||||
)
|
||||
return file_name
|
||||
self._log.info('Downloaded %s not found in "%s"', verbose_name, root_path)
|
||||
return None
|
||||
|
|
|
@ -2,6 +2,8 @@ import asyncio
|
|||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from yt_shared.enums import DownMediaType, TaskStatus
|
||||
|
@ -22,7 +24,10 @@ from worker.core.exceptions import DownloadVideoServiceError
|
|||
from worker.core.tasks.encode import EncodeToH264Task
|
||||
from worker.core.tasks.ffprobe_context import GetFfprobeContextTask
|
||||
from worker.core.tasks.thumbnail import MakeThumbnailTask
|
||||
from worker.utils import is_instagram
|
||||
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ytdl_opts.per_host._base import AbstractHostConfig
|
||||
|
||||
|
||||
class MediaService:
|
||||
|
@ -45,24 +50,45 @@ class MediaService:
|
|||
async def _process(
|
||||
self, media_payload: InbMediaPayload, task: Task, db: AsyncSession
|
||||
) -> DownMedia:
|
||||
host_conf = self._get_host_conf(url=task.url)
|
||||
await self._repository.save_as_processing(db, task)
|
||||
media = await self._start_download(task, media_payload, db)
|
||||
media = await self._start_download(
|
||||
task=task, media_payload=media_payload, host_conf=host_conf, db=db
|
||||
)
|
||||
try:
|
||||
await self._post_process_media(media, task, media_payload, db)
|
||||
await self._post_process_media(
|
||||
media=media,
|
||||
task=task,
|
||||
media_payload=media_payload,
|
||||
host_conf=host_conf,
|
||||
db=db,
|
||||
)
|
||||
except Exception:
|
||||
self._log.exception('Failed to post-process media %s', media)
|
||||
self._err_file_cleanup(media)
|
||||
raise
|
||||
return media
|
||||
|
||||
def _get_host_conf(self, url: str) -> 'AbstractHostConfig':
|
||||
host_to_cls_map = HostConfRegistry.get_host_to_cls_map()
|
||||
self._log.info('Registry: %s', HostConfRegistry.get_registry())
|
||||
self._log.info('Host to CLS map: %s', host_to_cls_map)
|
||||
host_cls = host_to_cls_map.get(urlsplit(url).netloc, host_to_cls_map[None])
|
||||
return host_cls(url=url)
|
||||
|
||||
async def _start_download(
|
||||
self, task: Task, media_payload: InbMediaPayload, db: AsyncSession
|
||||
self,
|
||||
task: Task,
|
||||
media_payload: InbMediaPayload,
|
||||
host_conf: 'AbstractHostConfig',
|
||||
db: AsyncSession,
|
||||
) -> DownMedia:
|
||||
try:
|
||||
return await asyncio.get_running_loop().run_in_executor(
|
||||
None,
|
||||
lambda: self._downloader.download(
|
||||
task.url, media_type=media_payload.download_media_type
|
||||
host_conf=host_conf,
|
||||
media_type=media_payload.download_media_type,
|
||||
),
|
||||
)
|
||||
except Exception as err:
|
||||
|
@ -75,16 +101,25 @@ class MediaService:
|
|||
media: DownMedia,
|
||||
task: Task,
|
||||
media_payload: InbMediaPayload,
|
||||
host_conf: 'AbstractHostConfig',
|
||||
db: AsyncSession,
|
||||
) -> None:
|
||||
def post_process_audio():
|
||||
return self._post_process_audio(
|
||||
media=media, media_payload=media_payload, task=task, db=db
|
||||
media=media,
|
||||
media_payload=media_payload,
|
||||
task=task,
|
||||
host_conf=host_conf,
|
||||
db=db,
|
||||
)
|
||||
|
||||
def post_process_video():
|
||||
return self._post_process_video(
|
||||
media=media, media_payload=media_payload, task=task, db=db
|
||||
media=media,
|
||||
media_payload=media_payload,
|
||||
task=task,
|
||||
host_conf=host_conf,
|
||||
db=db,
|
||||
)
|
||||
|
||||
match media.media_type: # noqa: E999
|
||||
|
@ -102,6 +137,7 @@ class MediaService:
|
|||
media: DownMedia,
|
||||
media_payload: InbMediaPayload,
|
||||
task: Task,
|
||||
host_conf: 'AbstractHostConfig',
|
||||
db: AsyncSession,
|
||||
) -> None:
|
||||
"""Post-process downloaded media files, e.g. make thumbnail and copy to storage."""
|
||||
|
@ -130,12 +166,12 @@ class MediaService:
|
|||
if media_payload.save_to_storage:
|
||||
coro_tasks.append(self._create_copy_file_task(video))
|
||||
|
||||
# Instagram returns VP9+AAC in MP4 container for logged users and needs
|
||||
# to be encoded to H264 since Telegram doesn't play VP9 on iOS
|
||||
if settings.INSTAGRAM_ENCODE_TO_H264 and is_instagram(media_payload.url):
|
||||
if host_conf.ENCODE_VIDEO:
|
||||
coro_tasks.append(
|
||||
create_task(
|
||||
EncodeToH264Task(media=media).run(),
|
||||
EncodeToH264Task(
|
||||
media=media, cmd_tpl=host_conf.FFMPEG_VIDEO_OPTS
|
||||
).run(),
|
||||
task_name=EncodeToH264Task.__class__.__name__,
|
||||
logger=self._log,
|
||||
exception_message='Task "%s" raised an exception',
|
||||
|
@ -153,6 +189,7 @@ class MediaService:
|
|||
media: DownMedia,
|
||||
media_payload: InbMediaPayload,
|
||||
task: Task,
|
||||
host_conf: 'AbstractHostConfig',
|
||||
db: AsyncSession,
|
||||
) -> None:
|
||||
coro_tasks = [self._repository.save_file(db, task, media.audio, media.meta)]
|
||||
|
|
|
@ -6,14 +6,14 @@ from worker.core.tasks.abstract import AbstractFfBinaryTask
|
|||
|
||||
|
||||
class EncodeToH264Task(AbstractFfBinaryTask):
|
||||
_CMD = 'ffmpeg -y -loglevel error -i "{filepath}" -c:v libx264 -pix_fmt yuv420p -preset veryfast -crf 25 -movflags +faststart -c:a copy "{output}"'
|
||||
_EXT = 'mp4'
|
||||
_CMD_TIMEOUT = 120
|
||||
|
||||
def __init__(self, media: DownMedia):
|
||||
def __init__(self, media: DownMedia, cmd_tpl: str) -> None:
|
||||
super().__init__(file_path=media.video.filepath)
|
||||
self._media = media
|
||||
self._video = media.video
|
||||
self._CMD = cmd_tpl # lol
|
||||
|
||||
async def run(self) -> None:
|
||||
await self._encode_video()
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
import os
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import yt_dlp
|
||||
|
||||
_COOKIES_FILEPATH = '/app/cookies/cookies.txt'
|
||||
_INSTAGRAM_HOST = 'instagram.com'
|
||||
|
||||
|
||||
def cli_to_api(opts: list) -> dict:
|
||||
|
@ -30,9 +28,5 @@ def get_cookies_opts_if_not_empty() -> list[str]:
|
|||
return [] if is_file_empty(_COOKIES_FILEPATH) else ['--cookies', _COOKIES_FILEPATH]
|
||||
|
||||
|
||||
def is_instagram(url: str) -> bool:
|
||||
return _INSTAGRAM_HOST in urlparse(url).netloc
|
||||
|
||||
|
||||
def get_media_format_options() -> str:
|
||||
pass
|
||||
|
|
|
@ -22,8 +22,6 @@ DEFAULT_YTDL_OPTS = [
|
|||
'--ignore-errors',
|
||||
'--verbose',
|
||||
*get_cookies_opts_if_not_empty(),
|
||||
'--format-sort',
|
||||
'vcodec:h264',
|
||||
]
|
||||
|
||||
AUDIO_YTDL_OPTS = [
|
||||
|
|
11
app_worker/ytdl_opts/per_host/__init__.py
Normal file
11
app_worker/ytdl_opts/per_host/__init__.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
from ytdl_opts.per_host._default import DefaultHost
|
||||
from ytdl_opts.per_host.instagram import InstagramHost
|
||||
from ytdl_opts.per_host.tiktok import TikTokHost
|
||||
from ytdl_opts.per_host.twitter import TwitterHost
|
||||
|
||||
__all__ = [
|
||||
'DefaultHost',
|
||||
'InstagramHost',
|
||||
'TikTokHost',
|
||||
'TwitterHost',
|
||||
]
|
116
app_worker/ytdl_opts/per_host/_base.py
Normal file
116
app_worker/ytdl_opts/per_host/_base.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
import abc
|
||||
import logging
|
||||
import os
|
||||
from copy import deepcopy
|
||||
|
||||
import pydantic
|
||||
from pydantic import StrictBool, StrictStr
|
||||
from yt_shared.enums import DownMediaType
|
||||
|
||||
from worker.utils import cli_to_api
|
||||
|
||||
try:
|
||||
from ytdl_opts.user import (
|
||||
AUDIO_FORMAT_YTDL_OPTS,
|
||||
AUDIO_YTDL_OPTS,
|
||||
DEFAULT_YTDL_OPTS,
|
||||
FINAL_AUDIO_FORMAT,
|
||||
FINAL_THUMBNAIL_FORMAT,
|
||||
VIDEO_YTDL_OPTS,
|
||||
)
|
||||
except ImportError:
|
||||
from ytdl_opts.default import (
|
||||
AUDIO_FORMAT_YTDL_OPTS,
|
||||
AUDIO_YTDL_OPTS,
|
||||
DEFAULT_YTDL_OPTS,
|
||||
FINAL_AUDIO_FORMAT,
|
||||
FINAL_THUMBNAIL_FORMAT,
|
||||
VIDEO_YTDL_OPTS,
|
||||
)
|
||||
|
||||
|
||||
class BaseHostConfModel(pydantic.BaseModel):
|
||||
# TODO: Add validators.
|
||||
|
||||
hostnames: tuple[str, ...]
|
||||
|
||||
encode_audio: StrictBool
|
||||
encode_video: StrictBool
|
||||
|
||||
ffmpeg_audio_opts: StrictStr | None
|
||||
ffmpeg_video_opts: StrictStr | None
|
||||
|
||||
ytdl_opts: dict
|
||||
|
||||
|
||||
class AbstractHostConfig:
|
||||
"""Abstract yt-dlp host config."""
|
||||
|
||||
ALLOW_NULL_HOSTNAMES: bool | None = None
|
||||
HOSTNAMES: tuple[str, ...] | None = None
|
||||
|
||||
CUSTOM_VIDEO_YTDL_OPTS: list[str] | None = None
|
||||
|
||||
ENCODE_AUDIO: bool | None = None
|
||||
ENCODE_VIDEO: bool | None = None
|
||||
|
||||
KEEP_VIDEO_OPTION = '--keep-video'
|
||||
|
||||
DEFAULT_YTDL_OPTS = DEFAULT_YTDL_OPTS
|
||||
|
||||
AUDIO_YTDL_OPTS = AUDIO_YTDL_OPTS
|
||||
AUDIO_FORMAT_YTDL_OPTS = AUDIO_FORMAT_YTDL_OPTS
|
||||
|
||||
FINAL_AUDIO_FORMAT = FINAL_AUDIO_FORMAT
|
||||
FINAL_THUMBNAIL_FORMAT = FINAL_THUMBNAIL_FORMAT
|
||||
|
||||
DEFAULT_VIDEO_YTDL_OPTS = VIDEO_YTDL_OPTS
|
||||
DEFAULT_VIDEO_FORMAT_SORT_OPT = ['--format-sort', 'res,vcodec:h265,h264']
|
||||
|
||||
FFMPEG_AUDIO_OPTS: str | None = None
|
||||
FFMPEG_VIDEO_OPTS: str | None = None
|
||||
|
||||
def __init__(self, url: str) -> None:
|
||||
self._log = logging.getLogger(self.__class__.__name__)
|
||||
self._validate_hostname()
|
||||
self.url = url
|
||||
self._log.info('Instantiating "%s" for url "%s"', self.__class__.__name__, url)
|
||||
|
||||
def _validate_hostname(self) -> None:
|
||||
if not self.ALLOW_NULL_HOSTNAMES and not self.HOSTNAMES:
|
||||
raise ValueError('Hostname(s) must be set before instantiation.')
|
||||
|
||||
@abc.abstractmethod
|
||||
def build_config(
|
||||
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||
) -> BaseHostConfModel:
|
||||
pass
|
||||
|
||||
def _build_ytdl_opts(self, media_type: DownMediaType, curr_tmp_dir: str) -> dict:
|
||||
def _add_video_opts(ytdl_opts_: list[str]) -> None:
|
||||
ytdl_opts_.extend(self.DEFAULT_VIDEO_YTDL_OPTS)
|
||||
ytdl_opts_.extend(self._build_custom_ytdl_video_opts())
|
||||
|
||||
ytdl_opts = deepcopy(self.DEFAULT_YTDL_OPTS)
|
||||
|
||||
match media_type: # noqa: E999
|
||||
case DownMediaType.AUDIO:
|
||||
ytdl_opts.extend(self.AUDIO_YTDL_OPTS)
|
||||
ytdl_opts.extend(self.AUDIO_FORMAT_YTDL_OPTS)
|
||||
case DownMediaType.VIDEO:
|
||||
_add_video_opts(ytdl_opts)
|
||||
case DownMediaType.AUDIO_VIDEO:
|
||||
ytdl_opts.extend(self.AUDIO_YTDL_OPTS)
|
||||
_add_video_opts(ytdl_opts)
|
||||
ytdl_opts.append(self.KEEP_VIDEO_OPTION)
|
||||
|
||||
ytdl_opts = cli_to_api(ytdl_opts)
|
||||
ytdl_opts['outtmpl']['default'] = os.path.join(
|
||||
curr_tmp_dir,
|
||||
ytdl_opts['outtmpl']['default'],
|
||||
)
|
||||
return ytdl_opts
|
||||
|
||||
@abc.abstractmethod
|
||||
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||
pass
|
30
app_worker/ytdl_opts/per_host/_default.py
Normal file
30
app_worker/ytdl_opts/per_host/_default.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
from yt_shared.enums import DownMediaType
|
||||
|
||||
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
|
||||
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||
|
||||
|
||||
class DefaultHostModel(BaseHostConfModel):
|
||||
hostnames: None
|
||||
|
||||
|
||||
class DefaultHost(AbstractHostConfig, metaclass=HostConfRegistry):
|
||||
ALLOW_NULL_HOSTNAMES = True
|
||||
HOSTNAMES = None
|
||||
ENCODE_AUDIO = False
|
||||
ENCODE_VIDEO = False
|
||||
|
||||
def build_config(
|
||||
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||
) -> DefaultHostModel:
|
||||
return DefaultHostModel(
|
||||
hostnames=self.HOSTNAMES,
|
||||
encode_audio=self.ENCODE_AUDIO,
|
||||
encode_video=self.ENCODE_VIDEO,
|
||||
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
|
||||
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
|
||||
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
|
||||
)
|
||||
|
||||
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT
|
39
app_worker/ytdl_opts/per_host/_registry.py
Normal file
39
app_worker/ytdl_opts/per_host/_registry.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
from typing import TYPE_CHECKING, Type
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ytdl_opts.per_host._base import AbstractHostConfig
|
||||
|
||||
|
||||
class HostConfRegistry(type):
|
||||
REGISTRY: dict[str, type['AbstractHostConfig']] = {}
|
||||
HOST_TO_CLS_MAP = {}
|
||||
|
||||
def __new__(
|
||||
mcs: Type['HostConfRegistry'],
|
||||
name: str,
|
||||
bases: tuple[type['AbstractHostConfig']],
|
||||
attrs: dict,
|
||||
) -> type['AbstractHostConfig']:
|
||||
host_cls: type['AbstractHostConfig'] = type.__new__(mcs, name, bases, attrs)
|
||||
mcs.REGISTRY[host_cls.__name__] = host_cls
|
||||
|
||||
mcs._build_host_to_cls_map(host_cls=host_cls, hostnames=attrs['HOSTNAMES'])
|
||||
return host_cls
|
||||
|
||||
@classmethod
|
||||
def get_registry(mcs) -> dict[str, type['AbstractHostConfig']]:
|
||||
return mcs.REGISTRY.copy()
|
||||
|
||||
@classmethod
|
||||
def get_host_to_cls_map(mcs) -> dict[str | None, type['AbstractHostConfig']]:
|
||||
return mcs.HOST_TO_CLS_MAP.copy()
|
||||
|
||||
@classmethod
|
||||
def _build_host_to_cls_map(
|
||||
mcs, host_cls: type['AbstractHostConfig'], hostnames: tuple[str, ...] | None
|
||||
) -> None:
|
||||
if hostnames is None:
|
||||
mcs.HOST_TO_CLS_MAP[None] = host_cls
|
||||
else:
|
||||
for host in hostnames:
|
||||
mcs.HOST_TO_CLS_MAP[host] = host_cls
|
37
app_worker/ytdl_opts/per_host/instagram.py
Normal file
37
app_worker/ytdl_opts/per_host/instagram.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
from yt_shared.constants import INSTAGRAM_HOSTS
|
||||
from yt_shared.enums import DownMediaType
|
||||
|
||||
from worker.core.config import settings
|
||||
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
|
||||
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||
|
||||
|
||||
class InstagramHostModel(BaseHostConfModel):
|
||||
pass
|
||||
|
||||
|
||||
class InstagramHost(AbstractHostConfig, metaclass=HostConfRegistry):
|
||||
ALLOW_NULL_HOSTNAMES = False
|
||||
HOSTNAMES = INSTAGRAM_HOSTS
|
||||
ENCODE_AUDIO = False
|
||||
ENCODE_VIDEO = settings.INSTAGRAM_ENCODE_TO_H264
|
||||
|
||||
FFMPEG_AUDIO_OPTS = None
|
||||
# Instagram returns VP9+AAC in MP4 container for logged users and needs to be
|
||||
# encoded to H264 since Telegram doesn't play VP9 on iOS.
|
||||
FFMPEG_VIDEO_OPTS = 'ffmpeg -y -loglevel error -i "{filepath}" -c:v libx264 -pix_fmt yuv420p -preset veryfast -crf 22 -movflags +faststart -c:a copy "{output}"'
|
||||
|
||||
def build_config(
|
||||
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||
) -> InstagramHostModel:
|
||||
return InstagramHostModel(
|
||||
hostnames=self.HOSTNAMES,
|
||||
encode_audio=self.ENCODE_AUDIO,
|
||||
encode_video=self.ENCODE_VIDEO,
|
||||
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
|
||||
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
|
||||
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
|
||||
)
|
||||
|
||||
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT
|
31
app_worker/ytdl_opts/per_host/tiktok.py
Normal file
31
app_worker/ytdl_opts/per_host/tiktok.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
from yt_shared.constants import TIKTOK_HOSTS
|
||||
from yt_shared.enums import DownMediaType
|
||||
|
||||
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
|
||||
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||
|
||||
|
||||
class TikTokHostModel(BaseHostConfModel):
|
||||
pass
|
||||
|
||||
|
||||
class TikTokHost(AbstractHostConfig, metaclass=HostConfRegistry):
|
||||
ALLOW_NULL_HOSTNAMES = False
|
||||
HOSTNAMES = TIKTOK_HOSTS
|
||||
ENCODE_AUDIO = False
|
||||
ENCODE_VIDEO = False
|
||||
|
||||
def build_config(
|
||||
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||
) -> TikTokHostModel:
|
||||
return TikTokHostModel(
|
||||
hostnames=self.HOSTNAMES,
|
||||
encode_audio=self.ENCODE_AUDIO,
|
||||
encode_video=self.ENCODE_VIDEO,
|
||||
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
|
||||
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
|
||||
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
|
||||
)
|
||||
|
||||
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT
|
31
app_worker/ytdl_opts/per_host/twitter.py
Normal file
31
app_worker/ytdl_opts/per_host/twitter.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
from yt_shared.constants import TWITTER_HOSTS
|
||||
from yt_shared.enums import DownMediaType
|
||||
|
||||
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
|
||||
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||
|
||||
|
||||
class TwitterHostModel(BaseHostConfModel):
|
||||
pass
|
||||
|
||||
|
||||
class TwitterHost(AbstractHostConfig, metaclass=HostConfRegistry):
|
||||
ALLOW_NULL_HOSTNAMES = False
|
||||
HOSTNAMES = TWITTER_HOSTS
|
||||
ENCODE_AUDIO = False
|
||||
ENCODE_VIDEO = False
|
||||
|
||||
def build_config(
|
||||
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||
) -> TwitterHostModel:
|
||||
return TwitterHostModel(
|
||||
hostnames=self.HOSTNAMES,
|
||||
encode_audio=self.ENCODE_AUDIO,
|
||||
encode_video=self.ENCODE_VIDEO,
|
||||
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
|
||||
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
|
||||
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
|
||||
)
|
||||
|
||||
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||
return ['--format-sort', 'res,proto:https,vcodec:h265,h264']
|
18
yt_shared/yt_shared/constants.py
Normal file
18
yt_shared/yt_shared/constants.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
from asyncio import Lock
|
||||
|
||||
ASYNC_LOCK = Lock()
|
||||
|
||||
|
||||
INSTAGRAM_HOSTS = ('instagram.com', 'www.instagram.com')
|
||||
TIKTOK_HOSTS = ('tiktok.com', 'vm.tiktok.com', 'www.tiktok.com', 'www.vm.tiktok.com')
|
||||
TWITTER_HOSTS = (
|
||||
'twitter.com',
|
||||
'www.twitter.com',
|
||||
'x.com',
|
||||
'www.x.com',
|
||||
't.co',
|
||||
'www.t.co',
|
||||
)
|
||||
|
||||
|
||||
REMOVE_QUERY_PARAMS_HOSTS = TWITTER_HOSTS + INSTAGRAM_HOSTS
|
|
@ -4,11 +4,11 @@ from uuid import UUID
|
|||
from sqlalchemy import insert, select
|
||||
from sqlalchemy.exc import NoResultFound
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from yt_shared.constants import ASYNC_LOCK
|
||||
from yt_shared.enums import TaskStatus
|
||||
from yt_shared.models import Cache, File, Task
|
||||
from yt_shared.schemas.cache import CacheSchema
|
||||
from yt_shared.schemas.media import BaseMedia, InbMediaPayload, Video
|
||||
from yt_shared.utils.common import ASYNC_LOCK
|
||||
|
||||
|
||||
class TaskRepository:
|
||||
|
|
|
@ -14,6 +14,7 @@ from pydantic import (
|
|||
from yt_shared.enums import DownMediaType, MediaFileType, TaskSource, TelegramChatType
|
||||
from yt_shared.schemas.base import RealBaseModel
|
||||
from yt_shared.utils.common import format_bytes
|
||||
from yt_shared.utils.file import file_size
|
||||
|
||||
|
||||
class InbMediaPayload(RealBaseModel):
|
||||
|
@ -24,8 +25,9 @@ class InbMediaPayload(RealBaseModel):
|
|||
from_chat_type: TelegramChatType | None
|
||||
from_user_id: StrictInt | None
|
||||
message_id: StrictInt | None
|
||||
acknowledge_message_id: StrictInt | None
|
||||
ack_message_id: StrictInt | None
|
||||
url: StrictStr
|
||||
original_url: StrictStr
|
||||
source: TaskSource
|
||||
save_to_storage: StrictBool
|
||||
download_media_type: DownMediaType
|
||||
|
@ -49,9 +51,15 @@ class BaseMedia(RealBaseModel):
|
|||
is_converted: StrictBool = False
|
||||
converted_filepath: StrictStr | None = None
|
||||
converted_filename: StrictStr | None = None
|
||||
converted_file_size: StrictInt | None = None
|
||||
|
||||
def file_size_human(self) -> str:
|
||||
return format_bytes(num=self.file_size)
|
||||
return format_bytes(num=self.current_file_size())
|
||||
|
||||
def current_file_size(self) -> int:
|
||||
if self.converted_file_size is not None:
|
||||
return self.converted_file_size
|
||||
return self.file_size
|
||||
|
||||
def mark_as_saved_to_storage(self, storage_path: str) -> None:
|
||||
self.storage_path = storage_path
|
||||
|
@ -60,6 +68,7 @@ class BaseMedia(RealBaseModel):
|
|||
def mark_as_converted(self, filepath: str) -> None:
|
||||
self.converted_filepath = filepath
|
||||
self.converted_filename = filepath.rsplit("/", 1)[-1]
|
||||
self.converted_file_size = file_size(filepath)
|
||||
self.is_converted = True
|
||||
|
||||
|
||||
|
|
|
@ -6,10 +6,11 @@ from yt_shared.schemas.base import RealBaseModel
|
|||
|
||||
class URL(RealBaseModel):
|
||||
url: StrictStr
|
||||
original_url: StrictStr
|
||||
from_chat_id: StrictInt
|
||||
from_chat_type: TelegramChatType
|
||||
from_user_id: StrictInt
|
||||
message_id: StrictInt
|
||||
acknowledge_message_id: StrictInt
|
||||
ack_message_id: StrictInt
|
||||
save_to_storage: StrictBool
|
||||
download_media_type: DownMediaType
|
||||
|
|
|
@ -6,8 +6,6 @@ from functools import partial, wraps
|
|||
from string import ascii_lowercase
|
||||
from typing import Any, Callable
|
||||
|
||||
ASYNC_LOCK = asyncio.Lock()
|
||||
|
||||
_UNIT_SIZE_NAMES = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi')
|
||||
_BASE = 1024.0
|
||||
|
||||
|
|
Loading…
Reference in a new issue