This commit is contained in:
Taras Terletskyi 2023-10-01 00:23:30 +03:00
parent 39b59d9efc
commit b5f2031cbc
22 changed files with 443 additions and 91 deletions

View file

@ -39,13 +39,13 @@ class TelegramCallback:
self._log.debug('No urls to download, skipping message')
return
acknowledge_message = await self._send_acknowledge_message(
ack_message = await self._send_acknowledge_message(
message=message, url_count=len(urls)
)
context = {
'message': message,
'user': user,
'acknowledge_message': acknowledge_message,
'ack_message': ack_message,
}
url_objects = self._url_parser.parse_urls(urls=urls, context=context)
await self._url_service.process_urls(url_objects)

View file

@ -9,7 +9,8 @@ from yt_shared.rabbit.publisher import RmqPublisher
from yt_shared.schemas.error import ErrorDownloadGeneralPayload
from yt_shared.schemas.media import BaseMedia
from yt_shared.schemas.success import SuccessDownloadPayload
from yt_shared.utils.file import remove_dir
from yt_shared.utils.common import format_bytes
from yt_shared.utils.file import file_size, remove_dir
from yt_shared.utils.tasks.tasks import create_task
from bot.core.handlers.abstract import AbstractDownloadHandler
@ -43,7 +44,7 @@ class SuccessDownloadHandler(AbstractDownloadHandler):
async def _delete_acknowledge_message(self) -> None:
await self._bot.delete_messages(
chat_id=self._body.from_chat_id,
message_ids=[self._body.context.acknowledge_message_id],
message_ids=[self._body.context.ack_message_id],
)
async def _publish_error_message(self, err: Exception) -> None:
@ -83,7 +84,10 @@ class SuccessDownloadHandler(AbstractDownloadHandler):
'Cleaning up task "%s": removing download content directory "%s" with files %s',
self._body.task_id,
root_path,
os.listdir(root_path),
{
fn: format_bytes(file_size(os.path.join(root_path, fn)))
for fn in os.listdir(root_path)
},
)
remove_dir(root_path)

View file

@ -1,14 +1,17 @@
import logging
import re
from itertools import product
from urllib.parse import urljoin, urlparse
from pyrogram.types import Message
from yt_shared.constants import REMOVE_QUERY_PARAMS_HOSTS
from yt_shared.enums import TaskSource, TelegramChatType
from yt_shared.rabbit.publisher import RmqPublisher
from yt_shared.schemas.media import InbMediaPayload
from yt_shared.schemas.url import URL
from bot.core.config.schema import UserSchema
from bot.core.utils import can_remove_url_params
class UrlService:
@ -23,8 +26,9 @@ class UrlService:
async def _send_to_worker(self, url: URL) -> bool:
payload = InbMediaPayload(
url=url.url,
original_url=url.original_url,
message_id=url.message_id,
acknowledge_message_id=url.acknowledge_message_id,
ack_message_id=url.ack_message_id,
from_user_id=url.from_user_id,
from_chat_id=url.from_chat_id,
from_chat_type=url.from_chat_type,
@ -43,24 +47,34 @@ class UrlParser:
self._log = logging.getLogger(self.__class__.__name__)
@staticmethod
def _preprocess_urls(urls: list[str]) -> dict[str, str]:
preprocessed_urls = {}
for url in urls:
if can_remove_url_params(url, REMOVE_QUERY_PARAMS_HOSTS):
preprocessed_urls[url] = urljoin(url, urlparse(url).path)
else:
preprocessed_urls[url] = url
return preprocessed_urls
def parse_urls(
urls: list[str], context: dict[str, Message | UserSchema]
self, urls: list[str], context: dict[str, Message | UserSchema]
) -> list[URL]:
message: Message = context['message']
user: UserSchema = context['user']
acknowledge_message: Message = context['acknowledge_message']
ack_message: Message = context['ack_message']
return [
URL(
url=url,
original_url=orig_url,
from_chat_id=message.chat.id,
from_chat_type=TelegramChatType(message.chat.type.value),
from_user_id=message.from_user.id,
message_id=message.id,
acknowledge_message_id=acknowledge_message.id,
ack_message_id=ack_message.id,
save_to_storage=user.save_to_storage,
download_media_type=user.download_media_type,
)
for url in urls
for orig_url, url in self._preprocess_urls(urls).items()
]
def filter_urls(self, urls: list[str], regexes: list[str]) -> list[str]:

View file

@ -269,13 +269,13 @@ class VideoUploadTask(AbstractUploadTask):
caption_conf = self._get_caption_conf()
if caption_conf.include_title:
caption_items.append(f'📝 {self._media_object.title}')
caption_items.append(self._media_object.title)
if caption_conf.include_filename:
caption_items.append(f' {self._filename}')
caption_items.append(self._filename)
if caption_conf.include_link:
caption_items.append(f'👀 {self._ctx.context.url}')
caption_items.append(self._ctx.context.url)
if caption_conf.include_size:
caption_items.append(f'💾 {self._media_object.file_size_human()}')
caption_items.append(self._media_object.file_size_human())
return caption_items
def _generate_send_media_coroutine(self, chat_id: int) -> Coroutine:

View file

@ -3,7 +3,8 @@ import asyncio
import random
import string
from datetime import datetime
from typing import Generator
from typing import Generator, Iterable
from urllib.parse import urlparse
from uuid import uuid4
from pyrogram.enums import ChatType
@ -87,3 +88,7 @@ def split_telegram_message(
break
else:
yield text
def can_remove_url_params(url: str, matching_hosts: Iterable[str]) -> bool:
return urlparse(url).netloc in set(matching_hosts)

View file

@ -2,37 +2,26 @@ import glob
import logging
import os
import shutil
from copy import deepcopy
from tempfile import TemporaryDirectory
from typing import TYPE_CHECKING
import yt_dlp
from yt_shared.enums import DownMediaType
from yt_shared.schemas.media import Audio, DownMedia, Video
from yt_shared.utils.common import random_string
from yt_shared.utils.common import format_bytes, random_string
from yt_shared.utils.file import file_size
from worker.core.config import settings
from worker.core.exceptions import MediaDownloaderError
from worker.utils import cli_to_api
if TYPE_CHECKING:
from ytdl_opts.per_host._base import AbstractHostConfig
try:
from ytdl_opts.user import (
AUDIO_FORMAT_YTDL_OPTS,
AUDIO_YTDL_OPTS,
DEFAULT_YTDL_OPTS,
FINAL_AUDIO_FORMAT,
FINAL_THUMBNAIL_FORMAT,
VIDEO_YTDL_OPTS,
)
from ytdl_opts.user import FINAL_THUMBNAIL_FORMAT
except ImportError:
from ytdl_opts.default import (
AUDIO_FORMAT_YTDL_OPTS,
AUDIO_YTDL_OPTS,
DEFAULT_YTDL_OPTS,
FINAL_AUDIO_FORMAT,
FINAL_THUMBNAIL_FORMAT,
VIDEO_YTDL_OPTS,
)
from ytdl_opts.default import FINAL_AUDIO_FORMAT, FINAL_THUMBNAIL_FORMAT
class MediaDownloader:
@ -51,49 +40,34 @@ class MediaDownloader:
settings.TMP_DOWNLOAD_ROOT_PATH, settings.TMP_DOWNLOADED_DIR
)
def download(self, url: str, media_type: DownMediaType) -> DownMedia:
def download(
self, host_conf: 'AbstractHostConfig', media_type: DownMediaType
) -> DownMedia:
try:
return self._download(url=url, media_type=media_type)
return self._download(host_conf=host_conf, media_type=media_type)
except Exception:
self._log.error('Failed to download %s', url)
self._log.error('Failed to download %s', host_conf.url)
raise
def _configure_ytdl_opts(
self, media_type: DownMediaType, curr_tmp_dir: str
) -> dict:
ytdl_opts = deepcopy(DEFAULT_YTDL_OPTS)
match media_type: # noqa: E999
case DownMediaType.AUDIO:
ytdl_opts.extend(AUDIO_YTDL_OPTS)
ytdl_opts.extend(AUDIO_FORMAT_YTDL_OPTS)
case DownMediaType.VIDEO:
ytdl_opts.extend(VIDEO_YTDL_OPTS)
case DownMediaType.AUDIO_VIDEO:
ytdl_opts.extend(AUDIO_YTDL_OPTS)
ytdl_opts.extend(VIDEO_YTDL_OPTS)
ytdl_opts.append(self._KEEP_VIDEO_OPTION)
ytdl_opts = cli_to_api(ytdl_opts)
ytdl_opts['outtmpl']['default'] = os.path.join(
curr_tmp_dir,
ytdl_opts['outtmpl']['default'],
)
return ytdl_opts
def _download(self, url: str, media_type: DownMediaType) -> DownMedia:
def _download(
self, host_conf: 'AbstractHostConfig', media_type: DownMediaType
) -> DownMedia:
url = host_conf.url
self._log.info('Downloading %s, media_type %s', url, media_type)
tmp_down_path = os.path.join(
settings.TMP_DOWNLOAD_ROOT_PATH, settings.TMP_DOWNLOAD_DIR
)
with TemporaryDirectory(prefix='tmp_media_dir-', dir=tmp_down_path) as tmp_dir:
curr_tmp_dir = os.path.join(tmp_down_path, tmp_dir)
ytdl_opts = self._configure_ytdl_opts(
ytdl_opts_model = host_conf.build_config(
media_type=media_type, curr_tmp_dir=curr_tmp_dir
)
with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
with yt_dlp.YoutubeDL(ytdl_opts_model.ytdl_opts) as ytdl:
self._log.info('Downloading %s', url)
self._log.info('Downloading to %s', curr_tmp_dir)
self._log.info('Downloading with options %s', ytdl_opts)
self._log.info('Downloading with options %s', ytdl_opts_model.ytdl_opts)
meta: dict | None = ytdl.extract_info(url, download=True)
current_files = os.listdir(curr_tmp_dir)
@ -223,7 +197,12 @@ class MediaDownloader:
"""Try to find downloaded audio or thumbnail file."""
verbose_name = self._EXT_TO_NAME[extension]
for file_name in glob.glob(f"*.{extension}", root_dir=root_path):
self._log.info('Found downloaded %s: "%s"', verbose_name, file_name)
self._log.info(
'Found downloaded %s: "%s" [%s]',
verbose_name,
file_name,
format_bytes(file_size(os.path.join(root_path, file_name))),
)
return file_name
self._log.info('Downloaded %s not found in "%s"', verbose_name, root_path)
return None

View file

@ -2,6 +2,8 @@ import asyncio
import logging
import os
import shutil
from typing import TYPE_CHECKING
from urllib.parse import urlsplit
from sqlalchemy.ext.asyncio import AsyncSession
from yt_shared.enums import DownMediaType, TaskStatus
@ -22,7 +24,10 @@ from worker.core.exceptions import DownloadVideoServiceError
from worker.core.tasks.encode import EncodeToH264Task
from worker.core.tasks.ffprobe_context import GetFfprobeContextTask
from worker.core.tasks.thumbnail import MakeThumbnailTask
from worker.utils import is_instagram
from ytdl_opts.per_host._registry import HostConfRegistry
if TYPE_CHECKING:
from ytdl_opts.per_host._base import AbstractHostConfig
class MediaService:
@ -45,24 +50,45 @@ class MediaService:
async def _process(
self, media_payload: InbMediaPayload, task: Task, db: AsyncSession
) -> DownMedia:
host_conf = self._get_host_conf(url=task.url)
await self._repository.save_as_processing(db, task)
media = await self._start_download(task, media_payload, db)
media = await self._start_download(
task=task, media_payload=media_payload, host_conf=host_conf, db=db
)
try:
await self._post_process_media(media, task, media_payload, db)
await self._post_process_media(
media=media,
task=task,
media_payload=media_payload,
host_conf=host_conf,
db=db,
)
except Exception:
self._log.exception('Failed to post-process media %s', media)
self._err_file_cleanup(media)
raise
return media
def _get_host_conf(self, url: str) -> 'AbstractHostConfig':
host_to_cls_map = HostConfRegistry.get_host_to_cls_map()
self._log.info('Registry: %s', HostConfRegistry.get_registry())
self._log.info('Host to CLS map: %s', host_to_cls_map)
host_cls = host_to_cls_map.get(urlsplit(url).netloc, host_to_cls_map[None])
return host_cls(url=url)
async def _start_download(
self, task: Task, media_payload: InbMediaPayload, db: AsyncSession
self,
task: Task,
media_payload: InbMediaPayload,
host_conf: 'AbstractHostConfig',
db: AsyncSession,
) -> DownMedia:
try:
return await asyncio.get_running_loop().run_in_executor(
None,
lambda: self._downloader.download(
task.url, media_type=media_payload.download_media_type
host_conf=host_conf,
media_type=media_payload.download_media_type,
),
)
except Exception as err:
@ -75,16 +101,25 @@ class MediaService:
media: DownMedia,
task: Task,
media_payload: InbMediaPayload,
host_conf: 'AbstractHostConfig',
db: AsyncSession,
) -> None:
def post_process_audio():
return self._post_process_audio(
media=media, media_payload=media_payload, task=task, db=db
media=media,
media_payload=media_payload,
task=task,
host_conf=host_conf,
db=db,
)
def post_process_video():
return self._post_process_video(
media=media, media_payload=media_payload, task=task, db=db
media=media,
media_payload=media_payload,
task=task,
host_conf=host_conf,
db=db,
)
match media.media_type: # noqa: E999
@ -102,6 +137,7 @@ class MediaService:
media: DownMedia,
media_payload: InbMediaPayload,
task: Task,
host_conf: 'AbstractHostConfig',
db: AsyncSession,
) -> None:
"""Post-process downloaded media files, e.g. make thumbnail and copy to storage."""
@ -130,12 +166,12 @@ class MediaService:
if media_payload.save_to_storage:
coro_tasks.append(self._create_copy_file_task(video))
# Instagram returns VP9+AAC in MP4 container for logged users and needs
# to be encoded to H264 since Telegram doesn't play VP9 on iOS
if settings.INSTAGRAM_ENCODE_TO_H264 and is_instagram(media_payload.url):
if host_conf.ENCODE_VIDEO:
coro_tasks.append(
create_task(
EncodeToH264Task(media=media).run(),
EncodeToH264Task(
media=media, cmd_tpl=host_conf.FFMPEG_VIDEO_OPTS
).run(),
task_name=EncodeToH264Task.__class__.__name__,
logger=self._log,
exception_message='Task "%s" raised an exception',
@ -153,6 +189,7 @@ class MediaService:
media: DownMedia,
media_payload: InbMediaPayload,
task: Task,
host_conf: 'AbstractHostConfig',
db: AsyncSession,
) -> None:
coro_tasks = [self._repository.save_file(db, task, media.audio, media.meta)]

View file

@ -6,14 +6,14 @@ from worker.core.tasks.abstract import AbstractFfBinaryTask
class EncodeToH264Task(AbstractFfBinaryTask):
_CMD = 'ffmpeg -y -loglevel error -i "{filepath}" -c:v libx264 -pix_fmt yuv420p -preset veryfast -crf 25 -movflags +faststart -c:a copy "{output}"'
_EXT = 'mp4'
_CMD_TIMEOUT = 120
def __init__(self, media: DownMedia):
def __init__(self, media: DownMedia, cmd_tpl: str) -> None:
super().__init__(file_path=media.video.filepath)
self._media = media
self._video = media.video
self._CMD = cmd_tpl # lol
async def run(self) -> None:
await self._encode_video()

View file

@ -1,10 +1,8 @@
import os
from urllib.parse import urlparse
import yt_dlp
_COOKIES_FILEPATH = '/app/cookies/cookies.txt'
_INSTAGRAM_HOST = 'instagram.com'
def cli_to_api(opts: list) -> dict:
@ -30,9 +28,5 @@ def get_cookies_opts_if_not_empty() -> list[str]:
return [] if is_file_empty(_COOKIES_FILEPATH) else ['--cookies', _COOKIES_FILEPATH]
def is_instagram(url: str) -> bool:
return _INSTAGRAM_HOST in urlparse(url).netloc
def get_media_format_options() -> str:
pass

View file

@ -22,8 +22,6 @@ DEFAULT_YTDL_OPTS = [
'--ignore-errors',
'--verbose',
*get_cookies_opts_if_not_empty(),
'--format-sort',
'vcodec:h264',
]
AUDIO_YTDL_OPTS = [

View file

@ -0,0 +1,11 @@
from ytdl_opts.per_host._default import DefaultHost
from ytdl_opts.per_host.instagram import InstagramHost
from ytdl_opts.per_host.tiktok import TikTokHost
from ytdl_opts.per_host.twitter import TwitterHost
__all__ = [
'DefaultHost',
'InstagramHost',
'TikTokHost',
'TwitterHost',
]

View file

@ -0,0 +1,116 @@
import abc
import logging
import os
from copy import deepcopy
import pydantic
from pydantic import StrictBool, StrictStr
from yt_shared.enums import DownMediaType
from worker.utils import cli_to_api
try:
from ytdl_opts.user import (
AUDIO_FORMAT_YTDL_OPTS,
AUDIO_YTDL_OPTS,
DEFAULT_YTDL_OPTS,
FINAL_AUDIO_FORMAT,
FINAL_THUMBNAIL_FORMAT,
VIDEO_YTDL_OPTS,
)
except ImportError:
from ytdl_opts.default import (
AUDIO_FORMAT_YTDL_OPTS,
AUDIO_YTDL_OPTS,
DEFAULT_YTDL_OPTS,
FINAL_AUDIO_FORMAT,
FINAL_THUMBNAIL_FORMAT,
VIDEO_YTDL_OPTS,
)
class BaseHostConfModel(pydantic.BaseModel):
# TODO: Add validators.
hostnames: tuple[str, ...]
encode_audio: StrictBool
encode_video: StrictBool
ffmpeg_audio_opts: StrictStr | None
ffmpeg_video_opts: StrictStr | None
ytdl_opts: dict
class AbstractHostConfig:
"""Abstract yt-dlp host config."""
ALLOW_NULL_HOSTNAMES: bool | None = None
HOSTNAMES: tuple[str, ...] | None = None
CUSTOM_VIDEO_YTDL_OPTS: list[str] | None = None
ENCODE_AUDIO: bool | None = None
ENCODE_VIDEO: bool | None = None
KEEP_VIDEO_OPTION = '--keep-video'
DEFAULT_YTDL_OPTS = DEFAULT_YTDL_OPTS
AUDIO_YTDL_OPTS = AUDIO_YTDL_OPTS
AUDIO_FORMAT_YTDL_OPTS = AUDIO_FORMAT_YTDL_OPTS
FINAL_AUDIO_FORMAT = FINAL_AUDIO_FORMAT
FINAL_THUMBNAIL_FORMAT = FINAL_THUMBNAIL_FORMAT
DEFAULT_VIDEO_YTDL_OPTS = VIDEO_YTDL_OPTS
DEFAULT_VIDEO_FORMAT_SORT_OPT = ['--format-sort', 'res,vcodec:h265,h264']
FFMPEG_AUDIO_OPTS: str | None = None
FFMPEG_VIDEO_OPTS: str | None = None
def __init__(self, url: str) -> None:
self._log = logging.getLogger(self.__class__.__name__)
self._validate_hostname()
self.url = url
self._log.info('Instantiating "%s" for url "%s"', self.__class__.__name__, url)
def _validate_hostname(self) -> None:
if not self.ALLOW_NULL_HOSTNAMES and not self.HOSTNAMES:
raise ValueError('Hostname(s) must be set before instantiation.')
@abc.abstractmethod
def build_config(
self, media_type: DownMediaType, curr_tmp_dir: str
) -> BaseHostConfModel:
pass
def _build_ytdl_opts(self, media_type: DownMediaType, curr_tmp_dir: str) -> dict:
def _add_video_opts(ytdl_opts_: list[str]) -> None:
ytdl_opts_.extend(self.DEFAULT_VIDEO_YTDL_OPTS)
ytdl_opts_.extend(self._build_custom_ytdl_video_opts())
ytdl_opts = deepcopy(self.DEFAULT_YTDL_OPTS)
match media_type: # noqa: E999
case DownMediaType.AUDIO:
ytdl_opts.extend(self.AUDIO_YTDL_OPTS)
ytdl_opts.extend(self.AUDIO_FORMAT_YTDL_OPTS)
case DownMediaType.VIDEO:
_add_video_opts(ytdl_opts)
case DownMediaType.AUDIO_VIDEO:
ytdl_opts.extend(self.AUDIO_YTDL_OPTS)
_add_video_opts(ytdl_opts)
ytdl_opts.append(self.KEEP_VIDEO_OPTION)
ytdl_opts = cli_to_api(ytdl_opts)
ytdl_opts['outtmpl']['default'] = os.path.join(
curr_tmp_dir,
ytdl_opts['outtmpl']['default'],
)
return ytdl_opts
@abc.abstractmethod
def _build_custom_ytdl_video_opts(self) -> list[str]:
pass

View file

@ -0,0 +1,30 @@
from yt_shared.enums import DownMediaType
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
from ytdl_opts.per_host._registry import HostConfRegistry
class DefaultHostModel(BaseHostConfModel):
hostnames: None
class DefaultHost(AbstractHostConfig, metaclass=HostConfRegistry):
ALLOW_NULL_HOSTNAMES = True
HOSTNAMES = None
ENCODE_AUDIO = False
ENCODE_VIDEO = False
def build_config(
self, media_type: DownMediaType, curr_tmp_dir: str
) -> DefaultHostModel:
return DefaultHostModel(
hostnames=self.HOSTNAMES,
encode_audio=self.ENCODE_AUDIO,
encode_video=self.ENCODE_VIDEO,
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
)
def _build_custom_ytdl_video_opts(self) -> list[str]:
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT

View file

@ -0,0 +1,39 @@
from typing import TYPE_CHECKING, Type
if TYPE_CHECKING:
from ytdl_opts.per_host._base import AbstractHostConfig
class HostConfRegistry(type):
REGISTRY: dict[str, type['AbstractHostConfig']] = {}
HOST_TO_CLS_MAP = {}
def __new__(
mcs: Type['HostConfRegistry'],
name: str,
bases: tuple[type['AbstractHostConfig']],
attrs: dict,
) -> type['AbstractHostConfig']:
host_cls: type['AbstractHostConfig'] = type.__new__(mcs, name, bases, attrs)
mcs.REGISTRY[host_cls.__name__] = host_cls
mcs._build_host_to_cls_map(host_cls=host_cls, hostnames=attrs['HOSTNAMES'])
return host_cls
@classmethod
def get_registry(mcs) -> dict[str, type['AbstractHostConfig']]:
return mcs.REGISTRY.copy()
@classmethod
def get_host_to_cls_map(mcs) -> dict[str | None, type['AbstractHostConfig']]:
return mcs.HOST_TO_CLS_MAP.copy()
@classmethod
def _build_host_to_cls_map(
mcs, host_cls: type['AbstractHostConfig'], hostnames: tuple[str, ...] | None
) -> None:
if hostnames is None:
mcs.HOST_TO_CLS_MAP[None] = host_cls
else:
for host in hostnames:
mcs.HOST_TO_CLS_MAP[host] = host_cls

View file

@ -0,0 +1,37 @@
from yt_shared.constants import INSTAGRAM_HOSTS
from yt_shared.enums import DownMediaType
from worker.core.config import settings
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
from ytdl_opts.per_host._registry import HostConfRegistry
class InstagramHostModel(BaseHostConfModel):
pass
class InstagramHost(AbstractHostConfig, metaclass=HostConfRegistry):
ALLOW_NULL_HOSTNAMES = False
HOSTNAMES = INSTAGRAM_HOSTS
ENCODE_AUDIO = False
ENCODE_VIDEO = settings.INSTAGRAM_ENCODE_TO_H264
FFMPEG_AUDIO_OPTS = None
# Instagram returns VP9+AAC in MP4 container for logged users and needs to be
# encoded to H264 since Telegram doesn't play VP9 on iOS.
FFMPEG_VIDEO_OPTS = 'ffmpeg -y -loglevel error -i "{filepath}" -c:v libx264 -pix_fmt yuv420p -preset veryfast -crf 22 -movflags +faststart -c:a copy "{output}"'
def build_config(
self, media_type: DownMediaType, curr_tmp_dir: str
) -> InstagramHostModel:
return InstagramHostModel(
hostnames=self.HOSTNAMES,
encode_audio=self.ENCODE_AUDIO,
encode_video=self.ENCODE_VIDEO,
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
)
def _build_custom_ytdl_video_opts(self) -> list[str]:
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT

View file

@ -0,0 +1,31 @@
from yt_shared.constants import TIKTOK_HOSTS
from yt_shared.enums import DownMediaType
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
from ytdl_opts.per_host._registry import HostConfRegistry
class TikTokHostModel(BaseHostConfModel):
pass
class TikTokHost(AbstractHostConfig, metaclass=HostConfRegistry):
ALLOW_NULL_HOSTNAMES = False
HOSTNAMES = TIKTOK_HOSTS
ENCODE_AUDIO = False
ENCODE_VIDEO = False
def build_config(
self, media_type: DownMediaType, curr_tmp_dir: str
) -> TikTokHostModel:
return TikTokHostModel(
hostnames=self.HOSTNAMES,
encode_audio=self.ENCODE_AUDIO,
encode_video=self.ENCODE_VIDEO,
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
)
def _build_custom_ytdl_video_opts(self) -> list[str]:
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT

View file

@ -0,0 +1,31 @@
from yt_shared.constants import TWITTER_HOSTS
from yt_shared.enums import DownMediaType
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
from ytdl_opts.per_host._registry import HostConfRegistry
class TwitterHostModel(BaseHostConfModel):
pass
class TwitterHost(AbstractHostConfig, metaclass=HostConfRegistry):
ALLOW_NULL_HOSTNAMES = False
HOSTNAMES = TWITTER_HOSTS
ENCODE_AUDIO = False
ENCODE_VIDEO = False
def build_config(
self, media_type: DownMediaType, curr_tmp_dir: str
) -> TwitterHostModel:
return TwitterHostModel(
hostnames=self.HOSTNAMES,
encode_audio=self.ENCODE_AUDIO,
encode_video=self.ENCODE_VIDEO,
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
)
def _build_custom_ytdl_video_opts(self) -> list[str]:
return ['--format-sort', 'res,proto:https,vcodec:h265,h264']

View file

@ -0,0 +1,18 @@
from asyncio import Lock
ASYNC_LOCK = Lock()
INSTAGRAM_HOSTS = ('instagram.com', 'www.instagram.com')
TIKTOK_HOSTS = ('tiktok.com', 'vm.tiktok.com', 'www.tiktok.com', 'www.vm.tiktok.com')
TWITTER_HOSTS = (
'twitter.com',
'www.twitter.com',
'x.com',
'www.x.com',
't.co',
'www.t.co',
)
REMOVE_QUERY_PARAMS_HOSTS = TWITTER_HOSTS + INSTAGRAM_HOSTS

View file

@ -4,11 +4,11 @@ from uuid import UUID
from sqlalchemy import insert, select
from sqlalchemy.exc import NoResultFound
from sqlalchemy.ext.asyncio import AsyncSession
from yt_shared.constants import ASYNC_LOCK
from yt_shared.enums import TaskStatus
from yt_shared.models import Cache, File, Task
from yt_shared.schemas.cache import CacheSchema
from yt_shared.schemas.media import BaseMedia, InbMediaPayload, Video
from yt_shared.utils.common import ASYNC_LOCK
class TaskRepository:

View file

@ -14,6 +14,7 @@ from pydantic import (
from yt_shared.enums import DownMediaType, MediaFileType, TaskSource, TelegramChatType
from yt_shared.schemas.base import RealBaseModel
from yt_shared.utils.common import format_bytes
from yt_shared.utils.file import file_size
class InbMediaPayload(RealBaseModel):
@ -24,8 +25,9 @@ class InbMediaPayload(RealBaseModel):
from_chat_type: TelegramChatType | None
from_user_id: StrictInt | None
message_id: StrictInt | None
acknowledge_message_id: StrictInt | None
ack_message_id: StrictInt | None
url: StrictStr
original_url: StrictStr
source: TaskSource
save_to_storage: StrictBool
download_media_type: DownMediaType
@ -49,9 +51,15 @@ class BaseMedia(RealBaseModel):
is_converted: StrictBool = False
converted_filepath: StrictStr | None = None
converted_filename: StrictStr | None = None
converted_file_size: StrictInt | None = None
def file_size_human(self) -> str:
return format_bytes(num=self.file_size)
return format_bytes(num=self.current_file_size())
def current_file_size(self) -> int:
if self.converted_file_size is not None:
return self.converted_file_size
return self.file_size
def mark_as_saved_to_storage(self, storage_path: str) -> None:
self.storage_path = storage_path
@ -60,6 +68,7 @@ class BaseMedia(RealBaseModel):
def mark_as_converted(self, filepath: str) -> None:
self.converted_filepath = filepath
self.converted_filename = filepath.rsplit("/", 1)[-1]
self.converted_file_size = file_size(filepath)
self.is_converted = True

View file

@ -6,10 +6,11 @@ from yt_shared.schemas.base import RealBaseModel
class URL(RealBaseModel):
url: StrictStr
original_url: StrictStr
from_chat_id: StrictInt
from_chat_type: TelegramChatType
from_user_id: StrictInt
message_id: StrictInt
acknowledge_message_id: StrictInt
ack_message_id: StrictInt
save_to_storage: StrictBool
download_media_type: DownMediaType

View file

@ -6,8 +6,6 @@ from functools import partial, wraps
from string import ascii_lowercase
from typing import Any, Callable
ASYNC_LOCK = asyncio.Lock()
_UNIT_SIZE_NAMES = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi')
_BASE = 1024.0