mirror of
https://github.com/tropicoo/yt-dlp-bot.git
synced 2024-09-20 06:46:08 +08:00
Refactor
This commit is contained in:
parent
39b59d9efc
commit
b5f2031cbc
|
@ -39,13 +39,13 @@ class TelegramCallback:
|
||||||
self._log.debug('No urls to download, skipping message')
|
self._log.debug('No urls to download, skipping message')
|
||||||
return
|
return
|
||||||
|
|
||||||
acknowledge_message = await self._send_acknowledge_message(
|
ack_message = await self._send_acknowledge_message(
|
||||||
message=message, url_count=len(urls)
|
message=message, url_count=len(urls)
|
||||||
)
|
)
|
||||||
context = {
|
context = {
|
||||||
'message': message,
|
'message': message,
|
||||||
'user': user,
|
'user': user,
|
||||||
'acknowledge_message': acknowledge_message,
|
'ack_message': ack_message,
|
||||||
}
|
}
|
||||||
url_objects = self._url_parser.parse_urls(urls=urls, context=context)
|
url_objects = self._url_parser.parse_urls(urls=urls, context=context)
|
||||||
await self._url_service.process_urls(url_objects)
|
await self._url_service.process_urls(url_objects)
|
||||||
|
|
|
@ -9,7 +9,8 @@ from yt_shared.rabbit.publisher import RmqPublisher
|
||||||
from yt_shared.schemas.error import ErrorDownloadGeneralPayload
|
from yt_shared.schemas.error import ErrorDownloadGeneralPayload
|
||||||
from yt_shared.schemas.media import BaseMedia
|
from yt_shared.schemas.media import BaseMedia
|
||||||
from yt_shared.schemas.success import SuccessDownloadPayload
|
from yt_shared.schemas.success import SuccessDownloadPayload
|
||||||
from yt_shared.utils.file import remove_dir
|
from yt_shared.utils.common import format_bytes
|
||||||
|
from yt_shared.utils.file import file_size, remove_dir
|
||||||
from yt_shared.utils.tasks.tasks import create_task
|
from yt_shared.utils.tasks.tasks import create_task
|
||||||
|
|
||||||
from bot.core.handlers.abstract import AbstractDownloadHandler
|
from bot.core.handlers.abstract import AbstractDownloadHandler
|
||||||
|
@ -43,7 +44,7 @@ class SuccessDownloadHandler(AbstractDownloadHandler):
|
||||||
async def _delete_acknowledge_message(self) -> None:
|
async def _delete_acknowledge_message(self) -> None:
|
||||||
await self._bot.delete_messages(
|
await self._bot.delete_messages(
|
||||||
chat_id=self._body.from_chat_id,
|
chat_id=self._body.from_chat_id,
|
||||||
message_ids=[self._body.context.acknowledge_message_id],
|
message_ids=[self._body.context.ack_message_id],
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _publish_error_message(self, err: Exception) -> None:
|
async def _publish_error_message(self, err: Exception) -> None:
|
||||||
|
@ -83,7 +84,10 @@ class SuccessDownloadHandler(AbstractDownloadHandler):
|
||||||
'Cleaning up task "%s": removing download content directory "%s" with files %s',
|
'Cleaning up task "%s": removing download content directory "%s" with files %s',
|
||||||
self._body.task_id,
|
self._body.task_id,
|
||||||
root_path,
|
root_path,
|
||||||
os.listdir(root_path),
|
{
|
||||||
|
fn: format_bytes(file_size(os.path.join(root_path, fn)))
|
||||||
|
for fn in os.listdir(root_path)
|
||||||
|
},
|
||||||
)
|
)
|
||||||
remove_dir(root_path)
|
remove_dir(root_path)
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,17 @@
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from itertools import product
|
from itertools import product
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
from pyrogram.types import Message
|
from pyrogram.types import Message
|
||||||
|
from yt_shared.constants import REMOVE_QUERY_PARAMS_HOSTS
|
||||||
from yt_shared.enums import TaskSource, TelegramChatType
|
from yt_shared.enums import TaskSource, TelegramChatType
|
||||||
from yt_shared.rabbit.publisher import RmqPublisher
|
from yt_shared.rabbit.publisher import RmqPublisher
|
||||||
from yt_shared.schemas.media import InbMediaPayload
|
from yt_shared.schemas.media import InbMediaPayload
|
||||||
from yt_shared.schemas.url import URL
|
from yt_shared.schemas.url import URL
|
||||||
|
|
||||||
from bot.core.config.schema import UserSchema
|
from bot.core.config.schema import UserSchema
|
||||||
|
from bot.core.utils import can_remove_url_params
|
||||||
|
|
||||||
|
|
||||||
class UrlService:
|
class UrlService:
|
||||||
|
@ -23,8 +26,9 @@ class UrlService:
|
||||||
async def _send_to_worker(self, url: URL) -> bool:
|
async def _send_to_worker(self, url: URL) -> bool:
|
||||||
payload = InbMediaPayload(
|
payload = InbMediaPayload(
|
||||||
url=url.url,
|
url=url.url,
|
||||||
|
original_url=url.original_url,
|
||||||
message_id=url.message_id,
|
message_id=url.message_id,
|
||||||
acknowledge_message_id=url.acknowledge_message_id,
|
ack_message_id=url.ack_message_id,
|
||||||
from_user_id=url.from_user_id,
|
from_user_id=url.from_user_id,
|
||||||
from_chat_id=url.from_chat_id,
|
from_chat_id=url.from_chat_id,
|
||||||
from_chat_type=url.from_chat_type,
|
from_chat_type=url.from_chat_type,
|
||||||
|
@ -43,24 +47,34 @@ class UrlParser:
|
||||||
self._log = logging.getLogger(self.__class__.__name__)
|
self._log = logging.getLogger(self.__class__.__name__)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
def _preprocess_urls(urls: list[str]) -> dict[str, str]:
|
||||||
|
preprocessed_urls = {}
|
||||||
|
for url in urls:
|
||||||
|
if can_remove_url_params(url, REMOVE_QUERY_PARAMS_HOSTS):
|
||||||
|
preprocessed_urls[url] = urljoin(url, urlparse(url).path)
|
||||||
|
else:
|
||||||
|
preprocessed_urls[url] = url
|
||||||
|
return preprocessed_urls
|
||||||
|
|
||||||
def parse_urls(
|
def parse_urls(
|
||||||
urls: list[str], context: dict[str, Message | UserSchema]
|
self, urls: list[str], context: dict[str, Message | UserSchema]
|
||||||
) -> list[URL]:
|
) -> list[URL]:
|
||||||
message: Message = context['message']
|
message: Message = context['message']
|
||||||
user: UserSchema = context['user']
|
user: UserSchema = context['user']
|
||||||
acknowledge_message: Message = context['acknowledge_message']
|
ack_message: Message = context['ack_message']
|
||||||
return [
|
return [
|
||||||
URL(
|
URL(
|
||||||
url=url,
|
url=url,
|
||||||
|
original_url=orig_url,
|
||||||
from_chat_id=message.chat.id,
|
from_chat_id=message.chat.id,
|
||||||
from_chat_type=TelegramChatType(message.chat.type.value),
|
from_chat_type=TelegramChatType(message.chat.type.value),
|
||||||
from_user_id=message.from_user.id,
|
from_user_id=message.from_user.id,
|
||||||
message_id=message.id,
|
message_id=message.id,
|
||||||
acknowledge_message_id=acknowledge_message.id,
|
ack_message_id=ack_message.id,
|
||||||
save_to_storage=user.save_to_storage,
|
save_to_storage=user.save_to_storage,
|
||||||
download_media_type=user.download_media_type,
|
download_media_type=user.download_media_type,
|
||||||
)
|
)
|
||||||
for url in urls
|
for orig_url, url in self._preprocess_urls(urls).items()
|
||||||
]
|
]
|
||||||
|
|
||||||
def filter_urls(self, urls: list[str], regexes: list[str]) -> list[str]:
|
def filter_urls(self, urls: list[str], regexes: list[str]) -> list[str]:
|
||||||
|
|
|
@ -269,13 +269,13 @@ class VideoUploadTask(AbstractUploadTask):
|
||||||
caption_conf = self._get_caption_conf()
|
caption_conf = self._get_caption_conf()
|
||||||
|
|
||||||
if caption_conf.include_title:
|
if caption_conf.include_title:
|
||||||
caption_items.append(f'📝 {self._media_object.title}')
|
caption_items.append(self._media_object.title)
|
||||||
if caption_conf.include_filename:
|
if caption_conf.include_filename:
|
||||||
caption_items.append(f'ℹ️ {self._filename}')
|
caption_items.append(self._filename)
|
||||||
if caption_conf.include_link:
|
if caption_conf.include_link:
|
||||||
caption_items.append(f'👀 {self._ctx.context.url}')
|
caption_items.append(self._ctx.context.url)
|
||||||
if caption_conf.include_size:
|
if caption_conf.include_size:
|
||||||
caption_items.append(f'💾 {self._media_object.file_size_human()}')
|
caption_items.append(self._media_object.file_size_human())
|
||||||
return caption_items
|
return caption_items
|
||||||
|
|
||||||
def _generate_send_media_coroutine(self, chat_id: int) -> Coroutine:
|
def _generate_send_media_coroutine(self, chat_id: int) -> Coroutine:
|
||||||
|
|
|
@ -3,7 +3,8 @@ import asyncio
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Generator
|
from typing import Generator, Iterable
|
||||||
|
from urllib.parse import urlparse
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
from pyrogram.enums import ChatType
|
from pyrogram.enums import ChatType
|
||||||
|
@ -87,3 +88,7 @@ def split_telegram_message(
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
yield text
|
yield text
|
||||||
|
|
||||||
|
|
||||||
|
def can_remove_url_params(url: str, matching_hosts: Iterable[str]) -> bool:
|
||||||
|
return urlparse(url).netloc in set(matching_hosts)
|
||||||
|
|
|
@ -2,37 +2,26 @@ import glob
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from copy import deepcopy
|
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
from yt_shared.enums import DownMediaType
|
from yt_shared.enums import DownMediaType
|
||||||
from yt_shared.schemas.media import Audio, DownMedia, Video
|
from yt_shared.schemas.media import Audio, DownMedia, Video
|
||||||
from yt_shared.utils.common import random_string
|
from yt_shared.utils.common import format_bytes, random_string
|
||||||
from yt_shared.utils.file import file_size
|
from yt_shared.utils.file import file_size
|
||||||
|
|
||||||
from worker.core.config import settings
|
from worker.core.config import settings
|
||||||
from worker.core.exceptions import MediaDownloaderError
|
from worker.core.exceptions import MediaDownloaderError
|
||||||
from worker.utils import cli_to_api
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from ytdl_opts.per_host._base import AbstractHostConfig
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ytdl_opts.user import (
|
from ytdl_opts.user import FINAL_THUMBNAIL_FORMAT
|
||||||
AUDIO_FORMAT_YTDL_OPTS,
|
|
||||||
AUDIO_YTDL_OPTS,
|
|
||||||
DEFAULT_YTDL_OPTS,
|
|
||||||
FINAL_AUDIO_FORMAT,
|
|
||||||
FINAL_THUMBNAIL_FORMAT,
|
|
||||||
VIDEO_YTDL_OPTS,
|
|
||||||
)
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from ytdl_opts.default import (
|
from ytdl_opts.default import FINAL_AUDIO_FORMAT, FINAL_THUMBNAIL_FORMAT
|
||||||
AUDIO_FORMAT_YTDL_OPTS,
|
|
||||||
AUDIO_YTDL_OPTS,
|
|
||||||
DEFAULT_YTDL_OPTS,
|
|
||||||
FINAL_AUDIO_FORMAT,
|
|
||||||
FINAL_THUMBNAIL_FORMAT,
|
|
||||||
VIDEO_YTDL_OPTS,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MediaDownloader:
|
class MediaDownloader:
|
||||||
|
@ -51,49 +40,34 @@ class MediaDownloader:
|
||||||
settings.TMP_DOWNLOAD_ROOT_PATH, settings.TMP_DOWNLOADED_DIR
|
settings.TMP_DOWNLOAD_ROOT_PATH, settings.TMP_DOWNLOADED_DIR
|
||||||
)
|
)
|
||||||
|
|
||||||
def download(self, url: str, media_type: DownMediaType) -> DownMedia:
|
def download(
|
||||||
|
self, host_conf: 'AbstractHostConfig', media_type: DownMediaType
|
||||||
|
) -> DownMedia:
|
||||||
try:
|
try:
|
||||||
return self._download(url=url, media_type=media_type)
|
return self._download(host_conf=host_conf, media_type=media_type)
|
||||||
except Exception:
|
except Exception:
|
||||||
self._log.error('Failed to download %s', url)
|
self._log.error('Failed to download %s', host_conf.url)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _configure_ytdl_opts(
|
def _download(
|
||||||
self, media_type: DownMediaType, curr_tmp_dir: str
|
self, host_conf: 'AbstractHostConfig', media_type: DownMediaType
|
||||||
) -> dict:
|
) -> DownMedia:
|
||||||
ytdl_opts = deepcopy(DEFAULT_YTDL_OPTS)
|
url = host_conf.url
|
||||||
match media_type: # noqa: E999
|
|
||||||
case DownMediaType.AUDIO:
|
|
||||||
ytdl_opts.extend(AUDIO_YTDL_OPTS)
|
|
||||||
ytdl_opts.extend(AUDIO_FORMAT_YTDL_OPTS)
|
|
||||||
case DownMediaType.VIDEO:
|
|
||||||
ytdl_opts.extend(VIDEO_YTDL_OPTS)
|
|
||||||
case DownMediaType.AUDIO_VIDEO:
|
|
||||||
ytdl_opts.extend(AUDIO_YTDL_OPTS)
|
|
||||||
ytdl_opts.extend(VIDEO_YTDL_OPTS)
|
|
||||||
ytdl_opts.append(self._KEEP_VIDEO_OPTION)
|
|
||||||
|
|
||||||
ytdl_opts = cli_to_api(ytdl_opts)
|
|
||||||
ytdl_opts['outtmpl']['default'] = os.path.join(
|
|
||||||
curr_tmp_dir,
|
|
||||||
ytdl_opts['outtmpl']['default'],
|
|
||||||
)
|
|
||||||
return ytdl_opts
|
|
||||||
|
|
||||||
def _download(self, url: str, media_type: DownMediaType) -> DownMedia:
|
|
||||||
self._log.info('Downloading %s, media_type %s', url, media_type)
|
self._log.info('Downloading %s, media_type %s', url, media_type)
|
||||||
tmp_down_path = os.path.join(
|
tmp_down_path = os.path.join(
|
||||||
settings.TMP_DOWNLOAD_ROOT_PATH, settings.TMP_DOWNLOAD_DIR
|
settings.TMP_DOWNLOAD_ROOT_PATH, settings.TMP_DOWNLOAD_DIR
|
||||||
)
|
)
|
||||||
with TemporaryDirectory(prefix='tmp_media_dir-', dir=tmp_down_path) as tmp_dir:
|
with TemporaryDirectory(prefix='tmp_media_dir-', dir=tmp_down_path) as tmp_dir:
|
||||||
curr_tmp_dir = os.path.join(tmp_down_path, tmp_dir)
|
curr_tmp_dir = os.path.join(tmp_down_path, tmp_dir)
|
||||||
ytdl_opts = self._configure_ytdl_opts(
|
|
||||||
|
ytdl_opts_model = host_conf.build_config(
|
||||||
media_type=media_type, curr_tmp_dir=curr_tmp_dir
|
media_type=media_type, curr_tmp_dir=curr_tmp_dir
|
||||||
)
|
)
|
||||||
with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
|
|
||||||
|
with yt_dlp.YoutubeDL(ytdl_opts_model.ytdl_opts) as ytdl:
|
||||||
self._log.info('Downloading %s', url)
|
self._log.info('Downloading %s', url)
|
||||||
self._log.info('Downloading to %s', curr_tmp_dir)
|
self._log.info('Downloading to %s', curr_tmp_dir)
|
||||||
self._log.info('Downloading with options %s', ytdl_opts)
|
self._log.info('Downloading with options %s', ytdl_opts_model.ytdl_opts)
|
||||||
|
|
||||||
meta: dict | None = ytdl.extract_info(url, download=True)
|
meta: dict | None = ytdl.extract_info(url, download=True)
|
||||||
current_files = os.listdir(curr_tmp_dir)
|
current_files = os.listdir(curr_tmp_dir)
|
||||||
|
@ -223,7 +197,12 @@ class MediaDownloader:
|
||||||
"""Try to find downloaded audio or thumbnail file."""
|
"""Try to find downloaded audio or thumbnail file."""
|
||||||
verbose_name = self._EXT_TO_NAME[extension]
|
verbose_name = self._EXT_TO_NAME[extension]
|
||||||
for file_name in glob.glob(f"*.{extension}", root_dir=root_path):
|
for file_name in glob.glob(f"*.{extension}", root_dir=root_path):
|
||||||
self._log.info('Found downloaded %s: "%s"', verbose_name, file_name)
|
self._log.info(
|
||||||
|
'Found downloaded %s: "%s" [%s]',
|
||||||
|
verbose_name,
|
||||||
|
file_name,
|
||||||
|
format_bytes(file_size(os.path.join(root_path, file_name))),
|
||||||
|
)
|
||||||
return file_name
|
return file_name
|
||||||
self._log.info('Downloaded %s not found in "%s"', verbose_name, root_path)
|
self._log.info('Downloaded %s not found in "%s"', verbose_name, root_path)
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -2,6 +2,8 @@ import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
from urllib.parse import urlsplit
|
||||||
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from yt_shared.enums import DownMediaType, TaskStatus
|
from yt_shared.enums import DownMediaType, TaskStatus
|
||||||
|
@ -22,7 +24,10 @@ from worker.core.exceptions import DownloadVideoServiceError
|
||||||
from worker.core.tasks.encode import EncodeToH264Task
|
from worker.core.tasks.encode import EncodeToH264Task
|
||||||
from worker.core.tasks.ffprobe_context import GetFfprobeContextTask
|
from worker.core.tasks.ffprobe_context import GetFfprobeContextTask
|
||||||
from worker.core.tasks.thumbnail import MakeThumbnailTask
|
from worker.core.tasks.thumbnail import MakeThumbnailTask
|
||||||
from worker.utils import is_instagram
|
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from ytdl_opts.per_host._base import AbstractHostConfig
|
||||||
|
|
||||||
|
|
||||||
class MediaService:
|
class MediaService:
|
||||||
|
@ -45,24 +50,45 @@ class MediaService:
|
||||||
async def _process(
|
async def _process(
|
||||||
self, media_payload: InbMediaPayload, task: Task, db: AsyncSession
|
self, media_payload: InbMediaPayload, task: Task, db: AsyncSession
|
||||||
) -> DownMedia:
|
) -> DownMedia:
|
||||||
|
host_conf = self._get_host_conf(url=task.url)
|
||||||
await self._repository.save_as_processing(db, task)
|
await self._repository.save_as_processing(db, task)
|
||||||
media = await self._start_download(task, media_payload, db)
|
media = await self._start_download(
|
||||||
|
task=task, media_payload=media_payload, host_conf=host_conf, db=db
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
await self._post_process_media(media, task, media_payload, db)
|
await self._post_process_media(
|
||||||
|
media=media,
|
||||||
|
task=task,
|
||||||
|
media_payload=media_payload,
|
||||||
|
host_conf=host_conf,
|
||||||
|
db=db,
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
self._log.exception('Failed to post-process media %s', media)
|
self._log.exception('Failed to post-process media %s', media)
|
||||||
self._err_file_cleanup(media)
|
self._err_file_cleanup(media)
|
||||||
raise
|
raise
|
||||||
return media
|
return media
|
||||||
|
|
||||||
|
def _get_host_conf(self, url: str) -> 'AbstractHostConfig':
|
||||||
|
host_to_cls_map = HostConfRegistry.get_host_to_cls_map()
|
||||||
|
self._log.info('Registry: %s', HostConfRegistry.get_registry())
|
||||||
|
self._log.info('Host to CLS map: %s', host_to_cls_map)
|
||||||
|
host_cls = host_to_cls_map.get(urlsplit(url).netloc, host_to_cls_map[None])
|
||||||
|
return host_cls(url=url)
|
||||||
|
|
||||||
async def _start_download(
|
async def _start_download(
|
||||||
self, task: Task, media_payload: InbMediaPayload, db: AsyncSession
|
self,
|
||||||
|
task: Task,
|
||||||
|
media_payload: InbMediaPayload,
|
||||||
|
host_conf: 'AbstractHostConfig',
|
||||||
|
db: AsyncSession,
|
||||||
) -> DownMedia:
|
) -> DownMedia:
|
||||||
try:
|
try:
|
||||||
return await asyncio.get_running_loop().run_in_executor(
|
return await asyncio.get_running_loop().run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: self._downloader.download(
|
lambda: self._downloader.download(
|
||||||
task.url, media_type=media_payload.download_media_type
|
host_conf=host_conf,
|
||||||
|
media_type=media_payload.download_media_type,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
@ -75,16 +101,25 @@ class MediaService:
|
||||||
media: DownMedia,
|
media: DownMedia,
|
||||||
task: Task,
|
task: Task,
|
||||||
media_payload: InbMediaPayload,
|
media_payload: InbMediaPayload,
|
||||||
|
host_conf: 'AbstractHostConfig',
|
||||||
db: AsyncSession,
|
db: AsyncSession,
|
||||||
) -> None:
|
) -> None:
|
||||||
def post_process_audio():
|
def post_process_audio():
|
||||||
return self._post_process_audio(
|
return self._post_process_audio(
|
||||||
media=media, media_payload=media_payload, task=task, db=db
|
media=media,
|
||||||
|
media_payload=media_payload,
|
||||||
|
task=task,
|
||||||
|
host_conf=host_conf,
|
||||||
|
db=db,
|
||||||
)
|
)
|
||||||
|
|
||||||
def post_process_video():
|
def post_process_video():
|
||||||
return self._post_process_video(
|
return self._post_process_video(
|
||||||
media=media, media_payload=media_payload, task=task, db=db
|
media=media,
|
||||||
|
media_payload=media_payload,
|
||||||
|
task=task,
|
||||||
|
host_conf=host_conf,
|
||||||
|
db=db,
|
||||||
)
|
)
|
||||||
|
|
||||||
match media.media_type: # noqa: E999
|
match media.media_type: # noqa: E999
|
||||||
|
@ -102,6 +137,7 @@ class MediaService:
|
||||||
media: DownMedia,
|
media: DownMedia,
|
||||||
media_payload: InbMediaPayload,
|
media_payload: InbMediaPayload,
|
||||||
task: Task,
|
task: Task,
|
||||||
|
host_conf: 'AbstractHostConfig',
|
||||||
db: AsyncSession,
|
db: AsyncSession,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Post-process downloaded media files, e.g. make thumbnail and copy to storage."""
|
"""Post-process downloaded media files, e.g. make thumbnail and copy to storage."""
|
||||||
|
@ -130,12 +166,12 @@ class MediaService:
|
||||||
if media_payload.save_to_storage:
|
if media_payload.save_to_storage:
|
||||||
coro_tasks.append(self._create_copy_file_task(video))
|
coro_tasks.append(self._create_copy_file_task(video))
|
||||||
|
|
||||||
# Instagram returns VP9+AAC in MP4 container for logged users and needs
|
if host_conf.ENCODE_VIDEO:
|
||||||
# to be encoded to H264 since Telegram doesn't play VP9 on iOS
|
|
||||||
if settings.INSTAGRAM_ENCODE_TO_H264 and is_instagram(media_payload.url):
|
|
||||||
coro_tasks.append(
|
coro_tasks.append(
|
||||||
create_task(
|
create_task(
|
||||||
EncodeToH264Task(media=media).run(),
|
EncodeToH264Task(
|
||||||
|
media=media, cmd_tpl=host_conf.FFMPEG_VIDEO_OPTS
|
||||||
|
).run(),
|
||||||
task_name=EncodeToH264Task.__class__.__name__,
|
task_name=EncodeToH264Task.__class__.__name__,
|
||||||
logger=self._log,
|
logger=self._log,
|
||||||
exception_message='Task "%s" raised an exception',
|
exception_message='Task "%s" raised an exception',
|
||||||
|
@ -153,6 +189,7 @@ class MediaService:
|
||||||
media: DownMedia,
|
media: DownMedia,
|
||||||
media_payload: InbMediaPayload,
|
media_payload: InbMediaPayload,
|
||||||
task: Task,
|
task: Task,
|
||||||
|
host_conf: 'AbstractHostConfig',
|
||||||
db: AsyncSession,
|
db: AsyncSession,
|
||||||
) -> None:
|
) -> None:
|
||||||
coro_tasks = [self._repository.save_file(db, task, media.audio, media.meta)]
|
coro_tasks = [self._repository.save_file(db, task, media.audio, media.meta)]
|
||||||
|
|
|
@ -6,14 +6,14 @@ from worker.core.tasks.abstract import AbstractFfBinaryTask
|
||||||
|
|
||||||
|
|
||||||
class EncodeToH264Task(AbstractFfBinaryTask):
|
class EncodeToH264Task(AbstractFfBinaryTask):
|
||||||
_CMD = 'ffmpeg -y -loglevel error -i "{filepath}" -c:v libx264 -pix_fmt yuv420p -preset veryfast -crf 25 -movflags +faststart -c:a copy "{output}"'
|
|
||||||
_EXT = 'mp4'
|
_EXT = 'mp4'
|
||||||
_CMD_TIMEOUT = 120
|
_CMD_TIMEOUT = 120
|
||||||
|
|
||||||
def __init__(self, media: DownMedia):
|
def __init__(self, media: DownMedia, cmd_tpl: str) -> None:
|
||||||
super().__init__(file_path=media.video.filepath)
|
super().__init__(file_path=media.video.filepath)
|
||||||
self._media = media
|
self._media = media
|
||||||
self._video = media.video
|
self._video = media.video
|
||||||
|
self._CMD = cmd_tpl # lol
|
||||||
|
|
||||||
async def run(self) -> None:
|
async def run(self) -> None:
|
||||||
await self._encode_video()
|
await self._encode_video()
|
||||||
|
|
|
@ -1,10 +1,8 @@
|
||||||
import os
|
import os
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
|
|
||||||
_COOKIES_FILEPATH = '/app/cookies/cookies.txt'
|
_COOKIES_FILEPATH = '/app/cookies/cookies.txt'
|
||||||
_INSTAGRAM_HOST = 'instagram.com'
|
|
||||||
|
|
||||||
|
|
||||||
def cli_to_api(opts: list) -> dict:
|
def cli_to_api(opts: list) -> dict:
|
||||||
|
@ -30,9 +28,5 @@ def get_cookies_opts_if_not_empty() -> list[str]:
|
||||||
return [] if is_file_empty(_COOKIES_FILEPATH) else ['--cookies', _COOKIES_FILEPATH]
|
return [] if is_file_empty(_COOKIES_FILEPATH) else ['--cookies', _COOKIES_FILEPATH]
|
||||||
|
|
||||||
|
|
||||||
def is_instagram(url: str) -> bool:
|
|
||||||
return _INSTAGRAM_HOST in urlparse(url).netloc
|
|
||||||
|
|
||||||
|
|
||||||
def get_media_format_options() -> str:
|
def get_media_format_options() -> str:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -22,8 +22,6 @@ DEFAULT_YTDL_OPTS = [
|
||||||
'--ignore-errors',
|
'--ignore-errors',
|
||||||
'--verbose',
|
'--verbose',
|
||||||
*get_cookies_opts_if_not_empty(),
|
*get_cookies_opts_if_not_empty(),
|
||||||
'--format-sort',
|
|
||||||
'vcodec:h264',
|
|
||||||
]
|
]
|
||||||
|
|
||||||
AUDIO_YTDL_OPTS = [
|
AUDIO_YTDL_OPTS = [
|
||||||
|
|
11
app_worker/ytdl_opts/per_host/__init__.py
Normal file
11
app_worker/ytdl_opts/per_host/__init__.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
from ytdl_opts.per_host._default import DefaultHost
|
||||||
|
from ytdl_opts.per_host.instagram import InstagramHost
|
||||||
|
from ytdl_opts.per_host.tiktok import TikTokHost
|
||||||
|
from ytdl_opts.per_host.twitter import TwitterHost
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'DefaultHost',
|
||||||
|
'InstagramHost',
|
||||||
|
'TikTokHost',
|
||||||
|
'TwitterHost',
|
||||||
|
]
|
116
app_worker/ytdl_opts/per_host/_base.py
Normal file
116
app_worker/ytdl_opts/per_host/_base.py
Normal file
|
@ -0,0 +1,116 @@
|
||||||
|
import abc
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
import pydantic
|
||||||
|
from pydantic import StrictBool, StrictStr
|
||||||
|
from yt_shared.enums import DownMediaType
|
||||||
|
|
||||||
|
from worker.utils import cli_to_api
|
||||||
|
|
||||||
|
try:
|
||||||
|
from ytdl_opts.user import (
|
||||||
|
AUDIO_FORMAT_YTDL_OPTS,
|
||||||
|
AUDIO_YTDL_OPTS,
|
||||||
|
DEFAULT_YTDL_OPTS,
|
||||||
|
FINAL_AUDIO_FORMAT,
|
||||||
|
FINAL_THUMBNAIL_FORMAT,
|
||||||
|
VIDEO_YTDL_OPTS,
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
from ytdl_opts.default import (
|
||||||
|
AUDIO_FORMAT_YTDL_OPTS,
|
||||||
|
AUDIO_YTDL_OPTS,
|
||||||
|
DEFAULT_YTDL_OPTS,
|
||||||
|
FINAL_AUDIO_FORMAT,
|
||||||
|
FINAL_THUMBNAIL_FORMAT,
|
||||||
|
VIDEO_YTDL_OPTS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseHostConfModel(pydantic.BaseModel):
|
||||||
|
# TODO: Add validators.
|
||||||
|
|
||||||
|
hostnames: tuple[str, ...]
|
||||||
|
|
||||||
|
encode_audio: StrictBool
|
||||||
|
encode_video: StrictBool
|
||||||
|
|
||||||
|
ffmpeg_audio_opts: StrictStr | None
|
||||||
|
ffmpeg_video_opts: StrictStr | None
|
||||||
|
|
||||||
|
ytdl_opts: dict
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractHostConfig:
|
||||||
|
"""Abstract yt-dlp host config."""
|
||||||
|
|
||||||
|
ALLOW_NULL_HOSTNAMES: bool | None = None
|
||||||
|
HOSTNAMES: tuple[str, ...] | None = None
|
||||||
|
|
||||||
|
CUSTOM_VIDEO_YTDL_OPTS: list[str] | None = None
|
||||||
|
|
||||||
|
ENCODE_AUDIO: bool | None = None
|
||||||
|
ENCODE_VIDEO: bool | None = None
|
||||||
|
|
||||||
|
KEEP_VIDEO_OPTION = '--keep-video'
|
||||||
|
|
||||||
|
DEFAULT_YTDL_OPTS = DEFAULT_YTDL_OPTS
|
||||||
|
|
||||||
|
AUDIO_YTDL_OPTS = AUDIO_YTDL_OPTS
|
||||||
|
AUDIO_FORMAT_YTDL_OPTS = AUDIO_FORMAT_YTDL_OPTS
|
||||||
|
|
||||||
|
FINAL_AUDIO_FORMAT = FINAL_AUDIO_FORMAT
|
||||||
|
FINAL_THUMBNAIL_FORMAT = FINAL_THUMBNAIL_FORMAT
|
||||||
|
|
||||||
|
DEFAULT_VIDEO_YTDL_OPTS = VIDEO_YTDL_OPTS
|
||||||
|
DEFAULT_VIDEO_FORMAT_SORT_OPT = ['--format-sort', 'res,vcodec:h265,h264']
|
||||||
|
|
||||||
|
FFMPEG_AUDIO_OPTS: str | None = None
|
||||||
|
FFMPEG_VIDEO_OPTS: str | None = None
|
||||||
|
|
||||||
|
def __init__(self, url: str) -> None:
|
||||||
|
self._log = logging.getLogger(self.__class__.__name__)
|
||||||
|
self._validate_hostname()
|
||||||
|
self.url = url
|
||||||
|
self._log.info('Instantiating "%s" for url "%s"', self.__class__.__name__, url)
|
||||||
|
|
||||||
|
def _validate_hostname(self) -> None:
|
||||||
|
if not self.ALLOW_NULL_HOSTNAMES and not self.HOSTNAMES:
|
||||||
|
raise ValueError('Hostname(s) must be set before instantiation.')
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def build_config(
|
||||||
|
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||||
|
) -> BaseHostConfModel:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _build_ytdl_opts(self, media_type: DownMediaType, curr_tmp_dir: str) -> dict:
|
||||||
|
def _add_video_opts(ytdl_opts_: list[str]) -> None:
|
||||||
|
ytdl_opts_.extend(self.DEFAULT_VIDEO_YTDL_OPTS)
|
||||||
|
ytdl_opts_.extend(self._build_custom_ytdl_video_opts())
|
||||||
|
|
||||||
|
ytdl_opts = deepcopy(self.DEFAULT_YTDL_OPTS)
|
||||||
|
|
||||||
|
match media_type: # noqa: E999
|
||||||
|
case DownMediaType.AUDIO:
|
||||||
|
ytdl_opts.extend(self.AUDIO_YTDL_OPTS)
|
||||||
|
ytdl_opts.extend(self.AUDIO_FORMAT_YTDL_OPTS)
|
||||||
|
case DownMediaType.VIDEO:
|
||||||
|
_add_video_opts(ytdl_opts)
|
||||||
|
case DownMediaType.AUDIO_VIDEO:
|
||||||
|
ytdl_opts.extend(self.AUDIO_YTDL_OPTS)
|
||||||
|
_add_video_opts(ytdl_opts)
|
||||||
|
ytdl_opts.append(self.KEEP_VIDEO_OPTION)
|
||||||
|
|
||||||
|
ytdl_opts = cli_to_api(ytdl_opts)
|
||||||
|
ytdl_opts['outtmpl']['default'] = os.path.join(
|
||||||
|
curr_tmp_dir,
|
||||||
|
ytdl_opts['outtmpl']['default'],
|
||||||
|
)
|
||||||
|
return ytdl_opts
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||||
|
pass
|
30
app_worker/ytdl_opts/per_host/_default.py
Normal file
30
app_worker/ytdl_opts/per_host/_default.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
from yt_shared.enums import DownMediaType
|
||||||
|
|
||||||
|
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
|
||||||
|
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultHostModel(BaseHostConfModel):
|
||||||
|
hostnames: None
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultHost(AbstractHostConfig, metaclass=HostConfRegistry):
|
||||||
|
ALLOW_NULL_HOSTNAMES = True
|
||||||
|
HOSTNAMES = None
|
||||||
|
ENCODE_AUDIO = False
|
||||||
|
ENCODE_VIDEO = False
|
||||||
|
|
||||||
|
def build_config(
|
||||||
|
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||||
|
) -> DefaultHostModel:
|
||||||
|
return DefaultHostModel(
|
||||||
|
hostnames=self.HOSTNAMES,
|
||||||
|
encode_audio=self.ENCODE_AUDIO,
|
||||||
|
encode_video=self.ENCODE_VIDEO,
|
||||||
|
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
|
||||||
|
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
|
||||||
|
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||||
|
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT
|
39
app_worker/ytdl_opts/per_host/_registry.py
Normal file
39
app_worker/ytdl_opts/per_host/_registry.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
from typing import TYPE_CHECKING, Type
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from ytdl_opts.per_host._base import AbstractHostConfig
|
||||||
|
|
||||||
|
|
||||||
|
class HostConfRegistry(type):
|
||||||
|
REGISTRY: dict[str, type['AbstractHostConfig']] = {}
|
||||||
|
HOST_TO_CLS_MAP = {}
|
||||||
|
|
||||||
|
def __new__(
|
||||||
|
mcs: Type['HostConfRegistry'],
|
||||||
|
name: str,
|
||||||
|
bases: tuple[type['AbstractHostConfig']],
|
||||||
|
attrs: dict,
|
||||||
|
) -> type['AbstractHostConfig']:
|
||||||
|
host_cls: type['AbstractHostConfig'] = type.__new__(mcs, name, bases, attrs)
|
||||||
|
mcs.REGISTRY[host_cls.__name__] = host_cls
|
||||||
|
|
||||||
|
mcs._build_host_to_cls_map(host_cls=host_cls, hostnames=attrs['HOSTNAMES'])
|
||||||
|
return host_cls
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_registry(mcs) -> dict[str, type['AbstractHostConfig']]:
|
||||||
|
return mcs.REGISTRY.copy()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_host_to_cls_map(mcs) -> dict[str | None, type['AbstractHostConfig']]:
|
||||||
|
return mcs.HOST_TO_CLS_MAP.copy()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _build_host_to_cls_map(
|
||||||
|
mcs, host_cls: type['AbstractHostConfig'], hostnames: tuple[str, ...] | None
|
||||||
|
) -> None:
|
||||||
|
if hostnames is None:
|
||||||
|
mcs.HOST_TO_CLS_MAP[None] = host_cls
|
||||||
|
else:
|
||||||
|
for host in hostnames:
|
||||||
|
mcs.HOST_TO_CLS_MAP[host] = host_cls
|
37
app_worker/ytdl_opts/per_host/instagram.py
Normal file
37
app_worker/ytdl_opts/per_host/instagram.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
from yt_shared.constants import INSTAGRAM_HOSTS
|
||||||
|
from yt_shared.enums import DownMediaType
|
||||||
|
|
||||||
|
from worker.core.config import settings
|
||||||
|
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
|
||||||
|
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||||
|
|
||||||
|
|
||||||
|
class InstagramHostModel(BaseHostConfModel):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class InstagramHost(AbstractHostConfig, metaclass=HostConfRegistry):
|
||||||
|
ALLOW_NULL_HOSTNAMES = False
|
||||||
|
HOSTNAMES = INSTAGRAM_HOSTS
|
||||||
|
ENCODE_AUDIO = False
|
||||||
|
ENCODE_VIDEO = settings.INSTAGRAM_ENCODE_TO_H264
|
||||||
|
|
||||||
|
FFMPEG_AUDIO_OPTS = None
|
||||||
|
# Instagram returns VP9+AAC in MP4 container for logged users and needs to be
|
||||||
|
# encoded to H264 since Telegram doesn't play VP9 on iOS.
|
||||||
|
FFMPEG_VIDEO_OPTS = 'ffmpeg -y -loglevel error -i "{filepath}" -c:v libx264 -pix_fmt yuv420p -preset veryfast -crf 22 -movflags +faststart -c:a copy "{output}"'
|
||||||
|
|
||||||
|
def build_config(
|
||||||
|
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||||
|
) -> InstagramHostModel:
|
||||||
|
return InstagramHostModel(
|
||||||
|
hostnames=self.HOSTNAMES,
|
||||||
|
encode_audio=self.ENCODE_AUDIO,
|
||||||
|
encode_video=self.ENCODE_VIDEO,
|
||||||
|
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
|
||||||
|
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
|
||||||
|
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||||
|
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT
|
31
app_worker/ytdl_opts/per_host/tiktok.py
Normal file
31
app_worker/ytdl_opts/per_host/tiktok.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
from yt_shared.constants import TIKTOK_HOSTS
|
||||||
|
from yt_shared.enums import DownMediaType
|
||||||
|
|
||||||
|
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
|
||||||
|
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||||
|
|
||||||
|
|
||||||
|
class TikTokHostModel(BaseHostConfModel):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TikTokHost(AbstractHostConfig, metaclass=HostConfRegistry):
|
||||||
|
ALLOW_NULL_HOSTNAMES = False
|
||||||
|
HOSTNAMES = TIKTOK_HOSTS
|
||||||
|
ENCODE_AUDIO = False
|
||||||
|
ENCODE_VIDEO = False
|
||||||
|
|
||||||
|
def build_config(
|
||||||
|
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||||
|
) -> TikTokHostModel:
|
||||||
|
return TikTokHostModel(
|
||||||
|
hostnames=self.HOSTNAMES,
|
||||||
|
encode_audio=self.ENCODE_AUDIO,
|
||||||
|
encode_video=self.ENCODE_VIDEO,
|
||||||
|
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
|
||||||
|
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
|
||||||
|
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||||
|
return self.DEFAULT_VIDEO_FORMAT_SORT_OPT
|
31
app_worker/ytdl_opts/per_host/twitter.py
Normal file
31
app_worker/ytdl_opts/per_host/twitter.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
from yt_shared.constants import TWITTER_HOSTS
|
||||||
|
from yt_shared.enums import DownMediaType
|
||||||
|
|
||||||
|
from ytdl_opts.per_host._base import AbstractHostConfig, BaseHostConfModel
|
||||||
|
from ytdl_opts.per_host._registry import HostConfRegistry
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterHostModel(BaseHostConfModel):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterHost(AbstractHostConfig, metaclass=HostConfRegistry):
|
||||||
|
ALLOW_NULL_HOSTNAMES = False
|
||||||
|
HOSTNAMES = TWITTER_HOSTS
|
||||||
|
ENCODE_AUDIO = False
|
||||||
|
ENCODE_VIDEO = False
|
||||||
|
|
||||||
|
def build_config(
|
||||||
|
self, media_type: DownMediaType, curr_tmp_dir: str
|
||||||
|
) -> TwitterHostModel:
|
||||||
|
return TwitterHostModel(
|
||||||
|
hostnames=self.HOSTNAMES,
|
||||||
|
encode_audio=self.ENCODE_AUDIO,
|
||||||
|
encode_video=self.ENCODE_VIDEO,
|
||||||
|
ffmpeg_audio_opts=self.FFMPEG_AUDIO_OPTS,
|
||||||
|
ffmpeg_video_opts=self.FFMPEG_VIDEO_OPTS,
|
||||||
|
ytdl_opts=self._build_ytdl_opts(media_type, curr_tmp_dir),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_custom_ytdl_video_opts(self) -> list[str]:
|
||||||
|
return ['--format-sort', 'res,proto:https,vcodec:h265,h264']
|
18
yt_shared/yt_shared/constants.py
Normal file
18
yt_shared/yt_shared/constants.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from asyncio import Lock
|
||||||
|
|
||||||
|
ASYNC_LOCK = Lock()
|
||||||
|
|
||||||
|
|
||||||
|
INSTAGRAM_HOSTS = ('instagram.com', 'www.instagram.com')
|
||||||
|
TIKTOK_HOSTS = ('tiktok.com', 'vm.tiktok.com', 'www.tiktok.com', 'www.vm.tiktok.com')
|
||||||
|
TWITTER_HOSTS = (
|
||||||
|
'twitter.com',
|
||||||
|
'www.twitter.com',
|
||||||
|
'x.com',
|
||||||
|
'www.x.com',
|
||||||
|
't.co',
|
||||||
|
'www.t.co',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
REMOVE_QUERY_PARAMS_HOSTS = TWITTER_HOSTS + INSTAGRAM_HOSTS
|
|
@ -4,11 +4,11 @@ from uuid import UUID
|
||||||
from sqlalchemy import insert, select
|
from sqlalchemy import insert, select
|
||||||
from sqlalchemy.exc import NoResultFound
|
from sqlalchemy.exc import NoResultFound
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
from yt_shared.constants import ASYNC_LOCK
|
||||||
from yt_shared.enums import TaskStatus
|
from yt_shared.enums import TaskStatus
|
||||||
from yt_shared.models import Cache, File, Task
|
from yt_shared.models import Cache, File, Task
|
||||||
from yt_shared.schemas.cache import CacheSchema
|
from yt_shared.schemas.cache import CacheSchema
|
||||||
from yt_shared.schemas.media import BaseMedia, InbMediaPayload, Video
|
from yt_shared.schemas.media import BaseMedia, InbMediaPayload, Video
|
||||||
from yt_shared.utils.common import ASYNC_LOCK
|
|
||||||
|
|
||||||
|
|
||||||
class TaskRepository:
|
class TaskRepository:
|
||||||
|
|
|
@ -14,6 +14,7 @@ from pydantic import (
|
||||||
from yt_shared.enums import DownMediaType, MediaFileType, TaskSource, TelegramChatType
|
from yt_shared.enums import DownMediaType, MediaFileType, TaskSource, TelegramChatType
|
||||||
from yt_shared.schemas.base import RealBaseModel
|
from yt_shared.schemas.base import RealBaseModel
|
||||||
from yt_shared.utils.common import format_bytes
|
from yt_shared.utils.common import format_bytes
|
||||||
|
from yt_shared.utils.file import file_size
|
||||||
|
|
||||||
|
|
||||||
class InbMediaPayload(RealBaseModel):
|
class InbMediaPayload(RealBaseModel):
|
||||||
|
@ -24,8 +25,9 @@ class InbMediaPayload(RealBaseModel):
|
||||||
from_chat_type: TelegramChatType | None
|
from_chat_type: TelegramChatType | None
|
||||||
from_user_id: StrictInt | None
|
from_user_id: StrictInt | None
|
||||||
message_id: StrictInt | None
|
message_id: StrictInt | None
|
||||||
acknowledge_message_id: StrictInt | None
|
ack_message_id: StrictInt | None
|
||||||
url: StrictStr
|
url: StrictStr
|
||||||
|
original_url: StrictStr
|
||||||
source: TaskSource
|
source: TaskSource
|
||||||
save_to_storage: StrictBool
|
save_to_storage: StrictBool
|
||||||
download_media_type: DownMediaType
|
download_media_type: DownMediaType
|
||||||
|
@ -49,9 +51,15 @@ class BaseMedia(RealBaseModel):
|
||||||
is_converted: StrictBool = False
|
is_converted: StrictBool = False
|
||||||
converted_filepath: StrictStr | None = None
|
converted_filepath: StrictStr | None = None
|
||||||
converted_filename: StrictStr | None = None
|
converted_filename: StrictStr | None = None
|
||||||
|
converted_file_size: StrictInt | None = None
|
||||||
|
|
||||||
def file_size_human(self) -> str:
|
def file_size_human(self) -> str:
|
||||||
return format_bytes(num=self.file_size)
|
return format_bytes(num=self.current_file_size())
|
||||||
|
|
||||||
|
def current_file_size(self) -> int:
|
||||||
|
if self.converted_file_size is not None:
|
||||||
|
return self.converted_file_size
|
||||||
|
return self.file_size
|
||||||
|
|
||||||
def mark_as_saved_to_storage(self, storage_path: str) -> None:
|
def mark_as_saved_to_storage(self, storage_path: str) -> None:
|
||||||
self.storage_path = storage_path
|
self.storage_path = storage_path
|
||||||
|
@ -60,6 +68,7 @@ class BaseMedia(RealBaseModel):
|
||||||
def mark_as_converted(self, filepath: str) -> None:
|
def mark_as_converted(self, filepath: str) -> None:
|
||||||
self.converted_filepath = filepath
|
self.converted_filepath = filepath
|
||||||
self.converted_filename = filepath.rsplit("/", 1)[-1]
|
self.converted_filename = filepath.rsplit("/", 1)[-1]
|
||||||
|
self.converted_file_size = file_size(filepath)
|
||||||
self.is_converted = True
|
self.is_converted = True
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,10 +6,11 @@ from yt_shared.schemas.base import RealBaseModel
|
||||||
|
|
||||||
class URL(RealBaseModel):
|
class URL(RealBaseModel):
|
||||||
url: StrictStr
|
url: StrictStr
|
||||||
|
original_url: StrictStr
|
||||||
from_chat_id: StrictInt
|
from_chat_id: StrictInt
|
||||||
from_chat_type: TelegramChatType
|
from_chat_type: TelegramChatType
|
||||||
from_user_id: StrictInt
|
from_user_id: StrictInt
|
||||||
message_id: StrictInt
|
message_id: StrictInt
|
||||||
acknowledge_message_id: StrictInt
|
ack_message_id: StrictInt
|
||||||
save_to_storage: StrictBool
|
save_to_storage: StrictBool
|
||||||
download_media_type: DownMediaType
|
download_media_type: DownMediaType
|
||||||
|
|
|
@ -6,8 +6,6 @@ from functools import partial, wraps
|
||||||
from string import ascii_lowercase
|
from string import ascii_lowercase
|
||||||
from typing import Any, Callable
|
from typing import Any, Callable
|
||||||
|
|
||||||
ASYNC_LOCK = asyncio.Lock()
|
|
||||||
|
|
||||||
_UNIT_SIZE_NAMES = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi')
|
_UNIT_SIZE_NAMES = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi')
|
||||||
_BASE = 1024.0
|
_BASE = 1024.0
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue