Enhanced download functionality for yt-dlp unsupported links (#363)

* Improved download ability for things that yt-dlp doesn’t support out of the box * yt-dlp bump to new release 2024.03.10 * Update README.md * Update README.md * Update README.md * sp_ytdl_download: remove aria2 and clean up
2024-09-20 06:55:55 +08:00 · 2024-04-08 21:22:44 +05:30 · 2024-04-08 21:22:44 +05:30 · 0547fc69a2
parent aab8abd74c
commit 0547fc69a2
4 changed files with 132 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -2,9 +2,9 @@

 [![docker image](https://github.com/tgbot-collection/ytdlbot/actions/workflows/builder.yaml/badge.svg)](https://github.com/tgbot-collection/ytdlbot/actions/workflows/builder.yaml)

-YouTube Download Bot🚀🎬⬇️
+**YouTube Download Bot🚀🎬⬇️**

-This Telegram bot allows you to download videos from YouTube and other supported websites, including Instagram!
+This Telegram bot allows you to download videos from YouTube and [other supported websites](#supported-websites).

 # Usage

@ -12,18 +12,14 @@ This Telegram bot allows you to download videos from YouTube and other supported

 Join Telegram Channel https://t.me/+OGRC8tp9-U9mZDZl for updates.

-Send link directly to the bot. Any
-Websites [supported by yt-dlp](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) will work too.
+Just send a link directly to the bot.

-# Limitations of my bot
+# Supported websites

-Due to limitations on servers and bandwidth, there are some restrictions on this free service.
-
-* Each user is limited to 10 free downloads per 24-hour period
-* Maximum of three subscriptions allowed for YouTube channels.
-* Files bigger than 2 GiB will require at least 1 download token.
-
-If you need more downloads, you can buy download tokens.
+* YouTube 😅
+* Any websites [supported by yt-dlp](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)
+* Instagram (Videos, Photos, Reels, IGTV & carousel)
+* Pixeldrain

 # Features

@ -42,8 +38,21 @@ If you need more downloads, you can buy download tokens.
 13. 4 GiB file size support with Telegram Premium
 14. History and inline mode support

-> If you download files larger than 2 GiB, you agreed that this file will be uploaded by me. I know who you are and what
-> you download.
+> [!NOTE]
+> **For users of [my official bot](https://t.me/benny_ytdlbot)**\
+> Files larger than 2 GiB will be automatically uploaded by me(My Premium Account). By utilizing our service for such downloads, you consent to this process. \
+> That means I know who you are and what you download. \
+> Rest assured that we handle your personal information with the utmost care.
+>
+> ## Limitations
+> Due to limitations on servers and bandwidth, there are some restrictions on this free service.
+> * Each user is limited to 10 free downloads per 24-hour period
+> * Maximum of three subscriptions allowed for YouTube channels.
+> * Files bigger than 2 GiB will require at least 1 download token.
+>
+> If you need more downloads, you can buy download tokens.
+>
+> **Thank you for using the [official bot](https://t.me/benny_ytdlbot).**

 # Screenshots

--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
 pyrogram==2.0.106
 tgcrypto==1.2.5
-git+https://github.com/yt-dlp/yt-dlp@413d3675804599bc8fe419c19e36490fd8f0b30f
+yt-dlp==2024.03.10
 APScheduler==3.10.4
 beautifultable==1.1.0
 ffmpeg-python==0.2.0
--- a/ytdlbot/downloader.py
+++ b/ytdlbot/downloader.py
@ -27,6 +27,8 @@ import yt_dlp as ytdl
 from pyrogram import types
 from tqdm import tqdm

+from sp_downloader import sp_dl
+
 from config import (
    AUDIO_FORMAT,
    ENABLE_ARIA2,
@ -220,7 +222,7 @@ def ytdl_download(url: str, tempdir: str, bm, **kwargs) -> list:
            None,
        ]
    adjust_formats(chat_id, url, formats, hijack)
-    if download_instagram(url, tempdir):
+    if sp_dl(url, tempdir):
        return list(pathlib.Path(tempdir).glob("*"))

    address = ["::", "0.0.0.0"] if IPv6 else [None]
@ -303,19 +305,3 @@ def split_large_video(video_paths: list):

    if split and original_video:
        return [i for i in pathlib.Path(original_video).parent.glob("*")]
-
-
-def download_instagram(url: str, tempdir: str):
-    if not url.startswith("https://www.instagram.com"):
-        return False
-
-    resp = requests.get(f"http://192.168.6.1:15000/?url={url}").json()
-    if url_results := resp.get("data"):
-        for link in url_results:
-            content = requests.get(link, stream=True).content
-            ext = filetype.guess_extension(content)
-            save_path = pathlib.Path(tempdir, f"{id(link)}.{ext}")
-            with open(save_path, "wb") as f:
-                f.write(content)
-
-        return True
--- a/ytdlbot/sp_downloader.py
+++ b/ytdlbot/sp_downloader.py
@ -0,0 +1,105 @@
+#!/usr/local/bin/python3
+# coding: utf-8
+
+# ytdlbot - sp_downloader.py
+# 3/16/24 16:32
+#
+
+__author__ = "Benny <benny.think@gmail.com>, SanujaNS <sanujas@sanuja.biz>"
+
+import pathlib
+import logging
+import traceback
+import re
+import requests
+from tqdm import tqdm
+import json
+from bs4 import BeautifulSoup
+from urllib.parse import parse_qs, urlparse
+import filetype
+import yt_dlp as ytdl
+
+from config import (
+    IPv6,
+)
+
+user_agent = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.128 Safari/537.36"
+)
+
+
+def sp_dl(url: str, tempdir: str):
+    """Specific link downloader"""
+    domain = urlparse(url).hostname
+    if not any(
+        x in domain
+        for x in [
+            "www.instagram.com",
+            "pixeldrain.com",
+            "mediafire.com",
+        ]
+    ):
+        return False
+    if "www.instagram.com" in domain:
+        return instagram(url, tempdir)
+    elif "pixeldrain.com" in domain:
+        return pixeldrain(url, tempdir)
+    elif "www.xasiat.com" in domain:
+        return xasiat(url, tempdir)
+
+
+def sp_ytdl_download(url: str, tempdir: str):
+    output = pathlib.Path(tempdir, "%(title).70s.%(ext)s").as_posix()
+    ydl_opts = {
+        "outtmpl": output,
+        "restrictfilenames": False,
+        "quiet": True,
+        "format": None,
+    }
+
+    address = ["::", "0.0.0.0"] if IPv6 else [None]
+    error = None
+    video_paths = None
+    for addr in address:
+        ydl_opts["source_address"] = addr
+        try:
+            logging.info("Downloading %s", url)
+            with ytdl.YoutubeDL(ydl_opts) as ydl:
+                ydl.download([url])
+            video_paths = list(pathlib.Path(tempdir).glob("*"))
+            break
+        except Exception:
+            error = traceback.format_exc()
+            logging.error("Download failed for %s - %s", url)
+
+    if not video_paths:
+        raise Exception(error)
+
+    return video_paths
+
+
+def instagram(url: str, tempdir: str):
+    resp = requests.get(f"http://192.168.6.1:15000/?url={url}").json()
+    if url_results := resp.get("data"):
+        for link in url_results:
+            content = requests.get(link, stream=True).content
+            ext = filetype.guess_extension(content)
+            save_path = pathlib.Path(tempdir, f"{id(link)}.{ext}")
+            with open(save_path, "wb") as f:
+                f.write(content)
+
+        return True
+
+def pixeldrain(url: str, tempdir: str):
+    user_page_url_regex = r'https://pixeldrain.com/u/(\w+)'
+    match = re.match(user_page_url_regex, url)
+    if match:
+        url = 'https://pixeldrain.com/api/file/{}?download'.format(match.group(1))
+        sp_ytdl_download(url, tempdir)
+    else:
+        return url
+    
+    return True
+
+def xasiat(url: str, tempdir: str):
+    return False