mirror of
https://github.com/Dineshkarthik/telegram_media_downloader.git
synced 2024-11-10 08:52:53 +08:00
upd: using asyncio for parallel downloading
This commit is contained in:
parent
39e7dbc02a
commit
1bcf4296a7
1 changed files with 74 additions and 39 deletions
|
@ -6,18 +6,29 @@ from datetime import datetime as dt
|
|||
|
||||
import yaml
|
||||
import pyrogram
|
||||
import asyncio
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
f = open(os.path.join(THIS_DIR, "config.yaml"))
|
||||
config = yaml.safe_load(f)
|
||||
f.close()
|
||||
|
||||
def update_config(config: dict):
|
||||
"""Update exisitng configuration file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config: dictionary
|
||||
Configuraiton to be written into config file.
|
||||
"""
|
||||
with open("config.yaml", "w") as yaml_file:
|
||||
yaml.dump(config, yaml_file, default_flow_style=False)
|
||||
logger.info("Updated last read message_id to config file")
|
||||
|
||||
|
||||
def _get_media_meta(
|
||||
async def _get_media_meta(
|
||||
media_obj: pyrogram.client.types.messages_and_media, _type: str
|
||||
) -> Tuple[str, str]:
|
||||
"""Extract file name and file id.
|
||||
|
@ -51,7 +62,43 @@ def _get_media_meta(
|
|||
return file_ref, file_name
|
||||
|
||||
|
||||
def download_media(
|
||||
async def download_media(client, message, media_types):
|
||||
"""Download media from Telegram.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
client: pyrogram.client.client.Client
|
||||
Client to interact with Telegram APIs.
|
||||
message: pyrogram.Message
|
||||
Message object retrived from telegram.
|
||||
media_types: list
|
||||
List of strings of media types to be downloaded.
|
||||
Ex : `["audio", "photo"]`
|
||||
Supported formats:
|
||||
* audio
|
||||
* document
|
||||
* photo
|
||||
* video
|
||||
* voice
|
||||
|
||||
Returns
|
||||
-------
|
||||
integer
|
||||
message_id
|
||||
"""
|
||||
if message.media:
|
||||
for _type in media_types:
|
||||
_media = getattr(message, _type, None)
|
||||
if _media:
|
||||
file_ref, file_name = await _get_media_meta(_media, _type)
|
||||
download_path = await client.download_media(
|
||||
message, file_ref=file_ref, file_name=file_name
|
||||
)
|
||||
logger.info("Media downloaded - %s", download_path)
|
||||
return message.message_id
|
||||
|
||||
|
||||
async def process_messages(
|
||||
client: pyrogram.client.client.Client,
|
||||
chat_id: str,
|
||||
last_read_message_id: int,
|
||||
|
@ -82,55 +129,43 @@ def download_media(
|
|||
integer
|
||||
last_message_id
|
||||
"""
|
||||
messages = client.iter_history(
|
||||
chat_id, offset_id=last_read_message_id, reverse=True
|
||||
message_ids = await asyncio.gather(
|
||||
*[
|
||||
download_media(client, message, media_types)
|
||||
async for message in client.iter_history(
|
||||
chat_id, offset_id=last_read_message_id, reverse=True
|
||||
)
|
||||
]
|
||||
)
|
||||
last_message_id: int = 0
|
||||
for message in messages:
|
||||
if message.media:
|
||||
for _type in media_types:
|
||||
_media = getattr(message, _type, None)
|
||||
if _media:
|
||||
file_ref, file_name = _get_media_meta(_media, _type)
|
||||
download_path = client.download_media(
|
||||
message, file_ref=file_ref, file_name=file_name
|
||||
)
|
||||
logger.info("Media downloaded - %s", download_path)
|
||||
last_message_id = message.message_id
|
||||
|
||||
last_message_id = max(message_ids, default=last_read_message_id)
|
||||
return last_message_id
|
||||
|
||||
|
||||
def update_config(config: dict):
|
||||
"""Update exisitng configuration file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config: dictionary
|
||||
Configuraiton to be written into config file.
|
||||
"""
|
||||
with open("config.yaml", "w") as yaml_file:
|
||||
yaml.dump(config, yaml_file, default_flow_style=False)
|
||||
logger.info("Updated last read message_id to config file")
|
||||
|
||||
|
||||
def begin_import():
|
||||
async def begin_import(config):
|
||||
"""Skeleton fucntion that creates client and import, write config"""
|
||||
client = pyrogram.Client(
|
||||
"media_downloader",
|
||||
api_id=config["api_id"],
|
||||
api_hash=config["api_hash"],
|
||||
)
|
||||
client.start()
|
||||
last_id = download_media(
|
||||
await client.start()
|
||||
last_read_message_id = await process_messages(
|
||||
client,
|
||||
config["chat_id"],
|
||||
config["last_read_message_id"],
|
||||
config["media_types"],
|
||||
)
|
||||
client.stop()
|
||||
config["last_read_message_id"] = last_id + 1
|
||||
update_config(config)
|
||||
await client.stop()
|
||||
config["last_read_message_id"] = last_read_message_id + 1
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
begin_import()
|
||||
f = open(os.path.join(THIS_DIR, "config.yaml"))
|
||||
config = yaml.safe_load(f)
|
||||
f.close()
|
||||
updated_config = asyncio.get_event_loop().run_until_complete(
|
||||
begin_import(config)
|
||||
)
|
||||
update_config(updated_config)
|
||||
|
|
Loading…
Reference in a new issue