2019-11-06 20:07:25 +08:00
|
|
|
"""Downloads media from telegram."""
|
2019-07-24 23:42:25 +08:00
|
|
|
import os
|
|
|
|
import logging
|
2019-08-05 23:33:47 +08:00
|
|
|
from typing import List, Tuple
|
|
|
|
from datetime import datetime as dt
|
2019-07-24 23:42:25 +08:00
|
|
|
|
2020-05-25 20:28:10 +08:00
|
|
|
import asyncio
|
2019-07-24 23:42:25 +08:00
|
|
|
import yaml
|
2019-11-06 20:07:25 +08:00
|
|
|
import pyrogram
|
2020-05-25 03:19:34 +08:00
|
|
|
|
2019-07-24 23:42:25 +08:00
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
2020-05-25 03:19:34 +08:00
|
|
|
|
|
|
|
def update_config(config: dict):
|
|
|
|
"""Update exisitng configuration file.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
config: dictionary
|
|
|
|
Configuraiton to be written into config file.
|
|
|
|
"""
|
|
|
|
with open("config.yaml", "w") as yaml_file:
|
|
|
|
yaml.dump(config, yaml_file, default_flow_style=False)
|
|
|
|
logger.info("Updated last read message_id to config file")
|
2019-07-24 23:42:25 +08:00
|
|
|
|
|
|
|
|
2020-05-25 03:19:34 +08:00
|
|
|
async def _get_media_meta(
|
2019-11-06 20:07:25 +08:00
|
|
|
media_obj: pyrogram.client.types.messages_and_media, _type: str
|
|
|
|
) -> Tuple[str, str]:
|
2019-08-05 23:33:47 +08:00
|
|
|
"""Extract file name and file id.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
media_obj: pyrogram.client.types.messages_and_media
|
|
|
|
Media object to be extracted.
|
|
|
|
_type: string
|
|
|
|
Type of media object.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
tuple
|
2019-11-08 00:55:43 +08:00
|
|
|
file_ref, file_name
|
2019-08-05 23:33:47 +08:00
|
|
|
"""
|
2019-11-08 00:55:43 +08:00
|
|
|
file_ref: str = media_obj.file_ref
|
2019-08-05 23:33:47 +08:00
|
|
|
if _type == "voice":
|
|
|
|
file_format: str = media_obj.mime_type.split("/")[-1]
|
|
|
|
file_name: str = os.path.join(
|
|
|
|
THIS_DIR,
|
|
|
|
_type,
|
|
|
|
"voice_{}.{}".format(
|
2019-08-06 22:30:13 +08:00
|
|
|
dt.utcfromtimestamp(media_obj.date).isoformat(), file_format
|
2019-08-05 23:33:47 +08:00
|
|
|
),
|
|
|
|
)
|
|
|
|
elif _type == "photo":
|
2019-11-06 20:07:25 +08:00
|
|
|
file_name = os.path.join(THIS_DIR, _type, "")
|
2019-08-05 23:33:47 +08:00
|
|
|
else:
|
2019-11-06 20:07:25 +08:00
|
|
|
file_name = os.path.join(THIS_DIR, _type, media_obj.file_name)
|
2019-11-08 00:55:43 +08:00
|
|
|
return file_ref, file_name
|
2019-08-05 23:33:47 +08:00
|
|
|
|
|
|
|
|
2020-05-25 03:19:34 +08:00
|
|
|
async def download_media(client, message, media_types):
|
|
|
|
"""Download media from Telegram.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
client: pyrogram.client.client.Client
|
|
|
|
Client to interact with Telegram APIs.
|
|
|
|
message: pyrogram.Message
|
|
|
|
Message object retrived from telegram.
|
|
|
|
media_types: list
|
|
|
|
List of strings of media types to be downloaded.
|
|
|
|
Ex : `["audio", "photo"]`
|
|
|
|
Supported formats:
|
|
|
|
* audio
|
|
|
|
* document
|
|
|
|
* photo
|
|
|
|
* video
|
|
|
|
* voice
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
integer
|
|
|
|
message_id
|
|
|
|
"""
|
|
|
|
if message.media:
|
|
|
|
for _type in media_types:
|
|
|
|
_media = getattr(message, _type, None)
|
|
|
|
if _media:
|
|
|
|
file_ref, file_name = await _get_media_meta(_media, _type)
|
|
|
|
download_path = await client.download_media(
|
|
|
|
message, file_ref=file_ref, file_name=file_name
|
|
|
|
)
|
|
|
|
logger.info("Media downloaded - %s", download_path)
|
|
|
|
return message.message_id
|
|
|
|
|
|
|
|
|
|
|
|
async def process_messages(
|
2019-11-06 20:07:25 +08:00
|
|
|
client: pyrogram.client.client.Client,
|
2019-08-05 23:33:47 +08:00
|
|
|
chat_id: str,
|
|
|
|
last_read_message_id: int,
|
|
|
|
media_types: List[str],
|
|
|
|
) -> int:
|
|
|
|
"""Download media from Telegram.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
client: pyrogram.client.client.Client
|
|
|
|
Client to interact with Telegram APIs.
|
|
|
|
chat_id: string
|
|
|
|
Id of the chat to download media from.
|
|
|
|
last_read_message_id: integer
|
|
|
|
Id of last message read from the conversational thread.
|
|
|
|
media_types: list
|
|
|
|
List of strings of media types to be downloaded.
|
|
|
|
Ex : `["audio", "photo"]`
|
|
|
|
Supported formats:
|
|
|
|
* audio
|
|
|
|
* document
|
|
|
|
* photo
|
|
|
|
* video
|
|
|
|
* voice
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
integer
|
2019-11-08 00:55:43 +08:00
|
|
|
last_message_id
|
2019-08-05 23:33:47 +08:00
|
|
|
"""
|
2020-05-25 03:19:34 +08:00
|
|
|
message_ids = await asyncio.gather(
|
|
|
|
*[
|
|
|
|
download_media(client, message, media_types)
|
|
|
|
async for message in client.iter_history(
|
|
|
|
chat_id, offset_id=last_read_message_id, reverse=True
|
|
|
|
)
|
|
|
|
]
|
2019-07-24 23:42:25 +08:00
|
|
|
)
|
2019-08-05 23:33:47 +08:00
|
|
|
|
2020-05-25 03:19:34 +08:00
|
|
|
last_message_id = max(message_ids, default=last_read_message_id)
|
|
|
|
return last_message_id
|
2019-07-24 23:42:25 +08:00
|
|
|
|
|
|
|
|
2020-05-25 03:19:34 +08:00
|
|
|
async def begin_import(config):
|
2019-08-05 23:33:47 +08:00
|
|
|
"""Skeleton fucntion that creates client and import, write config"""
|
2019-11-06 20:07:25 +08:00
|
|
|
client = pyrogram.Client(
|
2019-08-05 23:33:47 +08:00
|
|
|
"media_downloader",
|
|
|
|
api_id=config["api_id"],
|
|
|
|
api_hash=config["api_hash"],
|
2019-07-24 23:42:25 +08:00
|
|
|
)
|
2020-05-25 03:19:34 +08:00
|
|
|
await client.start()
|
|
|
|
last_read_message_id = await process_messages(
|
2019-08-05 23:33:47 +08:00
|
|
|
client,
|
|
|
|
config["chat_id"],
|
|
|
|
config["last_read_message_id"],
|
|
|
|
config["media_types"],
|
2019-07-24 23:42:25 +08:00
|
|
|
)
|
2020-05-25 03:19:34 +08:00
|
|
|
await client.stop()
|
|
|
|
config["last_read_message_id"] = last_read_message_id + 1
|
|
|
|
return config
|
2019-07-24 23:42:25 +08:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2020-05-25 03:19:34 +08:00
|
|
|
f = open(os.path.join(THIS_DIR, "config.yaml"))
|
|
|
|
config = yaml.safe_load(f)
|
|
|
|
f.close()
|
|
|
|
updated_config = asyncio.get_event_loop().run_until_complete(
|
|
|
|
begin_import(config)
|
|
|
|
)
|
|
|
|
update_config(updated_config)
|