upd: using asyncio for parallel downloading

This commit is contained in:
Dineshkarthik 2020-05-24 21:19:34 +02:00
parent 39e7dbc02a
commit 1bcf4296a7

View file

@ -6,18 +6,29 @@ from datetime import datetime as dt
import yaml
import pyrogram
import asyncio
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
f = open(os.path.join(THIS_DIR, "config.yaml"))
config = yaml.safe_load(f)
f.close()
def update_config(config: dict):
"""Update exisitng configuration file.
Parameters
----------
config: dictionary
Configuraiton to be written into config file.
"""
with open("config.yaml", "w") as yaml_file:
yaml.dump(config, yaml_file, default_flow_style=False)
logger.info("Updated last read message_id to config file")
def _get_media_meta(
async def _get_media_meta(
media_obj: pyrogram.client.types.messages_and_media, _type: str
) -> Tuple[str, str]:
"""Extract file name and file id.
@ -51,7 +62,43 @@ def _get_media_meta(
return file_ref, file_name
def download_media(
async def download_media(client, message, media_types):
"""Download media from Telegram.
Parameters
----------
client: pyrogram.client.client.Client
Client to interact with Telegram APIs.
message: pyrogram.Message
Message object retrived from telegram.
media_types: list
List of strings of media types to be downloaded.
Ex : `["audio", "photo"]`
Supported formats:
* audio
* document
* photo
* video
* voice
Returns
-------
integer
message_id
"""
if message.media:
for _type in media_types:
_media = getattr(message, _type, None)
if _media:
file_ref, file_name = await _get_media_meta(_media, _type)
download_path = await client.download_media(
message, file_ref=file_ref, file_name=file_name
)
logger.info("Media downloaded - %s", download_path)
return message.message_id
async def process_messages(
client: pyrogram.client.client.Client,
chat_id: str,
last_read_message_id: int,
@ -82,55 +129,43 @@ def download_media(
integer
last_message_id
"""
messages = client.iter_history(
chat_id, offset_id=last_read_message_id, reverse=True
message_ids = await asyncio.gather(
*[
download_media(client, message, media_types)
async for message in client.iter_history(
chat_id, offset_id=last_read_message_id, reverse=True
)
]
)
last_message_id: int = 0
for message in messages:
if message.media:
for _type in media_types:
_media = getattr(message, _type, None)
if _media:
file_ref, file_name = _get_media_meta(_media, _type)
download_path = client.download_media(
message, file_ref=file_ref, file_name=file_name
)
logger.info("Media downloaded - %s", download_path)
last_message_id = message.message_id
last_message_id = max(message_ids, default=last_read_message_id)
return last_message_id
def update_config(config: dict):
"""Update exisitng configuration file.
Parameters
----------
config: dictionary
Configuraiton to be written into config file.
"""
with open("config.yaml", "w") as yaml_file:
yaml.dump(config, yaml_file, default_flow_style=False)
logger.info("Updated last read message_id to config file")
def begin_import():
async def begin_import(config):
"""Skeleton fucntion that creates client and import, write config"""
client = pyrogram.Client(
"media_downloader",
api_id=config["api_id"],
api_hash=config["api_hash"],
)
client.start()
last_id = download_media(
await client.start()
last_read_message_id = await process_messages(
client,
config["chat_id"],
config["last_read_message_id"],
config["media_types"],
)
client.stop()
config["last_read_message_id"] = last_id + 1
update_config(config)
await client.stop()
config["last_read_message_id"] = last_read_message_id + 1
return config
if __name__ == "__main__":
begin_import()
f = open(os.path.join(THIS_DIR, "config.yaml"))
config = yaml.safe_load(f)
f.close()
updated_config = asyncio.get_event_loop().run_until_complete(
begin_import(config)
)
update_config(updated_config)