telegram_media_downloader/utils/file_management.py

49 lines
1.4 KiB
Python
Raw Normal View History

2020-07-22 03:31:39 +08:00
"""Utility functions to handle downloaded files."""
2020-07-22 01:11:42 +08:00
import glob
import os
import pathlib
from hashlib import md5
def get_next_name(file_path: str) -> str:
2020-07-22 03:31:39 +08:00
"""Returns the next available name to download file."""
2020-07-22 01:11:42 +08:00
posix_path = pathlib.Path(file_path)
counter: int = 1
2020-07-23 02:30:45 +08:00
new_file_name: str = os.path.join("{0}", "{1}-copy{2}{3}")
2020-07-22 01:11:42 +08:00
while os.path.isfile(
new_file_name.format(
posix_path.parent,
posix_path.stem,
counter,
"".join(posix_path.suffixes),
)
):
counter += 1
return new_file_name.format(
posix_path.parent,
posix_path.stem,
counter,
"".join(posix_path.suffixes),
)
def manage_duplicate_file(file_path: str):
2020-07-22 03:31:39 +08:00
"""
Check if a file is duplicate.
Compare the md5 of files with copy name pattern
and remove if the md5 hash is same.
"""
2020-07-22 01:11:42 +08:00
posix_path = pathlib.Path(file_path)
file_base_name: str = "".join(posix_path.stem.split("-copy")[0:-1])
name_pattern: str = f"{posix_path.parent}/{file_base_name}*"
old_files: list = glob.glob(name_pattern)
old_files.remove(file_path)
current_file_md5: str = md5(open(file_path, "rb").read()).hexdigest()
for old_file_path in old_files:
old_file_md5: str = md5(open(old_file_path, "rb").read()).hexdigest()
if current_file_md5 == old_file_md5:
os.remove(file_path)
2020-07-22 01:35:20 +08:00
return old_file_path
return file_path