2020-07-22 03:31:39 +08:00
|
|
|
"""Utility functions to handle downloaded files."""
|
2020-07-22 01:11:42 +08:00
|
|
|
import glob
|
|
|
|
import os
|
|
|
|
import pathlib
|
|
|
|
from hashlib import md5
|
|
|
|
|
|
|
|
|
|
|
|
def get_next_name(file_path: str) -> str:
|
2020-12-13 00:05:44 +08:00
|
|
|
"""
|
|
|
|
Get next available name to download file.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
file_path: str
|
|
|
|
Absolute path of the file for which next available name to
|
|
|
|
be generated.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
str
|
|
|
|
Absolute path of the next available name for the file.
|
|
|
|
"""
|
2020-07-22 01:11:42 +08:00
|
|
|
posix_path = pathlib.Path(file_path)
|
|
|
|
counter: int = 1
|
2020-07-23 02:30:45 +08:00
|
|
|
new_file_name: str = os.path.join("{0}", "{1}-copy{2}{3}")
|
2020-07-22 01:11:42 +08:00
|
|
|
while os.path.isfile(
|
|
|
|
new_file_name.format(
|
|
|
|
posix_path.parent,
|
|
|
|
posix_path.stem,
|
|
|
|
counter,
|
|
|
|
"".join(posix_path.suffixes),
|
|
|
|
)
|
|
|
|
):
|
|
|
|
counter += 1
|
|
|
|
return new_file_name.format(
|
|
|
|
posix_path.parent,
|
|
|
|
posix_path.stem,
|
|
|
|
counter,
|
|
|
|
"".join(posix_path.suffixes),
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def manage_duplicate_file(file_path: str):
|
2020-07-22 03:31:39 +08:00
|
|
|
"""
|
|
|
|
Check if a file is duplicate.
|
|
|
|
|
|
|
|
Compare the md5 of files with copy name pattern
|
|
|
|
and remove if the md5 hash is same.
|
2020-12-13 00:05:44 +08:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
file_path: str
|
|
|
|
Absolute path of the file for which duplicates needs to
|
|
|
|
be managed.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
str
|
|
|
|
Absolute path of the duplicate managed file.
|
2020-07-22 03:31:39 +08:00
|
|
|
"""
|
2020-07-22 01:11:42 +08:00
|
|
|
posix_path = pathlib.Path(file_path)
|
|
|
|
file_base_name: str = "".join(posix_path.stem.split("-copy")[0:-1])
|
|
|
|
name_pattern: str = f"{posix_path.parent}/{file_base_name}*"
|
2020-12-28 00:04:19 +08:00
|
|
|
# Reason for using `str.translate()`
|
|
|
|
# https://stackoverflow.com/q/22055500/6730439
|
|
|
|
old_files: list = glob.glob(
|
|
|
|
name_pattern.translate({ord("["): "[[]", ord("]"): "[]]"})
|
|
|
|
)
|
2020-12-27 22:25:34 +08:00
|
|
|
if file_path in old_files:
|
|
|
|
old_files.remove(file_path)
|
2020-07-22 01:11:42 +08:00
|
|
|
current_file_md5: str = md5(open(file_path, "rb").read()).hexdigest()
|
|
|
|
for old_file_path in old_files:
|
|
|
|
old_file_md5: str = md5(open(old_file_path, "rb").read()).hexdigest()
|
|
|
|
if current_file_md5 == old_file_md5:
|
|
|
|
os.remove(file_path)
|
2020-07-22 01:35:20 +08:00
|
|
|
return old_file_path
|
|
|
|
return file_path
|