From 1ed99aeb3e1f3ee7f984c2f2e6e5919d3b76d52c Mon Sep 17 00:00:00 2001 From: bobokun Date: Fri, 19 May 2023 21:52:06 -0400 Subject: [PATCH] Changes HardLink Detection logic (fixes #291) --- README.md | 2 +- config/config.yml.sample | 2 +- modules/core/remove_orphaned.py | 16 +---- modules/core/tag_nohardlinks.py | 3 +- modules/util.py | 104 +++++++++++++++++++++----------- 5 files changed, 74 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index bf54f82..356ceae 100755 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ This is a program used to manage your qBittorrent instance such as: * Automatically add [cross-seed](https://github.com/mmgoodnow/cross-seed) torrents in paused state. **\*Note: cross-seed now allows for torrent injections directly to qBit, making this feature obsolete.\*** * Recheck paused torrents sorted by lowest size and resume if completed * Remove orphaned files from your root directory that are not referenced by qBittorrent -* Tag any torrents that have no hard links and allows optional cleanup to delete these torrents and contents based on maximum ratio and/or time seeded +* Tag any torrents that have no hard links outisde the root folder and allows optional cleanup to delete these torrents and contents based on maximum ratio and/or time seeded * RecycleBin function to move files into a RecycleBin folder instead of deleting the data directly when deleting a torrent * Built-in scheduler to run the script every x minutes. (Can use `--run` command to run without the scheduler) * Webhook notifications with [Notifiarr](https://notifiarr.com/) and [Apprise API](https://github.com/caronc/apprise-api) integration diff --git a/config/config.yml.sample b/config/config.yml.sample index 6895056..4eca563 100755 --- a/config/config.yml.sample +++ b/config/config.yml.sample @@ -136,7 +136,7 @@ tracker: tag: other nohardlinks: - # Tag Movies/Series that are not hard linked + # Tag Movies/Series that are not hard linked outside the root directory # Mandatory to fill out directory parameter above to use this function (root_dir/remote_dir) # This variable should be set to your category name of your completed movies/completed series in qbit. Acceptable variable can be any category you would like to tag if there are no hardlinks found movies-completed: diff --git a/modules/core/remove_orphaned.py b/modules/core/remove_orphaned.py index 0d6f4a4..030ee3b 100644 --- a/modules/core/remove_orphaned.py +++ b/modules/core/remove_orphaned.py @@ -36,21 +36,7 @@ class RemoveOrphaned: orphaned_files = [] excluded_orphan_files = [] - if self.remote_dir != self.root_dir: - local_orphaned_dir = self.orphaned_dir.replace(self.remote_dir, self.root_dir) - root_files = [ - os.path.join(path.replace(self.remote_dir, self.root_dir), name) - for path, subdirs, files in os.walk(self.remote_dir) - for name in files - if local_orphaned_dir not in path - ] - else: - root_files = [ - os.path.join(path, name) - for path, subdirs, files in os.walk(self.root_dir) - for name in files - if self.orphaned_dir not in path - ] + root_files = util.get_root_files(self.remote_dir, self.root_dir, self.orphaned_dir) # Get an updated list of torrents logger.print_line("Locating orphan files", self.config.loglevel) diff --git a/modules/core/tag_nohardlinks.py b/modules/core/tag_nohardlinks.py index dbd3c8d..97d4495 100644 --- a/modules/core/tag_nohardlinks.py +++ b/modules/core/tag_nohardlinks.py @@ -186,6 +186,7 @@ class TagNoHardLinks: """Tag torrents with no hardlinks""" logger.separator("Tagging Torrents with No Hardlinks", space=False, border=False) nohardlinks = self.nohardlinks + check_hardlinks = util.CheckHardLinks(self.root_dir, self.remote_dir) for category in nohardlinks: torrent_list = self.qbt.get_torrents({"category": category, "status_filter": "completed"}) if len(torrent_list) == 0: @@ -199,7 +200,7 @@ class TagNoHardLinks: continue for torrent in torrent_list: tracker = self.qbt.get_tags(torrent.trackers) - has_nohardlinks = util.nohardlink( + has_nohardlinks = check_hardlinks.nohardlink( torrent["content_path"].replace(self.root_dir, self.remote_dir), self.config.notify ) if any(tag in torrent.tags for tag in nohardlinks[category]["exclude_tags"]): diff --git a/modules/util.py b/modules/util.py index ad7d734..61c89aa 100755 --- a/modules/util.py +++ b/modules/util.py @@ -325,44 +325,78 @@ def remove_empty_directories(pathlib_root_dir, pattern): pass # if this is being run in parallel, pathlib_root_dir could already be deleted -def nohardlink(file, notify): +class CheckHardLinks: """ - Check if there are any hard links - Will check if there are any hard links if it passes a file or folder - If a folder is passed, it will take the largest file in that folder and only check for hardlinks - of the remaining files where the file is greater size a percentage of the largest file - This fixes the bug in #192 + Class to check for hardlinks """ - check_for_hl = True - if os.path.isfile(file): - logger.trace(f"Checking file: {file}") - if os.stat(file).st_nlink > 1: - check_for_hl = False - else: - sorted_files = sorted(Path(file).rglob("*"), key=lambda x: os.stat(x).st_size, reverse=True) - logger.trace(f"Folder: {file}") - logger.trace(f"Files Sorted by size: {sorted_files}") - threshold = 0.5 - if not sorted_files: - msg = ( - f"Nohardlink Error: Unable to open the folder {file}. " - "Please make sure folder exists and qbit_manage has access to this directory." - ) - notify(msg, "nohardlink") - logger.warning(msg) + + def __init__(self, root_dir, remote_dir): + self.root_dir = root_dir + self.remote_dir = remote_dir + self.root_files = set(get_root_files(self.root_dir, self.remote_dir)) + self.get_inode_count() + + def get_inode_count(self): + self.inode_count = {} + for file in self.root_files: + inode_no = os.stat(file.replace(self.root_dir, self.remote_dir)).st_ino + if inode_no in self.inode_count: + self.inode_count[inode_no] += 1 + else: + self.inode_count[inode_no] = 1 + + def nohardlink(self, file, notify): + """ + Check if there are any hard links + Will check if there are any hard links if it passes a file or folder + If a folder is passed, it will take the largest file in that folder and only check for hardlinks + of the remaining files where the file is greater size a percentage of the largest file + This fixes the bug in #192 + """ + check_for_hl = True + if os.path.isfile(file): + logger.trace(f"Checking file: {file}") + # https://github.com/StuffAnThings/qbit_manage/issues/291 for more details + if os.stat(file).st_nlink - self.inode_count.get(os.stat(file).st_ino, 1) > 0: + check_for_hl = False else: - largest_file_size = os.stat(sorted_files[0]).st_size - logger.trace(f"Largest file: {sorted_files[0]}") - logger.trace(f"Largest file size: {largest_file_size}") - for files in sorted_files: - file_size = os.stat(files).st_size - file_no_hardlinks = os.stat(files).st_nlink - logger.trace(f"Checking file: {file}") - logger.trace(f"Checking file size: {file_size}") - logger.trace(f"Checking no of hard links: {file_no_hardlinks}") - if file_no_hardlinks > 1 and file_size >= (largest_file_size * threshold): - check_for_hl = False - return check_for_hl + sorted_files = sorted(Path(file).rglob("*"), key=lambda x: os.stat(x).st_size, reverse=True) + logger.trace(f"Folder: {file}") + logger.trace(f"Files Sorted by size: {sorted_files}") + threshold = 0.5 + if not sorted_files: + msg = ( + f"Nohardlink Error: Unable to open the folder {file}. " + "Please make sure folder exists and qbit_manage has access to this directory." + ) + notify(msg, "nohardlink") + logger.warning(msg) + else: + largest_file_size = os.stat(sorted_files[0]).st_size + logger.trace(f"Largest file: {sorted_files[0]}") + logger.trace(f"Largest file size: {largest_file_size}") + for files in sorted_files: + file_size = os.stat(files).st_size + file_no_hardlinks = os.stat(files).st_nlink + logger.trace(f"Checking file: {file}") + logger.trace(f"Checking file size: {file_size}") + logger.trace(f"Checking no of hard links: {file_no_hardlinks}") + if file_no_hardlinks - self.inode_count.get(os.stat(file).st_ino, 1) > 0 and file_size >= ( + largest_file_size * threshold + ): + check_for_hl = False + return check_for_hl + + +def get_root_files(root_dir, remote_dir, exclude_dir=None): + local_exclude_dir = exclude_dir.replace(remote_dir, root_dir) if exclude_dir and remote_dir != root_dir else exclude_dir + root_files = [ + os.path.join(path.replace(remote_dir, root_dir) if remote_dir != root_dir else path, name) + for path, subdirs, files in os.walk(remote_dir if remote_dir != root_dir else root_dir) + for name in files + if not local_exclude_dir or local_exclude_dir not in path + ] + return root_files def load_json(file):