Changes HardLink Detection logic (fixes #291)

This commit is contained in:
bobokun 2023-05-19 21:52:06 -04:00
parent e606a7d579
commit 1ed99aeb3e
No known key found for this signature in database
GPG key ID: B73932169607D927
5 changed files with 74 additions and 53 deletions

View file

@ -16,7 +16,7 @@ This is a program used to manage your qBittorrent instance such as:
* Automatically add [cross-seed](https://github.com/mmgoodnow/cross-seed) torrents in paused state. **\*Note: cross-seed now allows for torrent injections directly to qBit, making this feature obsolete.\*** * Automatically add [cross-seed](https://github.com/mmgoodnow/cross-seed) torrents in paused state. **\*Note: cross-seed now allows for torrent injections directly to qBit, making this feature obsolete.\***
* Recheck paused torrents sorted by lowest size and resume if completed * Recheck paused torrents sorted by lowest size and resume if completed
* Remove orphaned files from your root directory that are not referenced by qBittorrent * Remove orphaned files from your root directory that are not referenced by qBittorrent
* Tag any torrents that have no hard links and allows optional cleanup to delete these torrents and contents based on maximum ratio and/or time seeded * Tag any torrents that have no hard links outisde the root folder and allows optional cleanup to delete these torrents and contents based on maximum ratio and/or time seeded
* RecycleBin function to move files into a RecycleBin folder instead of deleting the data directly when deleting a torrent * RecycleBin function to move files into a RecycleBin folder instead of deleting the data directly when deleting a torrent
* Built-in scheduler to run the script every x minutes. (Can use `--run` command to run without the scheduler) * Built-in scheduler to run the script every x minutes. (Can use `--run` command to run without the scheduler)
* Webhook notifications with [Notifiarr](https://notifiarr.com/) and [Apprise API](https://github.com/caronc/apprise-api) integration * Webhook notifications with [Notifiarr](https://notifiarr.com/) and [Apprise API](https://github.com/caronc/apprise-api) integration

View file

@ -136,7 +136,7 @@ tracker:
tag: other tag: other
nohardlinks: nohardlinks:
# Tag Movies/Series that are not hard linked # Tag Movies/Series that are not hard linked outside the root directory
# Mandatory to fill out directory parameter above to use this function (root_dir/remote_dir) # Mandatory to fill out directory parameter above to use this function (root_dir/remote_dir)
# This variable should be set to your category name of your completed movies/completed series in qbit. Acceptable variable can be any category you would like to tag if there are no hardlinks found # This variable should be set to your category name of your completed movies/completed series in qbit. Acceptable variable can be any category you would like to tag if there are no hardlinks found
movies-completed: movies-completed:

View file

@ -36,21 +36,7 @@ class RemoveOrphaned:
orphaned_files = [] orphaned_files = []
excluded_orphan_files = [] excluded_orphan_files = []
if self.remote_dir != self.root_dir: root_files = util.get_root_files(self.remote_dir, self.root_dir, self.orphaned_dir)
local_orphaned_dir = self.orphaned_dir.replace(self.remote_dir, self.root_dir)
root_files = [
os.path.join(path.replace(self.remote_dir, self.root_dir), name)
for path, subdirs, files in os.walk(self.remote_dir)
for name in files
if local_orphaned_dir not in path
]
else:
root_files = [
os.path.join(path, name)
for path, subdirs, files in os.walk(self.root_dir)
for name in files
if self.orphaned_dir not in path
]
# Get an updated list of torrents # Get an updated list of torrents
logger.print_line("Locating orphan files", self.config.loglevel) logger.print_line("Locating orphan files", self.config.loglevel)

View file

@ -186,6 +186,7 @@ class TagNoHardLinks:
"""Tag torrents with no hardlinks""" """Tag torrents with no hardlinks"""
logger.separator("Tagging Torrents with No Hardlinks", space=False, border=False) logger.separator("Tagging Torrents with No Hardlinks", space=False, border=False)
nohardlinks = self.nohardlinks nohardlinks = self.nohardlinks
check_hardlinks = util.CheckHardLinks(self.root_dir, self.remote_dir)
for category in nohardlinks: for category in nohardlinks:
torrent_list = self.qbt.get_torrents({"category": category, "status_filter": "completed"}) torrent_list = self.qbt.get_torrents({"category": category, "status_filter": "completed"})
if len(torrent_list) == 0: if len(torrent_list) == 0:
@ -199,7 +200,7 @@ class TagNoHardLinks:
continue continue
for torrent in torrent_list: for torrent in torrent_list:
tracker = self.qbt.get_tags(torrent.trackers) tracker = self.qbt.get_tags(torrent.trackers)
has_nohardlinks = util.nohardlink( has_nohardlinks = check_hardlinks.nohardlink(
torrent["content_path"].replace(self.root_dir, self.remote_dir), self.config.notify torrent["content_path"].replace(self.root_dir, self.remote_dir), self.config.notify
) )
if any(tag in torrent.tags for tag in nohardlinks[category]["exclude_tags"]): if any(tag in torrent.tags for tag in nohardlinks[category]["exclude_tags"]):

View file

@ -325,44 +325,78 @@ def remove_empty_directories(pathlib_root_dir, pattern):
pass # if this is being run in parallel, pathlib_root_dir could already be deleted pass # if this is being run in parallel, pathlib_root_dir could already be deleted
def nohardlink(file, notify): class CheckHardLinks:
""" """
Check if there are any hard links Class to check for hardlinks
Will check if there are any hard links if it passes a file or folder
If a folder is passed, it will take the largest file in that folder and only check for hardlinks
of the remaining files where the file is greater size a percentage of the largest file
This fixes the bug in #192
""" """
check_for_hl = True
if os.path.isfile(file): def __init__(self, root_dir, remote_dir):
logger.trace(f"Checking file: {file}") self.root_dir = root_dir
if os.stat(file).st_nlink > 1: self.remote_dir = remote_dir
check_for_hl = False self.root_files = set(get_root_files(self.root_dir, self.remote_dir))
else: self.get_inode_count()
sorted_files = sorted(Path(file).rglob("*"), key=lambda x: os.stat(x).st_size, reverse=True)
logger.trace(f"Folder: {file}") def get_inode_count(self):
logger.trace(f"Files Sorted by size: {sorted_files}") self.inode_count = {}
threshold = 0.5 for file in self.root_files:
if not sorted_files: inode_no = os.stat(file.replace(self.root_dir, self.remote_dir)).st_ino
msg = ( if inode_no in self.inode_count:
f"Nohardlink Error: Unable to open the folder {file}. " self.inode_count[inode_no] += 1
"Please make sure folder exists and qbit_manage has access to this directory." else:
) self.inode_count[inode_no] = 1
notify(msg, "nohardlink")
logger.warning(msg) def nohardlink(self, file, notify):
"""
Check if there are any hard links
Will check if there are any hard links if it passes a file or folder
If a folder is passed, it will take the largest file in that folder and only check for hardlinks
of the remaining files where the file is greater size a percentage of the largest file
This fixes the bug in #192
"""
check_for_hl = True
if os.path.isfile(file):
logger.trace(f"Checking file: {file}")
# https://github.com/StuffAnThings/qbit_manage/issues/291 for more details
if os.stat(file).st_nlink - self.inode_count.get(os.stat(file).st_ino, 1) > 0:
check_for_hl = False
else: else:
largest_file_size = os.stat(sorted_files[0]).st_size sorted_files = sorted(Path(file).rglob("*"), key=lambda x: os.stat(x).st_size, reverse=True)
logger.trace(f"Largest file: {sorted_files[0]}") logger.trace(f"Folder: {file}")
logger.trace(f"Largest file size: {largest_file_size}") logger.trace(f"Files Sorted by size: {sorted_files}")
for files in sorted_files: threshold = 0.5
file_size = os.stat(files).st_size if not sorted_files:
file_no_hardlinks = os.stat(files).st_nlink msg = (
logger.trace(f"Checking file: {file}") f"Nohardlink Error: Unable to open the folder {file}. "
logger.trace(f"Checking file size: {file_size}") "Please make sure folder exists and qbit_manage has access to this directory."
logger.trace(f"Checking no of hard links: {file_no_hardlinks}") )
if file_no_hardlinks > 1 and file_size >= (largest_file_size * threshold): notify(msg, "nohardlink")
check_for_hl = False logger.warning(msg)
return check_for_hl else:
largest_file_size = os.stat(sorted_files[0]).st_size
logger.trace(f"Largest file: {sorted_files[0]}")
logger.trace(f"Largest file size: {largest_file_size}")
for files in sorted_files:
file_size = os.stat(files).st_size
file_no_hardlinks = os.stat(files).st_nlink
logger.trace(f"Checking file: {file}")
logger.trace(f"Checking file size: {file_size}")
logger.trace(f"Checking no of hard links: {file_no_hardlinks}")
if file_no_hardlinks - self.inode_count.get(os.stat(file).st_ino, 1) > 0 and file_size >= (
largest_file_size * threshold
):
check_for_hl = False
return check_for_hl
def get_root_files(root_dir, remote_dir, exclude_dir=None):
local_exclude_dir = exclude_dir.replace(remote_dir, root_dir) if exclude_dir and remote_dir != root_dir else exclude_dir
root_files = [
os.path.join(path.replace(remote_dir, root_dir) if remote_dir != root_dir else path, name)
for path, subdirs, files in os.walk(remote_dir if remote_dir != root_dir else root_dir)
for name in files
if not local_exclude_dir or local_exclude_dir not in path
]
return root_files
def load_json(file): def load_json(file):