From 7dc64cd0e7dde3d978f87876f1f033b78a05f86e Mon Sep 17 00:00:00 2001 From: bobokun Date: Tue, 6 Dec 2022 22:00:51 -0500 Subject: [PATCH] Fixes #192 Fixes noHL logic with false negatives not being caught with small files. --- modules/util.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/modules/util.py b/modules/util.py index a7cf798..55636ec 100755 --- a/modules/util.py +++ b/modules/util.py @@ -264,16 +264,29 @@ def remove_empty_directories(pathlib_root_dir, pattern): # will check if there are any hard links if it passes a file or folder +# If a folder is passed, it will take the largest file in that folder and only check for hardlinks +# of the remaining files where the file is greater size a percentage of the largest file +# This fixes the bug in #192 def nohardlink(file): check = True if os.path.isfile(file): + logger.trace(f"Checking file: {file}") if os.stat(file).st_nlink > 1: check = False else: - for path, subdirs, files in os.walk(file): - for x in files: - if os.stat(os.path.join(path, x)).st_nlink > 1: - check = False + sorted_files = sorted(Path(file).rglob("*"), key=lambda x: os.stat(x).st_size, reverse=True) + threshold = 0.5 + largest_file_size = os.stat(sorted_files[0]).st_size + logger.trace(f"Largest file: {sorted_files[0]}") + logger.trace(f"Largest file size: {largest_file_size}") + for x in sorted_files: + file_size = os.stat(x).st_size + file_no_hardlinks = os.stat(x).st_nlink + logger.trace(f"Checking file: {file}") + logger.trace(f"Checking file size: {file_size}") + logger.trace(f"Checking no of hard links: {file_no_hardlinks}") + if file_no_hardlinks > 1 and file_size >= (largest_file_size * threshold): + check = False return check