Skip to content

Commit 79bbf0a

Browse files
committed
feat: fix file filters in folder hashing
1 parent 7a880ee commit 79bbf0a

File tree

2 files changed

+5
-40
lines changed

2 files changed

+5
-40
lines changed

src/scanoss/file_filters.py

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -525,39 +525,3 @@ def _should_skip_file(self, file_rel_path: str) -> bool: # noqa: PLR0911
525525
self.print_debug(f'Skipping file: {file_rel_path} (matches custom pattern)')
526526
return True
527527
return False
528-
529-
def _should_skip_file_for_hfh(self, file_path: Path) -> bool:
530-
"""
531-
Check if a file should be skipped during folder hashing scan.
532-
533-
Args:
534-
file_path (Path): The path to the file to check.
535-
536-
Returns:
537-
bool: True if the file should be skipped, False otherwise.
538-
"""
539-
try:
540-
if (
541-
any(part.startswith('.') for part in file_path.parts) # Hidden files/folders
542-
or file_path.is_symlink() # Symlinks
543-
or file_path.stat().st_size == 0 # Empty files
544-
):
545-
self.print_debug(f'Skipping file: {file_path} (hidden/symlink/empty)')
546-
return True
547-
548-
# Files ending with null
549-
if file_path.suffix.lower() == '.txt':
550-
try:
551-
with open(file_path, 'rb') as f:
552-
if f.read().endswith(b'\x00'):
553-
self.print_debug(f'Skipping file: {file_path} (text file ending with null)')
554-
return True
555-
except (OSError, IOError):
556-
self.print_debug(f'Skipping file: {file_path} (cannot read file content)')
557-
return True
558-
559-
return False
560-
561-
except Exception as e:
562-
self.print_debug(f'Error checking file {file_path}: {str(e)}')
563-
return True

src/scanoss/scanners/folder_hasher.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,18 +138,19 @@ def _build_root_node(self, path: str) -> DirectoryNode:
138138
root = Path(path).resolve()
139139
root_node = DirectoryNode(str(root))
140140

141-
all_files = [
142-
f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) < MINIMUM_FILE_NAME_LENGTH
141+
filtered_files = [
142+
f
143+
for f in self.file_filters.get_filtered_files_from_folder(str(root))
144+
if len(f.encode('utf-8')) < MINIMUM_FILE_NAME_LENGTH
143145
]
144-
filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
145146

146147
# Sort the files by name to ensure the hash is the same for the same folder
147148
filtered_files.sort()
148149

149150
bar = Bar('Hashing files...', max=len(filtered_files))
150151
for file_path in filtered_files:
151152
try:
152-
file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
153+
file_path_obj = Path(file_path)
153154
full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
154155

155156
self.base.print_debug(f'\nHashing file {str(full_file_path)}')

0 commit comments

Comments
 (0)