Skip to content
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Upcoming changes...

## [1.36.0] - 2025-10-08
### Added
- Add `--recursive-threshold` argument to folder scan command
- Add `--depth` argument to `folder-scan` and `folder-hash` commands

## [1.35.0] - 2025-10-07
### Modified
- Use gRPC instead of REST for API calls
Expand Down Expand Up @@ -677,3 +682,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
[1.32.0]: https://github.com/scanoss/scanoss.py/compare/v1.31.5...v1.32.0
[1.33.0]: https://github.com/scanoss/scanoss.py/compare/v1.32.0...v1.33.0
[1.34.0]: https://github.com/scanoss/scanoss.py/compare/v1.33.0...v1.34.0
[1.35.0]: https://github.com/scanoss/scanoss.py/compare/v1.34.0...v1.35.0
[1.36.0]: https://github.com/scanoss/scanoss.py/compare/v1.35.0...v1.36.0
2 changes: 1 addition & 1 deletion src/scanoss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
THE SOFTWARE.
"""

__version__ = '1.35.0'
__version__ = '1.36.0'
1 change: 1 addition & 0 deletions src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import warnings

import grpc
import warnings
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Regenerate this file to remove duplicate import.

The warnings module is imported twice (line 3 and line 6). Since this is a generated file marked "DO NOT EDIT", regenerate it using the gRPC Python protocol compiler to eliminate the duplication.

🤖 Prompt for AI Agents
In src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py around line 6, the
generated file contains a duplicate import of the warnings module; regenerate
the file using the gRPC/protobuf Python code generator (e.g., protoc with the
grpc_python_plugin or grpcio-tools’ python -m grpc_tools.protoc) from the .proto
sources to produce a clean file without duplicate imports, then replace the
checked-in file with the newly generated output (do not hand-edit the generated
file).


GRPC_GENERATED_VERSION = '1.73.1'
GRPC_VERSION = grpc.__version__
Expand Down
34 changes: 34 additions & 0 deletions src/scanoss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@
from .components import Components
from .constants import (
DEFAULT_API_TIMEOUT,
DEFAULT_HFH_DEPTH,
DEFAULT_HFH_MIN_ACCEPTED_SCORE,
DEFAULT_HFH_RANK_THRESHOLD,
DEFAULT_HFH_RECURSIVE_THRESHOLD,
DEFAULT_POST_SIZE,
DEFAULT_RETRY,
DEFAULT_TIMEOUT,
Expand Down Expand Up @@ -869,6 +872,27 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
help='Filter results to only show those with rank value at or below this threshold (e.g., --rank-threshold 3 '
'returns results with rank 1, 2, or 3). Lower rank values indicate higher quality matches.',
)
p_folder_scan.add_argument(
'--depth',
type=int,
default=DEFAULT_HFH_DEPTH,
help=f'Defines how deep to scan the root directory (optional - default {DEFAULT_HFH_DEPTH})',
)
p_folder_scan.add_argument(
'--recursive-threshold',
type=float,
default=DEFAULT_HFH_RECURSIVE_THRESHOLD,
help=f'Minimum score threshold to consider a match (optional - default: {DEFAULT_HFH_RECURSIVE_THRESHOLD})',
)
p_folder_scan.add_argument(
'--min-accepted-score',
type=float,
default=DEFAULT_HFH_MIN_ACCEPTED_SCORE,
help=(
'Only show results with a score at or above this threshold '
f'(optional - default: {DEFAULT_HFH_MIN_ACCEPTED_SCORE})'
),
)
p_folder_scan.set_defaults(func=folder_hashing_scan)

# Sub-command: folder-hash
Expand All @@ -887,6 +911,12 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
default='json',
help='Result output format (optional - default: json)',
)
p_folder_hash.add_argument(
'--depth',
type=int,
default=DEFAULT_HFH_DEPTH,
help=f'Defines how deep to hash the root directory (optional - default {DEFAULT_HFH_DEPTH})',
)
p_folder_hash.set_defaults(func=folder_hash)

# Output options
Expand Down Expand Up @@ -2456,6 +2486,9 @@ def folder_hashing_scan(parser, args):
client=client,
scanoss_settings=scanoss_settings,
rank_threshold=args.rank_threshold,
depth=args.depth,
recursive_threshold=args.recursive_threshold,
min_accepted_score=args.min_accepted_score,
)

if scanner.scan():
Expand Down Expand Up @@ -2489,6 +2522,7 @@ def folder_hash(parser, args):
scan_dir=args.scan_dir,
config=folder_hasher_config,
scanoss_settings=scanoss_settings,
depth=args.depth,
)

folder_hasher.hash_directory(args.scan_dir)
Expand Down
5 changes: 4 additions & 1 deletion src/scanoss/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,7 @@

DEFAULT_API_TIMEOUT = 600

DEFAULT_HFH_RANK_THRESHOLD = 5
DEFAULT_HFH_RANK_THRESHOLD = 5
DEFAULT_HFH_DEPTH = 1
DEFAULT_HFH_RECURSIVE_THRESHOLD = 0.8
DEFAULT_HFH_MIN_ACCEPTED_SCORE = 0.15
159 changes: 1 addition & 158 deletions src/scanoss/file_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,162 +269,6 @@
'sqlite3',
}

# TODO: For hfh add the .gitignore patterns
DEFAULT_SKIPPED_EXT_HFH = {
'.1',
'.2',
'.3',
'.4',
'.5',
'.6',
'.7',
'.8',
'.9',
'.ac',
'.adoc',
'.am',
'.asciidoc',
'.bmp',
'.build',
'.cfg',
'.chm',
'.class',
'.cmake',
'.cnf',
'.conf',
'.config',
'.contributors',
'.copying',
'.crt',
'.csproj',
'.css',
'.csv',
'.dat',
'.data',
'.dtd',
'.dts',
'.iws',
'.c9',
'.c9revisions',
'.dtsi',
'.dump',
'.eot',
'.eps',
'.geojson',
'.gif',
'.glif',
'.gmo',
'.guess',
'.hex',
'.htm',
'.html',
'.ico',
'.iml',
'.in',
'.inc',
'.info',
'.ini',
'.ipynb',
'.jpeg',
'.jpg',
'.json',
'.jsonld',
'.lock',
'.log',
'.m4',
'.map',
'.md5',
'.meta',
'.mk',
'.mxml',
'.o',
'.otf',
'.out',
'.pbtxt',
'.pdf',
'.pem',
'.phtml',
'.plist',
'.png',
'.prefs',
'.properties',
'.pyc',
'.qdoc',
'.result',
'.rgb',
'.rst',
'.scss',
'.sha',
'.sha1',
'.sha2',
'.sha256',
'.sln',
'.spec',
'.sub',
'.svg',
'.svn-base',
'.tab',
'.template',
'.test',
'.tex',
'.tiff',
'.ttf',
'.txt',
'.utf-8',
'.vim',
'.wav',
'.woff',
'.woff2',
'.xht',
'.xhtml',
'.xml',
'.xpm',
'.xsd',
'.xul',
'.yaml',
'.yml',
'.wfp',
'.editorconfig',
'.dotcover',
'.pid',
'.lcov',
'.egg',
'.manifest',
'.cache',
'.coverage',
'.cover',
'.gem',
'.lst',
'.pickle',
'.pdb',
'.gml',
'.pot',
'.plt',
'.whml',
'.pom',
'.smtml',
'.min.js',
'.mf',
'.base64',
'.s',
'.diff',
'.patch',
'.rules',
# File endings
'-doc',
'config',
'news',
'readme',
'swiftdoc',
'texidoc',
'todo',
'version',
'ignore',
'manifest',
'sqlite',
'sqlite3',
}


class FileFilters(ScanossBase):
"""
Expand Down Expand Up @@ -707,9 +551,8 @@ def _should_skip_file(self, file_rel_path: str) -> bool: # noqa: PLR0911
bool: True if file should be skipped, False otherwise
"""
file_name = os.path.basename(file_rel_path)

DEFAULT_SKIPPED_EXT_LIST = {} if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
DEFAULT_SKIPPED_FILES_LIST = DEFAULT_SKIPPED_FILES_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_FILES
DEFAULT_SKIPPED_EXT_LIST = DEFAULT_SKIPPED_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT

if not self.hidden_files_folders and file_name.startswith('.'):
self.print_debug(f'Skipping file: {file_rel_path} (hidden file)')
Expand Down
Loading