diff --git a/cmd/osv-scanner/internal/helper/extractors_parser.go b/cmd/osv-scanner/internal/helper/extractors_parser.go new file mode 100644 index 00000000000..fabaa6c597c --- /dev/null +++ b/cmd/osv-scanner/internal/helper/extractors_parser.go @@ -0,0 +1,44 @@ +package helper + +import ( + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scanner/v2/internal/builders" + "github.com/google/osv-scanner/v2/internal/scalibrextract" +) + +var presets = map[string][]string{ + "sbom": scalibrextract.ExtractorsSBOMs, + "lockfile": scalibrextract.ExtractorsLockfiles, + "directory": scalibrextract.ExtractorsDirectories, + "artifact": scalibrextract.ExtractorsArtifacts, +} + +func ResolveEnabledExtractors(enabledExtractors []string, disabledExtractors []string) []filesystem.Extractor { + extractors := make(map[string]bool) + + for i, exts := range [][]string{enabledExtractors, disabledExtractors} { + enabled := i == 0 + + for _, extractorOrPreset := range exts { + if names, ok := presets[extractorOrPreset]; ok { + for _, name := range names { + extractors[name] = enabled + } + + continue + } + + extractors[extractorOrPreset] = enabled + } + } + + asSlice := make([]string, 0, len(extractors)) + + for name, value := range extractors { + if name != "" && value { + asSlice = append(asSlice, name) + } + } + + return builders.BuildExtractors(asSlice) +} diff --git a/cmd/osv-scanner/internal/helper/extractors_parser_test.go b/cmd/osv-scanner/internal/helper/extractors_parser_test.go new file mode 100644 index 00000000000..15bcf7ffb5d --- /dev/null +++ b/cmd/osv-scanner/internal/helper/extractors_parser_test.go @@ -0,0 +1,215 @@ +package helper + +import ( + "slices" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packageslockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive" + "github.com/google/osv-scalibr/extractor/filesystem/language/php/composerlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargoauditable" + "github.com/google/osv-scalibr/extractor/filesystem/os/apk" + "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" + "github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx" + "github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx" + "github.com/google/osv-scanner/v2/internal/scalibrextract/language/javascript/nodemodules" +) + +func TestResolveEnabledExtractors(t *testing.T) { + t.Parallel() + + type args struct { + enabledExtractors []string + disabledExtractors []string + } + tests := []struct { + name string + args args + want []string + }{ + { + name: "nothing_enabled_or_disabled", + args: args{ + enabledExtractors: nil, + disabledExtractors: nil, + }, + want: []string{}, + }, + { + name: "empty_strings_are_ignored", + args: args{ + enabledExtractors: []string{""}, + disabledExtractors: []string{""}, + }, + want: []string{}, + }, + // + { + name: "one_extractor_enabled_and_nothing_disabled", + args: args{ + enabledExtractors: []string{composerlock.Name}, + disabledExtractors: nil, + }, + want: []string{composerlock.Name}, + }, + { + name: "one_extractor_enabled_and_different_extractor_disabled", + args: args{ + enabledExtractors: []string{composerlock.Name}, + disabledExtractors: []string{packageslockjson.Name}, + }, + want: []string{composerlock.Name}, + }, + { + name: "one_extractor_enabled_and_same_extractor_disabled", + args: args{ + enabledExtractors: []string{composerlock.Name}, + disabledExtractors: []string{composerlock.Name}, + }, + want: []string{}, + }, + // + { + name: "one_preset_enabled_and_nothing_disabled", + args: args{ + enabledExtractors: []string{"artifact"}, + disabledExtractors: nil, + }, + want: []string{ + wheelegg.Name, + archive.Name, + gobinary.Name, + nodemodules.Name, + cargoauditable.Name, + apk.Name, + dpkg.Name, + }, + }, + { + name: "one_preset_enabled_and_different_preset_disabled", + args: args{ + enabledExtractors: []string{"artifact"}, + disabledExtractors: []string{"sbom"}, + }, + want: []string{ + wheelegg.Name, + archive.Name, + gobinary.Name, + nodemodules.Name, + cargoauditable.Name, + apk.Name, + dpkg.Name, + }, + }, + { + name: "one_preset_enabled_and_same_preset_disabled", + args: args{ + enabledExtractors: []string{"artifact"}, + disabledExtractors: []string{"artifact"}, + }, + want: []string{}, + }, + { + name: "one_preset_enabled_and_some_extractors_disabled", + args: args{ + enabledExtractors: []string{"artifact"}, + disabledExtractors: []string{wheelegg.Name, archive.Name, cargoauditable.Name}, + }, + want: []string{ + gobinary.Name, + nodemodules.Name, + apk.Name, + dpkg.Name, + }, + }, + // + { + name: "multiple_presets_enabled_and_nothing_disabled", + args: args{ + enabledExtractors: []string{"artifact", "sbom"}, + disabledExtractors: []string{}, + }, + want: []string{ + spdx.Name, + cdx.Name, + wheelegg.Name, + archive.Name, + gobinary.Name, + nodemodules.Name, + cargoauditable.Name, + apk.Name, + dpkg.Name, + }, + }, + // + { + name: "multiple_extractors_enabled_and_one_disabled_preset", + args: args{ + enabledExtractors: []string{ + spdx.Name, + archive.Name, + gobinary.Name, + }, + disabledExtractors: []string{"sbom"}, + }, + want: []string{ + archive.Name, + gobinary.Name, + }, + }, + { + name: "multiple_extractors_enabled_and_disabled", + args: args{ + enabledExtractors: []string{ + spdx.Name, + archive.Name, + gobinary.Name, + cargoauditable.Name, + }, + disabledExtractors: []string{ + cdx.Name, + wheelegg.Name, + gobinary.Name, + apk.Name, + }, + }, + want: []string{ + spdx.Name, + archive.Name, + cargoauditable.Name, + }, + }, + // + { + name: "extractor_that_does_not_exist", + args: args{ + enabledExtractors: []string{"???"}, + disabledExtractors: nil, + }, + want: []string{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := ResolveEnabledExtractors(tt.args.enabledExtractors, tt.args.disabledExtractors) + + slices.Sort(tt.want) + + gotNames := make([]string, 0, len(got)) + for _, extractor := range got { + gotNames = append(gotNames, extractor.Name()) + } + + slices.Sort(gotNames) + + if diff := cmp.Diff(tt.want, gotNames); diff != "" { + t.Errorf("replaceJSONInput() diff (-want +got): %s", diff) + } + }) + } +} diff --git a/cmd/osv-scanner/internal/helper/helper.go b/cmd/osv-scanner/internal/helper/helper.go index ca5793be0a0..8aa3bfcf3ed 100644 --- a/cmd/osv-scanner/internal/helper/helper.go +++ b/cmd/osv-scanner/internal/helper/helper.go @@ -63,7 +63,7 @@ func (g *allowedLicencesFlag) String() string { return strings.Join(g.allowlist, ",") } -func GetScanGlobalFlags() []cli.Flag { +func GetScanGlobalFlags(defaultExtractors []string) []cli.Flag { return []cli.Flag{ &cli.StringFlag{ Name: "config", @@ -172,6 +172,15 @@ func GetScanGlobalFlags() []cli.Flag { Usage: "report on licenses based on an allowlist", Value: &allowedLicencesFlag{}, }, + &cli.StringSliceFlag{ + Name: "experimental-extractors", + Usage: "list of specific extractors and ExtractorPresets of extractors to use", + Value: defaultExtractors, + }, + &cli.StringSliceFlag{ + Name: "experimental-disable-extractors", + Usage: "list of specific extractors and ExtractorPresets of extractors to not use", + }, } } @@ -251,5 +260,9 @@ func GetExperimentalScannerActions(cmd *cli.Command, scanLicensesAllowlist []str ShowAllPackages: cmd.Bool("all-packages"), ScanLicensesSummary: cmd.IsSet("licenses"), ScanLicensesAllowlist: scanLicensesAllowlist, + Extractors: ResolveEnabledExtractors( + cmd.StringSlice("experimental-extractors"), + cmd.StringSlice("experimental-disable-extractors"), + ), } } diff --git a/cmd/osv-scanner/scan/image/__snapshots__/command_test.snap b/cmd/osv-scanner/scan/image/__snapshots__/command_test.snap index b44cb589f4f..294841939e0 100755 --- a/cmd/osv-scanner/scan/image/__snapshots__/command_test.snap +++ b/cmd/osv-scanner/scan/image/__snapshots__/command_test.snap @@ -75,6 +75,45 @@ No package sources found, --help for usage information. --- +[TestCommand_Docker/real_alpine_image_without_apk_extractor_enabled - 1] +Checking if docker image ("alpine:3.18.9") exists locally... +Saving docker image ("alpine:3.18.9") to temporary file... +Scanning image "alpine:3.18.9" + +--- + +[TestCommand_Docker/real_alpine_image_without_apk_extractor_enabled - 2] +No package sources found, --help for usage information. + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out - 1] + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out - 2] +at least one extractor must be enabled + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out#01 - 1] + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out#01 - 2] +at least one extractor must be enabled + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out_with_presets - 1] + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out_with_presets - 2] +at least one extractor must be enabled + +--- + [TestCommand_OCIImage/Alpine_3.10_image_tar_with_3.18_version_file - 1] Scanning local image tarball "../../../../internal/image/fixtures/test-alpine.tar" diff --git a/cmd/osv-scanner/scan/image/command.go b/cmd/osv-scanner/scan/image/command.go index a2eb2c6537b..fd76b340e7c 100644 --- a/cmd/osv-scanner/scan/image/command.go +++ b/cmd/osv-scanner/scan/image/command.go @@ -26,7 +26,7 @@ func Command(stdout, stderr io.Writer) *cli.Command { Name: "archive", Usage: "input a local archive image (e.g. a tar file)", }, - }, helper.GetScanGlobalFlags()...), + }, helper.GetScanGlobalFlags([]string{"artifact"})...), ArgsUsage: "[image imageNameWithTag]", Action: func(ctx context.Context, cmd *cli.Command) error { return action(ctx, cmd, stdout, stderr) @@ -77,6 +77,10 @@ func action(_ context.Context, cmd *cli.Command, stdout, stderr io.Writer) error ExperimentalScannerActions: helper.GetExperimentalScannerActions(cmd, scanLicensesAllowlist), } + if len(scannerAction.Extractors) == 0 { + return errors.New("at least one extractor must be enabled") + } + var vulnResult models.VulnerabilityResults //nolint:contextcheck // passing the context in would be a breaking change vulnResult, err = osvscanner.DoContainerScan(scannerAction) diff --git a/cmd/osv-scanner/scan/image/command_test.go b/cmd/osv-scanner/scan/image/command_test.go index 38fe790a905..beafbeebb2a 100644 --- a/cmd/osv-scanner/scan/image/command_test.go +++ b/cmd/osv-scanner/scan/image/command_test.go @@ -11,6 +11,50 @@ import ( "github.com/google/osv-scanner/v2/internal/testutility" ) +func TestCommand_ExplicitExtractors(t *testing.T) { + t.Parallel() + + tests := []testcmd.Case{ + { + Name: "extractors_cancelled_out", + Args: []string{ + "", "image", + "--experimental-extractors=sbom/spdx", + "--experimental-extractors=sbom/cdx", + "--experimental-disable-extractors=sbom", + "alpine:non-existent-tag", + }, + Exit: 127, + }, + { + Name: "extractors_cancelled_out_with_presets", + Args: []string{ + "", "image", + "--experimental-extractors=sbom", + "--experimental-disable-extractors=sbom", + "alpine:non-existent-tag", + }, + Exit: 127, + }, + { + Name: "extractors_cancelled_out", + Args: []string{ + "", "image", + "--experimental-extractors=sbom/spdx,sbom/cdx", + "--experimental-disable-extractors=sbom", + "alpine:non-existent-tag", + }, + Exit: 127, + }, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + testcmd.RunAndMatchSnapshots(t, tt) + }) + } +} + func TestCommand_Docker(t *testing.T) { t.Parallel() @@ -42,6 +86,14 @@ func TestCommand_Docker(t *testing.T) { Args: []string{"", "image", "alpine:3.18.9"}, Exit: 1, }, + { + // this will result in an error about not being able to find any package sources + // since we've requested the os/apk extractor disabled, and there's nothing else + // in the image that we support extracting + Name: "real_alpine_image_without_apk_extractor_enabled", + Args: []string{"", "image", "--experimental-disable-extractors=os/apk", "alpine:3.18.9"}, + Exit: 128, + }, } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { diff --git a/cmd/osv-scanner/scan/source/__snapshots__/command_test.snap b/cmd/osv-scanner/scan/source/__snapshots__/command_test.snap index 8751b3c3dfa..bfcef1c7ab7 100755 --- a/cmd/osv-scanner/scan/source/__snapshots__/command_test.snap +++ b/cmd/osv-scanner/scan/source/__snapshots__/command_test.snap @@ -1205,6 +1205,153 @@ Filtered 1 vulnerability from output --- +[TestCommand_ExplicitExtractors/empty_extractors_flag_does_nothing - 1] + +--- + +[TestCommand_ExplicitExtractors/empty_extractors_flag_does_nothing - 2] +Incorrect Usage: flag needs an argument: --experimental-extractors= + +flag needs an argument: --experimental-extractors= + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out_specified_individually - 1] + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out_specified_individually - 2] +at least one extractor must be enabled + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out_specified_together - 1] + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out_specified_together - 2] +at least one extractor must be enabled + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out_with_presets - 1] + +--- + +[TestCommand_ExplicitExtractors/extractors_cancelled_out_with_presets - 2] +at least one extractor must be enabled + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_a_couple_of_specific_extractors_enabled_individually - 1] +Scanning dir ../../fixtures/locks-many +Scanned /osv-scanner/fixtures/locks-many/composer.lock file and found 1 package +Scanned /osv-scanner/fixtures/locks-many/package-lock.json file and found 1 package +Loaded filter from: /osv-scanner/fixtures/locks-many/osv-scanner.toml +GHSA-whgm-jr23-g3j9 and 1 alias have been filtered out because: Test manifest file +Filtered 1 vulnerability from output +No issues found + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_a_couple_of_specific_extractors_enabled_individually - 2] + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_a_couple_of_specific_extractors_enabled_specified_together - 1] +Scanning dir ../../fixtures/locks-many +Scanned /osv-scanner/fixtures/locks-many/composer.lock file and found 1 package +Scanned /osv-scanner/fixtures/locks-many/package-lock.json file and found 1 package +Loaded filter from: /osv-scanner/fixtures/locks-many/osv-scanner.toml +GHSA-whgm-jr23-g3j9 and 1 alias have been filtered out because: Test manifest file +Filtered 1 vulnerability from output +No issues found + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_a_couple_of_specific_extractors_enabled_specified_together - 2] + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_an_extractor_that_does_not_exist - 1] +Scanning dir ../../fixtures/locks-many +Scanned /osv-scanner/fixtures/locks-many/package-lock.json file and found 1 package +Loaded filter from: /osv-scanner/fixtures/locks-many/osv-scanner.toml +GHSA-whgm-jr23-g3j9 and 1 alias have been filtered out because: Test manifest file +Filtered 1 vulnerability from output + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_an_extractor_that_does_not_exist - 2] +Unknown extractor custom/extractor + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_one_specific_extractor_disabled - 1] +Scanning dir ../../fixtures/locks-many +Scanned /osv-scanner/fixtures/locks-many/Gemfile.lock file and found 1 package +Scanned /osv-scanner/fixtures/locks-many/alpine.cdx.xml file and found 15 packages +Scanned /osv-scanner/fixtures/locks-many/composer.lock file and found 1 package +Scanned /osv-scanner/fixtures/locks-many/yarn.lock file and found 1 package +Filtered 1 local/unscannable package/s from the scan. +Loaded filter from: /osv-scanner/fixtures/locks-many/osv-scanner.toml +CVE-2025-26519 has been filtered out because: Test manifest file (alpine.cdx.xml) +Filtered 1 vulnerability from output +No issues found + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_one_specific_extractor_disabled - 2] + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_one_specific_extractor_enabled - 1] +Scanning dir ../../fixtures/locks-many +Scanned /osv-scanner/fixtures/locks-many/package-lock.json file and found 1 package +Loaded filter from: /osv-scanner/fixtures/locks-many/osv-scanner.toml +GHSA-whgm-jr23-g3j9 and 1 alias have been filtered out because: Test manifest file +Filtered 1 vulnerability from output +No issues found + +--- + +[TestCommand_ExplicitExtractors/scanning_directory_with_one_specific_extractor_enabled - 2] + +--- + +[TestCommand_ExplicitExtractors/scanning_file_with_one_different_extractor_enabled - 1] +Scanning dir ../../fixtures/locks-many/composer.lock + +--- + +[TestCommand_ExplicitExtractors/scanning_file_with_one_different_extractor_enabled - 2] +No package sources found, --help for usage information. + +--- + +[TestCommand_ExplicitExtractors/scanning_file_with_one_specific_extractor_enabled - 1] +Scanning dir ../../fixtures/locks-many/package-lock.json +Scanned /osv-scanner/fixtures/locks-many/package-lock.json file and found 1 package +Loaded filter from: /osv-scanner/fixtures/locks-many/osv-scanner.toml +GHSA-whgm-jr23-g3j9 and 1 alias have been filtered out because: Test manifest file +Filtered 1 vulnerability from output +No issues found + +--- + +[TestCommand_ExplicitExtractors/scanning_file_with_one_specific_extractor_enabled - 2] + +--- + +[TestCommand_ExplicitExtractors/scanning_file_with_parse_as_but_specific_extractor_disabled - 1] + +--- + +[TestCommand_ExplicitExtractors/scanning_file_with_parse_as_but_specific_extractor_disabled - 2] +could not determine extractor, requested package-lock.json + +--- + [TestCommand_GithubActions/scanning_osv-scanner_custom_format - 1] Scanned /osv-scanner/scan/source/fixtures/locks-insecure/osv-scanner-flutter-deps.json file as a osv-scanner and found 3 packages +--------------------------------+------+-----------+----------------------------+-----------------------------+-------------------------------------------------------+ diff --git a/cmd/osv-scanner/scan/source/command.go b/cmd/osv-scanner/scan/source/command.go index 5ae2d60b298..f9fc8a8360b 100644 --- a/cmd/osv-scanner/scan/source/command.go +++ b/cmd/osv-scanner/scan/source/command.go @@ -73,7 +73,7 @@ func Command(stdout, stderr io.Writer) *cli.Command { Name: "maven-registry", Usage: "URL of the default registry to fetch Maven metadata", }, - }, helper.GetScanGlobalFlags()...), + }, helper.GetScanGlobalFlags([]string{"lockfile", "sbom", "directory"})...), ArgsUsage: "[directory1 directory2...]", Action: func(ctx context.Context, cmd *cli.Command) error { return action(ctx, cmd, stdout, stderr) @@ -129,6 +129,10 @@ func action(_ context.Context, cmd *cli.Command, stdout, stderr io.Writer) error ExperimentalScannerActions: experimentalScannerActions, } + if len(experimentalScannerActions.Extractors) == 0 { + return errors.New("at least one extractor must be enabled") + } + var vulnResult models.VulnerabilityResults //nolint:contextcheck // passing the context in would be a breaking change vulnResult, err = osvscanner.DoScan(scannerAction) diff --git a/cmd/osv-scanner/scan/source/command_test.go b/cmd/osv-scanner/scan/source/command_test.go index 4beb7fdf12b..83875c15703 100644 --- a/cmd/osv-scanner/scan/source/command_test.go +++ b/cmd/osv-scanner/scan/source/command_test.go @@ -245,6 +245,142 @@ func TestCommand(t *testing.T) { } } +func TestCommand_ExplicitExtractors(t *testing.T) { + t.Parallel() + + tests := []testcmd.Case{ + { + Name: "empty_extractors_flag_does_nothing", + Args: []string{"", "source", "--experimental-extractors="}, + Exit: 127, + }, + { + Name: "extractors_cancelled_out_specified_individually", + Args: []string{ + "", "source", + "--experimental-extractors=sbom/spdx", + "--experimental-extractors=sbom/cdx", + "--experimental-disable-extractors=sbom", + }, + Exit: 127, + }, + { + Name: "extractors_cancelled_out_specified_together", + Args: []string{ + "", "source", + "--experimental-extractors=sbom/spdx,sbom/cdx", + "--experimental-disable-extractors=sbom", + }, + Exit: 127, + }, + { + Name: "extractors_cancelled_out_with_presets", + Args: []string{ + "", "source", + "--experimental-extractors=sbom", + "--experimental-disable-extractors=sbom", + }, + Exit: 127, + }, + { + // this will scan just the package-lock.json file as we've not enabled + // extractors for any of the other lockfiles + Name: "scanning_directory_with_one_specific_extractor_enabled", + Args: []string{ + "", "source", + "--experimental-extractors=javascript/packagelockjson", + "../../fixtures/locks-many", + }, + Exit: 0, + }, + { + Name: "scanning_directory_with_an_extractor_that_does_not_exist", + Args: []string{ + "", "source", + "--experimental-extractors=javascript/packagelockjson", + "--experimental-extractors=custom/extractor", + "--experimental-disable-extractors=custom/anotherextractor", + "../../fixtures/locks-many", + }, + Exit: 127, + }, + { + // this will scan just the package-lock.json and composer.lock files as + // we've not enabled extractors for any of the other lockfiles + Name: "scanning_directory_with_a_couple_of_specific_extractors_enabled_individually", + Args: []string{ + "", "source", + "--experimental-extractors=javascript/packagelockjson", + "--experimental-extractors=php/composerlock", + "../../fixtures/locks-many", + }, + Exit: 0, + }, + { + // this will scan just the package-lock.json and composer.lock files as + // we've not enabled extractors for any of the other lockfiles + Name: "scanning_directory_with_a_couple_of_specific_extractors_enabled_specified_together", + Args: []string{ + "", "source", + "--experimental-extractors=javascript/packagelockjson,php/composerlock", + "../../fixtures/locks-many", + }, + Exit: 0, + }, + { + // this should result in all files within the directory being scanned + // except for the package-lock.json + Name: "scanning_directory_with_one_specific_extractor_disabled", + Args: []string{ + "", "source", + "--experimental-disable-extractors=javascript/packagelockjson", + "../../fixtures/locks-many", + }, + Exit: 0, + }, + { + // this will scan just the package lock, since we're requested that file + // specifically and have enabled just that extractor + Name: "scanning_file_with_one_specific_extractor_enabled", + Args: []string{ + "", "source", + "--experimental-extractors=javascript/packagelockjson", + "../../fixtures/locks-many/package-lock.json", + }, + Exit: 0, + }, + { + // this will result in an error about not being able to find any package sources + // since we've requested a composer.lock be scanned without the extractor enabled + Name: "scanning_file_with_one_different_extractor_enabled", + Args: []string{ + "", "source", + "--experimental-extractors=javascript/packagelockjson", + "../../fixtures/locks-many/composer.lock", + }, + Exit: 128, + }, + { + // this will result in an error about not being able to determine the extractor + // since we've requested the file to be parsed with a specific extractor + // that we've also disabled + Name: "scanning_file_with_parse_as_but_specific_extractor_disabled", + Args: []string{ + "", "source", + "--experimental-disable-extractors=javascript/packagelockjson", + "-L", "package-lock.json:../../fixtures/locks-many/composer.lock", + }, + Exit: 127, + }, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + testcmd.RunAndMatchSnapshots(t, tt) + }) + } +} + func TestCommand_CallAnalysis(t *testing.T) { t.Parallel() diff --git a/internal/builders/extractors.go b/internal/builders/extractors.go new file mode 100644 index 00000000000..f2038b099df --- /dev/null +++ b/internal/builders/extractors.go @@ -0,0 +1,171 @@ +package builders + +import ( + "log/slog" + + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/cpp/conanlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/dart/pubspec" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/depsjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packagesconfig" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packageslockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/erlang/mixlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gomod" + "github.com/google/osv-scalibr/extractor/filesystem/language/haskell/cabal" + "github.com/google/osv-scalibr/extractor/filesystem/language/haskell/stacklock" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradlelockfile" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradleverificationmetadataxml" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/bunlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagelockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/pnpmlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/yarnlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/php/composerlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pdmlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pipfilelock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/poetrylock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/requirements" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/uvlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg" + "github.com/google/osv-scalibr/extractor/filesystem/language/r/renvlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/ruby/gemfilelock" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargoauditable" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargolock" + "github.com/google/osv-scalibr/extractor/filesystem/os/apk" + "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" + "github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx" + "github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx" + "github.com/google/osv-scanner/v2/internal/scalibrextract/filesystem/vendored" + "github.com/google/osv-scanner/v2/internal/scalibrextract/language/java/pomxmlenhanceable" + "github.com/google/osv-scanner/v2/internal/scalibrextract/language/javascript/nodemodules" + "github.com/google/osv-scanner/v2/internal/scalibrextract/vcs/gitrepo" +) + +func build(name string) filesystem.Extractor { + switch name { + // Alpine + case apk.Name: + return apk.NewDefault() + + // C + case conanlock.Name: + return conanlock.New() + + // Debian + case dpkg.Name: + return dpkg.NewDefault() + + // Erlang + case mixlock.Name: + return mixlock.New() + + // Flutter + case pubspec.Name: + return pubspec.New() + + // Go + case gomod.Name: + return gomod.New() + case gobinary.Name: + return gobinary.NewDefault() + + // Haskell + case cabal.Name: + return cabal.NewDefault() + case stacklock.Name: + return stacklock.NewDefault() + + // Java + case gradlelockfile.Name: + return gradlelockfile.New() + case gradleverificationmetadataxml.Name: + return gradleverificationmetadataxml.New() + case pomxmlenhanceable.Name: + return pomxmlenhanceable.New() + case archive.Name: + return archive.NewDefault() + + // Javascript + case packagelockjson.Name: + return packagelockjson.NewDefault() + case pnpmlock.Name: + return pnpmlock.New() + case yarnlock.Name: + return yarnlock.New() + case bunlock.Name: + return bunlock.New() + case nodemodules.Name: + return nodemodules.Extractor{} + + // NuGet + case depsjson.Name: + return depsjson.NewDefault() + case packagesconfig.Name: + return packagesconfig.NewDefault() + case packageslockjson.Name: + return packageslockjson.NewDefault() + + // PHP + case composerlock.Name: + return composerlock.New() + + // Python + case pipfilelock.Name: + return pipfilelock.New() + case pdmlock.Name: + return pdmlock.New() + case poetrylock.Name: + return poetrylock.New() + case requirements.Name: + return requirements.NewDefault() + case uvlock.Name: + return uvlock.New() + case wheelegg.Name: + return wheelegg.NewDefault() + + // R + case renvlock.Name: + return renvlock.New() + + // Ruby + case gemfilelock.Name: + return gemfilelock.New() + + // Rust + case cargolock.Name: + return cargolock.New() + case cargoauditable.Name: + return cargoauditable.NewDefault() + + // SBOM + case spdx.Name: + return spdx.New() + case cdx.Name: + return cdx.New() + + // Directories + case vendored.Name: + return &vendored.Extractor{} + case gitrepo.Name: + return &gitrepo.Extractor{} + } + + return nil +} + +func BuildExtractors(names []string) []filesystem.Extractor { + extractors := make([]filesystem.Extractor, 0, len(names)) + + for _, name := range names { + extractor := build(name) + + if extractor == nil { + slog.Error("Unknown extractor " + name) + } else { + extractors = append(extractors, build(name)) + } + } + + return extractors +} diff --git a/internal/scalibrextract/filesystem/vendored/vendored.go b/internal/scalibrextract/filesystem/vendored/vendored.go index b2dcdf4f78a..2372df3c773 100644 --- a/internal/scalibrextract/filesystem/vendored/vendored.go +++ b/internal/scalibrextract/filesystem/vendored/vendored.go @@ -60,26 +60,39 @@ const ( maxDetermineVersionFiles = 10000 ) +type Config struct { + // ScanGitDir determines whether a vendored library with a git directory is scanned or not, + // this is used to avoid duplicate results, once from git scanning, once from vendoredDir scanning + ScanGitDir bool + OSVClient *osvdev.OSVClient + Disabled bool +} + type Extractor struct { // ScanGitDir determines whether a vendored library with a git directory is scanned or not, // this is used to avoid duplicate results, once from git scanning, once from vendoredDir scanning ScanGitDir bool OSVClient *osvdev.OSVClient + Disabled bool } // Name of the extractor. -func (e Extractor) Name() string { return Name } +func (e *Extractor) Name() string { return Name } // Version of the extractor. -func (e Extractor) Version() int { return 0 } +func (e *Extractor) Version() int { return 0 } // Requirements of the extractor. -func (e Extractor) Requirements() *plugin.Capabilities { +func (e *Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } // FileRequired returns true for likely directories to contain vendored c/c++ code -func (e Extractor) FileRequired(fapi filesystem.FileAPI) bool { +func (e *Extractor) FileRequired(fapi filesystem.FileAPI) bool { + if e.Disabled { + return false + } + // Check if parent directory is one of the vendoredLibName // Clean first before Dir call to avoid trailing slashes causing problems parentDir := filepath.Base(filepath.Dir(filepath.Clean(fapi.Path()))) @@ -99,7 +112,12 @@ func (e Extractor) FileRequired(fapi filesystem.FileAPI) bool { // Extract determines the most likely package version from the directory and returns them as // commit hash inventory entries -func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { +func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { + // todo: maybe we should return an error instead? need to double check we're always using FileRequired correctly first + if e.Disabled { + return inventory.Inventory{}, nil + } + var packages []*extractor.Package results, err := e.queryDetermineVersions(ctx, input.Path, input.FS, e.ScanGitDir) @@ -124,16 +142,16 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (in } // ToPURL converts an inventory created by this extractor into a PURL. -func (e Extractor) ToPURL(_ *extractor.Package) *purl.PackageURL { +func (e *Extractor) ToPURL(_ *extractor.Package) *purl.PackageURL { return nil } // Ecosystem returns an empty string as all inventories are commit hashes -func (e Extractor) Ecosystem(_ *extractor.Package) string { +func (e *Extractor) Ecosystem(_ *extractor.Package) string { return "" } -func (e Extractor) queryDetermineVersions(ctx context.Context, repoDir string, fsys scalibrfs.FS, scanGitDir bool) (*osvdev.DetermineVersionResponse, error) { +func (e *Extractor) queryDetermineVersions(ctx context.Context, repoDir string, fsys scalibrfs.FS, scanGitDir bool) (*osvdev.DetermineVersionResponse, error) { var hashes []osvdev.DetermineVersionHash err := fs.WalkDir(fsys, repoDir, func(p string, d fs.DirEntry, _ error) error { @@ -195,4 +213,24 @@ func (e Extractor) queryDetermineVersions(ctx context.Context, repoDir string, f return result, nil } -var _ filesystem.Extractor = Extractor{} +var _ filesystem.Extractor = &Extractor{} + +type configurable interface { + Configure(config Config) +} + +func (e *Extractor) Configure(config Config) { + e.ScanGitDir = config.ScanGitDir + e.OSVClient = config.OSVClient + e.Disabled = config.Disabled +} + +var _ configurable = &Extractor{} + +func Configure(extractor extractor.Extractor, config Config) { + us, ok := extractor.(configurable) + + if ok { + us.Configure(config) + } +} diff --git a/internal/scalibrextract/language/java/pomxmlenhanceable/pomxmlenhanceable.go b/internal/scalibrextract/language/java/pomxmlenhanceable/pomxmlenhanceable.go new file mode 100644 index 00000000000..78a2b0e441e --- /dev/null +++ b/internal/scalibrextract/language/java/pomxmlenhanceable/pomxmlenhanceable.go @@ -0,0 +1,81 @@ +package pomxmlenhanceable + +import ( + "context" + + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxml" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxmlnet" + "github.com/google/osv-scalibr/inventory" + "github.com/google/osv-scalibr/plugin" + "github.com/google/osv-scalibr/purl" +) + +const ( + // Name is the unique name of this extractor. + Name = "java/pomxmlenhanceable" +) + +// Extractor extracts Maven packages from pom.xml files. +type Extractor struct { + actual filesystem.Extractor +} + +// New returns a new instance of the extractor. +func New() filesystem.Extractor { return &Extractor{actual: pomxml.New()} } + +// Name of the extractor +func (e *Extractor) Name() string { return Name } + +// Version of the extractor +func (e *Extractor) Version() int { return 0 } + +// Requirements of the extractor +func (e *Extractor) Requirements() *plugin.Capabilities { + return e.actual.Requirements() +} + +// FileRequired returns true if the specified file matches Maven POM lockfile patterns. +func (e *Extractor) FileRequired(api filesystem.FileAPI) bool { + return e.actual.FileRequired(api) +} + +// Extract extracts packages from pom.xml files passed through the scan input. +func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { + return e.actual.Extract(ctx, input) +} + +// ToPURL converts a package created by this extractor into a PURL. +func (e *Extractor) ToPURL(p *extractor.Package) *purl.PackageURL { + return e.actual.ToPURL(p) +} + +// Ecosystem returns the OSV ecosystem ('npm') of the software extracted by this extractor. +func (e *Extractor) Ecosystem(p *extractor.Package) string { + return e.actual.Ecosystem(p) +} + +var _ filesystem.Extractor = &Extractor{} + +type enhanceable interface { + Enhance(config pomxmlnet.Config) +} + +// Enhance uses the given config to improve the abilities of this extractor, +// at the cost of additional requirements such as networking and direct fs access +func (e *Extractor) Enhance(config pomxmlnet.Config) { + e.actual = pomxmlnet.New(config) +} + +var _ enhanceable = &Extractor{} + +// EnhanceIfPossible calls Extractor.Enhance with the given config if the +// provided extractor is an Extractor +func EnhanceIfPossible(extractor filesystem.Extractor, config pomxmlnet.Config) { + us, ok := extractor.(enhanceable) + + if ok { + us.Enhance(config) + } +} diff --git a/internal/scalibrextract/presets.go b/internal/scalibrextract/presets.go new file mode 100644 index 00000000000..b58e4c639dc --- /dev/null +++ b/internal/scalibrextract/presets.go @@ -0,0 +1,123 @@ +package scalibrextract + +import ( + "github.com/google/osv-scalibr/extractor/filesystem/language/cpp/conanlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/dart/pubspec" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/depsjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packagesconfig" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packageslockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/erlang/mixlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gomod" + "github.com/google/osv-scalibr/extractor/filesystem/language/haskell/cabal" + "github.com/google/osv-scalibr/extractor/filesystem/language/haskell/stacklock" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradlelockfile" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradleverificationmetadataxml" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/bunlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagelockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/pnpmlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/yarnlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/php/composerlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pdmlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pipfilelock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/poetrylock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/requirements" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/uvlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg" + "github.com/google/osv-scalibr/extractor/filesystem/language/r/renvlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/ruby/gemfilelock" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargoauditable" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargolock" + "github.com/google/osv-scalibr/extractor/filesystem/os/apk" + "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" + "github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx" + "github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx" + "github.com/google/osv-scanner/v2/internal/scalibrextract/filesystem/vendored" + "github.com/google/osv-scanner/v2/internal/scalibrextract/language/java/pomxmlenhanceable" + "github.com/google/osv-scanner/v2/internal/scalibrextract/language/javascript/nodemodules" + "github.com/google/osv-scanner/v2/internal/scalibrextract/vcs/gitrepo" +) + +var ExtractorsSBOMs = []string{ + spdx.Name, + cdx.Name, +} + +var ExtractorsLockfiles = []string{ + // C + conanlock.Name, + + // Erlang + mixlock.Name, + + // Flutter + pubspec.Name, + + // Go + gomod.Name, + + // Java + gradlelockfile.Name, + gradleverificationmetadataxml.Name, + pomxmlenhanceable.Name, + + // Javascript + packagelockjson.Name, + pnpmlock.Name, + yarnlock.Name, + bunlock.Name, + + // PHP + composerlock.Name, + + // Python + pipfilelock.Name, + pdmlock.Name, + poetrylock.Name, + requirements.Name, + uvlock.Name, + + // R + renvlock.Name, + + // Ruby + gemfilelock.Name, + + // Rust + cargolock.Name, + + // NuGet + depsjson.Name, + packagesconfig.Name, + packageslockjson.Name, + + // Haskell + cabal.Name, + stacklock.Name, +} + +var ExtractorsDirectories = []string{ + gitrepo.Name, + vendored.Name, +} + +var ExtractorsArtifacts = []string{ + // --- Project artifacts --- + // Python + wheelegg.Name, + // Java + archive.Name, + // Go + gobinary.Name, + // Javascript + nodemodules.Name, + // Rust + cargoauditable.Name, + + // --- OS packages --- + // Alpine + apk.Name, + // Debian + dpkg.Name, +} diff --git a/internal/scalibrextract/vcs/gitrepo/extractor.go b/internal/scalibrextract/vcs/gitrepo/extractor.go index fb2afc23dbe..010c0e3b702 100644 --- a/internal/scalibrextract/vcs/gitrepo/extractor.go +++ b/internal/scalibrextract/vcs/gitrepo/extractor.go @@ -18,10 +18,16 @@ const ( Name = "vcs/gitrepo" ) +type Config struct { + IncludeRootGit bool + Disabled bool +} + // Extractor extracts git repository hashes including submodule hashes. // This extractor will not return an error, and will just return no results if we fail to extract type Extractor struct { IncludeRootGit bool + Disabled bool } func getCommitSHA(repo *git.Repository) (string, error) { @@ -63,18 +69,22 @@ func createCommitQueryInventory(commit string, path string) *extractor.Package { } // Name of the extractor. -func (e Extractor) Name() string { return Name } +func (e *Extractor) Name() string { return Name } // Version of the extractor. -func (e Extractor) Version() int { return 0 } +func (e *Extractor) Version() int { return 0 } // Requirements of the extractor. -func (e Extractor) Requirements() *plugin.Capabilities { +func (e *Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } // FileRequired returns true for git repositories .git dirs -func (e Extractor) FileRequired(fapi filesystem.FileAPI) bool { +func (e *Extractor) FileRequired(fapi filesystem.FileAPI) bool { + if e.Disabled { + return false + } + if filepath.Base(fapi.Path()) != ".git" { return false } @@ -89,7 +99,12 @@ func (e Extractor) FileRequired(fapi filesystem.FileAPI) bool { } // Extract extracts git commits from HEAD and from submodules -func (e Extractor) Extract(_ context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { +func (e *Extractor) Extract(_ context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { + // todo: maybe we should return an error instead? need to double check we're always using FileRequired correctly first + if e.Disabled { + return inventory.Inventory{}, nil + } + // The input path is the .git directory, but git.PlainOpen expects the actual directory containing the .git dir. // So call filepath.Dir to get the parent path // Assume this is fully on a real filesystem @@ -126,13 +141,32 @@ func (e Extractor) Extract(_ context.Context, input *filesystem.ScanInput) (inve } // ToPURL converts an inventory created by this extractor into a PURL. -func (e Extractor) ToPURL(_ *extractor.Package) *purl.PackageURL { +func (e *Extractor) ToPURL(_ *extractor.Package) *purl.PackageURL { return nil } // Ecosystem returns an empty string as all inventories are commit hashes -func (e Extractor) Ecosystem(_ *extractor.Package) string { +func (e *Extractor) Ecosystem(_ *extractor.Package) string { return "" } -var _ filesystem.Extractor = Extractor{} +var _ filesystem.Extractor = &Extractor{} + +type configurable interface { + Configure(config Config) +} + +func (e *Extractor) Configure(config Config) { + e.IncludeRootGit = config.IncludeRootGit + e.Disabled = config.Disabled +} + +var _ configurable = &Extractor{} + +func Configure(extractor extractor.Extractor, config Config) { + us, ok := extractor.(configurable) + + if ok { + us.Configure(config) + } +} diff --git a/pkg/osvscanner/internal/scanners/extractorbuilder.go b/pkg/osvscanner/internal/scanners/extractorbuilder.go deleted file mode 100644 index 99361c5ce20..00000000000 --- a/pkg/osvscanner/internal/scanners/extractorbuilder.go +++ /dev/null @@ -1,193 +0,0 @@ -package scanners - -import ( - "deps.dev/util/resolve" - "github.com/google/osv-scalibr/clients/datasource" - "github.com/google/osv-scalibr/extractor/filesystem" - "github.com/google/osv-scalibr/extractor/filesystem/language/cpp/conanlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/dart/pubspec" - "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/depsjson" - "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packagesconfig" - "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packageslockjson" - "github.com/google/osv-scalibr/extractor/filesystem/language/erlang/mixlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary" - "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gomod" - "github.com/google/osv-scalibr/extractor/filesystem/language/haskell/cabal" - "github.com/google/osv-scalibr/extractor/filesystem/language/haskell/stacklock" - "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive" - "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradlelockfile" - "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradleverificationmetadataxml" - "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxml" - "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxmlnet" - "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/bunlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagelockjson" - "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/pnpmlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/yarnlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/php/composerlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/python/pdmlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/python/pipfilelock" - "github.com/google/osv-scalibr/extractor/filesystem/language/python/poetrylock" - "github.com/google/osv-scalibr/extractor/filesystem/language/python/requirements" - "github.com/google/osv-scalibr/extractor/filesystem/language/python/uvlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg" - "github.com/google/osv-scalibr/extractor/filesystem/language/r/renvlock" - "github.com/google/osv-scalibr/extractor/filesystem/language/ruby/gemfilelock" - "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargoauditable" - "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargolock" - "github.com/google/osv-scalibr/extractor/filesystem/os/apk" - "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" - "github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx" - "github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx" - "github.com/google/osv-scanner/v2/internal/scalibrextract/filesystem/vendored" - "github.com/google/osv-scanner/v2/internal/scalibrextract/language/javascript/nodemodules" - "github.com/google/osv-scanner/v2/internal/scalibrextract/vcs/gitrepo" - "github.com/ossf/osv-schema/bindings/go/osvschema" - "osv.dev/bindings/go/osvdev" -) - -var sbomExtractors = []filesystem.Extractor{ - spdx.Extractor{}, - cdx.Extractor{}, -} - -var lockfileExtractors = []filesystem.Extractor{ - // C - conanlock.Extractor{}, - - // Erlang - mixlock.Extractor{}, - - // Flutter - pubspec.Extractor{}, - - // Go - gomod.Extractor{}, - - // Java - gradlelockfile.Extractor{}, - gradleverificationmetadataxml.Extractor{}, - - // Javascript - packagelockjson.Extractor{}, - pnpmlock.Extractor{}, - yarnlock.Extractor{}, - bunlock.Extractor{}, - - // PHP - composerlock.Extractor{}, - - // Python - pipfilelock.Extractor{}, - pdmlock.Extractor{}, - poetrylock.Extractor{}, - requirements.Extractor{}, - uvlock.Extractor{}, - - // R - renvlock.Extractor{}, - - // Ruby - gemfilelock.Extractor{}, - - // Rust - cargolock.Extractor{}, - - // NuGet - depsjson.Extractor{}, - packagesconfig.Extractor{}, - packageslockjson.Extractor{}, - - // Haskell - cabal.Extractor{}, - stacklock.Extractor{}, - // TODO: map the extracted packages to SwiftURL in OSV.dev - // The extracted package names do not match the package names of SwiftURL in OSV.dev, - // so we need to find a workaround to map the names. - // packageresolved.Extractor{}, -} - -// BuildLockfileExtractors returns all relevant extractors for lockfile scanning given the required clients -// All clients can be nil, and if nil the extractors requiring those clients will not be returned. -func BuildLockfileExtractors(dependencyClients map[osvschema.Ecosystem]resolve.Client, mavenAPIClient *datasource.MavenRegistryAPIClient) []filesystem.Extractor { - extractorsToUse := lockfileExtractors - - if dependencyClients[osvschema.EcosystemMaven] != nil && mavenAPIClient != nil { - extractorsToUse = append(extractorsToUse, pomxmlnet.New(pomxmlnet.Config{ - DependencyClient: dependencyClients[osvschema.EcosystemMaven], - MavenRegistryAPIClient: mavenAPIClient, - })) - } else { - extractorsToUse = append(extractorsToUse, pomxml.Extractor{}) - } - - return extractorsToUse -} - -// BuildSBOMExtractors returns extractors relevant to SBOM extraction -func BuildSBOMExtractors() []filesystem.Extractor { - return sbomExtractors -} - -// BuildWalkerExtractors returns all relevant extractors for directory scanning given the required clients -// All clients can be nil, and if nil the extractors requiring those clients will not be returned. -func BuildWalkerExtractors( - includeRootGit bool, - osvdevClient *osvdev.OSVClient, - dependencyClients map[osvschema.Ecosystem]resolve.Client, - mavenAPIClient *datasource.MavenRegistryAPIClient) []filesystem.Extractor { - relevantExtractors := []filesystem.Extractor{} - - if includeRootGit { - relevantExtractors = append(relevantExtractors, gitrepo.Extractor{ - IncludeRootGit: includeRootGit, - }) - } - relevantExtractors = append(relevantExtractors, lockfileExtractors...) - relevantExtractors = append(relevantExtractors, sbomExtractors...) - - if osvdevClient != nil { - relevantExtractors = append(relevantExtractors, vendored.Extractor{ - // Only attempt to vendor check git directories if we are not skipping scanning root git directories - ScanGitDir: !includeRootGit, - OSVClient: osvdevClient, - }) - } - - if dependencyClients[osvschema.EcosystemMaven] != nil && mavenAPIClient != nil { - relevantExtractors = append(relevantExtractors, pomxmlnet.New(pomxmlnet.Config{ - DependencyClient: dependencyClients[osvschema.EcosystemMaven], - MavenRegistryAPIClient: mavenAPIClient, - })) - } else { - relevantExtractors = append(relevantExtractors, pomxml.Extractor{}) - } - - return relevantExtractors -} - -// BuildArtifactExtractors returns all relevant extractors for artifact scanning given the required clients -// All clients can be nil, and if nil the extractors requiring those clients will not be returned. -func BuildArtifactExtractors() []filesystem.Extractor { - extractorsToUse := []filesystem.Extractor{ - // --- Project artifacts --- - // Python - wheelegg.New(wheelegg.DefaultConfig()), - // Java - archive.New(archive.DefaultConfig()), - // Go - gobinary.New(gobinary.DefaultConfig()), - // Javascript - nodemodules.Extractor{}, - // Rust - cargoauditable.NewDefault(), - - // --- OS packages --- - // Alpine - apk.New(apk.DefaultConfig()), - // Debian - // TODO: Add tests for debian containers - dpkg.New(dpkg.DefaultConfig()), - } - - return extractorsToUse -} diff --git a/pkg/osvscanner/internal/scanners/lockfile.go b/pkg/osvscanner/internal/scanners/lockfile.go index e721774ec6c..8ac9f30ac67 100644 --- a/pkg/osvscanner/internal/scanners/lockfile.go +++ b/pkg/osvscanner/internal/scanners/lockfile.go @@ -21,8 +21,6 @@ import ( "github.com/google/osv-scalibr/extractor/filesystem/language/haskell/stacklock" "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradlelockfile" "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradleverificationmetadataxml" - "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxml" - "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxmlnet" "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/bunlock" "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagelockjson" "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/pnpmlock" @@ -40,6 +38,7 @@ import ( "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" "github.com/google/osv-scanner/v2/internal/output" "github.com/google/osv-scanner/v2/internal/scalibrextract" + "github.com/google/osv-scanner/v2/internal/scalibrextract/language/java/pomxmlenhanceable" "github.com/google/osv-scanner/v2/internal/scalibrextract/language/osv/osvscannerjson" ) @@ -48,7 +47,7 @@ var lockfileExtractorMapping = map[string][]string{ "pnpm-lock.yaml": {pnpmlock.Name}, "yarn.lock": {yarnlock.Name}, "package-lock.json": {packagelockjson.Name}, - "pom.xml": {pomxmlnet.Name, pomxml.Name}, + "pom.xml": {pomxmlenhanceable.Name}, "buildscript-gradle.lockfile": {gradlelockfile.Name}, "gradle.lockfile": {gradlelockfile.Name}, "verification-metadata.xml": {gradleverificationmetadataxml.Name}, diff --git a/pkg/osvscanner/internal/scanners/lockfile_test.go b/pkg/osvscanner/internal/scanners/lockfile_test.go deleted file mode 100644 index c88c88fad74..00000000000 --- a/pkg/osvscanner/internal/scanners/lockfile_test.go +++ /dev/null @@ -1,27 +0,0 @@ -package scanners - -import ( - "slices" - "testing" -) - -func TestLockfileScalibrMappingExists(t *testing.T) { - t.Parallel() - - // Every lockfileExtractor should have a mapping, - // this might not be true the other way around as some extractors are dynamically set, - // and not present in lockfileExtractors - for _, target := range lockfileExtractors { - found := false - for _, names := range lockfileExtractorMapping { - if slices.Contains(names, target.Name()) { - found = true - break - } - } - - if !found { - t.Errorf("Extractor %v not found.", target.Name()) - } - } -} diff --git a/pkg/osvscanner/osvscanner.go b/pkg/osvscanner/osvscanner.go index b85d9eac56c..de74df198a2 100644 --- a/pkg/osvscanner/osvscanner.go +++ b/pkg/osvscanner/osvscanner.go @@ -19,6 +19,7 @@ import ( "github.com/google/osv-scalibr/clients/datasource" "github.com/google/osv-scalibr/clients/resolution" "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" "github.com/google/osv-scanner/v2/internal/clients/clientimpl/baseimagematcher" "github.com/google/osv-scanner/v2/internal/clients/clientimpl/licensematcher" "github.com/google/osv-scanner/v2/internal/clients/clientimpl/localmatcher" @@ -29,10 +30,10 @@ import ( "github.com/google/osv-scanner/v2/internal/imodels" "github.com/google/osv-scanner/v2/internal/imodels/results" "github.com/google/osv-scanner/v2/internal/output" + "github.com/google/osv-scanner/v2/internal/scalibrextract" "github.com/google/osv-scanner/v2/internal/version" "github.com/google/osv-scanner/v2/pkg/models" "github.com/google/osv-scanner/v2/pkg/osvscanner/internal/imagehelpers" - "github.com/google/osv-scanner/v2/pkg/osvscanner/internal/scanners" "github.com/ossf/osv-schema/bindings/go/osvschema" "osv.dev/bindings/go/osvdev" ) @@ -62,6 +63,8 @@ type ExperimentalScannerActions struct { LocalDBPath string TransitiveScanningActions + + Extractors []filesystem.Extractor } type TransitiveScanningActions struct { @@ -313,7 +316,11 @@ func DoContainerScan(actions ScannerActions) (models.VulnerabilityResults, error // --- Do Scalibr Scan --- scanner := scalibr.New() scalibrSR, err := scanner.ScanContainer(context.Background(), img, &scalibr.ScanConfig{ - FilesystemExtractors: scanners.BuildArtifactExtractors(), + FilesystemExtractors: getExtractors( + scalibrextract.ExtractorsArtifacts, + accessors, + actions, + ), }) if err != nil { return models.VulnerabilityResults{}, fmt.Errorf("failed to scan container image: %w", err) diff --git a/pkg/osvscanner/scan.go b/pkg/osvscanner/scan.go index d49b865e03f..9323b813f4b 100644 --- a/pkg/osvscanner/scan.go +++ b/pkg/osvscanner/scan.go @@ -5,21 +5,66 @@ import ( "fmt" "log/slog" "path/filepath" + "slices" "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxmlnet" + "github.com/google/osv-scanner/v2/internal/builders" "github.com/google/osv-scanner/v2/internal/imodels" "github.com/google/osv-scanner/v2/internal/scalibrextract" "github.com/google/osv-scanner/v2/internal/scalibrextract/ecosystemmock" + "github.com/google/osv-scanner/v2/internal/scalibrextract/filesystem/vendored" + "github.com/google/osv-scanner/v2/internal/scalibrextract/language/java/pomxmlenhanceable" + "github.com/google/osv-scanner/v2/internal/scalibrextract/vcs/gitrepo" "github.com/google/osv-scanner/v2/pkg/osvscanner/internal/scanners" + "github.com/ossf/osv-schema/bindings/go/osvschema" ) +func configureExtractors(extractors []filesystem.Extractor, accessors ExternalAccessors, actions ScannerActions) { + for _, tor := range extractors { + if accessors.DependencyClients[osvschema.EcosystemMaven] != nil && accessors.MavenRegistryAPIClient != nil { + pomxmlenhanceable.EnhanceIfPossible(tor, pomxmlnet.Config{ + DependencyClient: accessors.DependencyClients[osvschema.EcosystemMaven], + MavenRegistryAPIClient: accessors.MavenRegistryAPIClient, + }) + } + + // todo: the "disabled" aspect should probably be worked into the extractor being present in the first place + // since "IncludeRootGit" is always true + gitrepo.Configure(tor, gitrepo.Config{ + IncludeRootGit: actions.IncludeGitRoot, + Disabled: !actions.IncludeGitRoot, + }) + + vendored.Configure(tor, vendored.Config{ + // Only attempt to vendor check git directories if we are not skipping scanning root git directories + ScanGitDir: !actions.IncludeGitRoot, + OSVClient: accessors.OSVDevClient, + Disabled: accessors.OSVDevClient == nil, + }) + } +} + +func getExtractors(defaultExtractorNames []string, accessors ExternalAccessors, actions ScannerActions) []filesystem.Extractor { + extractors := actions.Extractors + + if len(extractors) == 0 { + extractors = builders.BuildExtractors(defaultExtractorNames) + } + + configureExtractors(extractors, accessors, actions) + + return extractors +} + // scan essentially converts ScannerActions into PackageScanResult by performing the extractions func scan(accessors ExternalAccessors, actions ScannerActions) ([]imodels.PackageScanResult, error) { //nolint:prealloc // We don't know how many inventories we will retrieve var scannedInventories []*extractor.Package // --- Lockfiles --- - lockfileExtractors := scanners.BuildLockfileExtractors(accessors.DependencyClients, accessors.MavenRegistryAPIClient) + lockfileExtractors := getExtractors(scalibrextract.ExtractorsLockfiles, accessors, actions) for _, lockfileElem := range actions.LockfilePaths { invs, err := scanners.ScanSingleFileWithMapping(lockfileElem, lockfileExtractors) if err != nil { @@ -30,7 +75,8 @@ func scan(accessors ExternalAccessors, actions ScannerActions) ([]imodels.Packag } // --- SBOMs --- - sbomExtractors := scanners.BuildSBOMExtractors() + // none of the SBOM extractors need configuring + sbomExtractors := builders.BuildExtractors(scalibrextract.ExtractorsSBOMs) for _, sbomPath := range actions.SBOMPaths { path, err := filepath.Abs(sbomPath) if err != nil { @@ -53,12 +99,16 @@ func scan(accessors ExternalAccessors, actions ScannerActions) ([]imodels.Packag } // --- Directories --- - dirExtractors := scanners.BuildWalkerExtractors( - actions.IncludeGitRoot, - accessors.OSVDevClient, - accessors.DependencyClients, - accessors.MavenRegistryAPIClient, + dirExtractors := getExtractors( + slices.Concat( + scalibrextract.ExtractorsLockfiles, + scalibrextract.ExtractorsSBOMs, + scalibrextract.ExtractorsDirectories, + ), + accessors, + actions, ) + for _, dir := range actions.DirectoryPaths { slog.Info("Scanning dir " + dir) pkgs, err := scanners.ScanDir(dir, actions.Recursive, !actions.NoIgnore, dirExtractors)