-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SyftCLIScanner: support SBOM generation with syft CLI
Packit currently supports SBOM generation with syft tooling by utilizing syft's go library. This has caused packit maintainers significant maintainence burden. This commit adds a mechanism for buildpack authors to utlize the syft CLI instead to generate SBOM. The intention here is that with widespread adoption of this, we can phase out the codebase that uses the syft go libary and thereby relieve the maintainers of this pain. Until recently, syft did not allow consumers to specify the exact schema version of an SBOM mediatype they want generated (the tooling currently supports passing a version for CycloneDX and SPDX - github.com/anchore/syft/issues/846#issuecomment-1908676454). So packit was forced to vendor-in (copy) large chunks of upstream syft go code into packit in order to pin SBOM mediatype versions to versions that most consumers wanted to use. Everytime a new version of Syft comes out, maintainers had to painfully update the vendored-in code to work with upstream syft components (e.g. github.com//pull/491). Furthermore, it is advantageous to use the syft CLI instead of syft go library for multiple reasons. With CLI, we can delegate the entire SBOM generation mechanism easily to syft. The CLI tool is well documented and widely used in the community, and it seems like the syft project is developed with with a CLI-first approach. The caveat here is that buildpack authors who use this method should include the Paketo Syft buildpack in their buildplan to have access to the CLI during the build phase. Example usage: \# detect \# unless BP_DISABLE_BOM is true requirements = append(requirements, packit.BuildPlanRequirement{ Name: "syft", Metadata: map[string]interface{}{ "build": true, }, }) \# build syftCLIScanner := sbom.NewSyftCLIScanner( pexec.NewExecutable("syft"), scribe.NewEmitter(os.Stdout), ) \# To scan a layer after installing a dependency _ = syftCLIScanner.GenerateSBOM(myLayer.Path, context.Layers.Path, myLayer.Name, context.BuildpackInfo.SBOMFormats..., ) \# OR to scan the workspace dir after running a process _ = syftCLIScanner.GenerateSBOM(context.WorkingDir, context.Layers.Path, myLayer.Name, context.BuildpackInfo.SBOMFormats..., ) - I have not implemented pretty-fication of SBOM that the codepath that use syft go lib implements. This seems to be adding bloat to the app image and not supported via CLI. Consumers of SBOM can easily prettify the SBOM JSONs. - In the codepath that use the syft go lib, license information is manually injected from buildpack.toml data into the SBOM. This is not available with the SyftCLIScanner. I couldn't find any reasoning for why this was done in the first place. - I have intentionally not reused some code in methods that's mixed up with the syft go library with an intention to easily phase out that codebase in the near future.
- Loading branch information
Showing
6 changed files
with
687 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package fakes | ||
|
||
import ( | ||
"sync" | ||
|
||
"github.com/paketo-buildpacks/packit/v2/pexec" | ||
) | ||
|
||
type Executable struct { | ||
ExecuteCall struct { | ||
mutex sync.Mutex | ||
CallCount int | ||
Receives struct { | ||
Execution pexec.Execution | ||
} | ||
Returns struct { | ||
Err error | ||
} | ||
Stub func(pexec.Execution) error | ||
} | ||
} | ||
|
||
func (f *Executable) Execute(param1 pexec.Execution) error { | ||
f.ExecuteCall.mutex.Lock() | ||
defer f.ExecuteCall.mutex.Unlock() | ||
f.ExecuteCall.CallCount++ | ||
f.ExecuteCall.Receives.Execution = param1 | ||
if f.ExecuteCall.Stub != nil { | ||
return f.ExecuteCall.Stub(param1) | ||
} | ||
return f.ExecuteCall.Returns.Err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package sbom_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/paketo-buildpacks/packit/v2/sbom" | ||
"github.com/sclevine/spec" | ||
|
||
. "github.com/onsi/gomega" | ||
) | ||
|
||
func testFormats(t *testing.T, context spec.G, it spec.S) { | ||
var Expect = NewWithT(t).Expect | ||
var f sbom.Format | ||
|
||
context("Formats", func() { | ||
context("no version param", func() { | ||
it("gets the right mediatype extension and version", func() { | ||
f = sbom.CycloneDXFormat | ||
Expect(f.Extension()).To(Equal("cdx.json")) | ||
Expect(f.VersionParam()).To(Equal("")) | ||
}) | ||
}) | ||
|
||
context("with version param", func() { | ||
it("gets the right mediatype extension and version", func() { | ||
f = sbom.SPDXFormat + ";version=9.8.7" | ||
Expect(f.Extension()).To(Equal("spdx.json")) | ||
Expect(f.VersionParam()).To(Equal("9.8.7")) | ||
}) | ||
context("Syft mediatype with version returns empty", func() { | ||
it("returns empty", func() { | ||
f = sbom.SyftFormat + ";version=9.8.7" | ||
Expect(f.Extension()).To(Equal("")) | ||
}) | ||
}) | ||
}) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,238 @@ | ||
package sbom | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"net/url" | ||
"os" | ||
"path/filepath" | ||
"strconv" | ||
"strings" | ||
"time" | ||
|
||
"github.com/google/uuid" | ||
"github.com/paketo-buildpacks/packit/v2/pexec" | ||
"github.com/paketo-buildpacks/packit/v2/scribe" | ||
) | ||
|
||
//go:generate faux --interface Executable --output fakes/executable.go | ||
type Executable interface { | ||
Execute(pexec.Execution) (err error) | ||
} | ||
|
||
// SyftCLIScanner implements scanning a dir using the `syft` CLI | ||
// to generate SBOM, process it, and write it to a location that complies with | ||
// the buildpacks spec. Supports CycloneDX, SPDX and Syft mediatypes, with an | ||
// optional version param for CycloneDX and Syft. | ||
// | ||
// Example Usage: | ||
// | ||
// syftCLIScanner := sbom.NewSyftCLIScanner( | ||
// pexec.NewExecutable("syft"), | ||
// scribe.NewEmitter(os.Stdout), | ||
// ) | ||
type SyftCLIScanner struct { | ||
syftCLI Executable | ||
logger scribe.Emitter | ||
} | ||
|
||
func NewSyftCLIScanner(syftCLI Executable, logger scribe.Emitter) SyftCLIScanner { | ||
return SyftCLIScanner{ | ||
syftCLI: syftCLI, | ||
logger: logger, | ||
} | ||
} | ||
|
||
// Generate takes a path to a directory to scan and a list of SBOM mediatypes | ||
// (with an optional version for CycloneDX and SPDX), and invokes the syft CLI | ||
// scan command. The CLI is instructed to write the SBOM to | ||
// <layers>/<layer>.sbom.<ext> as defined by the buildpack spec. Additionally, | ||
// CycloneDX & SPDX outputs are modified to make the output reproducible | ||
// (Paketo RFCs 38 & 49). | ||
func (s SyftCLIScanner) GenerateSBOM(scanDir, layersPath, layerName string, mediaTypes ...string) error { | ||
sbomWritePaths := make(map[string]string) | ||
args := []string{"scan", "--quiet"} | ||
|
||
s.logger.Debug.Process("Generating SBOM") | ||
s.logger.Debug.Subprocess("Generating syft CLI args from provided mediatypes %s", mediaTypes) | ||
for _, mediatype := range mediaTypes { | ||
syftOutputFormat, err := s.specMediatypeToSyftOutputFormat(mediatype) | ||
if err != nil { | ||
return fmt.Errorf("failed to convert mediatype %s to syft output format: %w", mediatype, err) | ||
} | ||
|
||
extension := Format(mediatype).Extension() | ||
if extension == "" { | ||
return fmt.Errorf("invalid mediatype %s provided", mediatype) | ||
} | ||
|
||
// Layer SBOM write location during build is <layers>/<layer>.sbom.<ext> (CNB spec) | ||
sbomWritePaths[mediatype] = filepath.Join(layersPath, fmt.Sprintf("%s.sbom.%s", layerName, extension)) | ||
args = append(args, "--output", fmt.Sprintf("%s=%s", syftOutputFormat, sbomWritePaths[mediatype])) | ||
} | ||
|
||
args = append(args, fmt.Sprintf("dir:%s", scanDir)) | ||
|
||
s.logger.Debug.Subprocess("Executing syft CLI with args %v", args) | ||
if err := s.syftCLI.Execute(pexec.Execution{ | ||
Args: args, | ||
Dir: scanDir, | ||
Stdout: s.logger.ActionWriter, | ||
Stderr: s.logger.ActionWriter, | ||
}); err != nil { | ||
return fmt.Errorf("failed to execute syft cli with args '%s': %w.\nYou might be missing a buildpack that provides the syft CLI", args, err) | ||
} | ||
|
||
// Make SBOM outputs reproducible | ||
for _, mediatype := range mediaTypes { | ||
if strings.HasPrefix(mediatype, CycloneDXFormat) { | ||
s.logger.Debug.Subprocess("Processing syft CLI CycloneDX SBOM output to make it reproducible") | ||
err := s.makeCycloneDXReproducible(sbomWritePaths[mediatype]) | ||
if err != nil { | ||
return fmt.Errorf("failed to make CycloneDX SBOM reproducible: %w", err) | ||
} | ||
} else if strings.HasPrefix(mediatype, SPDXFormat) { | ||
s.logger.Debug.Subprocess("Processing syft CLI SPDX SBOM output to make it reproducible") | ||
err := s.makeSPDXReproducible(sbomWritePaths[mediatype]) | ||
if err != nil { | ||
return fmt.Errorf("failed to make SPDX SBOM reproducible: %w", err) | ||
} | ||
} | ||
} | ||
|
||
s.logger.Debug.Break() | ||
return nil | ||
} | ||
|
||
// This method takes an SBOM mediatype name as defined by the buildpack spec, | ||
// (with an optional version param for CycloneDX and SPDX, e.g. | ||
// "application/vnd.cyclonedx+json;version=1.4") and returns the output format | ||
// understood by syft tooling (e.g. "cyclonedx-json@1.4"). | ||
// Refer github.com/anchore/syft/blob/v1.11.1/cmd/syft/internal/options/writer.go#L86 | ||
func (s SyftCLIScanner) specMediatypeToSyftOutputFormat(mediatype string) (string, error) { | ||
optionalVersionParam, err := Format(mediatype).VersionParam() | ||
if err != nil { | ||
return "", err | ||
} | ||
if optionalVersionParam != "" { | ||
optionalVersionParam = "@" + optionalVersionParam | ||
} | ||
|
||
switch { | ||
case strings.HasPrefix(mediatype, CycloneDXFormat): | ||
return "cyclonedx-json" + optionalVersionParam, nil | ||
case strings.HasPrefix(mediatype, SPDXFormat): | ||
return "spdx-json" + optionalVersionParam, nil | ||
case strings.HasPrefix(mediatype, SyftFormat): | ||
// The syft tool does not support providing a version for the syft mediatype. | ||
if optionalVersionParam != "" { | ||
return "", fmt.Errorf("The syft mediatype does not allow providing a ;version=<ver> param. Got: %s", mediatype) | ||
} | ||
return "syft-json", nil | ||
default: | ||
return "", fmt.Errorf("mediatype %s matched none of the known mediatypes. Valid values are %s, with an optional version param for CycloneDX and SPDX", mediatype, []string{CycloneDXFormat, SPDXFormat, SyftFormat}) | ||
} | ||
} | ||
|
||
// Makes CycloneDX SBOM more reproducible. | ||
// Remove fields serialNumber and metadata.timestamp. | ||
// See https://github.com/paketo-buildpacks/rfcs/blob/main/text/0038-cdx-syft-sbom.md#amendment-sbom-reproducibility | ||
func (s SyftCLIScanner) makeCycloneDXReproducible(path string) error { | ||
in, err := os.Open(path) | ||
if err != nil { | ||
return fmt.Errorf("unable to read CycloneDX JSON file %s:%w", path, err) | ||
} | ||
defer in.Close() | ||
|
||
input := map[string]interface{}{} | ||
if err := json.NewDecoder(in).Decode(&input); err != nil { | ||
return fmt.Errorf("unable to decode CycloneDX JSON %s: %w", path, err) | ||
} | ||
|
||
delete(input, "serialNumber") | ||
|
||
if md, exists := input["metadata"]; exists { | ||
if metadata, ok := md.(map[string]interface{}); ok { | ||
delete(metadata, "timestamp") | ||
} | ||
} | ||
|
||
out, err := os.Create(path) | ||
if err != nil { | ||
return fmt.Errorf("unable to open CycloneDX JSON for writing %s: %w", path, err) | ||
} | ||
defer out.Close() | ||
|
||
if err := json.NewEncoder(out).Encode(input); err != nil { | ||
return fmt.Errorf("unable to encode CycloneDX: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// Makes SPDX SBOM more reproducible. | ||
// Ensure documentNamespace and creationInfo.created have reproducible values. | ||
// The method respects $SOURCE_DATE_EPOCH for created timestamp if set. | ||
// See github.com/paketo-buildpacks/rfcs/blob/main/text/0049-reproducible-spdx.md | ||
func (s SyftCLIScanner) makeSPDXReproducible(path string) error { | ||
in, err := os.Open(path) | ||
if err != nil { | ||
return fmt.Errorf("unable to read SPDX JSON file %s:%w", path, err) | ||
} | ||
defer in.Close() | ||
|
||
input := map[string]interface{}{} | ||
if err := json.NewDecoder(in).Decode(&input); err != nil { | ||
return fmt.Errorf("unable to decode SPDX JSON %s: %w", path, err) | ||
} | ||
|
||
// Makes the creationInfo reproducible so a hash can be taken for the | ||
// documentNamespace | ||
if creationInfo, ok := input["creationInfo"].(map[string]interface{}); ok { | ||
creationInfo["created"] = time.Time{} // This is the zero-valued time | ||
|
||
sourceDateEpoch := os.Getenv("SOURCE_DATE_EPOCH") | ||
if sourceDateEpoch != "" { | ||
sde, err := strconv.ParseInt(sourceDateEpoch, 10, 64) | ||
if err != nil { | ||
return fmt.Errorf("failed to parse SOURCE_DATE_EPOCH: %w", err) | ||
} | ||
creationInfo["created"] = time.Unix(sde, 0).UTC() | ||
} | ||
input["creationInfo"] = creationInfo | ||
} | ||
|
||
if namespace, ok := input["documentNamespace"].(string); ok { | ||
delete(input, "documentNamespace") | ||
|
||
data, err := json.Marshal(input) | ||
if err != nil { | ||
return fmt.Errorf("failed to checksum SPDX document: %w", err) | ||
} | ||
|
||
uri, err := url.Parse(namespace) | ||
if err != nil { | ||
return fmt.Errorf("failed to parse SPDX documentNamespace url: %w", err) | ||
} | ||
|
||
uri.Host = "paketo.io" | ||
uri.Path = strings.Replace(uri.Path, "syft", "packit", 1) | ||
oldBase := filepath.Base(uri.Path) | ||
source, _, _ := strings.Cut(oldBase, "-") | ||
newBase := fmt.Sprintf("%s-%s", source, uuid.NewSHA1(uuid.NameSpaceURL, data)) | ||
uri.Path = strings.Replace(uri.Path, oldBase, newBase, 1) | ||
|
||
input["documentNamespace"] = uri.String() | ||
} | ||
|
||
out, err := os.Create(path) | ||
if err != nil { | ||
return fmt.Errorf("unable to open SPDX JSON for writing %s: %w", path, err) | ||
} | ||
defer out.Close() | ||
|
||
if err := json.NewEncoder(out).Encode(input); err != nil { | ||
return fmt.Errorf("unable to encode SPDX: %w", err) | ||
} | ||
return nil | ||
} |
Oops, something went wrong.