diff --git a/drivers/driver.go b/drivers/driver.go index dc9b12c3ca..adca4232dc 100644 --- a/drivers/driver.go +++ b/drivers/driver.go @@ -211,8 +211,8 @@ const ( // DifferOutputFormatDir means the output is a directory and it will // keep the original layout. DifferOutputFormatDir = iota - // DifferOutputFormatFlat will store the files by their checksum, in the form - // checksum[0:2]/checksum[2:] + // DifferOutputFormatFlat will store the files by their checksum, per + // pkg/chunked/internal/composefs.RegularFilePathForValidatedDigest. DifferOutputFormatFlat ) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 47742b05d8..ac0c2dacd4 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -710,7 +710,7 @@ func prepareCacheFile(manifest []byte, format graphdriver.DifferOutputFormat) ([ switch format { case graphdriver.DifferOutputFormatDir: case graphdriver.DifferOutputFormatFlat: - entries, err = makeEntriesFlat(entries) + entries, err = makeEntriesFlat(entries, nil) if err != nil { return nil, err } diff --git a/pkg/chunked/dump/dump.go b/pkg/chunked/dump/dump.go index d98cee09de..04c9c3d2e1 100644 --- a/pkg/chunked/dump/dump.go +++ b/pkg/chunked/dump/dump.go @@ -9,10 +9,11 @@ import ( "io" "path/filepath" "reflect" - "strings" "time" "github.com/containers/storage/pkg/chunked/internal" + "github.com/containers/storage/pkg/chunked/internal/composefs" + "github.com/opencontainers/go-digest" "golang.org/x/sys/unix" ) @@ -174,11 +175,16 @@ func dumpNode(out io.Writer, added map[string]*internal.FileMetadata, links map[ } else { payload = sanitizeName(entry.Linkname) } - } else { - if len(entry.Digest) > 10 { - d := strings.Replace(entry.Digest, "sha256:", "", 1) - payload = d[:2] + "/" + d[2:] + } else if entry.Digest != "" { + d, err := digest.Parse(entry.Digest) + if err != nil { + return fmt.Errorf("invalid digest %q for %q: %w", entry.Digest, entry.Name, err) + } + path, err := composefs.RegularFilePathForValidatedDigest(d) + if err != nil { + return fmt.Errorf("determining physical file path for %q: %w", entry.Name, err) } + payload = path } if _, err := fmt.Fprint(out, escapedOptional([]byte(payload), ESCAPE_LONE_DASH)); err != nil { diff --git a/pkg/chunked/dump/dump_test.go b/pkg/chunked/dump/dump_test.go index d99693baf8..809be5bfac 100644 --- a/pkg/chunked/dump/dump_test.go +++ b/pkg/chunked/dump/dump_test.go @@ -59,7 +59,7 @@ func TestDumpNode(t *testing.T) { Devminor: 0, ModTime: &modTime, Linkname: "", - Digest: "sha256:abcdef1234567890", + Digest: "sha256:0123456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef", Xattrs: map[string]string{ "user.key1": base64.StdEncoding.EncodeToString([]byte("value1")), }, @@ -150,7 +150,7 @@ func TestDumpNode(t *testing.T) { entries: []*internal.FileMetadata{ regularFileEntry, }, - expected: "/example.txt 100 100000 1 1000 1000 0 1672531200.0 ab/cdef1234567890 - - user.key1=value1\n", + expected: "/example.txt 100 100000 1 1000 1000 0 1672531200.0 01/23456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef - - user.key1=value1\n", }, { name: "root entry with file", @@ -158,7 +158,7 @@ func TestDumpNode(t *testing.T) { rootEntry, regularFileEntry, }, - expected: "/ 0 40000 1 0 0 0 1672531200.0 - - -\n/example.txt 100 100000 1 1000 1000 0 1672531200.0 ab/cdef1234567890 - - user.key1=value1\n", + expected: "/ 0 40000 1 0 0 0 1672531200.0 - - -\n/example.txt 100 100000 1 1000 1000 0 1672531200.0 01/23456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef - - user.key1=value1\n", skipAddingRootEntry: true, }, { @@ -196,7 +196,7 @@ func TestDumpNode(t *testing.T) { regularFileEntry, directoryEntry, }, - expected: "/ 0 40000 1 0 0 0 1672531200.0 - - -\n/example.txt 100 100000 1 1000 1000 0 1672531200.0 ab/cdef1234567890 - - user.key1=value1\n/mydir 0 40000 1 1000 1000 0 1672531200.0 - - - user.key2=value2\n", + expected: "/ 0 40000 1 0 0 0 1672531200.0 - - -\n/example.txt 100 100000 1 1000 1000 0 1672531200.0 01/23456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef - - user.key1=value1\n/mydir 0 40000 1 1000 1000 0 1672531200.0 - - - user.key2=value2\n", skipAddingRootEntry: true, }, } diff --git a/pkg/chunked/internal/composefs/composefs.go b/pkg/chunked/internal/composefs/composefs.go new file mode 100644 index 0000000000..f486e6607b --- /dev/null +++ b/pkg/chunked/internal/composefs/composefs.go @@ -0,0 +1,19 @@ +package composefs + +import ( + "fmt" + + "github.com/opencontainers/go-digest" +) + +// RegularFilePath returns the path used in the composefs backing store for a +// regular file with the provided content digest. +// +// The caller MUST ensure d is a valid digest (in particular, that it contains no path separators or .. entries) +func RegularFilePathForValidatedDigest(d digest.Digest) (string, error) { + if algo := d.Algorithm(); algo != digest.SHA256 { + return "", fmt.Errorf("unexpected digest algorithm %q", algo) + } + e := d.Encoded() + return e[0:2] + "/" + e[2:], nil +} diff --git a/pkg/chunked/internal/composefs/composefs_test.go b/pkg/chunked/internal/composefs/composefs_test.go new file mode 100644 index 0000000000..7b1e34acb2 --- /dev/null +++ b/pkg/chunked/internal/composefs/composefs_test.go @@ -0,0 +1,22 @@ +package composefs + +import ( + "testing" + + "github.com/opencontainers/go-digest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRegularFilePathForValidatedDigest(t *testing.T) { + d, err := digest.Parse("sha256:0123456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef") + require.NoError(t, err) + res, err := RegularFilePathForValidatedDigest(d) + require.NoError(t, err) + assert.Equal(t, "01/23456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef", res) + + d, err = digest.Parse("sha512:0123456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef0123456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef") + require.NoError(t, err) + _, err = RegularFilePathForValidatedDigest(d) + assert.Error(t, err) +} diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index 6a16d1db23..dc9a7f6cc5 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -2,6 +2,7 @@ package chunked import ( archivetar "archive/tar" + "bytes" "context" "encoding/base64" "errors" @@ -23,16 +24,20 @@ import ( "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chunked/compressor" "github.com/containers/storage/pkg/chunked/internal" + "github.com/containers/storage/pkg/chunked/internal/composefs" "github.com/containers/storage/pkg/chunked/toc" "github.com/containers/storage/pkg/fsverity" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/system" + securejoin "github.com/cyphar/filepath-securejoin" jsoniter "github.com/json-iterator/go" "github.com/klauspost/compress/zstd" "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" "github.com/sirupsen/logrus" "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/asm" + tsStorage "github.com/vbatts/tar-split/tar/storage" "golang.org/x/sys/unix" ) @@ -1111,10 +1116,13 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *fileMetadata, copyOptions return false, nil } -func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) { +// makeEntriesFlat collects regular-file entries from mergedEntries, and produces a new list +// where each file content is only represented once, and uses composefs.RegularFilePathForValidatedDigest for its name. +// if flatPathNameMap is not nil, this function writes to it a mapping from filepath.Clean(originalName) to the composefs name. +func makeEntriesFlat(mergedEntries []fileMetadata, flatPathNameMap map[string]string) ([]fileMetadata, error) { var new []fileMetadata - hashes := make(map[string]string) + knownFlatPaths := make(map[string]struct{}) for i := range mergedEntries { if mergedEntries[i].Type != TypeReg { continue @@ -1124,16 +1132,22 @@ func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) { } digest, err := digest.Parse(mergedEntries[i].Digest) if err != nil { - return nil, err + return nil, fmt.Errorf("invalid digest %q for %q: %w", mergedEntries[i].Digest, mergedEntries[i].Name, err) + } + path, err := composefs.RegularFilePathForValidatedDigest(digest) + if err != nil { + return nil, fmt.Errorf("determining physical file path for %q: %w", mergedEntries[i].Name, err) + } + if flatPathNameMap != nil { + flatPathNameMap[filepath.Clean(mergedEntries[i].Name)] = path } - d := digest.Encoded() - if hashes[d] != "" { + if _, known := knownFlatPaths[path]; known { continue } - hashes[d] = d + knownFlatPaths[path] = struct{}{} - mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:]) + mergedEntries[i].Name = path mergedEntries[i].skipSetAttrs = true new = append(new, mergedEntries[i]) @@ -1415,16 +1429,20 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff if err != nil { return output, &fs.PathError{Op: "open", Path: dest, Err: err} } - defer unix.Close(dirfd) + dirFile := os.NewFile(uintptr(dirfd), dest) + defer dirFile.Close() + var flatPathNameMap map[string]string // = nil if differOpts != nil && differOpts.Format == graphdriver.DifferOutputFormatFlat { - mergedEntries, err = makeEntriesFlat(mergedEntries) + flatPathNameMap = map[string]string{} + mergedEntries, err = makeEntriesFlat(mergedEntries, flatPathNameMap) if err != nil { return output, err } createdDirs := make(map[string]struct{}) for _, e := range mergedEntries { - d := e.Name[0:2] + // This hard-codes an assumption that RegularFilePathForValidatedDigest creates paths with exactly one directory component. + d := filepath.Dir(e.Name) if _, found := createdDirs[d]; !found { if err := unix.Mkdirat(dirfd, d, 0o755); err != nil { return output, &fs.PathError{Op: "mkdirat", Path: d, Err: err} @@ -1687,6 +1705,30 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff } } + // To ensure that consumers of the layer who decompress and read the full tar stream, + // and consumers who consume the data via the TOC, both see exactly the same data and metadata, + // compute the UncompressedDigest. + // c/image will then ensure that this value matches the value in the image config’s RootFS.DiffID, i.e. the image must commit + // to one UncompressedDigest value for each layer, and that will avoid the ambiguity (in consumers who validate layers against DiffID). + // + // c/image also uses the UncompressedDigest as a layer ID, allowing it to use the traditional layer and image IDs. + // + // This is, sadly, quite costly: Up to now we might have only have had to write, and digest, only the new/modified files. + // Here we need to read, and digest, the whole layer, even if almost all of it was already present locally previously. + // + // Also, layers without a tar-split (estargz layers and old zstd:chunked layers) can't produce an UncompressedDigest that + // matches the expected RootFS.DiffID; they will need to use a special TOC-based layer ID, and we need to somehow (??) + // ensure unambiguity - either by always falling back to full pulls, or by never falling back. + if output.UncompressedDigest == "" && output.TarSplit != nil { + metadata := tsStorage.NewJSONUnpacker(bytes.NewReader(output.TarSplit)) + fg := newStagedFileGetter(dirFile, flatPathNameMap) + digester := digest.Canonical.Digester() + if err := asm.WriteOutputTarStream(fg, metadata, digester.Hash()); err != nil { + return output, fmt.Errorf("digesting staged uncompressed stream: %w", err) + } + output.UncompressedDigest = digester.Digest() + } + if totalChunksSize > 0 { logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingPartsSize, totalChunksSize, float32(missingPartsSize*100.0)/float32(totalChunksSize)) } @@ -1812,3 +1854,33 @@ func validateChunkChecksum(chunk *internal.FileMetadata, root, path string, offs return digester.Digest() == digest } + +// newStagedFileGetter returns an object usable as storage.FileGetter for rootDir. +// if flatPathNameMap is not nil, it must be used to map logical file names into the backing file paths. +func newStagedFileGetter(rootDir *os.File, flatPathNameMap map[string]string) *stagedFileGetter { + return &stagedFileGetter{ + rootDir: rootDir, + flatPathNameMap: flatPathNameMap, + } +} + +type stagedFileGetter struct { + rootDir *os.File + flatPathNameMap map[string]string // nil, or a map from filepath.Clean()ed tar file names to expected on-filesystem names +} + +func (fg *stagedFileGetter) Get(filename string) (io.ReadCloser, error) { + if fg.flatPathNameMap != nil { + path, ok := fg.flatPathNameMap[filepath.Clean(filename)] + if !ok { + return nil, fmt.Errorf("no path mapping exists for tar entry %q", filename) + } + filename = path + } + pathFD, err := securejoin.OpenatInRoot(fg.rootDir, filename) + if err != nil { + return nil, err + } + defer pathFD.Close() + return securejoin.Reopen(pathFD, unix.O_RDONLY) +}