From ed0ab8c88909eb132f5adb88c05ac4a0b5eeed77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 10 Jun 2022 21:17:50 +0200 Subject: [PATCH] Introduce SourcedImage.CanChangeLayerCompression, use it in copy.Image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Don't compress/decompress layers with unknown MIME types, and layers in OCI artifacts. - Don't even change manifest MIME types in these situations, whatever happens. - Don't substitute compressed/uncompressed variants (via TryReusingBlobWithOptions) for OCI artifacts, if we discover the same variants when copying images that refer to the same blobs. Note that this does _not_ restrict compression to algorithms supported by the SourcedImage, because that would prohibit a single-pass conversion from v2s2 to OCI while compressing to zstd [1], and that's a feature we currently exercise in tests. So, this prevents us from failing to copy OCI artifacts, but users of zstd still need to be careful about choosing OCI manually. [1] We would need to ask the _destination_ format handler about zstd, not the source-format SourcedImage, and we don't currently have that infrastructure. It's also not immediately clear how to combine this with the sequence of alternative manifest formats returned by determineManifestConversion. Signed-off-by: Miloslav Trmač --- copy/compression.go | 17 +++++++++++++---- copy/copy.go | 20 +++++++++++++------- internal/image/docker_schema1.go | 9 +++++++++ internal/image/docker_schema1_test.go | 9 +++++++++ internal/image/docker_schema2.go | 9 +++++++++ internal/image/docker_schema2_test.go | 11 +++++++++++ internal/image/manifest.go | 15 ++++++++++++--- internal/image/oci.go | 9 +++++++++ internal/image/oci_test.go | 14 ++++++++++++++ manifest/common.go | 10 ++++++++++ manifest/docker_schema2.go | 8 ++++++++ manifest/docker_schema2_test.go | 8 ++++++++ manifest/oci.go | 17 +++++++++++++++++ manifest/oci_test.go | 11 +++++++++++ 14 files changed, 153 insertions(+), 14 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index f243838e13..99305a0398 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -65,7 +65,11 @@ func (ic *imageCopier) blobPipelineCompressionStep(stream *sourceStream, canModi detected bpDetectCompressionStepData) (*bpCompressionStepData, error) { // WARNING: If you are adding new reasons to change the blob, update also the OptimizeDestinationImageAlreadyExists // short-circuit conditions - if canModifyBlob { + layerCompressionChangeSupported := ic.src.CanChangeLayerCompression(stream.info.MediaType) + if !layerCompressionChangeSupported { + logrus.Debugf("Compression change for blob %s (%q) not supported", srcInfo.Digest, stream.info.MediaType) + } + if canModifyBlob && layerCompressionChangeSupported { for _, fn := range []func(*sourceStream, bpDetectCompressionStepData) (*bpCompressionStepData, error){ ic.bpcPreserveEncrypted, ic.bpcCompressUncompressed, @@ -81,7 +85,7 @@ func (ic *imageCopier) blobPipelineCompressionStep(stream *sourceStream, canModi } } } - return ic.bpcPreserveOriginal(stream, detected), nil + return ic.bpcPreserveOriginal(stream, detected, layerCompressionChangeSupported), nil } // bpcPreserveEncrypted checks if the input is encrypted, and returns a *bpCompressionStepData if so. @@ -194,14 +198,19 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp } // bpcPreserveOriginal returns a *bpCompressionStepData for not changing the original blob. -func (ic *imageCopier) bpcPreserveOriginal(stream *sourceStream, detected bpDetectCompressionStepData) *bpCompressionStepData { +func (ic *imageCopier) bpcPreserveOriginal(stream *sourceStream, detected bpDetectCompressionStepData, + layerCompressionChangeSupported bool) *bpCompressionStepData { logrus.Debugf("Using original blob without modification") // Remember if the original blob was compressed, and if so how, so that if // LayerInfosForCopy() returned something that differs from what was in the // source's manifest, and UpdatedImage() needs to call UpdateLayerInfos(), // it will be able to correctly derive the MediaType for the copied blob. + // + // But don’t touch blobs in objects where we can’t change compression, + // so that src.UpdatedImage() doesn’t fail; assume that for such blobs + // LayerInfosForCopy() should not be making any changes in the first place. var algorithm *compressiontypes.Algorithm - if detected.isCompressed { + if layerCompressionChangeSupported && detected.isCompressed { algorithm = &detected.format } else { algorithm = nil diff --git a/copy/copy.go b/copy/copy.go index 81d4597e27..0df5952375 100644 --- a/copy/copy.go +++ b/copy/copy.go @@ -678,12 +678,14 @@ func (c *copier) copyOneImage(ctx context.Context, policyContext *signature.Poli cannotModifyManifestReason: cannotModifyManifestReason, ociEncryptLayers: options.OciEncryptLayers, } - // Ensure _this_ copy sees exactly the intended data when either processing a signed image or signing it. - // This may be too conservative, but for now, better safe than sorry, _especially_ on the SignBy path: - // The signature makes the content non-repudiable, so it very much matters that the signature is made over exactly what the user intended. - // We do intend the RecordDigestUncompressedPair calls to only work with reliable data, but at least there’s a risk - // that the compressed version coming from a third party may be designed to attack some other decompressor implementation, - // and we would reuse and sign it. + // Decide whether we can substitute blobs with semantic equivalents: + // - Don’t do that if we can’t modify the manifest at all + // - Ensure _this_ copy sees exactly the intended data when either processing a signed image or signing it. + // This may be too conservative, but for now, better safe than sorry, _especially_ on the SignBy path: + // The signature makes the content non-repudiable, so it very much matters that the signature is made over exactly what the user intended. + // We do intend the RecordDigestUncompressedPair calls to only work with reliable data, but at least there’s a risk + // that the compressed version coming from a third party may be designed to attack some other decompressor implementation, + // and we would reuse and sign it. ic.canSubstituteBlobs = ic.cannotModifyManifestReason == "" && options.SignBy == "" if err := ic.updateEmbeddedDockerReference(); err != nil { @@ -1143,6 +1145,10 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to // Don’t read the layer from the source if we already have the blob, and optimizations are acceptable. if canAvoidProcessingCompleteLayer { + canChangeLayerCompression := ic.src.CanChangeLayerCompression(srcInfo.MediaType) + logrus.Debugf("Checking if we can reuse blob %s: general substitution = %v, compression for MIME type %q = %v", + srcInfo.Digest, ic.canSubstituteBlobs, srcInfo.MediaType, canChangeLayerCompression) + canSubstitute := ic.canSubstituteBlobs && ic.src.CanChangeLayerCompression(srcInfo.MediaType) // TODO: at this point we don't know whether or not a blob we end up reusing is compressed using an algorithm // that is acceptable for use on layers in the manifest that we'll be writing later, so if we end up reusing // a blob that's compressed with e.g. zstd, but we're only allowed to write a v2s2 manifest, this will cause @@ -1151,7 +1157,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to // the ImageDestination interface lets us pass in. reused, blobInfo, err := ic.c.dest.TryReusingBlobWithOptions(ctx, srcInfo, private.TryReusingBlobOptions{ Cache: ic.c.blobInfoCache, - CanSubstitute: ic.canSubstituteBlobs, + CanSubstitute: canSubstitute, EmptyLayer: emptyLayer, LayerIndex: &layerIndex, SrcRef: srcRef, diff --git a/internal/image/docker_schema1.go b/internal/image/docker_schema1.go index 5f24970c37..94f776224e 100644 --- a/internal/image/docker_schema1.go +++ b/internal/image/docker_schema1.go @@ -246,3 +246,12 @@ func (m *manifestSchema1) convertToManifestOCI1(ctx context.Context, options *ty func (m *manifestSchema1) SupportsEncryption(context.Context) bool { return false } + +// CanChangeLayerCompression returns true if we can compress/decompress layers with mimeType in the current image +// (and the code can handle that). +// NOTE: Even if this returns true, the relevant format might not accept all compression algorithms; the set of accepted +// algorithms depends not on the current format, but possibly on the target of a conversion (if UpdatedImage converts +// to a different manifest format). +func (m *manifestSchema1) CanChangeLayerCompression(mimeType string) bool { + return true // There are no MIME types in the manifest, so we must assume a valid image. +} diff --git a/internal/image/docker_schema1_test.go b/internal/image/docker_schema1_test.go index 6aa7405e08..58f222b3c7 100644 --- a/internal/image/docker_schema1_test.go +++ b/internal/image/docker_schema1_test.go @@ -667,3 +667,12 @@ func TestConvertSchema1ToManifestOCIWithAnnotations(t *testing.T) { require.NoError(t, err) assert.NotEqual(t, res.LayerInfos(), layerInfoOverwrites) } + +func TestManifestSchema1CanChangeLayerCompression(t *testing.T) { + for _, m := range []genericManifest{ + manifestSchema1FromFixture(t, "schema1.json"), + manifestSchema1FromComponentsLikeFixture(t), + } { + assert.True(t, m.CanChangeLayerCompression("")) + } +} diff --git a/internal/image/docker_schema2.go b/internal/image/docker_schema2.go index ca55d96c2c..7dfd3c5d8c 100644 --- a/internal/image/docker_schema2.go +++ b/internal/image/docker_schema2.go @@ -402,3 +402,12 @@ func v1ConfigFromConfigJSON(configJSON []byte, v1ID, parentV1ID string, throwawa func (m *manifestSchema2) SupportsEncryption(context.Context) bool { return false } + +// CanChangeLayerCompression returns true if we can compress/decompress layers with mimeType in the current image +// (and the code can handle that). +// NOTE: Even if this returns true, the relevant format might not accept all compression algorithms; the set of accepted +// algorithms depends not on the current format, but possibly on the target of a conversion (if UpdatedImage converts +// to a different manifest format). +func (m *manifestSchema2) CanChangeLayerCompression(mimeType string) bool { + return m.m.CanChangeLayerCompression(mimeType) +} diff --git a/internal/image/docker_schema2_test.go b/internal/image/docker_schema2_test.go index b20c99d4b2..a6dc014406 100644 --- a/internal/image/docker_schema2_test.go +++ b/internal/image/docker_schema2_test.go @@ -653,3 +653,14 @@ func TestConvertSchema2ToManifestOCIWithAnnotations(t *testing.T) { require.NoError(t, err) assert.NotEqual(t, res.LayerInfos(), layerInfoOverwrites) } + +func TestManifestSchema2CanChangeLayerCompression(t *testing.T) { + for _, m := range []genericManifest{ + manifestSchema2FromFixture(t, mocks.ForbiddenImageSource{}, "schema2.json", false), + manifestSchema2FromComponentsLikeFixture(nil), + } { + assert.True(t, m.CanChangeLayerCompression(manifest.DockerV2Schema2LayerMediaType)) + // Some projects like to use squashfs and other unspecified formats for layers; don’t touch those. + assert.False(t, m.CanChangeLayerCompression("a completely unknown and quite possibly invalid MIME type")) + } +} diff --git a/internal/image/manifest.go b/internal/image/manifest.go index 36d70b5c23..6b5f345388 100644 --- a/internal/image/manifest.go +++ b/internal/image/manifest.go @@ -12,9 +12,8 @@ import ( ) // genericManifest is an interface for parsing, modifying image manifests and related data. -// Note that the public methods are intended to be a subset of types.Image -// so that embedding a genericManifest into structs works. -// will support v1 one day... +// The public methods are related to types.Image so that embedding a genericManifest implements most of it, +// but there are also public methods that are only visible by packages that can import c/image/internal/image. type genericManifest interface { serialize() ([]byte, error) manifestMIMEType() string @@ -51,6 +50,16 @@ type genericManifest interface { // the process of updating a manifest between different manifest types was to update then convert. // This resulted in some fields in the update being lost. This has been fixed by: https://github.com/containers/image/pull/836 SupportsEncryption(ctx context.Context) bool + + // The following methods are not a part of types.Image: + // === + + // CanChangeLayerCompression returns true if we can compress/decompress layers with mimeType in the current image + // (and the code can handle that). + // NOTE: Even if this returns true, the relevant format might not accept all compression algorithms; the set of accepted + // algorithms depends not on the current format, but possibly on the target of a conversion (if UpdatedImage converts + // to a different manifest format). + CanChangeLayerCompression(mimeType string) bool } // manifestInstanceFromBlob returns a genericManifest implementation for (manblob, mt) in src. diff --git a/internal/image/oci.go b/internal/image/oci.go index b6b2e52147..af1a90e82d 100644 --- a/internal/image/oci.go +++ b/internal/image/oci.go @@ -260,3 +260,12 @@ func (m *manifestOCI1) convertToManifestSchema1(ctx context.Context, options *ty func (m *manifestOCI1) SupportsEncryption(context.Context) bool { return true } + +// CanChangeLayerCompression returns true if we can compress/decompress layers with mimeType in the current image +// (and the code can handle that). +// NOTE: Even if this returns true, the relevant format might not accept all compression algorithms; the set of accepted +// algorithms depends not on the current format, but possibly on the target of a conversion (if UpdatedImage converts +// to a different manifest format). +func (m *manifestOCI1) CanChangeLayerCompression(mimeType string) bool { + return m.m.CanChangeLayerCompression(mimeType) +} diff --git a/internal/image/oci_test.go b/internal/image/oci_test.go index bafc81d327..bb72601a87 100644 --- a/internal/image/oci_test.go +++ b/internal/image/oci_test.go @@ -542,3 +542,17 @@ func TestConvertToV2S2WithInvalidMIMEType(t *testing.T) { _, err = manifestOCI1FromManifest(originalSrc, manifest) require.NoError(t, err) } + +func TestManifestOCI1CanChangeLayerCompression(t *testing.T) { + for _, m := range []genericManifest{ + manifestOCI1FromFixture(t, mocks.ForbiddenImageSource{}, "oci1.json"), + manifestOCI1FromComponentsLikeFixture(nil), + } { + assert.True(t, m.CanChangeLayerCompression(imgspecv1.MediaTypeImageLayerGzip)) + // Some projects like to use squashfs and other unspecified formats for layers; don’t touch those. + assert.False(t, m.CanChangeLayerCompression("a completely unknown and quite possibly invalid MIME type")) + } + + artifact := manifestOCI1FromFixture(t, mocks.ForbiddenImageSource{}, "oci1-artifact.json") + assert.False(t, artifact.CanChangeLayerCompression(imgspecv1.MediaTypeImageLayerGzip)) +} diff --git a/manifest/common.go b/manifest/common.go index 2b2014831b..9cf7dd3a94 100644 --- a/manifest/common.go +++ b/manifest/common.go @@ -218,3 +218,13 @@ type ManifestLayerCompressionIncompatibilityError struct { func (m ManifestLayerCompressionIncompatibilityError) Error() string { return m.text } + +// compressionVariantsRecognizeMIMEType returns true if variantTable contains data about compressing/decompressing layers with mimeType +// Note that the caller still needs to worry about a specific algorithm not being supported. +func compressionVariantsRecognizeMIMEType(variantTable []compressionMIMETypeSet, mimeType string) bool { + if mimeType == mtsUnsupportedMIMEType { // Prevent matching against the {algo:mtsUnsupportedMIMEType} entries + return false + } + variants := findCompressionMIMETypeSet(variantTable, mimeType) + return variants != nil // Alternatively, this could be len(variants) > 1, but really the caller should ask about a specific algorithm. +} diff --git a/manifest/docker_schema2.go b/manifest/docker_schema2.go index 1f4db54eed..8b3fbdd399 100644 --- a/manifest/docker_schema2.go +++ b/manifest/docker_schema2.go @@ -295,3 +295,11 @@ func (m *Schema2) ImageID([]digest.Digest) (string, error) { } return m.ConfigDescriptor.Digest.Hex(), nil } + +// CanChangeLayerCompression returns true if we can compress/decompress layers with mimeType in the current image +// (and the code can handle that). +// NOTE: Even if this returns true, the relevant format might not accept all compression algorithms; the set of accepted +// algorithms depends not on the current format, but possibly on the target of a conversion. +func (m *Schema2) CanChangeLayerCompression(mimeType string) bool { + return compressionVariantsRecognizeMIMEType(schema2CompressionMIMETypeSets, mimeType) +} diff --git a/manifest/docker_schema2_test.go b/manifest/docker_schema2_test.go index cd89b97016..bdd10d0987 100644 --- a/manifest/docker_schema2_test.go +++ b/manifest/docker_schema2_test.go @@ -203,3 +203,11 @@ func TestSchema2ImageID(t *testing.T) { require.NoError(t, err) assert.Equal(t, "b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7", id) } + +func TestSchema2CanChangeLayerCompression(t *testing.T) { + m := manifestSchema2FromFixture(t, "v2s2.manifest.json") + + assert.True(t, m.CanChangeLayerCompression(DockerV2Schema2LayerMediaType)) + // Some projects like to use squashfs and other unspecified formats for layers; don’t touch those. + assert.False(t, m.CanChangeLayerCompression("a completely unknown and quite possibly invalid MIME type")) +} diff --git a/manifest/oci.go b/manifest/oci.go index 2fa23e69cb..11927ab5ec 100644 --- a/manifest/oci.go +++ b/manifest/oci.go @@ -116,6 +116,12 @@ var oci1CompressionMIMETypeSets = []compressionMIMETypeSet{ // UpdateLayerInfos replaces the original layers with the specified BlobInfos (size+digest+urls+mediatype), in order (the root layer first, and then successive layered layers) // The returned error will be a manifest.ManifestLayerCompressionIncompatibilityError if any of the layerInfos includes a combination of CompressionOperation and // CompressionAlgorithm that isn't supported by OCI. +// +// It’s generally the caller’s responsibility to determine whether a particular edit is acceptable, rather than relying on +// failures of this function, because the layer is typically created _before_ UpdateLayerInfos is called, because UpdateLayerInfos needs +// to know the final digest). See OCI1.CanChangeLayerCompression for some help in determining this; other aspects like compression +// algorithms that might not be supported by a format, or the limited set of MIME types accepted for encryption, are not currently +// handled — that logic should eventually also be provided as OCI1 methods, not hard-coded in callers. func (m *OCI1) UpdateLayerInfos(layerInfos []types.BlobInfo) error { if len(m.Layers) != len(layerInfos) { return errors.Errorf("Error preparing updated manifest: layer count changed from %d to %d", len(m.Layers), len(layerInfos)) @@ -247,3 +253,14 @@ func (m *OCI1) ImageID([]digest.Digest) (string, error) { } return m.Config.Digest.Hex(), nil } + +// CanChangeLayerCompression returns true if we can compress/decompress layers with mimeType in the current image +// (and the code can handle that). +// NOTE: Even if this returns true, the relevant format might not accept all compression algorithms; the set of accepted +// algorithms depends not on the current format, but possibly on the target of a conversion. +func (m *OCI1) CanChangeLayerCompression(mimeType string) bool { + if m.Config.MediaType != imgspecv1.MediaTypeImageConfig { + return false + } + return compressionVariantsRecognizeMIMEType(oci1CompressionMIMETypeSets, mimeType) +} diff --git a/manifest/oci_test.go b/manifest/oci_test.go index b752051490..4eabc0545f 100644 --- a/manifest/oci_test.go +++ b/manifest/oci_test.go @@ -373,3 +373,14 @@ func TestOCI1ImageID(t *testing.T) { var expected NonImageArtifactError assert.ErrorAs(t, err, &expected) } + +func TestOCI1CanChangeLayerCompression(t *testing.T) { + m := manifestOCI1FromFixture(t, "ociv1.manifest.json") + + assert.True(t, m.CanChangeLayerCompression(imgspecv1.MediaTypeImageLayerGzip)) + // Some projects like to use squashfs and other unspecified formats for layers; don’t touch those. + assert.False(t, m.CanChangeLayerCompression("a completely unknown and quite possibly invalid MIME type")) + + artifact := manifestOCI1FromFixture(t, "ociv1.artifact.json") + assert.False(t, artifact.CanChangeLayerCompression(imgspecv1.MediaTypeImageLayerGzip)) +}