Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[6.4.0] Support multiple remote execution digest functions #19042

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,24 @@ public static ClientServerCompatibilityStatus checkClientServerCompatibility(
return result.build(); // No point checking other execution fields.
}

// Check execution digest function.
if (execCap.getDigestFunction() == DigestFunction.Value.UNKNOWN) {
// Server side error -- this is not supposed to happen.
result.addError("Remote server error: UNKNOWN execution digest function.");
}
if (execCap.getDigestFunction() != digestFunction) {
// Check execution digest function. The protocol only later added
// support for multiple digest functions for remote execution, so
// check both the singular and repeated field.
if (execCap.getDigestFunctionsList().isEmpty()
&& execCap.getDigestFunction() != DigestFunction.Value.UNKNOWN) {
if (execCap.getDigestFunction() != digestFunction) {
result.addError(
String.format(
"Cannot use hash function %s with remote execution. "
+ "Server supported function is %s",
digestFunction, execCap.getDigestFunction()));
}
} else if (!execCap.getDigestFunctionsList().contains(digestFunction)) {
result.addError(
String.format(
"Cannot use hash function %s with remote execution. "
+ "Server supported function is %s",
digestFunction, execCap.getDigestFunction()));
+ "Server supported functions are: %s",
digestFunction, execCap.getDigestFunctionsList()));
}

// Check execution priority is in the supported range.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,31 @@ public void testCheckClientServerCompatibility_executionCapsOnly() throws Except
assertThat(st.isOk()).isTrue();
}

@Test
public void testCheckClientServerCompatibility_executionCapsDigestFunctionsList()
throws Exception {
ServerCapabilities caps =
ServerCapabilities.newBuilder()
.setLowApiVersion(ApiVersion.current.toSemVer())
.setHighApiVersion(ApiVersion.current.toSemVer())
.setExecutionCapabilities(
ExecutionCapabilities.newBuilder()
.addDigestFunctions(DigestFunction.Value.MD5)
.addDigestFunctions(DigestFunction.Value.SHA256)
.setExecEnabled(true)
.build())
.build();
RemoteOptions remoteOptions = Options.getDefaults(RemoteOptions.class);
remoteOptions.remoteExecutor = "server:port";
RemoteServerCapabilities.ClientServerCompatibilityStatus st =
RemoteServerCapabilities.checkClientServerCompatibility(
caps,
remoteOptions,
DigestFunction.Value.SHA256,
ServerCapabilitiesRequirement.EXECUTION);
assertThat(st.isOk()).isTrue();
}

@Test
public void testCheckClientServerCompatibility_cacheCapsOnly() throws Exception {
ServerCapabilities caps =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,12 @@ service Execution {
// send a [PreconditionFailure][google.rpc.PreconditionFailure] error detail
// where, for each requested blob not present in the CAS, there is a
// `Violation` with a `type` of `MISSING` and a `subject` of
// `"blobs/{hash}/{size}"` indicating the digest of the missing blob.
// `"blobs/{digest_function/}{hash}/{size}"` indicating the digest of the
// missing blob. The `subject` is formatted the same way as the
// `resource_name` provided to
// [ByteStream.Read][google.bytestream.ByteStream.Read], with the leading
// instance name omitted. `digest_function` MUST thus be omitted if its value
// is one of MD5, MURMUR3, SHA1, SHA256, SHA384, SHA512, or VSO.
//
// The server does not need to guarantee that a call to this method leads to
// at most one execution of the action. The server MAY execute the action
Expand Down Expand Up @@ -204,31 +209,38 @@ service ActionCache {
// [Write method][google.bytestream.ByteStream.Write] of the ByteStream API.
//
// For uncompressed data, The `WriteRequest.resource_name` is of the following form:
// `{instance_name}/uploads/{uuid}/blobs/{hash}/{size}{/optional_metadata}`
// `{instance_name}/uploads/{uuid}/blobs/{digest_function/}{hash}/{size}{/optional_metadata}`
//
// Where:
// * `instance_name` is an identifier, possibly containing multiple path
// segments, used to distinguish between the various instances on the server,
// in a manner defined by the server. If it is the empty path, the leading
// slash is omitted, so that the `resource_name` becomes
// `uploads/{uuid}/blobs/{hash}/{size}{/optional_metadata}`.
// `uploads/{uuid}/blobs/{digest_function/}{hash}/{size}{/optional_metadata}`.
// To simplify parsing, a path segment cannot equal any of the following
// keywords: `blobs`, `uploads`, `actions`, `actionResults`, `operations`,
// `capabilities` or `compressed-blobs`.
// * `uuid` is a version 4 UUID generated by the client, used to avoid
// collisions between concurrent uploads of the same data. Clients MAY
// reuse the same `uuid` for uploading different blobs.
// * `digest_function` is a lowercase string form of a `DigestFunction.Value`
// enum, indicating which digest function was used to compute `hash`. If the
// digest function used is one of MD5, MURMUR3, SHA1, SHA256, SHA384, SHA512,
// or VSO, this component MUST be omitted. In that case the server SHOULD
// infer the digest function using the length of the `hash` and the digest
// functions announced in the server's capabilities.
// * `hash` and `size` refer to the [Digest][build.bazel.remote.execution.v2.Digest]
// of the data being uploaded.
// * `optional_metadata` is implementation specific data, which clients MAY omit.
// Servers MAY ignore this metadata.
//
// Data can alternatively be uploaded in compressed form, with the following
// `WriteRequest.resource_name` form:
// `{instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{uncompressed_hash}/{uncompressed_size}{/optional_metadata}`
// `{instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{uncompressed_hash}/{uncompressed_size}{/optional_metadata}`
//
// Where:
// * `instance_name`, `uuid` and `optional_metadata` are defined as above.
// * `instance_name`, `uuid`, `digest_function` and `optional_metadata` are
// defined as above.
// * `compressor` is a lowercase string form of a `Compressor.Value` enum
// other than `identity`, which is supported by the server and advertised in
// [CacheCapabilities.supported_compressor][build.bazel.remote.execution.v2.CacheCapabilities.supported_compressor].
Expand Down Expand Up @@ -271,15 +283,17 @@ service ActionCache {
// [Read method][google.bytestream.ByteStream.Read] of the ByteStream API.
//
// For uncompressed data, The `ReadRequest.resource_name` is of the following form:
// `{instance_name}/blobs/{hash}/{size}`
// Where `instance_name`, `hash` and `size` are defined as for uploads.
// `{instance_name}/blobs/{digest_function/}{hash}/{size}`
// Where `instance_name`, `digest_function`, `hash` and `size` are defined as
// for uploads.
//
// Data can alternatively be downloaded in compressed form, with the following
// `ReadRequest.resource_name` form:
// `{instance_name}/compressed-blobs/{compressor}/{uncompressed_hash}/{uncompressed_size}`
// `{instance_name}/compressed-blobs/{compressor}/{digest_function/}{uncompressed_hash}/{uncompressed_size}`
//
// Where:
// * `instance_name` and `compressor` are defined as for uploads.
// * `instance_name`, `compressor` and `digest_function` are defined as for
// uploads.
// * `uncompressed_hash` and `uncompressed_size` refer to the
// [Digest][build.bazel.remote.execution.v2.Digest] of the data being
// downloaded, once uncompressed. Clients MUST verify that these match
Expand Down Expand Up @@ -1365,6 +1379,15 @@ message ExecuteRequest {
// The server will have a default policy if this is not provided.
// This may be applied to both the ActionResult and the associated blobs.
ResultsCachePolicy results_cache_policy = 8;

// The digest function that was used to compute the action digest.
//
// If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
// SHA384, SHA512, or VSO, the client MAY leave this field unset. In
// that case the server SHOULD infer the digest function using the
// length of the action digest hash and the digest functions announced
// in the server's capabilities.
DigestFunction.Value digest_function = 9;
}

// A `LogFile` is a log stored in the CAS.
Expand Down Expand Up @@ -1471,6 +1494,10 @@ message ExecuteOperationMetadata {
// [ByteStream.Read][google.bytestream.ByteStream.Read] to stream the
// standard error from the endpoint hosting streamed responses.
string stderr_stream_name = 4;

// The client can read this field to view details about the ongoing
// execution.
ExecutedActionMetadata partial_execution_metadata = 5;
}

// A request message for
Expand Down Expand Up @@ -1508,6 +1535,15 @@ message GetActionResultRequest {
// `output_files` (DEPRECATED since v2.1) in the
// [Command][build.bazel.remote.execution.v2.Command] message.
repeated string inline_output_files = 5;

// The digest function that was used to compute the action digest.
//
// If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
// SHA384, SHA512, or VSO, the client MAY leave this field unset. In
// that case the server SHOULD infer the digest function using the
// length of the action digest hash and the digest functions announced
// in the server's capabilities.
DigestFunction.Value digest_function = 6;
}

// A request message for
Expand All @@ -1532,6 +1568,15 @@ message UpdateActionResultRequest {
// The server will have a default policy if this is not provided.
// This may be applied to both the ActionResult and the associated blobs.
ResultsCachePolicy results_cache_policy = 4;

// The digest function that was used to compute the action digest.
//
// If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
// SHA384, SHA512, or VSO, the client MAY leave this field unset. In
// that case the server SHOULD infer the digest function using the
// length of the action digest hash and the digest functions announced
// in the server's capabilities.
DigestFunction.Value digest_function = 5;
}

// A request message for
Expand All @@ -1544,8 +1589,18 @@ message FindMissingBlobsRequest {
// omitted.
string instance_name = 1;

// A list of the blobs to check.
// A list of the blobs to check. All digests MUST use the same digest
// function.
repeated Digest blob_digests = 2;

// The digest function of the blobs whose existence is checked.
//
// If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
// SHA384, SHA512, or VSO, the client MAY leave this field unset. In
// that case the server SHOULD infer the digest function using the
// length of the blob digest hashes and the digest functions announced
// in the server's capabilities.
DigestFunction.Value digest_function = 3;
}

// A response message for
Expand All @@ -1560,7 +1615,8 @@ message FindMissingBlobsResponse {
message BatchUpdateBlobsRequest {
// A request corresponding to a single blob that the client wants to upload.
message Request {
// The digest of the blob. This MUST be the digest of `data`.
// The digest of the blob. This MUST be the digest of `data`. All
// digests MUST use the same digest function.
Digest digest = 1;

// The raw binary data.
Expand All @@ -1582,6 +1638,16 @@ message BatchUpdateBlobsRequest {

// The individual upload requests.
repeated Request requests = 2;

// The digest function that was used to compute the digests of the
// blobs being uploaded.
//
// If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
// SHA384, SHA512, or VSO, the client MAY leave this field unset. In
// that case the server SHOULD infer the digest function using the
// length of the blob digest hashes and the digest functions announced
// in the server's capabilities.
DigestFunction.Value digest_function = 5;
}

// A response message for
Expand Down Expand Up @@ -1610,12 +1676,22 @@ message BatchReadBlobsRequest {
// omitted.
string instance_name = 1;

// The individual blob digests.
// The individual blob digests. All digests MUST use the same digest
// function.
repeated Digest digests = 2;

// A list of acceptable encodings for the returned inlined data, in no
// particular order. `IDENTITY` is always allowed even if not specified here.
repeated Compressor.Value acceptable_compressors = 3;

// The digest function of the blobs being requested.
//
// If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
// SHA384, SHA512, or VSO, the client MAY leave this field unset. In
// that case the server SHOULD infer the digest function using the
// length of the blob digest hashes and the digest functions announced
// in the server's capabilities.
DigestFunction.Value digest_function = 4;
}

// A response message for
Expand Down Expand Up @@ -1668,6 +1744,16 @@ message GetTreeRequest {
// If present, the server will use that token as an offset, returning only
// that page and the ones that succeed it.
string page_token = 4;

// The digest function that was used to compute the digest of the root
// directory.
//
// If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
// SHA384, SHA512, or VSO, the client MAY leave this field unset. In
// that case the server SHOULD infer the digest function using the
// length of the root digest hash and the digest functions announced
// in the server's capabilities.
DigestFunction.Value digest_function = 5;
}

// A response message for
Expand Down Expand Up @@ -1743,6 +1829,62 @@ message DigestFunction {
// cryptographic hash function and its collision properties are not strongly guaranteed.
// See https://github.com/aappleby/smhasher/wiki/MurmurHash3 .
MURMUR3 = 7;

// The SHA-256 digest function, modified to use a Merkle tree for
// large objects. This permits implementations to store large blobs
// as a decomposed sequence of 2^j sized chunks, where j >= 10,
// while being able to validate integrity at the chunk level.
//
// Furthermore, on systems that do not offer dedicated instructions
// for computing SHA-256 hashes (e.g., the Intel SHA and ARMv8
// cryptographic extensions), SHA256TREE hashes can be computed more
// efficiently than plain SHA-256 hashes by using generic SIMD
// extensions, such as Intel AVX2 or ARM NEON.
//
// SHA256TREE hashes are computed as follows:
//
// - For blobs that are 1024 bytes or smaller, the hash is computed
// using the regular SHA-256 digest function.
//
// - For blobs that are more than 1024 bytes in size, the hash is
// computed as follows:
//
// 1. The blob is partitioned into a left (leading) and right
// (trailing) blob. These blobs have lengths m and n
// respectively, where m = 2^k and 0 < n <= m.
//
// 2. Hashes of the left and right blob, Hash(left) and
// Hash(right) respectively, are computed by recursively
// applying the SHA256TREE algorithm.
//
// 3. A single invocation is made to the SHA-256 block cipher with
// the following parameters:
//
// M = Hash(left) || Hash(right)
// H = {
// 0xcbbb9d5d, 0x629a292a, 0x9159015a, 0x152fecd8,
// 0x67332667, 0x8eb44a87, 0xdb0c2e0d, 0x47b5481d,
// }
//
// The values of H are the leading fractional parts of the
// square roots of the 9th to the 16th prime number (23 to 53).
// This differs from plain SHA-256, where the first eight prime
// numbers (2 to 19) are used, thereby preventing trivial hash
// collisions between small and large objects.
//
// 4. The hash of the full blob can then be obtained by
// concatenating the outputs of the block cipher:
//
// Hash(blob) = a || b || c || d || e || f || g || h
//
// Addition of the original values of H, as normally done
// through the use of the Davies-Meyer structure, is not
// performed. This isn't necessary, as the block cipher is only
// invoked once.
//
// Test vectors of this digest function can be found in the
// accompanying sha256tree_test_vectors.txt file.
SHA256TREE = 8;
}
}

Expand Down Expand Up @@ -1803,6 +1945,9 @@ message Compressor {
// It is advised to use algorithms such as Zstandard instead, as
// those are faster and/or provide a better compression ratio.
DEFLATE = 2;

// Brotli compression.
BROTLI = 3;
}
}

Expand Down Expand Up @@ -1843,7 +1988,10 @@ message CacheCapabilities {

// Capabilities of the remote execution system.
message ExecutionCapabilities {
// Remote execution may only support a single digest function.
// Legacy field for indicating which digest function is supported by the
// remote execution system. It MUST be set to a value other than UNKNOWN.
// Implementations should consider the repeated digest_functions field
// first, falling back to this singular field if digest_functions is unset.
DigestFunction.Value digest_function = 1;

// Whether remote execution is enabled for the particular server/instance.
Expand All @@ -1854,6 +2002,20 @@ message ExecutionCapabilities {

// Supported node properties.
repeated string supported_node_properties = 4;

// All the digest functions supported by the remote execution system.
// If this field is set, it MUST also contain digest_function.
//
// Even if the remote execution system announces support for multiple
// digest functions, individual execution requests may only reference
// CAS objects using a single digest function. For example, it is not
// permitted to execute actions having both MD5 and SHA-256 hashed
// files in their input root.
//
// The CAS objects referenced by action results generated by the
// remote execution system MUST use the same digest function as the
// one used to construct the action.
repeated DigestFunction.Value digest_functions = 5;
}

// Details for the tool used to call the API.
Expand Down