From 0365a66af5a4fccf3c9400a3e675346e4ec35297 Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 2 Feb 2024 09:37:44 -0800 Subject: [PATCH] [7.1.0] Teach ExecLogConverter to read the compact format. Share the conversion logic between ExecLogConverter and CompactSpawnLogContextTest. Also make the following improvements: * Cache entries by ID to avoid repeatedly parsing the same entry. * Avoid visiting the same entry multiple times when traversing a depset, as it has pathological behavior for large depsets with lots of cross edges. * Sort the `listed_outputs` field, per the proto definition. PiperOrigin-RevId: 603709859 Change-Id: I6e62ffff095802f96c0955ce05b9c45605784d93 --- .../com/google/devtools/build/lib/exec/BUILD | 8 +- .../build/lib/exec/SpawnLogReconstructor.java | 234 ++++++++++++++++++ .../lib/exec/CompactSpawnLogContextTest.java | 153 +----------- .../build/execlog/ConverterOptions.java | 3 +- .../build/execlog/ExecLogConverter.java | 5 + 5 files changed, 253 insertions(+), 150 deletions(-) create mode 100644 src/main/java/com/google/devtools/build/lib/exec/SpawnLogReconstructor.java diff --git a/src/main/java/com/google/devtools/build/lib/exec/BUILD b/src/main/java/com/google/devtools/build/lib/exec/BUILD index 6a02fee543dcac..ae365f3899a59d 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/BUILD +++ b/src/main/java/com/google/devtools/build/lib/exec/BUILD @@ -288,12 +288,18 @@ java_library( java_library( name = "spawn_log_context_utils", - srcs = ["StableSort.java"], + srcs = [ + "SpawnLogReconstructor.java", + "StableSort.java", + ], deps = [ "//src/main/java/com/google/devtools/build/lib/profiler", "//src/main/java/com/google/devtools/build/lib/util/io", + "//src/main/java/com/google/devtools/build/lib/util:pair", "//src/main/protobuf:spawn_java_proto", "//third_party:guava", + "//third_party:jsr305", + "@zstd-jni", ], ) diff --git a/src/main/java/com/google/devtools/build/lib/exec/SpawnLogReconstructor.java b/src/main/java/com/google/devtools/build/lib/exec/SpawnLogReconstructor.java new file mode 100644 index 00000000000000..7028a33e917024 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/exec/SpawnLogReconstructor.java @@ -0,0 +1,234 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.exec; + +import com.github.luben.zstd.ZstdInputStream; +import com.google.common.collect.ImmutableList; +import com.google.devtools.build.lib.exec.Protos.ExecLogEntry; +import com.google.devtools.build.lib.exec.Protos.File; +import com.google.devtools.build.lib.exec.Protos.SpawnExec; +import com.google.devtools.build.lib.util.Pair; +import com.google.devtools.build.lib.util.io.MessageInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayDeque; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import javax.annotation.Nullable; + +/** Reconstructs an execution log in expanded format from the compact format representation. */ +public final class SpawnLogReconstructor implements MessageInputStream { + private final ZstdInputStream in; + + private final HashMap fileMap = new HashMap<>(); + private final HashMap>> dirMap = new HashMap<>(); + private final HashMap symlinkMap = new HashMap<>(); + private final HashMap setMap = new HashMap<>(); + private String hashFunctionName = ""; + + public SpawnLogReconstructor(InputStream in) throws IOException { + this.in = new ZstdInputStream(in); + } + + @Override + @Nullable + public SpawnExec read() throws IOException { + ExecLogEntry entry; + while ((entry = ExecLogEntry.parseDelimitedFrom(in)) != null) { + switch (entry.getTypeCase()) { + case INVOCATION: + hashFunctionName = entry.getInvocation().getHashFunctionName(); + break; + case FILE: + fileMap.put(entry.getId(), reconstructFile(entry.getFile())); + break; + case DIRECTORY: + dirMap.put(entry.getId(), reconstructDir(entry.getDirectory())); + break; + case UNRESOLVED_SYMLINK: + symlinkMap.put(entry.getId(), reconstructSymlink(entry.getUnresolvedSymlink())); + break; + case INPUT_SET: + setMap.put(entry.getId(), entry.getInputSet()); + break; + case SPAWN: + return reconstructSpawnExec(entry.getSpawn()); + default: + throw new IOException( + String.format("unknown entry type %d", entry.getTypeCase().getNumber())); + } + } + return null; + } + + private SpawnExec reconstructSpawnExec(ExecLogEntry.Spawn entry) throws IOException { + SpawnExec.Builder builder = + SpawnExec.newBuilder() + .addAllCommandArgs(entry.getArgsList()) + .addAllEnvironmentVariables(entry.getEnvVarsList()) + .setTargetLabel(entry.getTargetLabel()) + .setMnemonic(entry.getMnemonic()) + .setExitCode(entry.getExitCode()) + .setStatus(entry.getStatus()) + .setRunner(entry.getRunner()) + .setCacheHit(entry.getCacheHit()) + .setRemotable(entry.getRemotable()) + .setCacheable(entry.getCacheable()) + .setRemoteCacheable(entry.getRemoteCacheable()) + .setTimeoutMillis(entry.getTimeoutMillis()) + .setMetrics(entry.getMetrics()); + + if (entry.hasPlatform()) { + builder.setPlatform(entry.getPlatform()); + } + + SortedMap inputs = reconstructInputs(entry.getInputSetId()); + SortedMap toolInputs = reconstructInputs(entry.getToolSetId()); + + for (Map.Entry e : inputs.entrySet()) { + File file = e.getValue(); + if (toolInputs.containsKey(e.getKey())) { + file = file.toBuilder().setIsTool(true).build(); + } + builder.addInputs(file); + } + + SortedSet listedOutputs = new TreeSet<>(); + + for (ExecLogEntry.Output output : entry.getOutputsList()) { + switch (output.getTypeCase()) { + case FILE_ID: + File file = getFromMap(fileMap, output.getFileId()); + listedOutputs.add(file.getPath()); + builder.addActualOutputs(file); + break; + case DIRECTORY_ID: + Pair> dir = getFromMap(dirMap, output.getDirectoryId()); + listedOutputs.add(dir.getFirst()); + for (File dirFile : dir.getSecond()) { + builder.addActualOutputs(dirFile); + } + break; + case UNRESOLVED_SYMLINK_ID: + File symlink = getFromMap(symlinkMap, output.getUnresolvedSymlinkId()); + listedOutputs.add(symlink.getPath()); + builder.addActualOutputs(symlink); + break; + case INVALID_OUTPUT_PATH: + listedOutputs.add(output.getInvalidOutputPath()); + break; + default: + throw new IOException( + String.format("unknown output type %d", output.getTypeCase().getNumber())); + } + } + + builder.addAllListedOutputs(listedOutputs); + + if (entry.hasDigest()) { + builder.setDigest(entry.getDigest().toBuilder().setHashFunctionName(hashFunctionName)); + } + + return builder.build(); + } + + private SortedMap reconstructInputs(int setId) throws IOException { + TreeMap inputs = new TreeMap<>(); + ArrayDeque setsToVisit = new ArrayDeque<>(); + HashSet visited = new HashSet<>(); + if (setId != 0) { + setsToVisit.addLast(setId); + visited.add(setId); + } + while (!setsToVisit.isEmpty()) { + ExecLogEntry.InputSet set = getFromMap(setMap, setsToVisit.removeFirst()); + for (int fileId : set.getFileIdsList()) { + if (visited.add(fileId)) { + File file = getFromMap(fileMap, fileId); + inputs.put(file.getPath(), file); + } + } + for (int dirId : set.getDirectoryIdsList()) { + if (visited.add(dirId)) { + Pair> dir = getFromMap(dirMap, dirId); + for (File dirFile : dir.getSecond()) { + inputs.put(dirFile.getPath(), dirFile); + } + } + } + for (int symlinkId : set.getUnresolvedSymlinkIdsList()) { + if (visited.add(symlinkId)) { + File symlink = getFromMap(symlinkMap, symlinkId); + inputs.put(symlink.getPath(), symlink); + } + } + for (int transitiveSetId : set.getTransitiveSetIdsList()) { + if (visited.add(transitiveSetId)) { + setsToVisit.addLast(transitiveSetId); + } + } + } + return inputs; + } + + private Pair> reconstructDir(ExecLogEntry.Directory dir) { + ImmutableList.Builder builder = + ImmutableList.builderWithExpectedSize(dir.getFilesCount()); + for (ExecLogEntry.File dirFile : dir.getFilesList()) { + builder.add(reconstructFile(dir, dirFile)); + } + return Pair.of(dir.getPath(), builder.build()); + } + + private File reconstructFile(ExecLogEntry.File entry) { + return reconstructFile(null, entry); + } + + private File reconstructFile( + @Nullable ExecLogEntry.Directory parentDir, ExecLogEntry.File entry) { + File.Builder builder = File.newBuilder(); + builder.setPath( + parentDir != null ? parentDir.getPath() + "/" + entry.getPath() : entry.getPath()); + if (entry.hasDigest()) { + builder.setDigest(entry.getDigest().toBuilder().setHashFunctionName(hashFunctionName)); + } + return builder.build(); + } + + private static File reconstructSymlink(ExecLogEntry.UnresolvedSymlink entry) { + return File.newBuilder() + .setPath(entry.getPath()) + .setSymlinkTargetPath(entry.getTargetPath()) + .build(); + } + + private static T getFromMap(Map map, int id) throws IOException { + T value = map.get(id); + if (value == null) { + throw new IOException(String.format("referenced entry %d is missing or has wrong type", id)); + } + return value; + } + + @Override + public void close() throws IOException { + in.close(); + } +} diff --git a/src/test/java/com/google/devtools/build/lib/exec/CompactSpawnLogContextTest.java b/src/test/java/com/google/devtools/build/lib/exec/CompactSpawnLogContextTest.java index 1e5d1afed35273..a13faeca807b6d 100644 --- a/src/test/java/com/google/devtools/build/lib/exec/CompactSpawnLogContextTest.java +++ b/src/test/java/com/google/devtools/build/lib/exec/CompactSpawnLogContextTest.java @@ -13,17 +13,14 @@ // limitations under the License. package com.google.devtools.build.lib.exec; -import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.truth.Truth.assertThat; -import com.github.luben.zstd.ZstdInputStream; import com.google.common.collect.ImmutableMap; import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.Artifact; import com.google.devtools.build.lib.actions.util.ActionsTestUtil; import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; -import com.google.devtools.build.lib.exec.Protos.ExecLogEntry; import com.google.devtools.build.lib.exec.Protos.File; import com.google.devtools.build.lib.exec.Protos.SpawnExec; import com.google.devtools.build.lib.exec.util.SpawnBuilder; @@ -35,14 +32,7 @@ import com.google.testing.junit.testparameterinjector.TestParameter; import com.google.testing.junit.testparameterinjector.TestParameterInjector; import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; -import javax.annotation.Nullable; import org.junit.Test; import org.junit.runner.RunWith; @@ -126,148 +116,15 @@ protected void closeAndAssertLog(SpawnLogContext context, SpawnExec... expected) throws IOException { context.close(); - HashMap entryMap = new HashMap<>(); - String hashFunctionName = ""; - ArrayList actual = new ArrayList<>(); - try (InputStream in = new ZstdInputStream(logPath.getInputStream())) { - ExecLogEntry e; - while ((e = ExecLogEntry.parseDelimitedFrom(in)) != null) { - entryMap.put(e.getId(), e); - - if (e.hasInvocation()) { - hashFunctionName = e.getInvocation().getHashFunctionName(); - } - - if (e.hasSpawn()) { - actual.add(reconstructSpawnExec(e.getSpawn(), hashFunctionName, entryMap)); - } + try (SpawnLogReconstructor reconstructor = + new SpawnLogReconstructor(logPath.getInputStream())) { + SpawnExec ex; + while ((ex = reconstructor.read()) != null) { + actual.add(ex); } } assertThat(actual).containsExactlyElementsIn(expected).inOrder(); } - - private SpawnExec reconstructSpawnExec( - ExecLogEntry.Spawn entry, String hashFunctionName, Map entryMap) { - SpawnExec.Builder builder = - SpawnExec.newBuilder() - .addAllCommandArgs(entry.getArgsList()) - .addAllEnvironmentVariables(entry.getEnvVarsList()) - .setTargetLabel(entry.getTargetLabel()) - .setMnemonic(entry.getMnemonic()) - .setExitCode(entry.getExitCode()) - .setStatus(entry.getStatus()) - .setRunner(entry.getRunner()) - .setCacheHit(entry.getCacheHit()) - .setRemotable(entry.getRemotable()) - .setCacheable(entry.getCacheable()) - .setRemoteCacheable(entry.getRemoteCacheable()) - .setTimeoutMillis(entry.getTimeoutMillis()) - .setMetrics(entry.getMetrics()); - - if (entry.hasPlatform()) { - builder.setPlatform(entry.getPlatform()); - } - - SortedMap inputs = - reconstructInputs(entry.getInputSetId(), hashFunctionName, entryMap); - SortedMap toolInputs = - reconstructInputs(entry.getToolSetId(), hashFunctionName, entryMap); - - for (Map.Entry e : inputs.entrySet()) { - File file = e.getValue(); - if (toolInputs.containsKey(e.getKey())) { - file = file.toBuilder().setIsTool(true).build(); - } - builder.addInputs(file); - } - - for (ExecLogEntry.Output output : entry.getOutputsList()) { - switch (output.getTypeCase()) { - case FILE_ID: - ExecLogEntry.File file = checkNotNull(entryMap.get(output.getFileId())).getFile(); - builder.addListedOutputs(file.getPath()); - builder.addActualOutputs(reconstructFile(/* parentDir= */ null, file, hashFunctionName)); - break; - case DIRECTORY_ID: - ExecLogEntry.Directory dir = - checkNotNull(entryMap.get(output.getDirectoryId())).getDirectory(); - builder.addListedOutputs(dir.getPath()); - for (ExecLogEntry.File dirFile : dir.getFilesList()) { - builder.addActualOutputs(reconstructFile(dir, dirFile, hashFunctionName)); - } - break; - case UNRESOLVED_SYMLINK_ID: - ExecLogEntry.UnresolvedSymlink symlink = - checkNotNull(entryMap.get(output.getUnresolvedSymlinkId())).getUnresolvedSymlink(); - builder.addListedOutputs(symlink.getPath()); - builder.addActualOutputs(reconstructSymlink(symlink)); - break; - case INVALID_OUTPUT_PATH: - builder.addListedOutputs(output.getInvalidOutputPath()); - break; - case TYPE_NOT_SET: - throw new AssertionError("malformed log entry"); - } - } - - if (entry.hasDigest()) { - builder.setDigest( - entry.getDigest().toBuilder().setHashFunctionName(hashFunctionName).build()); - } - - return builder.build(); - } - - private SortedMap reconstructInputs( - int setId, String hashFunctionName, Map entryMap) { - TreeMap inputs = new TreeMap<>(); - ArrayDeque setsToVisit = new ArrayDeque<>(); - if (setId != 0) { - setsToVisit.addLast(setId); - } - while (!setsToVisit.isEmpty()) { - ExecLogEntry.InputSet set = - checkNotNull(entryMap.get(setsToVisit.removeFirst())).getInputSet(); - for (int fileId : set.getFileIdsList()) { - ExecLogEntry.File file = checkNotNull(entryMap.get(fileId)).getFile(); - inputs.put(file.getPath(), reconstructFile(null, file, hashFunctionName)); - } - for (int dirId : set.getDirectoryIdsList()) { - ExecLogEntry.Directory dir = checkNotNull(entryMap.get(dirId)).getDirectory(); - for (ExecLogEntry.File dirFile : dir.getFilesList()) { - inputs.put(dirFile.getPath(), reconstructFile(dir, dirFile, hashFunctionName)); - } - } - for (int symlinkId : set.getUnresolvedSymlinkIdsList()) { - ExecLogEntry.UnresolvedSymlink symlink = - checkNotNull(entryMap.get(symlinkId)).getUnresolvedSymlink(); - inputs.put(symlink.getPath(), reconstructSymlink(symlink)); - } - setsToVisit.addAll(set.getTransitiveSetIdsList()); - } - return inputs; - } - - private File reconstructFile( - @Nullable ExecLogEntry.Directory parentDir, ExecLogEntry.File file, String hashFunctionName) { - File.Builder builder = File.newBuilder(); - - builder.setPath( - parentDir != null ? parentDir.getPath() + "/" + file.getPath() : file.getPath()); - - if (file.hasDigest()) { - builder.setDigest(file.getDigest().toBuilder().setHashFunctionName(hashFunctionName).build()); - } - - return builder.build(); - } - - private File reconstructSymlink(ExecLogEntry.UnresolvedSymlink symlink) { - return File.newBuilder() - .setPath(symlink.getPath()) - .setSymlinkTargetPath(symlink.getTargetPath()) - .build(); - } } diff --git a/src/tools/execlog/src/main/java/com/google/devtools/build/execlog/ConverterOptions.java b/src/tools/execlog/src/main/java/com/google/devtools/build/execlog/ConverterOptions.java index 61b515efb11203..1a717003f5221e 100644 --- a/src/tools/execlog/src/main/java/com/google/devtools/build/execlog/ConverterOptions.java +++ b/src/tools/execlog/src/main/java/com/google/devtools/build/execlog/ConverterOptions.java @@ -62,7 +62,8 @@ public class ConverterOptions extends OptionsBase { enum Format { BINARY, - JSON + JSON, + COMPACT } private static final ImmutableMap FORMAT_BY_NAME = diff --git a/src/tools/execlog/src/main/java/com/google/devtools/build/execlog/ExecLogConverter.java b/src/tools/execlog/src/main/java/com/google/devtools/build/execlog/ExecLogConverter.java index 4ab556d7c2b3af..e6052662a09ee1 100644 --- a/src/tools/execlog/src/main/java/com/google/devtools/build/execlog/ExecLogConverter.java +++ b/src/tools/execlog/src/main/java/com/google/devtools/build/execlog/ExecLogConverter.java @@ -15,6 +15,7 @@ import com.google.devtools.build.execlog.ConverterOptions.FormatAndPath; import com.google.devtools.build.lib.exec.Protos.SpawnExec; +import com.google.devtools.build.lib.exec.SpawnLogReconstructor; import com.google.devtools.build.lib.exec.StableSort; import com.google.devtools.build.lib.util.io.MessageInputStream; import com.google.devtools.build.lib.util.io.MessageInputStreamWrapper.BinaryInputStreamWrapper; @@ -40,6 +41,8 @@ private static MessageInputStream getMessageInputStream(FormatAndPath return new BinaryInputStreamWrapper<>(in, SpawnExec.getDefaultInstance()); case JSON: return new JsonInputStreamWrapper<>(in, SpawnExec.getDefaultInstance()); + case COMPACT: + return new SpawnLogReconstructor(in); } throw new AssertionError("unsupported input format"); } @@ -52,6 +55,8 @@ private static MessageOutputStream getMessageOutputStream(FormatAndPa return new BinaryOutputStreamWrapper<>(out); case JSON: return new JsonOutputStreamWrapper<>(out); + case COMPACT: + // unsupported } throw new AssertionError("unsupported output format"); }