Skip to content

Commit

Permalink
Add streamz to CPU Compiler::RunBackends to record callstacks
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 692227925
  • Loading branch information
changm authored and tensorflower-gardener committed Nov 1, 2024
1 parent b6e6753 commit 1ce2bbc
Show file tree
Hide file tree
Showing 6 changed files with 291 additions and 0 deletions.
51 changes: 51 additions & 0 deletions third_party/xla/xla/service/cpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ load(
"xla_cc_test",
"xla_internal",
)
load("//xla/tests:build_defs.bzl", "xla_test")
load("//xla/tsl:tsl.bzl", "internal_visibility", "tf_openmp_copts", "tsl_copts")
load("//xla/tsl:tsl.default.bzl", "filegroup", "get_compatible_with_portable")
load(
Expand Down Expand Up @@ -226,6 +227,7 @@ cc_library(
":ir_emission_utils",
":ir_emitter",
":ir_emitter2",
":metrics",
":onednn_contraction_rewriter",
":onednn_ops_rewriter",
":parallel_task_assignment",
Expand Down Expand Up @@ -410,6 +412,31 @@ cc_library(
]),
)

xla_test(
name = "cpu_compiler_test",
srcs = ["cpu_compiler_test.cc"],
backends = [
"cpu",
],
deps = [
"//xla:shape_util",
"//xla/pjrt:pjrt_client",
"//xla/service:hlo_runner_interface",
"//xla/service:hlo_runner_pjrt",
"//xla/service:platform_util",
"//xla/tests:hlo_test_base",
"//xla/tests:new_hlo_test_base",
"//xla/tests:pjrt_client_registry",
"//xla/tests:pjrt_cpu_client_registry",
"//xla/tests:xla_internal_test_main",
"//xla/tsl/lib/core:status_test_util",
"//xla/tsl/lib/monitoring:collected_metrics",
"//xla/tsl/lib/monitoring:collection_registry",
"@local_tsl//tsl/platform:statusor",
"@local_tsl//tsl/platform:test",
],
)

cc_library(
# The old target name will still be used so that dependencies won't break.
# In the future, dependencies should be cleaned up and relinked to the above
Expand Down Expand Up @@ -2065,3 +2092,27 @@ cc_library(
hdrs = ["cpu_executable_run_options.h"],
deps = [":collectives_interface"],
)

cc_library(
name = "metrics",
srcs = ["metrics.cc"],
hdrs = ["metrics.h"],
deps = [
"//xla/tsl/lib/monitoring:counter",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:string_view",
"@local_tsl//tsl/platform:stacktrace",
],
)

xla_cc_test(
name = "metrics_test",
srcs = ["metrics_test.cc"],
deps = [
":metrics",
"//xla/tests:xla_internal_test_main",
"//xla/tsl/lib/monitoring:collected_metrics",
"//xla/tsl/lib/monitoring:collection_registry",
"@local_tsl//tsl/platform:test",
],
)
2 changes: 2 additions & 0 deletions third_party/xla/xla/service/cpu/cpu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ limitations under the License.
#include "xla/service/cpu/executable.pb.h"
#include "xla/service/cpu/ir_emitter.h"
#include "xla/service/cpu/ir_emitter2.h"
#include "xla/service/cpu/metrics.h"
#include "xla/service/cpu/parallel_task_assignment.h"
#include "xla/service/cpu/simple_orc_jit.h"
#include "xla/service/cpu/target_machine_features.h"
Expand Down Expand Up @@ -1716,6 +1717,7 @@ absl::StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
[[maybe_unused]] se::StreamExecutor* stream_exec,
const CompileOptions& options) {
VLOG(1) << "Compiling: " << module->name();
RecordCpuCompilerStacktrace();
XLA_SCOPED_LOGGING_TIMER(
absl::StrFormat("Compiling [%s] for CPU using JIT", module->name()));
std::string slow_compilation_msg =
Expand Down
87 changes: 87 additions & 0 deletions third_party/xla/xla/service/cpu/cpu_compiler_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/* Copyright 2024 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include <memory>
#include <string>
#include <utility>

#include "xla/pjrt/pjrt_client.h"
#include "xla/service/hlo_runner_interface.h"
#include "xla/service/hlo_runner_pjrt.h"
#include "xla/shape.h"
#include "xla/tests/new_hlo_test_base.h"
#include "xla/tests/pjrt_client_registry.h"
#include "xla/tsl/lib/monitoring/collected_metrics.h"
#include "xla/tsl/lib/monitoring/collection_registry.h"
#include "tsl/platform/statusor.h"
#include "tsl/platform/test.h"

namespace xla {
namespace cpu {
namespace {

std::unique_ptr<HloRunnerInterface> CreatePjrtHloRunner() {
PjRtClientTestFactoryRegistry& pjrt_registry =
GetGlobalPjRtClientTestFactory();
std::unique_ptr<PjRtClient> client = pjrt_registry.Get()().value();
PjRtClientTestFactoryRegistry::DeviceShapeRepresentationFn
device_shape_representation_fn =
pjrt_registry.GetDeviceShapeRepresentationFn(client.get());
PjRtClientTestFactoryRegistry::DeviceShapeSizeFn device_shape_size_fn =
pjrt_registry.GetDeviceShapeSizeFn(client.get());
return std::make_unique<HloRunnerPjRt>(
std::move(client), [](const Shape& host_shape) { return host_shape; },
device_shape_size_fn);
}

class CpuCompilerTest : public NewHloTestBase {
public:
CpuCompilerTest()
: NewHloTestBase(CreatePjrtHloRunner(), CreatePjrtHloRunner()) {}
};

TEST_F(CpuCompilerTest, RecordsStreamzStackTrace) {
const char* hlo_text = R"(
HloModule test
ENTRY main {
p = f32[10]{0} parameter(0)
ROOT neg = f32[10]{0} negate(p)
}
)";

TF_ASSERT_OK_AND_ASSIGN(auto module, ParseAndReturnVerifiedModule(hlo_text));
EXPECT_TRUE(Run(std::move(module), /*run_hlo_passes=*/true));

const std::string kCpuCompilerStacktraceMetricName =
"/xla/service/cpu/compiler_stacktrace_count";

tsl::monitoring::CollectionRegistry::CollectMetricsOptions options;
std::unique_ptr<tsl::monitoring::CollectedMetrics> metrics =
tsl::monitoring::CollectionRegistry::Default()->CollectMetrics(options);

EXPECT_TRUE(metrics->point_set_map.find(kCpuCompilerStacktraceMetricName) !=
metrics->point_set_map.end());

// Since Streamz is recorded every call, we expect at least one point.
// All other callers may increment the counter as well.
EXPECT_GT(
metrics->point_set_map[kCpuCompilerStacktraceMetricName]->points.size(),
0);
}

} // namespace
} // namespace cpu
} // namespace xla
68 changes: 68 additions & 0 deletions third_party/xla/xla/service/cpu/metrics.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/* Copyright 2024 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "xla/service/cpu/metrics.h"

#include <deque>
#include <string>

#include "absl/strings/ascii.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "xla/tsl/lib/monitoring/counter.h"
#include "tsl/platform/stacktrace.h"

namespace xla {
namespace cpu {

auto* cpu_compiler_stacktrace_count = tsl::monitoring::Counter<1>::New(
"/xla/service/cpu/compiler_stacktrace_count",
"The number of times a compiler stacktrace was called.", "stacktrace");

void RecordCpuCompilerStacktrace() {
std::string tsl_stacktrace = tsl::CurrentStackTrace();

// tsl::CurrentStackTrace() adds a prefix and postfix lines, so remove them.
std::deque<std::string> stack = absl::StrSplit(tsl_stacktrace, '\n');
stack.pop_front();
stack.pop_back();

const int kMaxStackDepth = 10;
while (stack.size() > kMaxStackDepth) {
stack.pop_back();
}

// Stack traces with addresses would make too many unique streamz cells.
// We only care about the actual call stack.
// Format chars added by tsl::CurrentStackTrace().
constexpr unsigned kFormatChars = 8;
constexpr unsigned kAddressFormat = kFormatChars + 2 * sizeof(void*);
for (int i = 0; i < stack.size(); ++i) {
stack[i] = std::string(absl::StripAsciiWhitespace(
absl::ClippedSubstr(stack[i], kAddressFormat)));
}

std::string stacktrace = absl::StrJoin(stack, ";\n");
cpu_compiler_stacktrace_count->GetCell(stacktrace)->IncrementBy(1);
}

int GetCpuCompilerStacktraceCount(absl::string_view stacktrace) {
return cpu_compiler_stacktrace_count->GetCell(std::string(stacktrace))
->value();
}

} // namespace cpu
} // namespace xla
34 changes: 34 additions & 0 deletions third_party/xla/xla/service/cpu/metrics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* Copyright 2024 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef XLA_SERVICE_CPU_METRICS_H_
#define XLA_SERVICE_CPU_METRICS_H_

#include "absl/strings/string_view.h"

namespace xla {
namespace cpu {

// Records the stacktrace of the CPU compiler.
void RecordCpuCompilerStacktrace();

// Returns the number of times the GPU compiler was called with the given
// stacktrace.
int GetCpuCompilerStacktraceCount(absl::string_view stacktrace);

} // namespace cpu
} // namespace xla

#endif // XLA_SERVICE_CPU_METRICS_H_
49 changes: 49 additions & 0 deletions third_party/xla/xla/service/cpu/metrics_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/* Copyright 2024 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "xla/service/cpu/metrics.h"

#include <memory>
#include <string>
#include <vector>

#include "xla/tsl/lib/monitoring/collected_metrics.h"
#include "xla/tsl/lib/monitoring/collection_registry.h"
#include "tsl/platform/test.h"

namespace xla {
namespace cpu {
namespace {

TEST(MetricsTest, RecordsCpuCompilerStacktrace) {
const std::string kCpuCompilerStacktraceMetricName =
"/xla/service/cpu/compiler_stacktrace_count";

RecordCpuCompilerStacktrace();

tsl::monitoring::CollectionRegistry::CollectMetricsOptions options;
std::unique_ptr<tsl::monitoring::CollectedMetrics> metrics =
tsl::monitoring::CollectionRegistry::Default()->CollectMetrics(options);

EXPECT_TRUE(metrics->point_set_map.find(kCpuCompilerStacktraceMetricName) !=
metrics->point_set_map.end());
EXPECT_EQ(
metrics->point_set_map[kCpuCompilerStacktraceMetricName]->points.size(),
1);
}

} // namespace
} // namespace cpu
} // namespace xla

0 comments on commit 1ce2bbc

Please sign in to comment.