forked from tensorflow/tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 95
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add streamz to CPU Compiler::RunBackends to record callstacks
PiperOrigin-RevId: 692227925
- Loading branch information
1 parent
b6e6753
commit 1ce2bbc
Showing
6 changed files
with
291 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
/* Copyright 2024 The OpenXLA Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#include <memory> | ||
#include <string> | ||
#include <utility> | ||
|
||
#include "xla/pjrt/pjrt_client.h" | ||
#include "xla/service/hlo_runner_interface.h" | ||
#include "xla/service/hlo_runner_pjrt.h" | ||
#include "xla/shape.h" | ||
#include "xla/tests/new_hlo_test_base.h" | ||
#include "xla/tests/pjrt_client_registry.h" | ||
#include "xla/tsl/lib/monitoring/collected_metrics.h" | ||
#include "xla/tsl/lib/monitoring/collection_registry.h" | ||
#include "tsl/platform/statusor.h" | ||
#include "tsl/platform/test.h" | ||
|
||
namespace xla { | ||
namespace cpu { | ||
namespace { | ||
|
||
std::unique_ptr<HloRunnerInterface> CreatePjrtHloRunner() { | ||
PjRtClientTestFactoryRegistry& pjrt_registry = | ||
GetGlobalPjRtClientTestFactory(); | ||
std::unique_ptr<PjRtClient> client = pjrt_registry.Get()().value(); | ||
PjRtClientTestFactoryRegistry::DeviceShapeRepresentationFn | ||
device_shape_representation_fn = | ||
pjrt_registry.GetDeviceShapeRepresentationFn(client.get()); | ||
PjRtClientTestFactoryRegistry::DeviceShapeSizeFn device_shape_size_fn = | ||
pjrt_registry.GetDeviceShapeSizeFn(client.get()); | ||
return std::make_unique<HloRunnerPjRt>( | ||
std::move(client), [](const Shape& host_shape) { return host_shape; }, | ||
device_shape_size_fn); | ||
} | ||
|
||
class CpuCompilerTest : public NewHloTestBase { | ||
public: | ||
CpuCompilerTest() | ||
: NewHloTestBase(CreatePjrtHloRunner(), CreatePjrtHloRunner()) {} | ||
}; | ||
|
||
TEST_F(CpuCompilerTest, RecordsStreamzStackTrace) { | ||
const char* hlo_text = R"( | ||
HloModule test | ||
ENTRY main { | ||
p = f32[10]{0} parameter(0) | ||
ROOT neg = f32[10]{0} negate(p) | ||
} | ||
)"; | ||
|
||
TF_ASSERT_OK_AND_ASSIGN(auto module, ParseAndReturnVerifiedModule(hlo_text)); | ||
EXPECT_TRUE(Run(std::move(module), /*run_hlo_passes=*/true)); | ||
|
||
const std::string kCpuCompilerStacktraceMetricName = | ||
"/xla/service/cpu/compiler_stacktrace_count"; | ||
|
||
tsl::monitoring::CollectionRegistry::CollectMetricsOptions options; | ||
std::unique_ptr<tsl::monitoring::CollectedMetrics> metrics = | ||
tsl::monitoring::CollectionRegistry::Default()->CollectMetrics(options); | ||
|
||
EXPECT_TRUE(metrics->point_set_map.find(kCpuCompilerStacktraceMetricName) != | ||
metrics->point_set_map.end()); | ||
|
||
// Since Streamz is recorded every call, we expect at least one point. | ||
// All other callers may increment the counter as well. | ||
EXPECT_GT( | ||
metrics->point_set_map[kCpuCompilerStacktraceMetricName]->points.size(), | ||
0); | ||
} | ||
|
||
} // namespace | ||
} // namespace cpu | ||
} // namespace xla |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* Copyright 2024 The OpenXLA Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#include "xla/service/cpu/metrics.h" | ||
|
||
#include <deque> | ||
#include <string> | ||
|
||
#include "absl/strings/ascii.h" | ||
#include "absl/strings/str_join.h" | ||
#include "absl/strings/str_split.h" | ||
#include "absl/strings/string_view.h" | ||
#include "xla/tsl/lib/monitoring/counter.h" | ||
#include "tsl/platform/stacktrace.h" | ||
|
||
namespace xla { | ||
namespace cpu { | ||
|
||
auto* cpu_compiler_stacktrace_count = tsl::monitoring::Counter<1>::New( | ||
"/xla/service/cpu/compiler_stacktrace_count", | ||
"The number of times a compiler stacktrace was called.", "stacktrace"); | ||
|
||
void RecordCpuCompilerStacktrace() { | ||
std::string tsl_stacktrace = tsl::CurrentStackTrace(); | ||
|
||
// tsl::CurrentStackTrace() adds a prefix and postfix lines, so remove them. | ||
std::deque<std::string> stack = absl::StrSplit(tsl_stacktrace, '\n'); | ||
stack.pop_front(); | ||
stack.pop_back(); | ||
|
||
const int kMaxStackDepth = 10; | ||
while (stack.size() > kMaxStackDepth) { | ||
stack.pop_back(); | ||
} | ||
|
||
// Stack traces with addresses would make too many unique streamz cells. | ||
// We only care about the actual call stack. | ||
// Format chars added by tsl::CurrentStackTrace(). | ||
constexpr unsigned kFormatChars = 8; | ||
constexpr unsigned kAddressFormat = kFormatChars + 2 * sizeof(void*); | ||
for (int i = 0; i < stack.size(); ++i) { | ||
stack[i] = std::string(absl::StripAsciiWhitespace( | ||
absl::ClippedSubstr(stack[i], kAddressFormat))); | ||
} | ||
|
||
std::string stacktrace = absl::StrJoin(stack, ";\n"); | ||
cpu_compiler_stacktrace_count->GetCell(stacktrace)->IncrementBy(1); | ||
} | ||
|
||
int GetCpuCompilerStacktraceCount(absl::string_view stacktrace) { | ||
return cpu_compiler_stacktrace_count->GetCell(std::string(stacktrace)) | ||
->value(); | ||
} | ||
|
||
} // namespace cpu | ||
} // namespace xla |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/* Copyright 2024 The OpenXLA Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#ifndef XLA_SERVICE_CPU_METRICS_H_ | ||
#define XLA_SERVICE_CPU_METRICS_H_ | ||
|
||
#include "absl/strings/string_view.h" | ||
|
||
namespace xla { | ||
namespace cpu { | ||
|
||
// Records the stacktrace of the CPU compiler. | ||
void RecordCpuCompilerStacktrace(); | ||
|
||
// Returns the number of times the GPU compiler was called with the given | ||
// stacktrace. | ||
int GetCpuCompilerStacktraceCount(absl::string_view stacktrace); | ||
|
||
} // namespace cpu | ||
} // namespace xla | ||
|
||
#endif // XLA_SERVICE_CPU_METRICS_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/* Copyright 2024 The OpenXLA Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#include "xla/service/cpu/metrics.h" | ||
|
||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "xla/tsl/lib/monitoring/collected_metrics.h" | ||
#include "xla/tsl/lib/monitoring/collection_registry.h" | ||
#include "tsl/platform/test.h" | ||
|
||
namespace xla { | ||
namespace cpu { | ||
namespace { | ||
|
||
TEST(MetricsTest, RecordsCpuCompilerStacktrace) { | ||
const std::string kCpuCompilerStacktraceMetricName = | ||
"/xla/service/cpu/compiler_stacktrace_count"; | ||
|
||
RecordCpuCompilerStacktrace(); | ||
|
||
tsl::monitoring::CollectionRegistry::CollectMetricsOptions options; | ||
std::unique_ptr<tsl::monitoring::CollectedMetrics> metrics = | ||
tsl::monitoring::CollectionRegistry::Default()->CollectMetrics(options); | ||
|
||
EXPECT_TRUE(metrics->point_set_map.find(kCpuCompilerStacktraceMetricName) != | ||
metrics->point_set_map.end()); | ||
EXPECT_EQ( | ||
metrics->point_set_map[kCpuCompilerStacktraceMetricName]->points.size(), | ||
1); | ||
} | ||
|
||
} // namespace | ||
} // namespace cpu | ||
} // namespace xla |