diff --git a/xla/service/gpu/gpu_compiler_test.cc b/xla/service/gpu/gpu_compiler_test.cc index bb064b853b74d..9ab749e1d51ca 100644 --- a/xla/service/gpu/gpu_compiler_test.cc +++ b/xla/service/gpu/gpu_compiler_test.cc @@ -64,6 +64,10 @@ using ::testing::Not; using ::testing::TempDir; class GpuCompilerTest : public HloTestBase { + const auto& device_desc() { + return backend().default_stream_executor()->GetDeviceDescription(); + } + public: absl::Status Schedule(HloModule* module) { auto compiler = backend().compiler(); @@ -73,6 +77,10 @@ class GpuCompilerTest : public HloTestBase { return tensorflow::down_cast(compiler) ->RunPostSchedulingPipelines(module, 4 * 1024 * 1024, gpu_device_info); } + + const se::GpuComputeCapability& GpuComputeComp() { + return device_desc().gpu_compute_capability(); + } }; TEST_F(GpuCompilerTest, CompiledProgramsCount) { @@ -335,6 +343,10 @@ ENTRY main { TEST_F(GpuCompilerTest, GemmFusionIsNoOpWhenGemmFusionAutotunerFallsBackToCublas) { + if (std::holds_alternative(GpuComputeComp())) { + GTEST_SKIP() << "Not using autotuner on ROCM yet."; + } + const absl::string_view hlo_string = R"( HloModule test