Fix xla unit tests

hsharsha · hsharsha · commit 43382aafbd49 · 2025-11-21T03:00:00.000-08:00
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_xla.sh b/tensorflow/tools/ci_build/linux/rocm/run_xla.sh
@@ -66,13 +66,23 @@ EXCLUDED_TESTS=(
 
     # @local_xla//xla/backends/gpu/codegen/triton:fusion_emitter_device_test_amdgpu_any
     TritonEmitterTest.FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly
+    TritonEmitterTest.ConvertF16ToF8E5M2Exhaustive
+    TritonEmitterTest.RocmWarpSizeIsSetCorrectly
     BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F16_F16_F16
 
     # @local_xla//xla/backends/gpu/codegen/triton:fusion_emitter_int4_device_test_amdgpu_any
     TritonTest.FuseSubchannelDequantizationWithTranspose
 
     # @local_xla//xla/backends/gpu/codegen/triton:fusion_emitter_parametrized_test_amdgpu_any
     TritonNormalizationTest.CanFuseAndEmitDiamondWithBF16Converts
+    ElementwiseTestSuiteF16/UnaryElementwiseTest.ElementwiseUnaryOpExecutesCorrectly/f16_cosine
+    ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseBinaryOpExecutesCorrectly/f16_atan2
+    ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f16_atan2
+
+    # @local_xla//xla/service/gpu/tests:command_buffer_test_amdgpu_any
+    CommandBufferTests/CommandBufferTest.WhileLoop/*
+    CommandBufferTests/CommandBufferTest.IndexConditional/*
+    CommandBufferTests/CommandBufferTest.TrueFalseConditional/*
 
     # @local_xla//xla/backends/gpu/runtime:command_buffer_conversion_pass_test_amdgpu_any
     CommandBufferConversionPassTest.ConvertWhileThunk
@@ -88,14 +98,33 @@ EXCLUDED_TESTS=(
     DotTf32Tf32F32Tests/DotAlgorithmSupportTest.AlgorithmIsSupportedFromCudaCapability/dot_tf32_tf32_f32_*
     DotTf32Tf32F32X3Tests/DotAlgorithmSupportTest.AlgorithmIsSupportedFromCudaCapability/dot_tf32_tf32_f32_*
 
+    # @local_xla//xla/service/gpu/transforms:triton_fusion_numerics_verifier_test_amdgpu_any_notfrt
+    # @local_xla//xla/service/gpu/transforms:triton_fusion_numerics_verifier_test_amdgpu_any
+    TritonFusionNumericsVerifierTest.CompilationSucceedsEvenIfKernelWillSpillRegisters
+    TritonFusionNumericsVerifierTest.VerifyThatDisablingTritonIsFast
+
     # @local_xla//xla/service/gpu/tests:gpu_cub_sort_test_amdgpu_any
     CubSortKeysTest.CompareToReferenceNumpyOrderGt
     CubSortKeysTest.CompareToReferenceTotalOrderLt
     CubSort/CubSortKeysTest.*
     CubSort/CubSortPairsTest.*
 
+    # @local_xla//xla/backends/gpu/runtime:cub_sort_thunk_test
+    CubSortThunkTest.ProtoRoundTrip
+
     # @local_xla//xla/service/gpu/transforms:cublas_gemm_rewriter_test_amdgpu_any
     CublasLtGemmRewriteTest.MatrixBiasSwishActivation
+    CublasLtGemmRewriteTest.VectorBiasReluActivationF16Padded
+    CublasLtGemmRewriteTest.VectorBiasF16Padded
+    CublasLtGemmRewriteTest.ReluActivationF16Padded
+    CublasLtGemmRewriteTest.VectorBiasReluActivationBF16Padded
+    CublasLtGemmRewriteTest.BF16VectorBiasPadded
+    CublasLtGemmRewriteTest.ApproxGeluActivationBF16
+    CublasLtGemmRewriteTest.ReluActivationBF16Padded
+    CublasLtGemmRewriteTest.VectorBiasBF16Padded
+
+    # @local_xla//xla/service/gpu:determinism_test_amdgpu_any
+    DeterminismTest.Conv
 
     # @local_xla//xla/tests:sample_file_test_amdgpu_any
     # @local_xla//xla/tests:sample_file_test_amdgpu_any_notfrt
@@ -107,8 +136,8 @@ EXCLUDED_TESTS=(
     # @local_xla//xla/tests:scatter_test_amdgpu_any_notfrt
     ScatterTest.TensorFlowScatterV1_UpdateTwice
 
-    # @local_xla//xla/service/gpu/llvm_gpu_backend:amdgpu_bitcode_link_test
-    BitcodeLinkTest.TestLinkEmbeded
+    # @local_xla//xla/tests:multioutput_fusion_test_amdgpu_any
+    MultiOutputFusionTest.MultiOutputReduceFusionMajorWithExtraOutput
 )
 
 bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.bazelrc test \
@@ -125,5 +154,9 @@ bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.b
     --action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
     --test_filter=-$(IFS=: ; echo "${EXCLUDED_TESTS[*]}") \
     -- @local_xla//xla/... \
-    -@local_xla//xla/service/gpu/tests:sorting.hlo.test_mi200
+    -@local_xla//xla/service/gpu/tests:sorting_test_amdgpu_any \
+    -@local_xla//xla/service/gpu/tests:sorting.hlo.test_mi200 \
+    -@local_xla//xla/backends/gpu/codegen/emitters/tests:reduce_row/mof_scalar_variadic.hlo.test \
+    -@local_xla//xla/backends/gpu/codegen/emitters/tests:reduce_row/side_output_broadcast.hlo.test \
+    -@local_xla//xla/tools/hlo_opt:tests/gpu_hlo_llvm.hlo.test
     # ^^^ TODO (rocm) weekly-sync-20251021 excluded test files
diff --git a/third_party/xla/xla/backends/gpu/codegen/emitters/tests/BUILD b/third_party/xla/xla/backends/gpu/codegen/emitters/tests/BUILD
@@ -24,7 +24,7 @@ lit_test_suite(
         "@llvm-project//llvm:FileCheck",
     ],
     tags_override = {
-        "reduce_row/mof_scalar_variadic.hlo": ["cuda-only"], #TODO(rocm): weekly sync 25-07-14
-        "reduce_row/side_output_broadcast.hlo": ["cuda-only"], #TODO(rocm): weekly sync 25-07-14
+        "reduce_row/mof_scalar_variadic.hlo": [],
+        "reduce_row/side_output_broadcast.hlo": [],
     },
 )
diff --git a/third_party/xla/xla/backends/gpu/codegen/triton/dot_algorithms_test.cc b/third_party/xla/xla/backends/gpu/codegen/triton/dot_algorithms_test.cc
@@ -141,9 +141,6 @@ class BlasAlgorithmTest : public AlgorithmTest {
 using TritonAlgorithmTest = AlgorithmTest;
 
 TEST_F(AlgorithmTest, Algorithm3xBF16) {
-  if (std::holds_alternative<se::RocmComputeCapability>(GpuComputeComp())) {
-    GTEST_SKIP() << "ALG_DOT_BF16_BF16_F32_X3 not supported on ROCM.";
-  }
   constexpr absl::string_view kHloText = R"(
     HloModule Algorithm3xBF16
 
@@ -160,9 +157,6 @@ TEST_F(AlgorithmTest, Algorithm3xBF16) {
 }
 
 TEST_F(AlgorithmTest, Algorithm6xBF16) {
-  if (std::holds_alternative<se::RocmComputeCapability>(GpuComputeComp())) {
-    GTEST_SKIP() << "ALG_DOT_BF16_BF16_F32_X6 not supported on ROCM.";
-  }
   constexpr absl::string_view kHloText = R"(
     HloModule Algorithm6xBF16
 
@@ -870,9 +864,6 @@ TEST_F(TritonAlgorithmTest, Algorithm_TF32_TF32_F32_X3) {
 }
 
 TEST_F(TritonAlgorithmTest, Algorithm_BF16_BF16_F32) {
-  if (std::holds_alternative<se::RocmComputeCapability>(GpuComputeComp())) {
-    GTEST_SKIP() << "Triton currently disabled on ROCM.";
-  }
   if (!SupportsBF16(GpuComputeComp())) {
     GTEST_SKIP() << "BF16 not supported.";
   }
@@ -899,7 +890,7 @@ TEST_F(TritonAlgorithmTest, Algorithm_BF16_BF16_F32) {
 }
 
 TEST_F(TritonAlgorithmTest, Dot_BF16_X6_WithConst) {
-  constexpr std::string_view kHloText = R"(
+  constexpr absl::string_view kHloText = R"(
     HloModule Dot_BF16_X6_WithConst
 
     lhs {
@@ -1576,7 +1567,6 @@ TEST_P(TritonAndBlasSupportForDifferentTensorSizes, Regular2DDot) {
 
 TEST_P(TritonAndBlasSupportForDifferentTensorSizes,
        IsDotAlgorithmSupportedByTriton) {
-
   // Here we test which dot algorithm is supported by triton.
   // In case of a change you need to update the expected results.
   constexpr absl::string_view kHloText = R"(
diff --git a/third_party/xla/xla/backends/gpu/codegen/triton/fusion_emitter_device_test.cc b/third_party/xla/xla/backends/gpu/codegen/triton/fusion_emitter_device_test.cc
@@ -3205,7 +3205,6 @@ ENTRY entry_computation {
 // Reproducer from b/384110192.
 TEST_F(TritonEmitterTest,
        FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly) {
-   //               "issue with triton.";
   // The point here is to check the output of the Triton fusion. The `slice` op
   // at the end is inserted to allow the comparison of output to run in a
   // reasonable amount of time, and has been proven to still correctly capture
@@ -3270,12 +3269,6 @@ TEST_F(TritonEmitterTest, ConvertF16ToF8E5M2Exhaustive) {
                     "always correct";
   }
 
-  if (std::holds_alternative<se::RocmComputeCapability>(
-          GpuComputeCapability())) {
-    GTEST_SKIP() << "Skipping tests on Rocm, Triton's conversion isn't "
-                    "always correct";
-  }
-
   constexpr absl::string_view kHloTextTemplate = R"(
 computation {
   p0 = f16[65536]{0} parameter(0)
@@ -4448,22 +4441,9 @@ TEST_F(TritonEmitterTest, RocmWarpSizeIsSetCorrectly) {
     GTEST_SKIP() << "Warp size is always 32 on CUDA";
   }
 
-  // TODO (rocm) weekly-sync-20251021 Use legacy emitter otherwise test segfaults
-  constexpr absl::string_view kHloText = R"(
-  %gemm_fusion___computation.clone {
-    %parameter_0 = f16[30,30]{1,0} parameter(0)
-    %parameter_1 = s8[30,30]{1,0} parameter(1)
-    %cp1.1 = f16[30,30]{1,0} convert(%parameter_1)
-    ROOT %_.1 = f16[30,30]{1,0} dot(%parameter_0, %cp1.1), lhs_contracting_dims={0}, rhs_contracting_dims={1}
-  }
-  ENTRY %entry_computation {
-    %p1 = s8[30,30]{1,0} parameter(1)
-    %p0 = f16[30,30]{1,0} parameter(0)
-    ROOT %gemm_fusion__ = f16[30,30]{1,0} fusion(%p0, %p1), kind=kCustom, calls=%gemm_fusion___computation.clone, backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"fusion_backend_config":{"kind":"__triton_gemm","triton_gemm_config":{"block_m":"16","block_n":"16","block_k":"256","split_k":"1","num_stages":"1","num_warps":"4","num_ctas":"1"}},"force_earliest_schedule":false,"reification_cost":[]}
-  })";
-
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> verified_module,
-                          ParseAndReturnVerifiedModule(kHloText));
+                          ParseAndReturnVerifiedModule(GetDotAlgorithmHlo(
+                              F16, F16, PrecisionConfig::ALG_UNSET)));
 
   std::string output_directory;
   if (!tsl::io::GetTestUndeclaredOutputsDir(&output_directory)) {
@@ -4500,8 +4480,7 @@ TEST_F(TritonEmitterTest, RocmWarpSizeIsSetCorrectly) {
       // CHECK: "ttg.threads-per-warp" = 64
     )";
   EXPECT_THAT(RunFileCheck(triton_passes_log, kPattern), true);
-  // TODO (rocm) weekly-sync-20251021 Enable this whence test pass
-#if 0
+
   // For RX7900 warp_size should be 32
   const se::DeviceDescription dev_info_n =
       TestGpuDeviceInfo::AMDRX7900DeviceInfo();
@@ -4518,7 +4497,6 @@ TEST_F(TritonEmitterTest, RocmWarpSizeIsSetCorrectly) {
       // CHECK: "ttg.threads-per-warp" = 32
     )";
   EXPECT_THAT(RunFileCheck(triton_passes_log, kPattern_n), true);
-#endif
 }
 
 TEST_F(TritonEmitterTest, EmitsCorrectlyForReshapeOfPad) {
diff --git a/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd_test.cc b/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd_test.cc
@@ -302,8 +302,6 @@ TEST(CommandBufferCmdTest, MemcpyCmd) {
 }
 
 TEST(CommandBufferCmdTest, LaunchCmd) {
-  // TODO(rocm): weekly sync 24-12-10
-  GTEST_SKIP() << "CUDA graph conditionals are not supported";
   se::StreamExecutor* stream_executor = GpuExecutor();
 
   auto stream = stream_executor->CreateStream().value();
diff --git a/third_party/xla/xla/backends/gpu/runtime/command_buffer_thunk_test.cc b/third_party/xla/xla/backends/gpu/runtime/command_buffer_thunk_test.cc
@@ -471,10 +471,6 @@ TEST(CommandBufferThunkTest, Memset32CmdOnDifferentStreams) {
 
 TEST(CommandBufferThunkTest, LaunchCmd) {
   se::StreamExecutor* stream_executor = GpuExecutor();
-  if (!IsAtLeastCuda12300(stream_executor)) {
-    // TODO(rocm): weekly sync 24-12-10
-    GTEST_SKIP() << "CUDA graph conditionals are not supported";
-  }
 
   TF_ASSERT_OK_AND_ASSIGN(auto stream, stream_executor->CreateStream());
 
@@ -569,10 +565,6 @@ TEST(CommandBufferThunkTest, LaunchCmd) {
 
 TEST(CommandBufferThunkTest, CustomAddKernelLaunchCmd) {
   se::StreamExecutor* stream_executor = GpuExecutor();
-  if (!IsAtLeastCuda12300(stream_executor)) {
-    // TODO(rocm): weekly sync 24-12-10
-    GTEST_SKIP() << "CUDA graph conditionals are not supported";
-  }
 
   TF_ASSERT_OK_AND_ASSIGN(auto stream, stream_executor->CreateStream());
 
@@ -1237,10 +1229,6 @@ TEST(CommandBufferThunkTest, CublasLtCmd) {
 
 TEST(CommandBufferThunkTest, MultipleLaunchCmd) {
   se::StreamExecutor* stream_executor = GpuExecutor();
-  if (!IsAtLeastCuda12300(stream_executor)) {
-    // TODO(rocm): weekly sync 24-12-10
-    GTEST_SKIP() << "CUDA graph conditionals are not supported";
-  }
 
   TF_ASSERT_OK_AND_ASSIGN(auto stream, stream_executor->CreateStream());
 
diff --git a/third_party/xla/xla/service/gpu/autotuning/BUILD b/third_party/xla/xla/service/gpu/autotuning/BUILD
@@ -676,7 +676,6 @@ xla_cc_test(
     ],
     tags = [
         "gpu",
-        "cuda-only", #TODO(rocm): weekly sync 24-10-01
     ],
     deps = [
         ":autotune_cache_key",
diff --git a/third_party/xla/xla/service/gpu/determinism_test.cc b/third_party/xla/xla/service/gpu/determinism_test.cc
@@ -262,9 +262,6 @@ ENTRY e {
 }
 
 TEST_F(DeterminismTest, Conv) {
-  if (IsRocm()) {
-      GTEST_SKIP() << "Test temporarily disabled for ROCm!"; //TODO(rocm): weekly sync 25-08-25
-  }
   constexpr absl::string_view kHloText = R"(
 ENTRY e {
   input = f32[16,3,64,64] parameter(0)
diff --git a/third_party/xla/xla/service/gpu/tests/BUILD b/third_party/xla/xla/service/gpu/tests/BUILD
@@ -238,7 +238,6 @@ xla_test(
         "gpu_too_many_blocks_test.cc",
     ],
     backends = ["gpu"],
-    #tags = ["cuda-only",], #(TODO)(rocm): weekly sync 24-11-05
     deps = [
         ":gpu_codegen_test",
         "//xla/hlo/ir:hlo",
diff --git a/third_party/xla/xla/service/gpu/tests/command_buffer_test.cc b/third_party/xla/xla/service/gpu/tests/command_buffer_test.cc
@@ -233,9 +233,6 @@ TEST_P(CommandBufferTest, Fusions) {
 }
 
 TEST_P(CommandBufferTest, TrueFalseConditional) {
-  if (IsRocm()) {
-    GTEST_SKIP() << "Test currently failing on ROCm"; //TODO(rocm): weekly sync 25-07-14
-  }
   constexpr absl::string_view hlo_text = R"(
   HloModule m, is_scheduled=true
 
@@ -295,9 +292,6 @@ TEST_P(CommandBufferTest, TrueFalseConditional) {
 }
 
 TEST_P(CommandBufferTest, IndexConditional) {
-  if (IsRocm()) {
-    GTEST_SKIP() << "Test currently failing on ROCm"; //TODO(rocm): weekly sync 25-07-14
-  }
   constexpr absl::string_view hlo_text = R"(
   HloModule m, is_scheduled=true
 
@@ -365,9 +359,6 @@ TEST_P(CommandBufferTest, IndexConditional) {
 }
 
 TEST_P(CommandBufferTest, WhileLoop) {
-  if (IsRocm()) {
-    GTEST_SKIP() << "Test currently failing on ROCm"; //TODO(rocm): weekly sync 25-07-14
-  }
   constexpr absl::string_view hlo_text = R"(
   HloModule m, is_scheduled=true
 
diff --git a/third_party/xla/xla/service/gpu/transforms/topk_specializer_test.cc b/third_party/xla/xla/service/gpu/transforms/topk_specializer_test.cc
@@ -37,7 +37,6 @@ limitations under the License.
 #include "xla/hlo/pass/hlo_pass_interface.h"
 #include "xla/service/platform_util.h"
 #include "xla/service/topk_rewriter.h"
-#include "xla/stream_executor/device_description.h"
 #include "xla/shape_util.h"
 #include "xla/stream_executor/device_description.h"
 #include "xla/tests/hlo_test_base.h"
@@ -63,13 +62,6 @@ class TopkTest : public HloTestBase, public ParameterizedInterface {
       : HloTestBase(*PlatformUtil::GetPlatform("gpu"),
                     *PlatformUtil::GetPlatform("gpu"), true, true, {}) {}
 
-  const se::GpuComputeCapability& GetGpuComputeCapability() {
-    return backend()
-        .default_stream_executor()
-        ->GetDeviceDescription()
-        .gpu_compute_capability();
-  }
-
  protected:
   absl::StatusOr<std::unique_ptr<HloModule>> TopkHlo(int n, int k,
                                                      int batch_size,
@@ -141,11 +133,6 @@ void ToSortAndSlice(HloModule* module) {
 }
 
 TEST_P(TopkTest, ProducesCorrectResult) {
-  const auto& gpu_desc = GetGpuComputeCapability();
-  if (std::holds_alternative<se::RocmComputeCapability>(gpu_desc)) {
-    // TODO(rocm): weekly sync 24-12-10
-    GTEST_SKIP() << "Currently failing on ROCm!";
-  }
   const auto [n_kb, k, batch_size, dtype] = GetParam();
   const size_t n = n_kb * 1024;
   TF_ASSERT_OK_AND_ASSIGN(auto topk_module, TopkHlo(n, k, batch_size, dtype));
diff --git a/third_party/xla/xla/service/gpu/transforms/triton_fusion_numerics_verifier_test.cc b/third_party/xla/xla/service/gpu/transforms/triton_fusion_numerics_verifier_test.cc
@@ -145,10 +145,6 @@ TEST_P(TritonFusionNumericsVerifierTest, VerifyExactSoftmaxFusionNumerics) {
 }
 
 TEST_P(TritonFusionNumericsVerifierTest, VerifyNestedGemmNumerics) {
-  if (IsRocm()) {
-    GTEST_SKIP() << "Test currently failing on ROCm"; //TODO(rocm): weekly sync 25-07-14
-  }
-  
   constexpr absl::string_view kNestedGemmFusionHloText = R"(
 flhs {
   ROOT flhs.p0 = $0[16,16] parameter(0)
@@ -375,10 +371,6 @@ TEST_F(TritonFusionNumericsVerifierTest, CheckMismatch) {
 // spill. Verify that the numerics verifier still runs on those kernels.
 TEST_F(TritonFusionNumericsVerifierTest,
        CompilationSucceedsEvenIfKernelWillSpillRegisters) {
-  if (IsRocm()) {
-    GTEST_SKIP() << "Test currently failing on ROCm"; //TODO(rocm): weekly sync 25-07-14
-  }
-
   auto module = Module(R"(
 HloModule m
 
@@ -491,9 +483,6 @@ TEST_F(TritonFusionNumericsVerifierTest, VerifyThatDisablingTritonIsFast) {
   // compiled without Triton and without rerunning the fusion pass, the
   // resulting kernel is extremely slow and the test will timeout. This test
   // ensures that the fusion pass is rerun.
-  if (IsRocm()) {
-    GTEST_SKIP() << "Test currently failing on ROCm"; //TODO(rocm): weekly sync 25-07-14
-  }
   absl::string_view hlo_text = R"(
 max {
   p0 = f32[] parameter(0)
diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_kernel_test.cc b/third_party/xla/xla/stream_executor/gpu/gpu_kernel_test.cc
@@ -109,11 +109,6 @@ TEST_F(GpuKernelTest, LoadAndRunKernelFromPtx) {
 }
 
 TEST_F(GpuKernelTest, LoadAndRunKernelFromCubin) {
-  if (executor_->GetPlatform()->id() ==
-      stream_executor::rocm::kROCmPlatformId) {
-    // TODO(rocm): weekly sync 24-12-10
-    GTEST_SKIP() << "There is no PTX or any equivalent abstraction for ROCm.";
-  }
   TF_ASSERT_OK_AND_ASSIGN(
       auto binary, GetGpuTestKernelsFatbin(executor_->GetPlatform()->Name()));
   KernelLoaderSpec spec =
diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_executor_test.cc b/third_party/xla/xla/stream_executor/rocm/rocm_executor_test.cc
@@ -57,10 +57,10 @@ TEST(RocmExecutorTest, CreateDeviceDescription) {
   EXPECT_THAT(result->model_str(), Not(IsEmpty()));
   EXPECT_THAT(result->device_vendor(), "Advanced Micro Devices, Inc");
 
-  EXPECT_THAT(
-      std::get_if<RocmComputeCapability>(&result->gpu_compute_capability())
-          ->gcn_arch_name(),
-      Not(IsEmpty()));
+  EXPECT_THAT(result->gpu_compute_capability()
+                  .rocm_compute_capability()
+                  ->gcn_arch_name(),
+              Not(IsEmpty()));
 }
 
 TEST(RocmExecutorTest, GetRocmKernel) {
diff --git a/third_party/xla/xla/tests/dot_operation_test.cc b/third_party/xla/xla/tests/dot_operation_test.cc
diff --git a/third_party/xla/xla/tests/multioutput_fusion_test.cc b/third_party/xla/xla/tests/multioutput_fusion_test.cc
diff --git a/third_party/xla/xla/tools/hlo_opt/BUILD b/third_party/xla/xla/tools/hlo_opt/BUILD

Original file line number	Diff line number	Diff line change
`@@ -302,8 +302,6 @@ TEST(CommandBufferCmdTest, MemcpyCmd) {`
`302`	`302`	`}`
`303`	`303`
`304`	`304`	`TEST(CommandBufferCmdTest, LaunchCmd) {`
`305`		`- // TODO(rocm): weekly sync 24-12-10`
`306`		`- GTEST_SKIP() << "CUDA graph conditionals are not supported";`
`307`	`305`	`se::StreamExecutor* stream_executor = GpuExecutor();`
`308`	`306`
`309`	`307`	`auto stream = stream_executor->CreateStream().value();`
Original file line number	Diff line number	Diff line change
`@@ -262,9 +262,6 @@ ENTRY e {`
`262`	`262`	`}`
`263`	`263`
`264`	`264`	`TEST_F(DeterminismTest, Conv) {`
`265`		`- if (IsRocm()) {`
`266`		`- GTEST_SKIP() << "Test temporarily disabled for ROCm!"; //TODO(rocm): weekly sync 25-08-25`
`267`		`- }`
`268`	`265`	`constexpr absl::string_view kHloText = R"(`
`269`	`266`	`ENTRY e {`
`270`	`267`	`input = f32[16,3,64,64] parameter(0)`