Clean up

pearu · pearu · commit ceeb2dbfb34c · 2025-12-05T12:38:19.000+02:00
diff --git a/src/libtorchaudio/cuda_utils.h b/src/libtorchaudio/cuda_utils.h
@@ -5,6 +5,8 @@
 
 #include <cuda_runtime_api.h>
 
+// TODO: replace TA_CUDA_CHECK with STD_CUDA_CHECK after
+// https://github.com/pytorch/pytorch/pull/169385 has landed.
 #define TA_CUDA_CHECK(...) __VA_ARGS__
 
 namespace libtorchaudio::cuda {
diff --git a/src/libtorchaudio/forced_align/gpu/compute.cu b/src/libtorchaudio/forced_align/gpu/compute.cu
@@ -210,12 +210,10 @@ void forced_align_impl(
     C10_CUDA_KERNEL_LAUNCH_CHECK();
     ++backPtrBufferLen;
     if (backPtrBufferLen == kBackPtrBufferSize || t == T - 1) {
-      //cpuDataTranferStream.synchronize();
       libtorchaudio::cuda::synchronize(cpuDataTranferStream, device_index);
       // GPU -> GPU copy
       bufferCopy = torch::stable::clone(backPtrBuffer);
       STD_TORCH_CHECK(bufferCopy.is_contiguous(), "unexpected fail, need to implement stable::Tensor::contiguous()")
-      //defaultStream.synchronize();
       libtorchaudio::cuda::synchronize(defaultStream, device_index);
       libtorchaudio::cuda::setCurrentCUDAStream(cpuDataTranferStream, device_index);
       // Copy ASYNC from GPU to CPU
@@ -231,7 +229,6 @@ void forced_align_impl(
       backPtrBufferLen = 0;
     }
   }
-  //cpuDataTranferStream.synchronize();
   libtorchaudio::cuda::synchronize(cpuDataTranferStream, device_index);
   auto alphasCpu = torchaudio::stable::cpu(alphas);
   auto alphasCpu_a = torchaudio::accessor<scalar_t, 2>(alphasCpu);
diff --git a/src/libtorchaudio/shim_temporary.h b/src/libtorchaudio/shim_temporary.h
@@ -1,6 +1,6 @@
 #pragma once
 // TODO: remove this file once https://github.com/pytorch/pytorch/pull/169376
-// has landed.
+// has landed in nightly.
 
 #include <c10/cuda/CUDAStream.h>
 #include <torch/csrc/inductor/aoti_torch/utils.h>