Eliminate new_zeros

pearu · pearu · commit b68da9acc782 · 2025-12-06T00:36:38.000+02:00
diff --git a/src/libtorchaudio/forced_align/cpu/compute.cpp b/src/libtorchaudio/forced_align/cpu/compute.cpp
@@ -208,7 +208,8 @@ std::tuple<Tensor, Tensor> compute(
       ScalarType::Long);
   const auto B = logProbs.size(0);
   const auto T = logProbs.size(1);
-  Tensor paths = torchaudio::stable::new_zeros(targets, {B, T});
+  Tensor paths = torch::stable::empty({B, T}, targets.scalar_type());
+  torch::stable::zero_(paths);
   THO_DISPATCH_V2(
       logProbs.scalar_type(),
       "forced_align_impl",
diff --git a/src/libtorchaudio/forced_align/gpu/compute.cu b/src/libtorchaudio/forced_align/gpu/compute.cu
@@ -294,7 +294,8 @@ std::tuple<Tensor, Tensor> compute(
   auto B = logProbs.size(0);
   auto T = logProbs.size(1); // num frames
 
-  Tensor paths = torchaudio::stable::new_zeros(targets, {B, T}, /*dtype=*/std::nullopt, /*layout=*/std::nullopt, /*device=*/torch::stable::DeviceType::CPU);
+  Tensor paths = torch::stable::empty({B, T}, targets.scalar_type());
+  torch::stable::zero_(paths);
 
   THO_DISPATCH_V2(logProbs.scalar_type(), "forced_align_impl", AT_WRAP([&] {
         if (targets.scalar_type() == ScalarType::Long) {
diff --git a/src/libtorchaudio/stable/ops.h b/src/libtorchaudio/stable/ops.h
@@ -17,14 +17,12 @@
 #include <c10/cuda/CUDAException.h>
 #endif
 
-using torch::stable::Tensor;
-
 namespace torchaudio::stable {
 
-using Layout = int32_t;
+using torch::stable::Tensor;
 
-// TODO: When cpu is implemented in torch::stable, eliminate
-// cpu function below.
+// TODO: When cpu op is implemented in torch::stable, eliminate cpu
+// function below.
 inline Tensor cpu(const Tensor& self) {
   auto sizes_ = self.sizes();
   int32_t cpu_type = static_cast<int32_t>(torch::stable::DeviceType::CPU);
@@ -48,7 +46,8 @@ inline Tensor cpu(const Tensor& self) {
   return result;
 }
 
-// TODO:
+// TODO: When cuda op is implemented in torch::stable, eliminate cuda
+// function below.
 inline Tensor cuda(const Tensor& self, int32_t cuda_index) {
   auto sizes_ = self.sizes();
   int32_t cuda_type = static_cast<int32_t>(torch::stable::DeviceType::CUDA);
@@ -72,61 +71,6 @@ inline Tensor cuda(const Tensor& self, int32_t cuda_index) {
   return result;
 }
 
-// TODO: remove when torch::stable provides new_zeros
-inline Tensor new_zeros(
-    const Tensor& self,
-    std::vector<int64_t> size,
-    std::optional<c10::ScalarType> dtype = std::nullopt,
-    std::optional<Layout> layout = std::nullopt,
-    std::optional<torch::stable::Device> device = std::nullopt,
-    std::optional<bool> pin_memory = std::nullopt) {
-  int32_t target_dtype{};
-  if (dtype.has_value()) {
-    target_dtype = torch::stable::detail::to<int32_t>(
-        torch::stable::detail::from(dtype.value()));
-  } else {
-    TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(self.get(), &target_dtype));
-  }
-
-  Layout layout_;
-  if (layout.has_value()) {
-    layout_ = layout.value();
-  } else {
-    TORCH_ERROR_CODE_CHECK(aoti_torch_get_layout(self.get(), &layout_));
-  }
-
-  int32_t device_type;
-  torch::stable::DeviceIndex device_index = 0;
-  if (device.has_value()) {
-    auto device_ = device.value();
-    device_type = static_cast<int32_t>(device_.type());
-    device_index = device_.index();
-  } else {
-    TORCH_ERROR_CODE_CHECK(
-        aoti_torch_get_device_type(self.get(), &device_type));
-    TORCH_ERROR_CODE_CHECK(
-        aoti_torch_get_device_index(self.get(), &device_index));
-  }
-
-  // TODO: pin_memory
-
-  AtenTensorHandle ret0;
-  TORCH_ERROR_CODE_CHECK(aoti_torch_aten_new_empty(
-      self.get(),
-      size.data(),
-      static_cast<int64_t>(size.size()),
-      &target_dtype,
-      &layout_,
-      &device_type,
-      device_index,
-      nullptr, // pin_memory (nullptr for default)
-      &ret0));
-
-  auto result = Tensor(ret0);
-  torch::stable::zero_(result);
-  return result;
-}
-
 // An analog of item template function defined in
 // ATen/templates/TensorBody.h
 template <typename T>
diff --git a/src/libtorchaudio/utils.h b/src/libtorchaudio/utils.h
@@ -4,7 +4,7 @@
 
 // TODO: replace the include libtorchaudio/stable/ops.h with
 // torch/stable/ops.h when torch::stable provides all required
-// features (torch::stable::item<T> or similar):
+// features (torch::stable::item<T> et al):
 #include <libtorchaudio/stable/ops.h>
 
 namespace torchaudio {
@@ -25,7 +25,7 @@ using TensorAccessor = torch::headeronly::HeaderOnlyTensorAccessor<T, N>;
 // TODO: eliminate accessor<T, N>(t) in favor of t.accessor<T, N>
 // after Tensor::accessor is supported in stable ABI
 template <typename T, size_t N>
-inline TensorAccessor<T, N> accessor(Tensor t) {
+inline TensorAccessor<T, N> accessor(torch::stable::Tensor t) {
   return TensorAccessor<T, N>(
       reinterpret_cast<T*>(t.data_ptr()), t.sizes().data(), t.strides().data());
 }
@@ -42,7 +42,7 @@ using PackedTensorAccessor32 =
 // TODO: eliminate accessor<T, N>(t) in favor of t.accessor<T, N>
 // after Tensor::accessor is supported in stable ABI
 template <typename T, size_t N>
-inline PackedTensorAccessor32<T, N> packed_accessor32(Tensor t) {
+inline PackedTensorAccessor32<T, N> packed_accessor32(torch::stable::Tensor t) {
   return PackedTensorAccessor32<T, N>(
       static_cast<typename PackedTensorAccessor32<T, N>::PtrType>(t.data_ptr()),
       t.sizes().data(),
@@ -58,7 +58,8 @@ using PackedTensorAccessorSizeT =
         size_t>;
 
 template <typename T, size_t N>
-inline PackedTensorAccessorSizeT<T, N> packed_accessor_size_t(Tensor t) {
+inline PackedTensorAccessorSizeT<T, N> packed_accessor_size_t(
+    torch::stable::Tensor t) {
   return PackedTensorAccessorSizeT<T, N>(
       static_cast<typename PackedTensorAccessorSizeT<T, N>::PtrType>(
           t.data_ptr()),