csarofeen · naoyam · Nov 11, 2022 · Nov 5, 2022 · Nov 8, 2022 · Nov 8, 2022
diff --git a/torch/csrc/jit/codegen/cuda/kernel_ir.cpp b/torch/csrc/jit/codegen/cuda/kernel_ir.cpp
@@ -449,12 +449,14 @@ Allocate::Allocate(
     Val* buffer,
     MemoryType memory_type,
     std::vector<Val*> shape,
-    bool zero_init)
+    bool zero_init,
+    const Allocate* alias)
     : Expr(passkey, ExprType::Allocate),
       buffer_(buffer),
       memory_type_(memory_type),
       shape_(std::move(shape)),
-      zero_init_(zero_init) {
+      zero_init_(zero_init),
+      alias_(alias) {
   TORCH_INTERNAL_ASSERT(
       passkey.ir_container_->isA<kir::Kernel>(),
       "IR type only valid for Kernel container.");
@@ -484,6 +486,12 @@ Allocate::Allocate(
     size_ = FusionGuard::getCurFusion()->oneVal();
   }
 
+  if (alias_ != nullptr) {
+    TORCH_INTERNAL_ASSERT(alias_ != this, "Invalid alias");
+    TORCH_INTERNAL_ASSERT(
+        alias_->memoryType() == memory_type_, "Invalid alias");
+  }
+
   addInput(size_);
 }
 

diff --git a/torch/csrc/jit/codegen/cuda/kernel_ir.h b/torch/csrc/jit/codegen/cuda/kernel_ir.h
@@ -187,7 +187,8 @@ class TORCH_CUDA_CU_API Allocate final : public Expr {
       Val* buffer,
       MemoryType memory_type,
       std::vector<Val*> shape = {},
-      bool zero_init = false);
+      bool zero_init = false,
+      const Allocate* alias = nullptr);
 
   //! Allocation of a non-dimensional buffer
   //!
@@ -225,12 +226,6 @@ class TORCH_CUDA_CU_API Allocate final : public Expr {
     return alias_;
   }
 
-  void setAlias(const Allocate* alias) {
-    TORCH_INTERNAL_ASSERT(alias != this);
-    TORCH_INTERNAL_ASSERT(alias->memoryType() == memory_type_);
-    alias_ = alias;
-  }
-
  private:
   Val* buffer_ = nullptr;
   MemoryType memory_type_ = MemoryType::Local;

diff --git a/torch/csrc/jit/codegen/cuda/lower2device.cpp b/torch/csrc/jit/codegen/cuda/lower2device.cpp
@@ -396,6 +396,16 @@ bool GpuLower::hasCurrent() {
 
 void GpuLower::propagateExprInfo(const Expr* old_expr, const Expr* new_expr) {
   pred_elimination_.propagateRemovalInfo(old_expr, new_expr);
+  if (old_expr->isA<kir::Allocate>()) {
+    auto alloc_info_it =
+        localAllocationInfoMap().find(old_expr->as<kir::Allocate>());
+    if (alloc_info_it != localAllocationInfoMap().end()) {
+      auto alloc_info =
+          std::make_unique<LocalAllocationInfo>(*(alloc_info_it->second));
+      localAllocationInfoMap().emplace(
+          new_expr->as<kir::Allocate>(), std::move(alloc_info));
+    }
+  }
 }
 
 } // namespace cuda