diff --git a/oneflow/core/framework/tensor_methods.cpp b/oneflow/core/framework/tensor_methods.cpp
index d05d0f97797..aa119983f25 100644
--- a/oneflow/core/framework/tensor_methods.cpp
+++ b/oneflow/core/framework/tensor_methods.cpp
@@ -88,7 +88,9 @@ Maybe<Tensor> Reshape(const std::shared_ptr<Tensor>& input, const Shape& shape)
   int need_infer_axis = -1;
   size_t count = 1;
   for (int i = 0; i < shape.NumAxes(); ++i) {
-    if (shape.At(i) == -1) {
+    if (shape.At(i) < -1) {
+      return Error::RuntimeError() << "Invalid shape dimension " << shape.At(i);
+    } else if (shape.At(i) == -1) {
       CHECK_EQ_OR_RETURN(need_infer_axis, -1)
           << "Shape " << shape.ToString() << " has more than 1 axis that needs to be infered.";
       need_infer_axis = i;
diff --git a/oneflow/core/functional/impl/array_functor.cpp b/oneflow/core/functional/impl/array_functor.cpp
index caa45a20b0f..d7415915111 100644
--- a/oneflow/core/functional/impl/array_functor.cpp
+++ b/oneflow/core/functional/impl/array_functor.cpp
@@ -894,7 +894,9 @@ class ReshapeFunctor {
     int need_infer_axis = -1;
     size_t count = 1;
     for (int i = 0; i < shape.NumAxes(); ++i) {
-      if (shape.At(i) == -1) {
+      if (shape.At(i) < -1) {
+        return Error::RuntimeError() << "Invalid shape dimension " << shape.At(i);
+      } else if (shape.At(i) == -1) {
         CHECK_EQ_OR_RETURN(need_infer_axis, -1)
             << "Shape " << shape.ToString() << " has more than 1 axis that needs to be infered.";
         need_infer_axis = i;
diff --git a/oneflow/core/functional/impl/math_functor.cpp b/oneflow/core/functional/impl/math_functor.cpp
index 4a78e740811..d6873390857 100644
--- a/oneflow/core/functional/impl/math_functor.cpp
+++ b/oneflow/core/functional/impl/math_functor.cpp
@@ -575,19 +575,20 @@ class TransposeFunctor {
   Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input,
                            const std::vector<int32_t>& permute) const {
     MutableAttrMap attrs;
-    CHECK_EQ_OR_RETURN(input->ndim(), permute.size()) << "number of dims don't match in permute";
-    JUST(attrs.SetAttr<std::vector<int32_t>>("perm", permute));
-    int32_t ndims = input->shape()->NumAxes();
-    for (int i = 0; i < permute.size(); i++) {
-      int32_t dim = permute.at(i);
-      if (dim < 0) { dim += ndims; }
-      CHECK_GE_OR_RETURN(dim, 0)
-          << "IndexError: Dimension out of range (expected to be in range of [" << -ndims << ","
-          << ndims << " ] but got " << ndims;
-      CHECK_LT_OR_RETURN(dim, ndims)
-          << "IndexError: Dimension out of range (expected to be in range of [" << -ndims << ","
-          << ndims << " ] but got " << ndims;
+    auto ndim = input->ndim();
+    CHECK_EQ_OR_RETURN(ndim, permute.size()) << "number of dims don't match in permute";
+
+    // handle negative permute value here, because of permute is const,
+    // so copy it to local var and do modification.
+    auto positive_perm = permute;
+    for (auto i = 0; i < positive_perm.size(); i++) {
+      if (positive_perm[i] < 0) { positive_perm[i] += ndim; }
+      CHECK_OR_RETURN(positive_perm[i] >= 0 && positive_perm[i] < ndim)
+          << "IndexError: Dimension out of range (expected to be in range of [" << -ndim << ","
+          << ndim << " ) but got " << positive_perm[i];
     }
+
+    JUST(attrs.SetAttr<std::vector<int32_t>>("perm", positive_perm));
     return OpInterpUtil::Dispatch<Tensor>(*op_, {input}, attrs);
   }
 
diff --git a/oneflow/user/kernels/transpose_kernel.cpp b/oneflow/user/kernels/transpose_kernel.cpp
index bf1628119a7..f8438fbc102 100644
--- a/oneflow/user/kernels/transpose_kernel.cpp
+++ b/oneflow/user/kernels/transpose_kernel.cpp
@@ -14,12 +14,22 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 #include "oneflow/core/framework/framework.h"
+#include "oneflow/core/kernel/kernel_util.h"
 #include "oneflow/core/kernel/cuda_graph_support.h"
 #include "oneflow/core/ep/include/primitive/permute.h"
 namespace oneflow {
 
 namespace user_op {
 
+namespace {
+bool IsIdentity(const std::vector<int32_t>& perm) {
+  for (auto i = 0; i < perm.size(); i++) {
+    if (perm[i] != i) { return false; }
+  }
+  return true;
+}
+}  // namespace
+
 template<typename Context>
 std::unique_ptr<ep::primitive::Permute> NewPermutePrimitive(Context* ctx) {
   const int64_t num_dims = ctx->TensorDesc4ArgNameAndIndex("output", 0)->shape().NumAxes();
@@ -46,9 +56,18 @@ class TransposeKernel final : public OpKernel, public user_op::CudaGraphSupport
     const int64_t* src_dims = in_shape.ptr();
 
     int64_t elem_cnt = tensor_out->shape().elem_cnt();
+
     if (elem_cnt != 0) {
-      primitive->Launch(ctx->stream(), dtype, num_dims, src_dims, tensor_in->dptr(), perm.data(),
-                        tensor_out->mut_dptr());
+      if (IsIdentity(perm)) {
+        // if permute vector is 0,1,...,n, do data copy directly
+        AutoMemcpy(ctx->stream(), tensor_out->mut_dptr(), tensor_in->dptr(),
+                   elem_cnt * GetSizeOfDataType(dtype), tensor_out->mem_case(),
+                   tensor_in->mem_case());
+      } else {
+        primitive->Launch(ctx->stream(), dtype, num_dims, src_dims, tensor_in->dptr(), perm.data(),
+                          tensor_out->mut_dptr());
+      }
+
     } else {
       // For 0-d Tensor
       return;