Merge pull request PaddlePaddle#6 from ckl117/adi_fm

atan2 precision in adfm-pnc
ckl117 · Jul 19, 2024 · 2343eb1 · 2343eb1
2 parents 47c3fbc + 9dedb7c
commit 2343eb1
Show file tree

Hide file tree

Showing 2 changed files with 147 additions and 59 deletions.
diff --git a/paddle/fluid/inference/tensorrt/convert/atan2_op.cc b/paddle/fluid/inference/tensorrt/convert/atan2_op.cc
@@ -33,69 +33,155 @@ class Atan2OpConverter : public OpConverter {
 
     auto* x = engine_->GetITensor(x_name);
     auto* y = engine_->GetITensor(y_name);
-
-    auto* intermediate_div = Div(x, y);
-    auto* atan2_layer = TRT_ENGINE_ADD_LAYER(
-        engine_, Unary, *intermediate_div, nvinfer1::UnaryOperation::kATAN);
-    auto* atan2_intermediate = atan2_layer->getOutput(0);
     auto* shape_tensor = Shape(x);
     auto rank = x->getDimensions().nbDims;
     auto* zero = FillConstantLayer(shape_tensor, rank, 0.f);
-    auto* one = FillConstantLayer(shape_tensor, rank, 1.f);
+    // auto* one = FillConstantLayer(shape_tensor, rank, 1.f);
     auto* two = FillConstantLayer(shape_tensor, rank, 2.f);
     auto* PI =
         FillConstantLayer(shape_tensor, rank, static_cast<float>(3.1415926535));
+    // Calculate x_zero, y_zero (whether inputs are zero)
+    auto* x_zero = TRT_ENGINE_ADD_LAYER(engine_,
+                                        ElementWise,
+                                        *x,
+                                        *zero,
+                                        nvinfer1::ElementWiseOperation::kEQUAL)
+                       ->getOutput(0);
+    auto* y_zero = TRT_ENGINE_ADD_LAYER(engine_,
+                                        ElementWise,
+                                        *y,
+                                        *zero,
+                                        nvinfer1::ElementWiseOperation::kEQUAL)
+                       ->getOutput(0);
+
+    // Get sign of inputs
+    auto* x_positive =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *x,
+                             *zero,
+                             nvinfer1::ElementWiseOperation::kGREATER)
+            ->getOutput(0);
 
-    auto* x_mask =
-        Cast(TRT_ENGINE_ADD_LAYER(engine_,
-                                  ElementWise,
-                                  *x,
-                                  *zero,
-                                  nvinfer1::ElementWiseOperation::kLESS)
-                 ->getOutput(0),
-             nvinfer1::DataType::kFLOAT);
-
-    auto* y_mask =
-        Cast(TRT_ENGINE_ADD_LAYER(engine_,
-                                  ElementWise,
-                                  *y,
-                                  *zero,
-                                  nvinfer1::ElementWiseOperation::kLESS)
-                 ->getOutput(0),
-             nvinfer1::DataType::kFLOAT);
-
-    x_mask = TRT_ENGINE_ADD_LAYER(engine_,
-                                  ElementWise,
-                                  *x_mask,
-                                  *two,
-                                  nvinfer1::ElementWiseOperation::kPROD)
-                 ->getOutput(0);
-    x_mask = TRT_ENGINE_ADD_LAYER(engine_,
-                                  ElementWise,
-                                  *x_mask,
-                                  *one,
-                                  nvinfer1::ElementWiseOperation::kSUB)
-                 ->getOutput(0);
-    x_mask = TRT_ENGINE_ADD_LAYER(engine_,
-                                  ElementWise,
-                                  *x_mask,
-                                  *PI,
-                                  nvinfer1::ElementWiseOperation::kPROD)
-                 ->getOutput(0);
-
-    auto* correction_term =
+    auto* x_zero_positive =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *x_zero,
+                             *x_positive,
+                             nvinfer1::ElementWiseOperation::kOR)
+            ->getOutput(0);
+    auto* x_negative =
         TRT_ENGINE_ADD_LAYER(engine_,
                              ElementWise,
-                             *y_mask,
                              *x,
-                             nvinfer1::ElementWiseOperation::kPROD)
+                             *zero,
+                             nvinfer1::ElementWiseOperation::kLESS)
+            ->getOutput(0);
+    auto* y_positive =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *y,
+                             *zero,
+                             nvinfer1::ElementWiseOperation::kGREATER)
             ->getOutput(0);
 
-    auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
-                                       ElementWise,
-                                       *atan2_intermediate,
-                                       *correction_term,
-                                       nvinfer1::ElementWiseOperation::kSUB);
+    auto* y_negative =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *y,
+                             *zero,
+                             nvinfer1::ElementWiseOperation::kLESS)
+            ->getOutput(0);
+    // Calculate atan(x/y)
+    auto* intermediate_div = Div(x, y);
+    auto* atan2_layer = TRT_ENGINE_ADD_LAYER(
+        engine_, Unary, *intermediate_div, nvinfer1::UnaryOperation::kATAN);
+    auto* atan_val = atan2_layer->getOutput(0);
+
+    // atan(x/y)+π if x≥0 and y<0,
+    auto* atan_add_pi = Sum(atan_val, PI);
+    // atan(x/y)-π if x<0 and y<0,
+    auto* atan_sub_pi = Sub(atan_val, PI);
+
+    // atan(x/y)+π if x≥0 and y<0,
+    auto* atan_corrected_indices =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *x_zero_positive,
+                             *y_negative,
+                             nvinfer1::ElementWiseOperation::kAND)
+            ->getOutput(0);
+    auto* atan_corrected =
+        TRT_ENGINE_ADD_LAYER(
+            engine_, Select, *atan_corrected_indices, *atan_add_pi, *atan_val)
+            ->getOutput(0);
+
+    // atan(x/y)-π if x<0 and y<0,
+    auto* atan_corrected_indices_2 =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *x_negative,
+                             *y_negative,
+                             nvinfer1::ElementWiseOperation::kAND)
+            ->getOutput(0);
+    auto* atan_corrected_2 = TRT_ENGINE_ADD_LAYER(engine_,
+                                                  Select,
+                                                  *atan_corrected_indices_2,
+                                                  *atan_sub_pi,
+                                                  *atan_corrected)
+                                 ->getOutput(0);
+
+    // atan(x/y) if y>0
+    auto* atan_output =
+        TRT_ENGINE_ADD_LAYER(
+            engine_, Select, *y_positive, *atan_val, *atan_corrected_2)
+            ->getOutput(0);
+
+    // pi_over_2_tensor
+    auto* pi_over_2_tensor = Div(PI, two);
+    auto* minus_pi_over_2_tensor = Div(Sub(zero, PI), two);
+
+    // π/2 if x>0 and y=0,
+    auto* pi_over_2_output_indices =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *x_positive,
+                             *y_zero,
+                             nvinfer1::ElementWiseOperation::kAND)
+            ->getOutput(0);
+    auto* pi_over_2_output = TRT_ENGINE_ADD_LAYER(engine_,
+                                                  Select,
+                                                  *pi_over_2_output_indices,
+                                                  *pi_over_2_tensor,
+                                                  *atan_output)
+                                 ->getOutput(0);
+
+    // -π/2 if x<0 and y=0,
+    auto* minus_pi_over_2_output_indices =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *x_negative,
+                             *y_zero,
+                             nvinfer1::ElementWiseOperation::kAND)
+            ->getOutput(0);
+    auto* minus_pi_over_2_output =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             Select,
+                             *minus_pi_over_2_output_indices,
+                             *minus_pi_over_2_tensor,
+                             *pi_over_2_output)
+            ->getOutput(0);
+
+    // 0 if x=0 and y=0,
+    auto* zero_output_indices =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *x_zero,
+                             *y_zero,
+                             nvinfer1::ElementWiseOperation::kAND)
+            ->getOutput(0);
+    auto* layer = TRT_ENGINE_ADD_LAYER(
+        engine_, Select, *zero_output_indices, *zero, *minus_pi_over_2_output);
 
     ReplenishLayerAndOutput(layer, "atan2", {output_name}, test_mode);
   }

diff --git a/test/ir/inference/test_trt_convert_atan2.py b/test/ir/inference/test_trt_convert_atan2.py
@@ -29,10 +29,12 @@ def is_program_valid(self, program_config: ProgramConfig) -> bool:
 
     def sample_program_configs(self):
         def generate_input1():
-            return np.random.random([1, 80, 1]).astype(np.float32)
+            x = 2 * np.random.random([1, 800, 1]).astype(np.float32) - 1
+            return x
 
         def generate_input2():
-            return np.random.random([1, 80, 1]).astype(np.float32)
+            x = 2 * np.random.random([1, 800, 1]).astype(np.float32) - 1
+            return x
 
         ops_config = [
             {
@@ -76,16 +78,16 @@ def clear_dynamic_shape():
 
         def generate_dynamic_shape(attrs):
             self.dynamic_shape.min_input_shape = {
-                "input_data1": [1, 80, 1],
-                "input_data2": [1, 80, 1],
+                "input_data1": [1, 800, 1],
+                "input_data2": [1, 800, 1],
             }
             self.dynamic_shape.max_input_shape = {
-                "input_data1": [2, 80, 1],
-                "input_data2": [2, 80, 1],
+                "input_data1": [2, 800, 1],
+                "input_data2": [2, 800, 1],
             }
             self.dynamic_shape.opt_input_shape = {
-                "input_data1": [1, 80, 1],
-                "input_data2": [1, 80, 1],
+                "input_data1": [1, 800, 1],
+                "input_data2": [1, 800, 1],
             }
 
         attrs = [