Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#6 from ckl117/adi_fm
Browse files Browse the repository at this point in the history
atan2 precision in adfm-pnc
  • Loading branch information
ming1753 authored Jul 19, 2024
2 parents 47c3fbc + 9dedb7c commit 2343eb1
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 59 deletions.
188 changes: 137 additions & 51 deletions paddle/fluid/inference/tensorrt/convert/atan2_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,69 +33,155 @@ class Atan2OpConverter : public OpConverter {

auto* x = engine_->GetITensor(x_name);
auto* y = engine_->GetITensor(y_name);

auto* intermediate_div = Div(x, y);
auto* atan2_layer = TRT_ENGINE_ADD_LAYER(
engine_, Unary, *intermediate_div, nvinfer1::UnaryOperation::kATAN);
auto* atan2_intermediate = atan2_layer->getOutput(0);
auto* shape_tensor = Shape(x);
auto rank = x->getDimensions().nbDims;
auto* zero = FillConstantLayer(shape_tensor, rank, 0.f);
auto* one = FillConstantLayer(shape_tensor, rank, 1.f);
// auto* one = FillConstantLayer(shape_tensor, rank, 1.f);
auto* two = FillConstantLayer(shape_tensor, rank, 2.f);
auto* PI =
FillConstantLayer(shape_tensor, rank, static_cast<float>(3.1415926535));
// Calculate x_zero, y_zero (whether inputs are zero)
auto* x_zero = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x,
*zero,
nvinfer1::ElementWiseOperation::kEQUAL)
->getOutput(0);
auto* y_zero = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*y,
*zero,
nvinfer1::ElementWiseOperation::kEQUAL)
->getOutput(0);

// Get sign of inputs
auto* x_positive =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x,
*zero,
nvinfer1::ElementWiseOperation::kGREATER)
->getOutput(0);

auto* x_mask =
Cast(TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x,
*zero,
nvinfer1::ElementWiseOperation::kLESS)
->getOutput(0),
nvinfer1::DataType::kFLOAT);

auto* y_mask =
Cast(TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*y,
*zero,
nvinfer1::ElementWiseOperation::kLESS)
->getOutput(0),
nvinfer1::DataType::kFLOAT);

x_mask = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_mask,
*two,
nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
x_mask = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_mask,
*one,
nvinfer1::ElementWiseOperation::kSUB)
->getOutput(0);
x_mask = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_mask,
*PI,
nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);

auto* correction_term =
auto* x_zero_positive =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_zero,
*x_positive,
nvinfer1::ElementWiseOperation::kOR)
->getOutput(0);
auto* x_negative =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*y_mask,
*x,
nvinfer1::ElementWiseOperation::kPROD)
*zero,
nvinfer1::ElementWiseOperation::kLESS)
->getOutput(0);
auto* y_positive =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*y,
*zero,
nvinfer1::ElementWiseOperation::kGREATER)
->getOutput(0);

auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*atan2_intermediate,
*correction_term,
nvinfer1::ElementWiseOperation::kSUB);
auto* y_negative =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*y,
*zero,
nvinfer1::ElementWiseOperation::kLESS)
->getOutput(0);
// Calculate atan(x/y)
auto* intermediate_div = Div(x, y);
auto* atan2_layer = TRT_ENGINE_ADD_LAYER(
engine_, Unary, *intermediate_div, nvinfer1::UnaryOperation::kATAN);
auto* atan_val = atan2_layer->getOutput(0);

// atan(x/y)+π if x≥0 and y<0,
auto* atan_add_pi = Sum(atan_val, PI);
// atan(x/y)-π if x<0 and y<0,
auto* atan_sub_pi = Sub(atan_val, PI);

// atan(x/y)+π if x≥0 and y<0,
auto* atan_corrected_indices =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_zero_positive,
*y_negative,
nvinfer1::ElementWiseOperation::kAND)
->getOutput(0);
auto* atan_corrected =
TRT_ENGINE_ADD_LAYER(
engine_, Select, *atan_corrected_indices, *atan_add_pi, *atan_val)
->getOutput(0);

// atan(x/y)-π if x<0 and y<0,
auto* atan_corrected_indices_2 =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_negative,
*y_negative,
nvinfer1::ElementWiseOperation::kAND)
->getOutput(0);
auto* atan_corrected_2 = TRT_ENGINE_ADD_LAYER(engine_,
Select,
*atan_corrected_indices_2,
*atan_sub_pi,
*atan_corrected)
->getOutput(0);

// atan(x/y) if y>0
auto* atan_output =
TRT_ENGINE_ADD_LAYER(
engine_, Select, *y_positive, *atan_val, *atan_corrected_2)
->getOutput(0);

// pi_over_2_tensor
auto* pi_over_2_tensor = Div(PI, two);
auto* minus_pi_over_2_tensor = Div(Sub(zero, PI), two);

// π/2 if x>0 and y=0,
auto* pi_over_2_output_indices =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_positive,
*y_zero,
nvinfer1::ElementWiseOperation::kAND)
->getOutput(0);
auto* pi_over_2_output = TRT_ENGINE_ADD_LAYER(engine_,
Select,
*pi_over_2_output_indices,
*pi_over_2_tensor,
*atan_output)
->getOutput(0);

// -π/2 if x<0 and y=0,
auto* minus_pi_over_2_output_indices =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_negative,
*y_zero,
nvinfer1::ElementWiseOperation::kAND)
->getOutput(0);
auto* minus_pi_over_2_output =
TRT_ENGINE_ADD_LAYER(engine_,
Select,
*minus_pi_over_2_output_indices,
*minus_pi_over_2_tensor,
*pi_over_2_output)
->getOutput(0);

// 0 if x=0 and y=0,
auto* zero_output_indices =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*x_zero,
*y_zero,
nvinfer1::ElementWiseOperation::kAND)
->getOutput(0);
auto* layer = TRT_ENGINE_ADD_LAYER(
engine_, Select, *zero_output_indices, *zero, *minus_pi_over_2_output);

ReplenishLayerAndOutput(layer, "atan2", {output_name}, test_mode);
}
Expand Down
18 changes: 10 additions & 8 deletions test/ir/inference/test_trt_convert_atan2.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ def is_program_valid(self, program_config: ProgramConfig) -> bool:

def sample_program_configs(self):
def generate_input1():
return np.random.random([1, 80, 1]).astype(np.float32)
x = 2 * np.random.random([1, 800, 1]).astype(np.float32) - 1
return x

def generate_input2():
return np.random.random([1, 80, 1]).astype(np.float32)
x = 2 * np.random.random([1, 800, 1]).astype(np.float32) - 1
return x

ops_config = [
{
Expand Down Expand Up @@ -76,16 +78,16 @@ def clear_dynamic_shape():

def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {
"input_data1": [1, 80, 1],
"input_data2": [1, 80, 1],
"input_data1": [1, 800, 1],
"input_data2": [1, 800, 1],
}
self.dynamic_shape.max_input_shape = {
"input_data1": [2, 80, 1],
"input_data2": [2, 80, 1],
"input_data1": [2, 800, 1],
"input_data2": [2, 800, 1],
}
self.dynamic_shape.opt_input_shape = {
"input_data1": [1, 80, 1],
"input_data2": [1, 80, 1],
"input_data1": [1, 800, 1],
"input_data2": [1, 800, 1],
}

attrs = [
Expand Down

0 comments on commit 2343eb1

Please sign in to comment.