Skip to content

Commit

Permalink
inference gets cutlass info & improve coding efficiency (#63211)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhink authored Apr 3, 2024
1 parent 5f32f92 commit d3f3c22
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 23 deletions.
20 changes: 12 additions & 8 deletions paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
${element_c} *bias = (${element_c} *)(params.bias);
${element_c} *output = (${element_c} *)(params.output);
// only used by conv2d_bias_residual
auto residual = (${element_c} *)(params.residual);
auto residual = (${element_c} *)(params.residual);
int batch = params.batch;
int ic = params.ic;
Expand Down Expand Up @@ -96,8 +96,8 @@
ImplicitGemm implicit_gemm_op;
size_t bytes = implicit_gemm_op.get_workspace_size(arguments);
auto stream = params.stream;
void *workspace = params.workspace;
auto stream = params.stream;
void *workspace = params.workspace;
cutlass::Status status = implicit_gemm_op.can_implement(arguments);
CUTLASS_CHECK(status);
Expand Down Expand Up @@ -125,7 +125,7 @@
std::map<std::vector<int>, int> map_problem_${func_name};
std::mutex ${func_name}_mutex;
void ${func_name}(ConvAllParams params) {
bool ${func_name}(ConvAllParams params) {
int batch = params.batch;
int ic = params.ic;
int ih = params.ih;
Expand All @@ -145,7 +145,7 @@
if (map_problem_${func_name}.count(problem_size)) {
${func_name}_all_func[map_problem_${func_name}.at(problem_size)](
params);
return;
return true;
}
int best_config_index = ProfileToGetBestConfig(
Expand All @@ -155,6 +155,7 @@
map_problem_${func_name}[problem_size] = best_config_index;
${func_name}_all_func[best_config_index](params);
return true;
}
"""

Expand All @@ -164,8 +165,8 @@
# this function is invoked by phi kernel

CommonWrapperForPhi = """
void ${op_name}(ConvAllParams params) {
${dispatch_body}
bool ${op_name}(ConvAllParams params) {
${dispatch_body}
}
"""

Expand All @@ -177,12 +178,14 @@ def convert_c_data_type(dtype):
return "Conv2dDataType::bf16"
elif dtype == "fp32":
return "Conv2dDataType::fp32"
else:
return None


CommonDispatchTemp = '''
if (params.sm_version == ${sm_code} && params.data_type == ${data_type})
{
${op_name_with_sm}(params);
return ${op_name_with_sm}(params);
}
'''

Expand Down Expand Up @@ -215,6 +218,7 @@ def GenerateFunctionForPhi(
+ data_type
)
dispatch_body += SubstituteTemplate(CommonDispatchTemp, sm_dicts)
dispatch_body += ''' return false;'''
op_dicts = {}
op_dicts["dispatch_body"] = dispatch_body
op_dicts["op_name"] = camel_names[epi_func]
Expand Down
22 changes: 10 additions & 12 deletions paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_decl.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,17 @@ typedef struct {
} ConvAllParams;

// Below functions are provided by cutlass, they are called by phi.
extern "C" void Conv2dBiasAddRelu(ConvAllParams params);
extern "C" void Conv2dBiasRelu(ConvAllParams params);
extern "C" void Conv2dBiasLeakyRelu(ConvAllParams params);
extern "C" void Conv2dBiasSilu(ConvAllParams params);
extern "C" void Conv2dBias(ConvAllParams params);
extern "C" void Conv2dBiasSigmoid(ConvAllParams params);
extern "C" bool Conv2dBiasAddRelu(ConvAllParams params);
extern "C" bool Conv2dBiasRelu(ConvAllParams params);
extern "C" bool Conv2dBiasLeakyRelu(ConvAllParams params);
extern "C" bool Conv2dBiasSilu(ConvAllParams params);
extern "C" bool Conv2dBias(ConvAllParams params);
extern "C" bool Conv2dBiasSigmoid(ConvAllParams params);

extern "C" void Conv2dDepthwiseBias(ConvAllParams params);
extern "C" void Conv2dDepthwiseBiasRelu(ConvAllParams params);
extern "C" void Conv2dDepthwiseBiasSigmoid(ConvAllParams params);
extern "C" void Conv2dDepthwiseBiasSilu(ConvAllParams params);

extern "C" int HelloFromCutlassConv2d(int a, int b);
extern "C" bool Conv2dDepthwiseBias(ConvAllParams params);
extern "C" bool Conv2dDepthwiseBiasRelu(ConvAllParams params);
extern "C" bool Conv2dDepthwiseBiasSigmoid(ConvAllParams params);
extern "C" bool Conv2dDepthwiseBiasSilu(ConvAllParams params);

} // namespace cutlass_internal
} // namespace fusion
Expand Down
15 changes: 12 additions & 3 deletions paddle/phi/kernels/fusion/cutlass/fused_conv2d_add_act_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace phi {
namespace fusion {
namespace cutlass_internal {

typedef void (*func)(phi::fusion::cutlass_internal::ConvAllParams);
typedef bool (*func)(phi::fusion::cutlass_internal::ConvAllParams);

template <typename T, typename Context>
void FusedConv2dAddActKernel(const Context& ctx,
Expand Down Expand Up @@ -230,7 +230,12 @@ void FusedConv2dAddActKernel(const Context& ctx,
"Cutlass conv2d_depthwise does not support this activation: %s.",
activation.c_str()));
}
conv_func(params);

if (!conv_func(params)) {
PADDLE_THROW(
phi::errors::Fatal("no fused_conv2d_add_act cutlass kernel "));
}

output->set_layout(DataLayout::NHWC);
return;
}
Expand Down Expand Up @@ -265,7 +270,11 @@ void FusedConv2dAddActKernel(const Context& ctx,
PADDLE_THROW(phi::errors::InvalidArgument(
"Cutlass does not support this activation: %s.", activation.c_str()));
}
conv_func(params);

if (!conv_func(params)) {
PADDLE_THROW(phi::errors::Fatal("no fused_conv2d_add_act cutlass kernel "));
}

output->set_layout(DataLayout::NHWC);
}
} // namespace cutlass_internal
Expand Down

0 comments on commit d3f3c22

Please sign in to comment.