Skip to content

Commit

Permalink
Merge pull request #27 from lincolnhua/paddlebox
Browse files Browse the repository at this point in the history
添加cvrq一致性策略逻辑
  • Loading branch information
qingshui authored Jan 20, 2022
2 parents 56c50c2 + e3b0aab commit 4da95e5
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 5 deletions.
2 changes: 2 additions & 0 deletions paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,11 @@ class FusedSeqpoolCVMOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(0.0);
AddAttr<bool>("use_cvm", "bool, use cvm or not").SetDefault(true);
AddAttr<bool>("need_filter", "(bool, default false)").SetDefault(false);
AddAttr<bool>("embed_threshold_filter", "(bool, default false)").SetDefault(false);
AddAttr<float>("show_coeff", "(float, default 0.2)").SetDefault(0.2);
AddAttr<float>("clk_coeff", "(float, default 1)").SetDefault(1);
AddAttr<float>("threshold", "(float, default 0.96)").SetDefault(0.96);
AddAttr<float>("embed_threshold", "(float, default 0)").SetDefault(0);
AddAttr<int>("cvm_offset", "(int, default 2)").SetDefault(2);
AddAttr<int>("quant_ratio", "(int, default 128)").SetDefault(0);
AddAttr<bool>("clk_filter", "(bool, default false)").SetDefault(false);
Expand Down
116 changes: 111 additions & 5 deletions paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,104 @@ __global__ void FusedSeqpoolKernelQuantFilter(
*(seqpool_output_values[x] + y * embedding_size + offset) = val;
}
}
// embed quant filter
template <typename T>
__global__ void FusedSeqpoolKernelEmbedQuantFilter(
const size_t N, T **input_values, T **seqpool_output_values,
size_t **lods_values, const int batch_size, const int embedding_size,
const float pad_value, const int cvm_offset, const float show_coeff,
const float clk_coeff, const float threshold, const int quant_ratio,
const float embed_threshold) {
CUDA_KERNEL_LOOP(i, N) {
int key = i / embedding_size;
int offset = i % embedding_size; // embedx id
int x = key / batch_size; // slot id
int y = key % batch_size; // ins id
auto &start = *(lods_values[x] + y);
auto &end = *(lods_values[x] + y + 1);

double val = pad_value;
for (auto k = start; k < end; ++k) {
T &show = *(input_values[x] + k * embedding_size);
T &click = *(input_values[x] + k * embedding_size + 1);
if ((show - click) * show_coeff + click * clk_coeff < threshold) {
continue;
}
T &embedw = *(input_values[x] + k * embedding_size + cvm_offset);
T embedx_weight_score = 0.0;
for (int i = cvm_offset+1; i < embedding_size; i++) {
embedx_weight_score += pow(*(input_values[x] + k * embedding_size + i), 2);
}
embedx_weight_score = std::sqrt(embedx_weight_score) + std::abs(embedw);
if (embedx_weight_score < embed_threshold) {
continue;
}
if (offset < cvm_offset) { // show & click
val += *(input_values[x] + k * embedding_size + offset);
} else {
val += ((static_cast<int>(
*(input_values[x] + k * embedding_size + offset) *
quant_ratio +
0.5)) /
static_cast<float>(quant_ratio));
}
}
*(seqpool_output_values[x] + y * embedding_size + offset) = val;
}
}
// embed quant filter opt
template <typename T>
__global__ void FusedSeqpoolKernelEmbedQuantOptFilter(
const size_t N, T **input_values, T **seqpool_output_values,
size_t **lods_values, const int batch_size, const int embedding_size,
const float pad_value, const int cvm_offset, const float show_coeff,
const float clk_coeff, const float threshold, const int quant_ratio,
const float embed_threshold) {
CUDA_KERNEL_LOOP(i, N) {
int key = i / embedding_size;
int offset = i % embedding_size; // embedx id
int x = key / batch_size; // slot id
int y = key % batch_size; // ins id
auto &start = *(lods_values[x] + y);
auto &end = *(lods_values[x] + y + 1);

bool is_filter[end - start];
if (offset == 0) {
is_filter[end - start] = {false};
for (auto k = start; k < end; ++k) {
T &show = *(input_values[x] + k * embedding_size);
T &click = *(input_values[x] + k * embedding_size + 1);
T &embedw = *(input_values[x] + k * embedding_size + cvm_offset);
T embedx_weight_score = 0.0;
for (int i = cvm_offset+1; i < embedding_size; i++) {
embedx_weight_score += pow(*(input_values[x] + k * embedding_size + i), 2);
}
T show_click_score = (show - click) * show_coeff + click * clk_coeff;
embedx_weight_score = std::sqrt(embedx_weight_score) + std::abs(embedw);
if (show_click_score < threshold || embedx_weight_score < embed_threshold) {
is_filter[k-start] = true;
}
}
}

double val = pad_value;
for (auto k = start; k < end; ++k) {
if (is_filter[k-start]) {
continue;
}
if (offset < cvm_offset) { // show & click
val += *(input_values[x] + k * embedding_size + offset);
} else {
val += ((static_cast<int>(
*(input_values[x] + k * embedding_size + offset) *
quant_ratio +
0.5)) /
static_cast<float>(quant_ratio));
}
}
*(seqpool_output_values[x] + y * embedding_size + offset) = val;
}
}
// join need show click input
template <typename T>
__global__ void FusedCVMKernelWithCVM(const size_t N, T **output_values,
Expand Down Expand Up @@ -190,8 +288,8 @@ void FusedSeqpoolCVM(const paddle::platform::Place &place,
std::vector<const size_t *> lods, const int batch_size,
const int slot_num, const int embedding_size,
const float padding_value, const bool use_cvm,
const int cvm_offset, float need_filter, float show_coeff,
float clk_coeff, float threshold, const int quant_ratio,
const int cvm_offset, float need_filter, const bool embed_threshold_filter, float show_coeff,
float clk_coeff, float threshold, float embed_threshold, const int quant_ratio,
const bool clk_filter) {
auto stream = dynamic_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(
Expand Down Expand Up @@ -224,7 +322,13 @@ void FusedSeqpoolCVM(const paddle::platform::Place &place,

size_t N = static_cast<size_t>(batch_size * slot_num * embedding_size);
// first sum pool
if (need_filter) { // quant need filter
if (need_filter && embed_threshold_filter) { // embed quant filter
FusedSeqpoolKernelEmbedQuantFilter<<<GET_BLOCK(N), PADDLE_CUDA_NUM_THREADS, 0,
stream>>>(
N, gpu_input_values, gpu_seqpool_output_values, lods_values, batch_size,
embedding_size, padding_value, cvm_offset, show_coeff, clk_coeff,
threshold, quant_ratio, embed_threshold);
} else if (need_filter) { // quant need filter
FusedSeqpoolKernelQuantFilter<<<GET_BLOCK(N), PADDLE_CUDA_NUM_THREADS, 0,
stream>>>(
N, gpu_input_values, gpu_seqpool_output_values, lods_values, batch_size,
Expand Down Expand Up @@ -414,9 +518,11 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
auto padding_value = ctx.Attr<float>("pad_value");
auto use_cvm = ctx.Attr<bool>("use_cvm");
bool need_filter = ctx.Attr<bool>("need_filter");
bool embed_threshold_filter = ctx.Attr<bool>("embed_threshold_filter");
float show_coeff = ctx.Attr<float>("show_coeff");
float clk_coeff = ctx.Attr<float>("clk_coeff");
float threshold = ctx.Attr<float>("threshold");
float embed_threshold = ctx.Attr<float>("embed_threshold");
const int cvm_offset = ctx.Attr<int>("cvm_offset");
const int quant_ratio = ctx.Attr<int>("quant_ratio");
bool clk_filter = ctx.Attr<bool>("clk_filter");
Expand Down Expand Up @@ -458,8 +564,8 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
FusedSeqpoolCVM(ctx.GetPlace(), input_data, output_data,
seqpool_output_data, lods_data, batch_size, slot_size,
embedding_size, padding_value, use_cvm, cvm_offset,
need_filter, show_coeff, clk_coeff, threshold, quant_ratio,
clk_filter);
need_filter, embed_threshold_filter, show_coeff, clk_coeff,
threshold, embed_threshold, quant_ratio, clk_filter);
}
};

Expand Down
4 changes: 4 additions & 0 deletions python/paddle/fluid/contrib/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1549,9 +1549,11 @@ def fused_seqpool_cvm(input,
pad_value=0.0,
use_cvm=True,
need_filter=False,
embed_threshold_filter=False,
show_coeff=0.2,
clk_coeff=1.0,
threshold=0.96,
embed_threshold=0,
cvm_offset=2,
quant_ratio=0,
clk_filter=False):
Expand Down Expand Up @@ -1603,9 +1605,11 @@ def fused_seqpool_cvm(input,
"use_cvm": use_cvm,
"cvm_offset": cvm_offset,
"need_filter": need_filter,
"embed_threshold_filter": embed_threshold_filter,
"show_coeff": show_coeff,
"clk_coeff": clk_coeff,
"threshold": threshold,
"embed_threshold": embed_threshold,
"quant_ratio": quant_ratio,
"clk_filter": clk_filter
})
Expand Down

0 comments on commit 4da95e5

Please sign in to comment.