diff --git a/paddle/fluid/framework/details/all_reduce_deps_pass.cc b/paddle/fluid/framework/details/all_reduce_deps_pass.cc index 878b950858a71..c44793cd11d22 100644 --- a/paddle/fluid/framework/details/all_reduce_deps_pass.cc +++ b/paddle/fluid/framework/details/all_reduce_deps_pass.cc @@ -13,125 +13,186 @@ // limitations under the License. #include -#include +#include #include #include #include +#include #include -#include "paddle/fluid/framework/details/all_reduce_deps_pass.h" #include "paddle/fluid/framework/details/all_reduce_op_handle.h" +#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/op_graph_view.h" -#include "paddle/fluid/framework/details/var_handle.h" +#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/op_proto_maker.h" namespace paddle { namespace framework { namespace details { -VarHandle* GetValidInput(const OpHandleBase* a) { - for (auto p : a->Inputs()) { - VarHandle* b = dynamic_cast(p); - if (b) { - return b; +class AllReduceDepsPass : public ir::Pass { + protected: + void ApplyImpl(ir::Graph* graph) const override { + std::vector all_reduce_op_handles = + GetSortedAllReduceOps(*graph); + + for (size_t i = 1; i < all_reduce_op_handles.size(); ++i) { + auto* dep_var = new DummyVarHandle(graph->CreateControlDepVar()); + graph->Get(kGraphDepVars).emplace(dep_var); + all_reduce_op_handles[i - 1]->AddOutput(dep_var); + all_reduce_op_handles[i]->AddInput(dep_var); } - } - return nullptr; -} - -void AllReduceDepsPass::ApplyImpl(ir::Graph* graph) const { - auto graph_ops = ir::FilterByNodeWrapper(*graph); - - // get vars order - int order = 0; - std::unordered_map vars; - // TODO(gongwb): use graph topology sort to find the order of operators. - // Note that must assert topology sort is stable - auto& ops = graph->Get>(kStaleProgramOpDescs); - for (auto* op_desc : ops) { - try { - bool is_bk_op = - static_cast(boost::get(op_desc->GetAttr( - OpProtoAndCheckerMaker::OpRoleAttrName())) & - static_cast(OpRole::kBackward)); - if (!is_bk_op) continue; - - auto backward_vars = - boost::get>(op_desc->GetNullableAttr( - OpProtoAndCheckerMaker::OpRoleVarAttrName())); - PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0); - - auto outputs = op_desc->Outputs(); - for (auto& o_it : outputs) { - for (auto& v : o_it.second) { // values - vars[v] = order; - VLOG(10) << "in all_reduce_deps_pass:" << v; - } - } - order++; - } catch (boost::bad_get e) { + if (VLOG_IS_ON(10)) { + DebugString(*graph, all_reduce_op_handles); } } - std::vector dist_ops; - // get allreduce ops. - for (auto& op : graph_ops) { - // FIXME(gongwb):add broad cast. - if (op->Name() == "all_reduce" || op->Name() == "reduce") { - dist_ops.push_back(op); + std::vector GetSortedAllReduceOps( + const ir::Graph& graph) const { + std::vector all_reduce_op_handles; + std::unordered_map pending_ops; + std::unordered_set ready_ops; + std::unordered_set next_ready_ops; + + auto op_handles = ir::FilterByNodeWrapper(graph); + size_t num_of_ops = op_handles.size(); + for (OpHandleBase* op : op_handles) { + size_t not_ready_vars = op->NotReadyInputSize(); + if (not_ready_vars) { + pending_ops.insert({op, not_ready_vars}); + } else { + ready_ops.insert(op); + } } - } - - VLOG(10) << "dist_ops size:" << dist_ops.size() - << ", outputs size:" << vars.size() << ", ops size:" << ops.size(); - - std::sort(dist_ops.begin(), dist_ops.end(), [&](OpHandleBase* op1, - OpHandleBase* op2) { - VarHandle* i0 = dynamic_cast(GetValidInput(op1)); - VarHandle* i1 = dynamic_cast(GetValidInput(op2)); - - PADDLE_ENFORCE(i0 != nullptr && i1 != nullptr, "%s convert to %s error", - op1->DebugString(), op2->DebugString()); - auto l_it = vars.find(i0->name()); - auto r_it = vars.find(i1->name()); - - PADDLE_ENFORCE(l_it != vars.end() && r_it != vars.end(), - "can't find var's name %s and %s in opdesc", i0->name(), - i1->name()); - - if (l_it->second < r_it->second) return true; + GetSortedAllReduceOps(ready_ops, &all_reduce_op_handles); + + size_t has_run_ops = ready_ops.size(); + while (has_run_ops != num_of_ops) { + for (auto* op : ready_ops) { + for (auto& ready_var : op->Outputs()) { + for (auto* pend_op : ready_var->PendingOps()) { + auto& deps = --pending_ops[pend_op]; + if (deps == 0) { + next_ready_ops.insert(pend_op); + } + } + } + } - if (l_it->second == r_it->second) { - return i0->name() < i1->name(); + PADDLE_ENFORCE_NE(next_ready_ops.size(), 0, "There maybe have a cycle."); + ready_ops.clear(); + std::swap(ready_ops, next_ready_ops); + GetSortedAllReduceOps(ready_ops, &all_reduce_op_handles); + has_run_ops += ready_ops.size(); } + return all_reduce_op_handles; + } - return false; - }); - - // add dependency. - auto& sorted_ops = dist_ops; - for (size_t i = 1; i < sorted_ops.size(); ++i) { - auto* dep_var = new DummyVarHandle(graph->CreateControlDepVar()); - - auto* pre_op = sorted_ops[i - 1]; - auto* op = sorted_ops[i]; - - pre_op->AddOutput(dep_var); - op->AddInput(dep_var); - graph->Get(kGraphDepVars).emplace(dep_var); + void GetSortedAllReduceOps( + const std::unordered_set& ready_ops, + std::vector* all_reduce_op_handles) const { + std::vector current_all_reduce_op_handles; + for (auto& op_handle : ready_ops) { + auto all_reduce_op_handle = dynamic_cast(op_handle); + if (all_reduce_op_handle) { + current_all_reduce_op_handles.emplace_back(all_reduce_op_handle); + } + } - VLOG(10) << "add all_reduce sequential dependencies between " << pre_op - << " and " << op; + // NOTE(zcd): For distributed training, it is important to keep the order of + // allReduce on each node consistent. Otherwise, hang may occur. + // Sort the current_all_reduce_op_handles according to the name of input. + sort(current_all_reduce_op_handles.begin(), + current_all_reduce_op_handles.end(), + [](const AllReduceOpHandle* left, + const AllReduceOpHandle* right) -> bool { + auto left_in_vars = DynamicCast(left->Inputs()); + auto right_in_vars = DynamicCast(right->Inputs()); + PADDLE_ENFORCE_GT(left_in_vars.size(), 0); + PADDLE_ENFORCE_EQ(left_in_vars.size(), right_in_vars.size()); + return left_in_vars[0]->Name() > right_in_vars[0]->Name(); + }); + + all_reduce_op_handles->insert(all_reduce_op_handles->end(), + current_all_reduce_op_handles.begin(), + current_all_reduce_op_handles.end()); + } - VLOG(10) << "pre_op:" << pre_op->DebugString() - << ", op:" << op->DebugString(); + void DebugString( + const ir::Graph& graph, + const std::vector& all_reduce_op_handles) const { + // get vars order + std::map> vars = + GetSoredGradientsFromStaleProgram(graph); + std::stringstream out; + size_t grads_of_stale_program = 0; + out << "Get Order From kStaleProgramOpDescs: "; + for (auto& var : vars) { + out << "Order " << var.first << " ["; + for (auto& var_name : var.second) { + out << var_name << ", "; + ++grads_of_stale_program; + } + out << "], "; + } + VLOG(10) << out.str(); + + std::stringstream out2; + out2 << "Get Order From Topological order: "; + for (auto& op : all_reduce_op_handles) { + bool find_valid_input = false; + for (auto& in_var : op->Inputs()) { + if (dynamic_cast(in_var)) { + out2 << in_var->Name() << ", "; + find_valid_input = true; + break; + } + } + PADDLE_ENFORCE(find_valid_input, "Doesn't find valid input."); + } + VLOG(10) << out2.str(); + if (grads_of_stale_program != all_reduce_op_handles.size()) { + VLOG(10) + << "The gradients number of stale program and graph is not equal."; + } } -} + std::map> GetSoredGradientsFromStaleProgram( + const ir::Graph& graph) const { + std::map> vars; + auto ops = graph.Get>(kStaleProgramOpDescs); + int order = 0; + for (auto* op_desc : ops) { + try { + bool is_bk_op = + static_cast(boost::get(op_desc->GetAttr( + OpProtoAndCheckerMaker::OpRoleAttrName())) & + static_cast(OpRole::kBackward)); + if (!is_bk_op) continue; + + auto backward_vars = + boost::get>(op_desc->GetNullableAttr( + OpProtoAndCheckerMaker::OpRoleVarAttrName())); + if (backward_vars.empty()) continue; + + PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0); + for (size_t i = 1; i < backward_vars.size(); i += 2) { + vars[order].emplace_back(backward_vars[i]); + VLOG(1) << "get parameter and gradient: " << backward_vars[i - 1] + << ", " << backward_vars[i]; + } + order++; + } catch (boost::bad_get e) { + } + } + return vars; + } +}; } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/all_reduce_deps_pass.h b/paddle/fluid/framework/details/all_reduce_deps_pass.h deleted file mode 100644 index 4ed3736587aa3..0000000000000 --- a/paddle/fluid/framework/details/all_reduce_deps_pass.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/framework/ir/graph.h" -#include "paddle/fluid/framework/ir/pass.h" - -namespace paddle { -namespace framework { -namespace details { - -// TODO(gongwb): overlap allreduce with backward computation. -class AllReduceDepsPass : public ir::Pass { - protected: - void ApplyImpl(ir::Graph* graph) const override; -}; - -} // namespace details -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 6e477cd297756..ed75b48090b27 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -28,7 +28,7 @@ // asynchronous nccl allreduce or synchronous issue: // https://github.com/PaddlePaddle/Paddle/issues/15049 DEFINE_bool( - sync_nccl_allreduce, false, + sync_nccl_allreduce, true, "If set true, will call `cudaStreamSynchronize(nccl_stream)`" "after allreduce, this mode can get better performance in some scenarios."); diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index 027de4cda4101..f8bf43bcb4822 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -163,14 +163,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { "graph_printer", new details::GraphvizSSAGraphPrinter); } - // Verify that the graph is correct for multi-device executor. - AppendPass("multi_devices_check_pass"); - - if (VLOG_IS_ON(2)) { - AppendPass("all_reduce_deps_pass"); - } - - if (SeqOnlyAllReduceOps(strategy_)) { + // experimental shows that the program will be faster if append + // all_reduce_deps_pass here. + if (!strategy_.enable_parallel_graph_ && + (SeqOnlyAllReduceOps(strategy_) || + strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce)) { VLOG(10) << "Add all_reduce_deps_pass"; AppendPass("all_reduce_deps_pass"); } @@ -179,6 +176,9 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { VLOG(10) << "Add modify_op_lock_and_record_event_pass"; AppendPass("modify_op_lock_and_record_event_pass"); } + + // Verify that the graph is correct for multi-device executor. + AppendPass("multi_devices_check_pass"); } // Convert graph to run on multi-devices. diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 413b14961631b..69cd84ebf2d67 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -68,7 +68,7 @@ void OpHandleBase::Run(bool use_cuda) { if (out_var_handle) { PADDLE_ENFORCE( platform::is_same_place(place, out_var_handle->place()), - "The place of input(%s) is not consistent with the " + "The place of output(%s) is not consistent with the " "place of current op(%s).", out_var_handle->Name(), Name()); out_var_handle->SetGenerateEvent(events_.at(dev_id)); diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 353db435213c7..e6f5b15af8cd4 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -617,6 +617,25 @@ void OpDesc::Flush() { static std::once_flag init_infer_shape_funcs; +/** + * NOTE(paddle-dev): Very tricky code here. Maybe we should find a + * better way to register compile-time infershape method gentlely. + * + * Normally, we can register a class derived from InferShapeBase, so that + * we can set the field of `infer_shape_` inside OpInfo when registering op. + * + * However, there is another way we can set the field of `infer_shape_` inside + * OpInfo. Usually, we overload InferShape method of OperatorWithKernel. After + * running the following method InitInferShapeFuncs, `infer_shape_` would be set + * to be the InferShape method of OperatorWithKernel. That is to say, we borrow + * the run-time InferShape method of OperatorWithKernel to be the compile-time + * InferShape method. + * + * However, during compiling time, we may not know inputs, outputs and attrs of + * run-time OperatorWithKernel. So the following code creates a fake + * OperatorWithKernel object. That is why the field info_ of OperatorBase + * would be null. + */ static void InitInferShapeFuncs() { std::call_once(init_infer_shape_funcs, [] { auto &map = OpInfoMap::Instance(); @@ -628,11 +647,16 @@ static void InitInferShapeFuncs() { PADDLE_ENFORCE(it != info_map.end(), "%s has not been registered", op_type); auto &op_info = it->second; - auto op = static_cast(op_info.Creator()( - "", VariableNameMap{}, VariableNameMap{}, AttributeMap{})); if (op_info.infer_shape_) { // infer_shape has been registered. continue; } + + auto op = dynamic_cast(op_info.Creator()( + "", VariableNameMap{}, VariableNameMap{}, AttributeMap{})); + + PADDLE_ENFORCE_NOT_NULL( + op, "InferShapeBase is not registered to Operator %s", op_type); + op_info.infer_shape_ = [op](InferShapeContext *ctx) { op->InferShape(ctx); }; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index a2a8083da955c..4245caf1689c7 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -19,11 +19,6 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/framework/ir/graph_helper.h" - -#include "paddle/fluid/framework/ir/graph.h" - -#include "paddle/fluid/framework/details/all_reduce_deps_pass.h" #include "paddle/fluid/framework/details/async_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" @@ -31,6 +26,8 @@ limitations under the License. */ #include "paddle/fluid/framework/details/reference_count_pass_helper.h" #include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h" #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/platform/profiler.h" #ifdef WITH_GPERFTOOLS diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index e5036d940197e..0109b4a4fa761 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -142,7 +142,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { void AnalysisConfig::EnableMKLDNN() { #ifdef PADDLE_WITH_MKLDNN - pass_builder()->EnableMKLDNN(); use_mkldnn_ = true; #else LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; @@ -235,16 +234,13 @@ void AnalysisConfig::Update() { } if (use_mkldnn_) { +#ifdef PADDLE_WITH_MKLDNN if (!enable_ir_optim_) { LOG(ERROR) << "EnableMKLDNN() only works when IR optimization is enabled."; + } else { + pass_builder()->EnableMKLDNN(); } -#ifdef PADDLE_WITH_MKLDNN - pass_builder()->EnableMKLDNN(); - use_mkldnn_ = true; -#else - LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; - use_mkldnn_ = false; #endif } @@ -256,9 +252,6 @@ void AnalysisConfig::Update() { } #ifdef PADDLE_WITH_MKLDNN pass_builder()->EnableMkldnnQuantizer(); -#else - LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnQuantizer"; - use_mkldnn_quantizer_ = false; #endif } diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 258a79fa4e884..c89dd41e0a628 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -27,6 +27,7 @@ #include #include #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/port.h" #include "paddle/fluid/string/printf.h" @@ -266,17 +267,17 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) { } static void PrintTime(int batch_size, int repeat, int num_threads, int tid, - double latency, int epoch = 1) { - LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat - << ", threads: " << num_threads << ", thread id: " << tid - << ", latency: " << latency << "ms, fps: " << 1 / (latency / 1000.f) + double batch_latency, int epoch = 1) { + PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size."); + double sample_latency = batch_latency / batch_size; + LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid << " ======"; - if (epoch > 1) { - int samples = batch_size * epoch; - LOG(INFO) << "====== sample number: " << samples - << ", average latency of each sample: " << latency / samples - << "ms ======"; - } + LOG(INFO) << "====== batch_size: " << batch_size << ", iterations: " << epoch + << ", repetitions: " << repeat << " ======"; + LOG(INFO) << "====== batch latency: " << batch_latency + << "ms, number of samples: " << batch_size * epoch + << ", sample latency: " << sample_latency + << "ms, fps: " << 1000.f / sample_latency << " ======"; } static bool IsFileExists(const std::string &path) { diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 1d1d39e44096b..e765c078aa838 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -64,10 +64,12 @@ void PaddlePassBuilder::DeletePass(size_t idx) { passes_.erase(std::begin(passes_) + idx); } -void GpuPassStrategy::EnableMKLDNN() { - LOG(ERROR) << "GPU not support MKLDNN yet"; +void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) { + analysis_passes_.push_back(pass); } +void PaddlePassBuilder::ClearPasses() { passes_.clear(); } + // The following passes works for Anakin sub-graph engine. const std::vector kAnakinSubgraphPasses({ "infer_clean_graph_pass", // @@ -102,12 +104,12 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { use_gpu_ = true; } -void GpuPassStrategy::EnableMkldnnQuantizer() { - LOG(ERROR) << "GPU not support MKL-DNN quantization"; +void GpuPassStrategy::EnableMKLDNN() { + LOG(ERROR) << "GPU not support MKLDNN yet"; } -void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) { - analysis_passes_.push_back(pass); +void GpuPassStrategy::EnableMkldnnQuantizer() { + LOG(ERROR) << "GPU not support MKL-DNN quantization"; } CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { @@ -130,10 +132,44 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { "conv_bn_fuse_pass", // "conv_eltwiseadd_bn_fuse_pass", // "is_test_pass", // - "identity_scale_op_clean_pass", // "runtime_context_cache_pass", // }); + use_gpu_ = false; } -void PaddlePassBuilder::ClearPasses() { passes_.clear(); } + +void CpuPassStrategy::EnableMKLDNN() { +// TODO(Superjomn) Consider the way to mix CPU with GPU. +#ifdef PADDLE_WITH_MKLDNN + if (!use_mkldnn_) { + passes_.insert(passes_.begin(), "mkldnn_placement_pass"); + + for (auto &pass : std::vector( + {"depthwise_conv_mkldnn_pass", // + "conv_bn_fuse_pass", // Execute BN passes again to + "conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order + "conv_bias_mkldnn_fuse_pass", // + "conv3d_bias_mkldnn_fuse_pass", // + "conv_elementwise_add_mkldnn_fuse_pass", + "conv_relu_mkldnn_fuse_pass"})) { + passes_.push_back(pass); + } + } + use_mkldnn_ = true; +#else + use_mkldnn_ = false; +#endif +} + +void CpuPassStrategy::EnableMkldnnQuantizer() { +#ifdef PADDLE_WITH_MKLDNN + if (!use_mkldnn_quantizer_) { + passes_.push_back("cpu_quantize_placement_pass"); + } + use_mkldnn_quantizer_ = true; +#else + use_mkldnn_quantizer_ = false; +#endif +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 48da8c156f426..09ef195d5e66a 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -109,43 +109,16 @@ class CpuPassStrategy : public PassStrategy { CpuPassStrategy(); explicit CpuPassStrategy(const CpuPassStrategy &other) - : PassStrategy(other.AllPasses()) {} + : PassStrategy(other.AllPasses()) { + use_gpu_ = other.use_gpu_; + use_mkldnn_ = other.use_mkldnn_; + use_mkldnn_quantizer_ = other.use_mkldnn_quantizer_; + } virtual ~CpuPassStrategy() = default; - void EnableMKLDNN() override { -// TODO(Superjomn) Consider the way to mix CPU with GPU. -#ifdef PADDLE_WITH_MKLDNN - if (!use_mkldnn_) { - passes_.insert(passes_.begin(), "mkldnn_placement_pass"); - - for (auto &pass : std::vector( - {"depthwise_conv_mkldnn_pass", // - "conv_bn_fuse_pass", // Execute BN passes again to - "conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order - "conv_bias_mkldnn_fuse_pass", // - "conv3d_bias_mkldnn_fuse_pass", // - "conv_relu_mkldnn_fuse_pass", // - "conv_elementwise_add_mkldnn_fuse_pass"})) { - passes_.push_back(pass); - } - } - use_mkldnn_ = true; -#else - use_mkldnn_ = false; -#endif - } - - void EnableMkldnnQuantizer() override { -#ifdef PADDLE_WITH_MKLDNN - if (!use_mkldnn_quantizer_) { - passes_.push_back("cpu_quantize_placement_pass"); - } - use_mkldnn_quantizer_ = true; -#else - use_mkldnn_quantizer_ = false; -#endif - } + void EnableMKLDNN() override; + void EnableMkldnnQuantizer() override; protected: bool use_mkldnn_quantizer_{false}; diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 647913cc80727..8ecb0310c9775 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -26,7 +26,11 @@ endfunction() function(inference_analysis_api_int8_test target model_dir data_dir filename) inference_analysis_test(${target} SRCS ${filename} EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark - ARGS --infer_model=${model_dir}/model --infer_data=${data_dir}/data.bin --batch_size=100) + ARGS --infer_model=${model_dir}/model + --infer_data=${data_dir}/data.bin + --warmup_batch_size=100 + --batch_size=50 + --iterations=2) endfunction() function(inference_analysis_api_test_with_fake_data target install_dir filename model_name) @@ -146,22 +150,22 @@ inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_con # int8 image classification tests if(WITH_MKLDNN) - set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8") + set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2") if (NOT EXISTS ${INT8_DATA_DIR}) - inference_download_and_uncompress(${INT8_DATA_DIR} ${INFERENCE_URL}"/int8" "imagenet_val_100.tar.gz") + inference_download_and_uncompress(${INT8_DATA_DIR} "${INFERENCE_URL}/int8" "imagenet_val_100_tail.tar.gz") endif() #resnet50 int8 set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50") if (NOT EXISTS ${INT8_RESNET50_MODEL_DIR}) - inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} ${INFERENCE_URL}"/int8" "resnet50_int8_model.tar.gz" ) + inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} "${INFERENCE_URL}/int8" "resnet50_int8_model.tar.gz" ) endif() inference_analysis_api_int8_test(test_analyzer_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL) #mobilenet int8 set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet") if (NOT EXISTS ${INT8_MOBILENET_MODEL_DIR}) - inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} ${INFERENCE_URL}"/int8" "mobilenetv1_int8_model.tar.gz" ) + inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} "${INFERENCE_URL}/int8" "mobilenetv1_int8_model.tar.gz" ) endif() inference_analysis_api_int8_test(test_analyzer_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL) endif() diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc index e73358d8827a4..9b2e74ec16eb3 100644 --- a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc @@ -154,7 +154,7 @@ void profile(bool use_mkldnn = false) { config.EnableMKLDNN(); } - std::vector outputs; + std::vector> outputs; std::vector> inputs; LoadInputData(&inputs); TestPrediction(reinterpret_cast(&config), diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index 735e4fb563788..e10d239a5d1b3 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -197,7 +197,7 @@ void profile(bool use_mkldnn = false) { cfg.SetMKLDNNOp(op_list); } - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -206,9 +206,11 @@ void profile(bool use_mkldnn = false) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { PADDLE_ENFORCE_GT(outputs.size(), 0); - size_t size = GetSize(outputs[0]); + auto output = outputs.back(); + PADDLE_ENFORCE_GT(output.size(), 0); + size_t size = GetSize(output[0]); PADDLE_ENFORCE_GT(size, 0); - float *result = static_cast(outputs[0].data.data()); + float *result = static_cast(output[0].data.data()); for (size_t i = 0; i < size; i++) { EXPECT_NEAR(result[i], result_data[i], 1e-3); } diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc index 5a4f9a31a164a..ece094717b807 100644 --- a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc @@ -17,8 +17,6 @@ limitations under the License. */ #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" -DEFINE_int32(iterations, 0, "Number of iterations"); - namespace paddle { namespace inference { namespace analysis { @@ -30,8 +28,13 @@ void SetConfig(AnalysisConfig *cfg) { cfg->SwitchIrOptim(); cfg->SwitchSpecifyInputNames(false); cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads); - cfg->EnableMKLDNN(); + cfg->pass_builder()->SetPasses( + {"infer_clean_graph_pass", "mkldnn_placement_pass", + "depthwise_conv_mkldnn_pass", "conv_bn_fuse_pass", + "conv_eltwiseadd_bn_fuse_pass", "conv_bias_mkldnn_fuse_pass", + "conv_elementwise_add_mkldnn_fuse_pass", "conv_relu_mkldnn_fuse_pass", + "fc_fuse_pass", "is_test_pass"}); } template @@ -40,8 +43,8 @@ class TensorReader { TensorReader(std::ifstream &file, size_t beginning_offset, std::vector shape, std::string name) : file_(file), position(beginning_offset), shape_(shape), name_(name) { - numel = - std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies()); + numel = std::accumulate(shape_.begin(), shape_.end(), size_t{1}, + std::multiplies()); } PaddleTensor NextBatch() { @@ -71,10 +74,14 @@ class TensorReader { }; std::shared_ptr> GetWarmupData( - const std::vector> &test_data, int num_images) { + const std::vector> &test_data, + int num_images = FLAGS_warmup_batch_size) { int test_data_batch_size = test_data[0][0].shape[0]; - CHECK_LE(static_cast(num_images), - test_data.size() * test_data_batch_size); + auto iterations_max = test_data.size(); + PADDLE_ENFORCE( + static_cast(num_images) <= iterations_max * test_data_batch_size, + "The requested quantization warmup data size " + + std::to_string(num_images) + " is bigger than all test data size."); PaddleTensor images; images.name = "input"; @@ -120,20 +127,17 @@ void SetInput(std::vector> *inputs, std::vector image_batch_shape{batch_size, 3, 224, 224}; std::vector label_batch_shape{batch_size, 1}; + auto images_offset_in_file = static_cast(file.tellg()); auto labels_offset_in_file = - static_cast(file.tellg()) + - sizeof(float) * total_images * - std::accumulate(image_batch_shape.begin() + 1, - image_batch_shape.end(), 1, std::multiplies()); + images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224; - TensorReader image_reader(file, 0, image_batch_shape, "input"); + TensorReader image_reader(file, images_offset_in_file, + image_batch_shape, "input"); TensorReader label_reader(file, labels_offset_in_file, label_batch_shape, "label"); - auto iterations = total_images / batch_size; - if (FLAGS_iterations > 0 && FLAGS_iterations < iterations) - iterations = FLAGS_iterations; - for (auto i = 0; i < iterations; i++) { + auto iterations_max = total_images / batch_size; + for (auto i = 0; i < iterations_max; i++) { auto images = image_reader.NextBatch(); auto labels = label_reader.NextBatch(); inputs->emplace_back( @@ -148,20 +152,21 @@ TEST(Analyzer_int8_resnet50, quantization) { AnalysisConfig q_cfg; SetConfig(&q_cfg); + // read data from file and prepare batches with test data std::vector> input_slots_all; - SetInput(&input_slots_all, 100); + SetInput(&input_slots_all); + // prepare warmup batch from input data read earlier + // warmup batch size can be different than batch size std::shared_ptr> warmup_data = - GetWarmupData(input_slots_all, 100); + GetWarmupData(input_slots_all); + // configure quantizer q_cfg.EnableMkldnnQuantizer(); q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data); - q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(100); + q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size); - CompareQuantizedAndAnalysis( - reinterpret_cast(&cfg), - reinterpret_cast(&q_cfg), - input_slots_all); + CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all); } } // namespace analysis diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index 347672eaae314..142905dcd8d99 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -124,7 +124,7 @@ void SetInput(std::vector> *inputs) { TEST(Analyzer_LAC, profile) { AnalysisConfig cfg; SetConfig(&cfg); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -137,11 +137,13 @@ TEST(Analyzer_LAC, profile) { 24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25, 44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14, 15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; - PADDLE_ENFORCE_EQ(outputs.size(), 1UL); - size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(outputs.size(), 0); + auto output = outputs.back(); + PADDLE_ENFORCE_EQ(output.size(), 1UL); + size_t size = GetSize(output[0]); size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); PADDLE_ENFORCE_GE(size, batch1_size); - int64_t *pdata = static_cast(outputs[0].data.data()); + int64_t *pdata = static_cast(output[0].data.data()); for (size_t i = 0; i < batch1_size; ++i) { EXPECT_EQ(pdata[i], lac_ref_data[i]); } diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc index 089f655c180d7..2eb347a44b394 100644 --- a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc @@ -96,7 +96,7 @@ void SetInput(std::vector> *inputs) { void profile(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); - std::vector outputs; + std::vector> outputs; if (use_mkldnn) { cfg.EnableMKLDNN(); @@ -108,8 +108,9 @@ void profile(bool use_mkldnn = false) { input_slots_all, &outputs, FLAGS_num_threads); if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { - PADDLE_ENFORCE_EQ(outputs.size(), 2UL); - for (auto &output : outputs) { + PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_EQ(outputs.back().size(), 2UL); + for (auto &output : outputs.back()) { size_t size = GetSize(output); PADDLE_ENFORCE_GT(size, 0); float *result = static_cast(output.data.data()); diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index a70aa7a6ac411..36e07d5f55600 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -106,7 +106,7 @@ void SetInput(std::vector> *inputs) { void profile(bool memory_load = false) { AnalysisConfig cfg; SetConfig(&cfg, memory_load); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -117,10 +117,12 @@ void profile(bool memory_load = false) { // the first inference result const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, 48, 39, 38, 16, 25}; - PADDLE_ENFORCE_EQ(outputs.size(), 1UL); - size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(outputs.size(), 0); + auto output = outputs.back(); + PADDLE_ENFORCE_EQ(output.size(), 1UL); + size_t size = GetSize(output[0]); PADDLE_ENFORCE_GT(size, 0); - int64_t *result = static_cast(outputs[0].data.data()); + int64_t *result = static_cast(output[0].data.data()); for (size_t i = 0; i < std::min(11UL, size); i++) { EXPECT_EQ(result[i], chinese_ner_result_data[i]); } diff --git a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc index 5157bd280d0f3..9443b08063b8f 100644 --- a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc @@ -127,7 +127,7 @@ void SetInput(std::vector> *inputs) { TEST(Analyzer_Pyramid_DNN, profile) { AnalysisConfig cfg; SetConfig(&cfg); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -135,10 +135,12 @@ TEST(Analyzer_Pyramid_DNN, profile) { input_slots_all, &outputs, FLAGS_num_threads); if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) { - PADDLE_ENFORCE_EQ(outputs.size(), 1UL); - size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(outputs.size(), 0); + auto output = outputs.back(); + PADDLE_ENFORCE_EQ(output.size(), 1UL); + size_t size = GetSize(output[0]); PADDLE_ENFORCE_GT(size, 0); - float *result = static_cast(outputs[0].data.data()); + float *result = static_cast(output[0].data.data()); // output is probability, which is in (0, 1). for (size_t i = 0; i < size; i++) { EXPECT_GT(result[i], 0); diff --git a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc index 629981d565f1b..d4330e6cddf88 100644 --- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc @@ -40,7 +40,7 @@ void profile(bool use_mkldnn = false) { if (use_mkldnn) { cfg.EnableMKLDNN(); } - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index dcf4b38ce8a92..54fd3a4a4caba 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -229,7 +229,7 @@ TEST(Analyzer_rnn1, profile) { SetConfig(&cfg); cfg.DisableGpu(); cfg.SwitchIrDebug(); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -280,7 +280,7 @@ TEST(Analyzer_rnn1, compare_determine) { TEST(Analyzer_rnn1, multi_thread) { AnalysisConfig cfg; SetConfig(&cfg); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc index 007f9f0b66a7b..9ccbf58cbd2bb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc @@ -126,7 +126,7 @@ void SetInput(std::vector> *inputs) { TEST(Analyzer_rnn2, profile) { AnalysisConfig cfg; SetConfig(&cfg); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -136,9 +136,11 @@ TEST(Analyzer_rnn2, profile) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { // the first inference result PADDLE_ENFORCE_GT(outputs.size(), 0); - size_t size = GetSize(outputs[0]); + auto output = outputs.back(); + PADDLE_ENFORCE_GT(output.size(), 0); + size_t size = GetSize(output[0]); PADDLE_ENFORCE_GT(size, 0); - float *result = static_cast(outputs[0].data.data()); + float *result = static_cast(output[0].data.data()); for (size_t i = 0; i < size; i++) { EXPECT_NEAR(result[i], result_data[i], 1e-3); } diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index 47c1d7375843e..9f23b9f037bca 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -110,7 +110,7 @@ void SetInput(std::vector> *inputs) { TEST(Analyzer_seq_conv1, profile) { AnalysisConfig cfg; SetConfig(&cfg); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -119,10 +119,12 @@ TEST(Analyzer_seq_conv1, profile) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { // the first inference result - PADDLE_ENFORCE_EQ(outputs.size(), 1UL); - size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(outputs.size(), 0); + auto output = outputs.back(); + PADDLE_ENFORCE_EQ(output.size(), 1UL); + size_t size = GetSize(output[0]); PADDLE_ENFORCE_GT(size, 0); - float *result = static_cast(outputs[0].data.data()); + float *result = static_cast(output[0].data.data()); // output is probability, which is in (0, 1). for (size_t i = 0; i < size; i++) { EXPECT_GT(result[i], 0); diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc index 19fa5528da4d1..d6f7f468a6c83 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc @@ -156,7 +156,7 @@ void profile(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg, use_mkldnn); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); TestPrediction(reinterpret_cast(&cfg), diff --git a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc index 2003be8201933..54492dbc238bb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc @@ -70,7 +70,7 @@ TEST(Analyzer_Text_Classification, profile) { AnalysisConfig cfg; SetConfig(&cfg); cfg.SwitchIrDebug(); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -79,8 +79,9 @@ TEST(Analyzer_Text_Classification, profile) { if (FLAGS_num_threads == 1) { // Get output - LOG(INFO) << "get outputs " << outputs.size(); - for (auto &output : outputs) { + PADDLE_ENFORCE_GT(outputs.size(), 0); + LOG(INFO) << "get outputs " << outputs.back().size(); + for (auto &output : outputs.back()) { LOG(INFO) << "output.shape: " << to_string(output.shape); // no lod ? CHECK_EQ(output.lod.size(), 0UL); diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc index a925da312cde3..bd4f1b61973fb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc @@ -186,7 +186,7 @@ void SetInput(std::vector> *inputs) { void profile(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); - std::vector outputs; + std::vector> outputs; if (use_mkldnn) { cfg.EnableMKLDNN(); } diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index ca04c1365cbbf..fb47048cd0ccc 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -87,7 +87,7 @@ void profile(bool use_mkldnn = false) { cfg.EnableMKLDNN(); } // cfg.pass_builder()->TurnOnDebug(); - std::vector outputs; + std::vector> outputs; std::vector> input_slots_all; SetInput(&input_slots_all); @@ -100,7 +100,8 @@ void profile(bool use_mkldnn = false) { auto refer = ProcessALine(line); file.close(); - auto &output = outputs.front(); + PADDLE_ENFORCE_GT(outputs.size(), 0); + auto &output = outputs.back().front(); size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); CHECK_EQ(numel, refer.data.size()); for (size_t i = 0; i < numel; ++i) { diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 33f1d02548588..9a0dcc722cf00 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -41,7 +41,10 @@ DEFINE_string(model_name, "", "model name"); DEFINE_string(infer_model, "", "model path"); DEFINE_string(infer_data, "", "data file"); DEFINE_string(refer_result, "", "reference result for comparison"); -DEFINE_int32(batch_size, 1, "batch size."); +DEFINE_int32(batch_size, 1, "batch size"); +DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup"); +// setting iterations to 0 means processing the whole dataset +DEFINE_int32(iterations, 0, "number of batches to process"); DEFINE_int32(repeat, 1, "Running the inference program repeat times."); DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); @@ -239,7 +242,7 @@ void SetFakeImageInput(std::vector> *inputs, } input.shape = shape; input.dtype = PaddleDType::FLOAT32; - size_t len = std::accumulate(shape.begin(), shape.end(), 1, + size_t len = std::accumulate(shape.begin(), shape.end(), size_t{1}, [](int a, int b) { return a * b; }); input.data.Resize(len * sizeof(float)); input.lod.assign({{0, static_cast(FLAGS_batch_size)}}); @@ -286,17 +289,18 @@ void ConvertPaddleTensorToZeroCopyTensor( void PredictionWarmUp(PaddlePredictor *predictor, const std::vector> &inputs, - std::vector *outputs, int num_threads, - int tid) { + std::vector> *outputs, + int num_threads, int tid) { int batch_size = FLAGS_batch_size; LOG(INFO) << "Running thread " << tid << ", warm up run..."; if (FLAGS_zero_copy) { ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[0]); } + outputs->resize(1); Timer warmup_timer; warmup_timer.tic(); if (!FLAGS_zero_copy) { - predictor->Run(inputs[0], outputs, batch_size); + predictor->Run(inputs[0], &(*outputs)[0], batch_size); } else { predictor->ZeroCopyRun(); } @@ -308,11 +312,16 @@ void PredictionWarmUp(PaddlePredictor *predictor, void PredictionRun(PaddlePredictor *predictor, const std::vector> &inputs, - std::vector *outputs, int num_threads, - int tid) { - int batch_size = FLAGS_batch_size; + std::vector> *outputs, + int num_threads, int tid) { int num_times = FLAGS_repeat; - LOG(INFO) << "Thread " << tid << " run " << num_times << " times..."; + int iterations = inputs.size(); // process the whole dataset ... + if (FLAGS_iterations > 0 && FLAGS_iterations < inputs.size()) + iterations = + FLAGS_iterations; // ... unless the number of iterations is set + outputs->resize(iterations); + LOG(INFO) << "Thread " << tid << ", number of threads " << num_threads + << ", run " << num_times << " times..."; Timer run_timer; double elapsed_time = 0; #ifdef WITH_GPERFTOOLS @@ -320,14 +329,14 @@ void PredictionRun(PaddlePredictor *predictor, #endif if (!FLAGS_zero_copy) { run_timer.tic(); - for (size_t i = 0; i < inputs.size(); i++) { + for (size_t i = 0; i < iterations; i++) { for (int j = 0; j < num_times; j++) { - predictor->Run(inputs[i], outputs, batch_size); + predictor->Run(inputs[i], &(*outputs)[i], FLAGS_batch_size); } } elapsed_time = run_timer.toc(); } else { - for (size_t i = 0; i < inputs.size(); i++) { + for (size_t i = 0; i < iterations; i++) { ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]); run_timer.tic(); for (int j = 0; j < num_times; j++) { @@ -340,13 +349,14 @@ void PredictionRun(PaddlePredictor *predictor, ProfilerStop(); #endif - PrintTime(batch_size, num_times, num_threads, tid, elapsed_time / num_times, - inputs.size()); + auto batch_latency = elapsed_time / (iterations * num_times); + PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency, + iterations); if (FLAGS_record_benchmark) { Benchmark benchmark; benchmark.SetName(FLAGS_model_name); - benchmark.SetBatchSize(batch_size); - benchmark.SetLatency(elapsed_time / num_times); + benchmark.SetBatchSize(FLAGS_batch_size); + benchmark.SetLatency(batch_latency); benchmark.PersistToFile("benchmark_record.txt"); } } @@ -354,16 +364,17 @@ void PredictionRun(PaddlePredictor *predictor, void TestOneThreadPrediction( const PaddlePredictor::Config *config, const std::vector> &inputs, - std::vector *outputs, bool use_analysis = true) { + std::vector> *outputs, bool use_analysis = true) { auto predictor = CreateTestPredictor(config, use_analysis); - PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0); - PredictionRun(predictor.get(), inputs, outputs, 1, 0); + PredictionWarmUp(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, + 0); + PredictionRun(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, 0); } void TestMultiThreadPrediction( const PaddlePredictor::Config *config, const std::vector> &inputs, - std::vector *outputs, int num_threads, + std::vector> *outputs, int num_threads, bool use_analysis = true) { std::vector threads; std::vector> predictors; @@ -376,7 +387,7 @@ void TestMultiThreadPrediction( threads.emplace_back([&, tid]() { // Each thread should have local inputs and outputs. // The inputs of each thread are all the same. - std::vector outputs_tid; + std::vector> outputs_tid; auto &predictor = predictors[tid]; #ifdef PADDLE_WITH_MKLDNN if (use_analysis) { @@ -384,8 +395,8 @@ void TestMultiThreadPrediction( ->SetMkldnnThreadID(static_cast(tid) + 1); } #endif - PredictionWarmUp(predictor.get(), inputs, outputs, num_threads, tid); - PredictionRun(predictor.get(), inputs, outputs, num_threads, tid); + PredictionWarmUp(predictor.get(), inputs, &outputs_tid, num_threads, tid); + PredictionRun(predictor.get(), inputs, &outputs_tid, num_threads, tid); }); } for (int i = 0; i < num_threads; ++i) { @@ -395,8 +406,8 @@ void TestMultiThreadPrediction( void TestPrediction(const PaddlePredictor::Config *config, const std::vector> &inputs, - std::vector *outputs, int num_threads, - bool use_analysis = FLAGS_use_analysis) { + std::vector> *outputs, + int num_threads, bool use_analysis = FLAGS_use_analysis) { PrintConfig(config, use_analysis); if (num_threads == 1) { TestOneThreadPrediction(config, inputs, outputs, use_analysis); @@ -406,30 +417,41 @@ void TestPrediction(const PaddlePredictor::Config *config, } } -void CompareTopAccuracy(const std::vector &output_slots1, - const std::vector &output_slots2) { - // first output: avg_cost - if (output_slots1.size() == 0 || output_slots2.size() == 0) +void CompareTopAccuracy( + const std::vector> &output_slots_quant, + const std::vector> &output_slots_ref) { + if (output_slots_quant.size() == 0 || output_slots_ref.size() == 0) throw std::invalid_argument( "CompareTopAccuracy: output_slots vector is empty."); - PADDLE_ENFORCE(output_slots1.size() >= 2UL); - PADDLE_ENFORCE(output_slots2.size() >= 2UL); - // second output: acc_top1 - if (output_slots1[1].lod.size() > 0 || output_slots2[1].lod.size() > 0) - throw std::invalid_argument( - "CompareTopAccuracy: top1 accuracy output has nonempty LoD."); - if (output_slots1[1].dtype != paddle::PaddleDType::FLOAT32 || - output_slots2[1].dtype != paddle::PaddleDType::FLOAT32) - throw std::invalid_argument( - "CompareTopAccuracy: top1 accuracy output is of a wrong type."); - float *top1_quantized = static_cast(output_slots1[1].data.data()); - float *top1_reference = static_cast(output_slots2[1].data.data()); - LOG(INFO) << "top1 INT8 accuracy: " << *top1_quantized; - LOG(INFO) << "top1 FP32 accuracy: " << *top1_reference; + float total_accs1_quant{0}; + float total_accs1_ref{0}; + for (size_t i = 0; i < output_slots_quant.size(); ++i) { + PADDLE_ENFORCE(output_slots_quant[i].size() >= 2UL); + PADDLE_ENFORCE(output_slots_ref[i].size() >= 2UL); + // second output: acc_top1 + if (output_slots_quant[i][1].lod.size() > 0 || + output_slots_ref[i][1].lod.size() > 0) + throw std::invalid_argument( + "CompareTopAccuracy: top1 accuracy output has nonempty LoD."); + if (output_slots_quant[i][1].dtype != paddle::PaddleDType::FLOAT32 || + output_slots_ref[i][1].dtype != paddle::PaddleDType::FLOAT32) + throw std::invalid_argument( + "CompareTopAccuracy: top1 accuracy output is of a wrong type."); + total_accs1_quant += + *static_cast(output_slots_quant[i][1].data.data()); + total_accs1_ref += + *static_cast(output_slots_ref[i][1].data.data()); + } + float avg_acc1_quant = total_accs1_quant / output_slots_quant.size(); + float avg_acc1_ref = total_accs1_ref / output_slots_ref.size(); + + LOG(INFO) << "Avg top1 INT8 accuracy: " << std::fixed << std::setw(6) + << std::setprecision(4) << avg_acc1_quant; + LOG(INFO) << "Avg top1 FP32 accuracy: " << std::fixed << std::setw(6) + << std::setprecision(4) << avg_acc1_ref; LOG(INFO) << "Accepted accuracy drop threshold: " << FLAGS_quantized_accuracy; - CHECK_LE(std::abs(*top1_quantized - *top1_reference), - FLAGS_quantized_accuracy); + CHECK_LE(std::abs(avg_acc1_quant - avg_acc1_ref), FLAGS_quantized_accuracy); } void CompareDeterministic( @@ -455,20 +477,35 @@ void CompareNativeAndAnalysis( const PaddlePredictor::Config *config, const std::vector> &inputs) { PrintConfig(config, true); - std::vector native_outputs, analysis_outputs; + std::vector> native_outputs, analysis_outputs; TestOneThreadPrediction(config, inputs, &native_outputs, false); TestOneThreadPrediction(config, inputs, &analysis_outputs, true); - CompareResult(analysis_outputs, native_outputs); + PADDLE_ENFORCE(native_outputs.size() > 0, "Native output is empty."); + PADDLE_ENFORCE(analysis_outputs.size() > 0, "Analysis output is empty."); + CompareResult(analysis_outputs.back(), native_outputs.back()); } void CompareQuantizedAndAnalysis( - const PaddlePredictor::Config *config, - const PaddlePredictor::Config *qconfig, + const AnalysisConfig *config, const AnalysisConfig *qconfig, const std::vector> &inputs) { - PrintConfig(config, true); - std::vector analysis_outputs, quantized_outputs; - TestOneThreadPrediction(config, inputs, &analysis_outputs, true); - TestOneThreadPrediction(qconfig, inputs, &quantized_outputs, true); + PADDLE_ENFORCE_EQ(inputs[0][0].shape[0], FLAGS_batch_size, + "Input data has to be packed batch by batch."); + LOG(INFO) << "FP32 & INT8 prediction run: batch_size " << FLAGS_batch_size + << ", warmup batch size " << FLAGS_warmup_batch_size << "."; + + LOG(INFO) << "--- FP32 prediction start ---"; + auto *cfg = reinterpret_cast(config); + PrintConfig(cfg, true); + std::vector> analysis_outputs; + TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true); + + LOG(INFO) << "--- INT8 prediction start ---"; + auto *qcfg = reinterpret_cast(qconfig); + PrintConfig(qcfg, true); + std::vector> quantized_outputs; + TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true); + + LOG(INFO) << "--- comparing outputs --- "; CompareTopAccuracy(quantized_outputs, analysis_outputs); } @@ -578,9 +615,9 @@ static bool CompareTensorData(const framework::LoDTensor &a, const framework::LoDTensor &b) { auto a_shape = framework::vectorize(a.dims()); auto b_shape = framework::vectorize(b.dims()); - size_t a_size = std::accumulate(a_shape.begin(), a_shape.end(), 1, + size_t a_size = std::accumulate(a_shape.begin(), a_shape.end(), size_t{1}, [](int a, int b) { return a * b; }); - size_t b_size = std::accumulate(b_shape.begin(), b_shape.end(), 1, + size_t b_size = std::accumulate(b_shape.begin(), b_shape.end(), size_t{1}, [](int a, int b) { return a * b; }); if (a_size != b_size) { LOG(ERROR) << string::Sprintf("tensor data size not match, %d != %d", diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index cb668a4174134..98ce225a0476b 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -74,7 +74,7 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) { SetFakeImageInput(&inputs_all, model_dir, false, "__model__", ""); } - std::vector outputs; + std::vector> outputs; if (use_analysis || use_tensorrt) { AnalysisConfig config; config.EnableUseGpu(100, 0); diff --git a/paddle/fluid/op_use_default_grad_op_maker.spec b/paddle/fluid/op_use_default_grad_op_maker.spec index 712449f6be87a..f0e3d3e86f24c 100644 --- a/paddle/fluid/op_use_default_grad_op_maker.spec +++ b/paddle/fluid/op_use_default_grad_op_maker.spec @@ -8,9 +8,6 @@ conv_shift cos cos_sim dequantize -elementwise_div -elementwise_max -elementwise_min elu fc flatten @@ -28,8 +25,6 @@ gelu gru hard_shrink hierarchical_sigmoid -hinge_loss -huber_loss leaky_relu log logsigmoid @@ -57,7 +52,6 @@ requantize reshape rnn_memory_helper round -row_conv sequence_softmax sin softplus diff --git a/paddle/fluid/operators/batch_size_like.h b/paddle/fluid/operators/batch_size_like.h index fc15d56891cf7..7e2740f148f1d 100644 --- a/paddle/fluid/operators/batch_size_like.h +++ b/paddle/fluid/operators/batch_size_like.h @@ -74,5 +74,8 @@ class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker { virtual void Apply() = 0; }; +DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(BatchSizeLikeNoNeedBufferVarsInference, + "Input"); + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cc b/paddle/fluid/operators/elementwise/elementwise_div_op.cc index 85612ba47448a..530a54b7ca186 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cc @@ -13,10 +13,47 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_div_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseDivOpMaker : public ElementwiseOpMaker { + protected: + std::string GetName() const override { return "Div"; } + std::string GetEquation() const override { return "Out = X / Y"; } +}; + +class ElementwiseDivGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_div_grad"); + op->SetInput("Y", Input("Y")); + op->SetInput("Out", Output("Out")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + op->SetAttrMap(Attrs()); + return op; + } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_div, "Div", "Out = X / Y"); +REGISTER_OPERATOR(elementwise_div, ops::ElementwiseOp, + ops::ElementwiseDivOpMaker, ops::ElementwiseOpInferVarType, + ops::ElementwiseDivGradOpDescMaker); + +REGISTER_OPERATOR(elementwise_div_grad, ops::ElementwiseOpGrad); REGISTER_OP_CPU_KERNEL( elementwise_div, diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h index 8a07339077aea..0f0ad86373017 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h @@ -47,7 +47,7 @@ struct DivGradDX { template struct DivGradDY { HOSTDEVICE T operator()(T x, T y, T out, T dout) const { - return -dout * x / (y * y); + return -dout * out / y; } }; @@ -58,13 +58,15 @@ class ElementwiseDivGradKernel : public ElemwiseGradKernel { ElemwiseGradKernel::Compute(ctx); using Tensor = framework::Tensor; - auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); auto* out = ctx.Input("Out"); auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); + + auto* x = dout; // Fake x, not used + ElemwiseGradCompute, DivGradDY>( ctx, *x, *y, *out, *dout, axis, dx, dy, DivGradDX(), DivGradDY()); } diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.cc b/paddle/fluid/operators/elementwise/elementwise_max_op.cc index ea0dcd736e570..b7df9c6f845df 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.cc @@ -13,9 +13,48 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_max_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseMaxOpMaker : public ElementwiseOpMaker { + protected: + std::string GetName() const override { return "Max"; } + std::string GetEquation() const override { return "Out = max(X, Y)"; } +}; + +class ElementwiseMaxGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_max_grad"); + op->SetInput("X", Input("X")); + op->SetInput("Y", Input("Y")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + op->SetAttrMap(Attrs()); + return op; + } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_max, "Max", "Out = max(X, Y)"); + +REGISTER_OPERATOR(elementwise_max, ops::ElementwiseOp, + ops::ElementwiseMaxOpMaker, ops::ElementwiseOpInferVarType, + ops::ElementwiseMaxGradOpDescMaker); + +REGISTER_OPERATOR(elementwise_max_grad, ops::ElementwiseOpGrad); + REGISTER_OP_CPU_KERNEL( elementwise_max, ops::ElementwiseMaxKernel, diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.h b/paddle/fluid/operators/elementwise/elementwise_max_op.h index 3ee0c32e0d5d5..abdb1b9671de8 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.h @@ -63,10 +63,10 @@ class ElementwiseMaxGradKernel : public ElemwiseGradKernel { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); - auto* out = ctx.Input("Out"); auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* out = dout; // Fake out, not used int axis = ctx.Attr("axis"); ElemwiseGradCompute, MaxGradDy>( ctx, *x, *y, *out, *dout, axis, dx, dy, MaxGradDx(), MaxGradDy()); diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cc b/paddle/fluid/operators/elementwise/elementwise_min_op.cc index b263b9addd40c..f60c0ed8a0faa 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.cc @@ -13,9 +13,48 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_min_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseMinOpMaker : public ElementwiseOpMaker { + protected: + std::string GetName() const override { return "Min"; } + std::string GetEquation() const override { return "Out = min(X, Y)"; } +}; + +class ElementwiseMinGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_min_grad"); + op->SetInput("X", Input("X")); + op->SetInput("Y", Input("Y")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + op->SetAttrMap(Attrs()); + return op; + } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_min, "Min", "Out = min(X, Y)"); + +REGISTER_OPERATOR(elementwise_min, ops::ElementwiseOp, + ops::ElementwiseMinOpMaker, ops::ElementwiseOpInferVarType, + ops::ElementwiseMinGradOpDescMaker); + +REGISTER_OPERATOR(elementwise_min_grad, ops::ElementwiseOpGrad); + REGISTER_OP_CPU_KERNEL( elementwise_min, ops::ElementwiseMinKernel, diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.h b/paddle/fluid/operators/elementwise/elementwise_min_op.h index d04e372faaa4e..1a49a6013987a 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.h @@ -62,10 +62,10 @@ class ElementwiseMinGradKernel : public ElemwiseGradKernel { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); - auto* out = ctx.Input("Out"); auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* out = dout; // Fake out, not used int axis = ctx.Attr("axis"); ElemwiseGradCompute, MinGradDy>( ctx, *x, *y, *out, *dout, axis, dx, dy, MinGradDx(), MinGradDy()); diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index 6dbb9072495f7..95246b38f530f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -173,12 +173,12 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { using Tensor = framework::Tensor; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); + auto out_grad_name = framework::GradVarName("Out"); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + PADDLE_ENFORCE(ctx->HasInput(out_grad_name), "Input(Out@GRAD) should not be null"); - auto x_dims = ctx->GetInputDim("X"); + auto x_dims = ctx->GetInputDim(out_grad_name); auto y_dims = ctx->GetInputDim("Y"); PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(), @@ -187,8 +187,8 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { auto x_grad_name = framework::GradVarName("X"); auto y_grad_name = framework::GradVarName("Y"); if (ctx->HasOutput(x_grad_name)) { - ctx->ShareDim("X", /*->*/ x_grad_name); - ctx->ShareLoD("X", /*->*/ x_grad_name); + ctx->ShareDim(out_grad_name, /*->*/ x_grad_name); + ctx->ShareLoD(out_grad_name, /*->*/ x_grad_name); } if (ctx->HasOutput(y_grad_name)) { ctx->ShareDim("Y", /*->*/ y_grad_name); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc index 453a1b32a0171..b8921b171cf37 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc @@ -46,6 +46,7 @@ obtained from the `input` tensor. )DOC"); } }; + } // namespace operators } // namespace paddle @@ -53,7 +54,8 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOp, paddle::framework::EmptyGradOpMaker, - ops::FillConstantBatchSizeLikeOpMaker); + ops::FillConstantBatchSizeLikeOpMaker, + ops::BatchSizeLikeNoNeedBufferVarsInference); REGISTER_OP_CPU_KERNEL( fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOpKernel(ctx.Attr("dtype")), + ctx.GetPlace()); + } }; + +class FillZerosLikeOp2Maker : public FillZerosLikeOpMaker { + protected: + void ExtraMake() override { + this->AddAttr("dtype", + "(int, default 5(FP32)) " + "Output data type.") + .SetDefault(framework::proto::VarType::FP32); + } +}; + +DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(FillZerosLikeOp2NoNeedBufferVarsInference, + "X"); + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, ops::FillZerosLikeOp, ops::FillZerosLikeOpMaker); + +REGISTER_OPERATOR(fill_zeros_like2, ops::FillZerosLikeOp2, + ops::FillZerosLikeOp2Maker, + ops::FillZerosLikeOp2NoNeedBufferVarsInference, + paddle::framework::EmptyGradOpMaker); + REGISTER_OP_CPU_KERNEL( fill_zeros_like, ops::FillZerosLikeKernel, @@ -58,3 +95,11 @@ REGISTER_OP_CPU_KERNEL( ops::FillZerosLikeKernel, ops::FillZerosLikeKernel, ops::FillZerosLikeKernel); + +REGISTER_OP_CPU_KERNEL( + fill_zeros_like2, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel); diff --git a/paddle/fluid/operators/fill_zeros_like_op.cu.cc b/paddle/fluid/operators/fill_zeros_like_op.cu.cc index e80a703c30c03..1831635def79b 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cu.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cu.cc @@ -26,3 +26,13 @@ REGISTER_OP_CUDA_KERNEL( ops::FillZerosLikeKernel, ops::FillZerosLikeKernel); + +REGISTER_OP_CUDA_KERNEL( + fill_zeros_like2, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel); diff --git a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc index 98ebe1fdf4bb3..01302687a4211 100644 --- a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc @@ -65,17 +65,13 @@ by input arguments. } }; -DECLARE_NO_NEED_BUFFER_VARS_INFERENCE( - GaussianRandomBatchSizeLikeNoNeedBufferVarsInference, "Input"); - } // namespace operators } // namespace paddle -REGISTER_OPERATOR( - gaussian_random_batch_size_like, - paddle::operators::GaussianRandomBatchSizeLikeOp, - paddle::operators::GaussianRandomBatchSizeLikeOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::operators::GaussianRandomBatchSizeLikeNoNeedBufferVarsInference); +REGISTER_OPERATOR(gaussian_random_batch_size_like, + paddle::operators::GaussianRandomBatchSizeLikeOp, + paddle::operators::GaussianRandomBatchSizeLikeOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::operators::BatchSizeLikeNoNeedBufferVarsInference); // Kernels are registered in gaussian_random_op.cc and gaussian_random_op.cu diff --git a/paddle/fluid/operators/hinge_loss_op.cc b/paddle/fluid/operators/hinge_loss_op.cc index f458ce6c83bfc..b6cfa9cc43c31 100644 --- a/paddle/fluid/operators/hinge_loss_op.cc +++ b/paddle/fluid/operators/hinge_loss_op.cc @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/hinge_loss_op.h" +#include +#include +#include namespace paddle { namespace operators { @@ -97,12 +100,29 @@ class HingeLossGradOp : public framework::OperatorWithKernel { } }; +class HingeLossGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("hinge_loss_grad"); + op->SetInput("Logits", Input("Logits")); + op->SetInput("Labels", Input("Labels")); + op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss")); + op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits")); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(hinge_loss, ops::HingeLossOp, ops::HingeLossOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::HingeLossGradOpDescMaker); REGISTER_OPERATOR(hinge_loss_grad, ops::HingeLossGradOp); REGISTER_OP_CPU_KERNEL( hinge_loss, diff --git a/paddle/fluid/operators/huber_loss_op.cc b/paddle/fluid/operators/huber_loss_op.cc index 253b65a5f3330..a72db384c1f09 100644 --- a/paddle/fluid/operators/huber_loss_op.cc +++ b/paddle/fluid/operators/huber_loss_op.cc @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/huber_loss_op.h" +#include +#include +#include namespace paddle { namespace operators { @@ -90,38 +93,45 @@ class HuberLossGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Residual"), - "Input(Residual) should not be null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), "Input(Out@GRAD) should not be null."); - auto x_dims = ctx->GetInputDim("X"); - auto y_dims = ctx->GetInputDim("Y"); auto residual_dims = ctx->GetInputDim("Residual"); - auto out_grad_dims = ctx->GetInputDim(framework::GradVarName("Out")); - - PADDLE_ENFORCE_EQ(residual_dims, x_dims); - PADDLE_ENFORCE_EQ(out_grad_dims, x_dims); auto x_grad_name = framework::GradVarName("X"); auto y_grad_name = framework::GradVarName("Y"); if (ctx->HasOutput(x_grad_name)) { - ctx->SetOutputDim(x_grad_name, x_dims); + ctx->SetOutputDim(x_grad_name, residual_dims); } if (ctx->HasOutput(y_grad_name)) { - ctx->SetOutputDim(y_grad_name, y_dims); + ctx->SetOutputDim(y_grad_name, residual_dims); } } }; +class HuberLossGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("huber_loss_grad"); + op->SetInput("Residual", Output("Residual")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(huber_loss, ops::HuberLossOp, ops::HuberLossOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::HuberLossGradOpDescMaker); REGISTER_OPERATOR(huber_loss_grad, ops::HuberLossGradOp); REGISTER_OP_CPU_KERNEL( huber_loss, ops::HuberLossKernel, diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index d283bddbe9f97..81aabdd0061b3 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -13,6 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/row_conv_op.h" +#include +#include +#include + #include "paddle/fluid/framework/eigen.h" namespace paddle { @@ -54,7 +58,6 @@ class RowConvGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); PADDLE_ENFORCE(ctx->HasInput("Filter"), "Input(Filter) should not be null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), @@ -62,8 +65,8 @@ class RowConvGradOp : public framework::OperatorWithKernel { auto x_grad_name = framework::GradVarName("X"); if (ctx->HasOutput(x_grad_name)) { - auto x_dims = ctx->GetInputDim("X"); - ctx->SetOutputDim(x_grad_name, x_dims); + auto dout_dims = ctx->GetInputDim(framework::GradVarName("Out")); + ctx->SetOutputDim(x_grad_name, dout_dims); } auto filter_grad_name = framework::GradVarName("Filter"); @@ -259,12 +262,31 @@ class RowConvGradKernel } } }; + +class RowConvGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("row_conv_grad"); + op->SetAttrMap(Attrs()); + op->SetInput("X", Input("X")); + op->SetInput("Filter", Input("Filter")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Filter"), InputGrad("Filter")); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(row_conv, ops::RowConvOp, ops::RowConvOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::RowConvGradOpDescMaker); REGISTER_OPERATOR(row_conv_grad, ops::RowConvGradOp); REGISTER_OP_CPU_KERNEL( row_conv, ops::RowConvKernel); diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc index 75d6181749e4e..7260fe25d6ebb 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc @@ -64,8 +64,9 @@ with random values sampled from a uniform distribution. } // namespace operators } // namespace paddle -REGISTER_OP_WITHOUT_GRADIENT( - uniform_random_batch_size_like, - paddle::operators::UniformRandomBatchSizeLikeOp, - paddle::operators::UniformRandomBatchSizeLikeOpMaker); +REGISTER_OPERATOR(uniform_random_batch_size_like, + paddle::operators::UniformRandomBatchSizeLikeOp, + paddle::operators::UniformRandomBatchSizeLikeOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::operators::BatchSizeLikeNoNeedBufferVarsInference); // Kernels are registered in uniform_random_op.cc and uniform_random_op.cu diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 6303be003a701..9fd53a74bf519 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -231,9 +231,16 @@ def _op_can_be_removed_(op_desc, no_grad_set): for idx, op_desc in enumerate(op_descs): for arg in op_desc.input_arg_names(): if core.grad_var_suffix() in arg and arg in no_grad_set: - to_insert.append((_create_op_desc_("fill_zeros_like", { - "X": [_strip_grad_suffix_(arg)] - }, {"Out": [arg]}, {}), idx)) + x_in = _strip_grad_suffix_(arg) + x_in_var_desc = op_desc.block().find_var_recursive( + cpt.to_bytes(x_in)) + assert x_in_var_desc is not None, "Variable {} not found".format( + x_in) + dtype = x_in_var_desc.dtype() + + to_insert.append( + (_create_op_desc_("fill_zeros_like2", {"X": [x_in]}, + {"Out": [arg]}, {"dtype": dtype}), idx)) list([op_descs.insert(p[1], p[0]) for p in reversed(to_insert)]) diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 04da8561a3700..527c37cb2c4f1 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -15,19 +15,20 @@ from __future__ import print_function from six.moves import reduce -import numpy as np from .. import core from ..layers import utils from . import layers -from ..framework import Variable, OpProtoHolder, Parameter -from ..layers import layer_function_generator +from ..framework import Variable, _in_dygraph_mode, OpProtoHolder, Parameter from ..param_attr import ParamAttr from ..initializer import Normal, Constant, NumpyArrayInitializer +import numpy as np __all__ = [ - 'Conv2D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit', 'LayerNorm', - 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose', 'SequenceConv' + 'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit', + 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose', + 'Conv3DTranspose', 'SequenceConv', 'RowConv', 'GroupNorm', 'SpectralNorm', + 'TreeConv' ] @@ -137,6 +138,303 @@ def forward(self, input): return self._helper.append_activation(pre_act, act=self._act) +class Conv3D(layers.Layer): + """ + **Convlution3D Layer** + + The convolution3D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input(Input) and + Output(Output) are in NCDHW format. Where N is batch size C is the number of + channels, D is the depth of the feature, H is the height of the feature, + and W is the width of the feature. Convlution3D is similar with Convlution2D + but adds one dimension(depth). If bias attribution and activation type are + provided, bias is added to the output of the convolution, and the + corresponding activation function is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + In the above equation: + + * :math:`X`: Input value, a tensor with NCDHW format. + * :math:`W`: Filter value, a tensor with MCDHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)` + + - Output: + Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\ + H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 + + Args: + input (Variable): The input image with [N, C, D, H, W] format. + num_filters(int): The number of filter. It is as same as the output + image channel. + filter_size (int|tuple|None): The filter size. If filter_size is a tuple, + it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + stride (int|tuple): The stride size. If stride is a tuple, it must + contain three integers, (stride_D, stride_H, stride_W). Otherwise, the + stride_D = stride_H = stride_W = stride. Default: stride = 1. + padding (int|tuple): The padding size. If padding is a tuple, it must + contain three integers, (padding_D, padding_H, padding_W). Otherwise, the + padding_D = padding_H = padding_W = padding. Default: padding = 0. + dilation (int|tuple): The dilation size. If dilation is a tuple, it must + contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the + dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1. + groups (int): The groups number of the Conv3d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1 + param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights + of conv3d. If it is set to None or one attribute of ParamAttr, conv3d + will create ParamAttr as param_attr. If it is set to None, the parameter + is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is + :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None. + bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv3d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv3d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + act (str): Activation type, if it is set to None, activation is not appended. + Default: None. + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Default: None. + + Returns: + Variable: The tensor variable storing the convolution and \ + non-linearity activation result. + + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32') + conv3d = fluid.layers.conv3d(input=data, num_filters=2, filter_size=3, act="relu") + """ + + def __init__(self, + name_scope, + num_filters, + filter_size, + stride=1, + padding=0, + dilation=1, + groups=None, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None): + assert param_attr is not False, "param_attr should not be False here." + super(Conv3D, self).__init__(name_scope) + self._groups = groups + self._stride = utils.convert_to_list(stride, 3, 'stride') + self._padding = utils.convert_to_list(padding, 3, 'padding') + self._dilation = utils.convert_to_list(dilation, 3, 'dilation') + self._act = act + if not isinstance(use_cudnn, bool): + raise ValueError("use_cudnn should be True or False") + self._use_cudnn = use_cudnn + self._filter_size = filter_size + self._num_filters = num_filters + self._param_attr = param_attr + self._bias_attr = bias_attr + + def _build_once(self, input): + num_channels = input.shape[1] + self._dtype = self._helper.input_dtype(input) + + if self._groups is None: + num_filter_channels = num_channels + else: + if num_channels % self._groups != 0: + raise ValueError("num_channels must be divisible by groups.") + num_filter_channels = num_channels // self._groups + + filter_size = utils.convert_to_list(self._filter_size, 3, 'filter_size') + + filter_shape = [self._num_filters, num_filter_channels] + filter_size + + def _get_default_param_initializer(): + filter_elem_num = filter_size[0] * filter_size[1] * filter_size[ + 2] * num_channels + std = (2.0 / filter_elem_num)**0.5 + return Normal(0.0, std, 0) + + self._filter_param = self.create_parameter( + attr=self._param_attr, + shape=filter_shape, + dtype=self._dtype, + default_initializer=_get_default_param_initializer()) + + self._bias_param = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + + def forward(self, input): + pre_bias = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + + self._helper.append_op( + type='conv3d', + inputs={ + 'Input': input, + 'Filter': self._filter_param, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': self._stride, + 'paddings': self._padding, + 'dilations': self._dilation, + 'groups': self._groups if self._groups else 1, + 'use_cudnn': self._use_cudnn, + 'use_mkldnn': False + }) + + pre_act = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + + self._helper.append_op( + type='elementwise_add', + inputs={'X': [pre_bias], + 'Y': [self._bias_param]}, + outputs={'Out': [pre_act]}, + attrs={'axis': 1}) + + return self._helper.append_activation(pre_act, act=self._act) + + +class Conv3DTranspose(layers.Layer): + def __init__(self, + name_scope, + num_filters, + output_size=None, + filter_size=None, + padding=0, + stride=1, + dilation=1, + groups=None, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None, + name=None): + super(Conv3DTranspose, self).__init__(name_scope) + if not isinstance(use_cudnn, bool): + raise ValueError("use_cudnn should be True or False") + assert param_attr is not False, "param_attr should not be False in conv3d_transpose." + self._padding = utils.convert_to_list(padding, 3, 'padding') + self._stride = utils.convert_to_list(stride, 3, 'stride') + self._dilation = utils.convert_to_list(dilation, 3, 'dilation') + self._param_attr = param_attr + self._filter_size = filter_size + self._output_size = output_size + self._groups = 1 if groups is None else groups + self._num_filters = num_filters + self._use_cudnn = use_cudnn + self._bias_attr = bias_attr + self._act = act + + def _build_once(self, input): + self._dtype = self._helper.input_dtype(input) + self._input_channel = input.shape[1] + + if self._filter_size is None: + if self._output_size is None: + raise ValueError( + "output_size must be set when filter_size is None") + if isinstance(self._output_size, int): + self._output_size = [self._output_size, self._output_size] + + d_in = input.shape[2] + h_in = input.shape[3] + w_in = input.shape[4] + + filter_size_d = (self._output_size[0] - + (d_in - 1) * self._stride[0] + 2 * self._padding[0] + - 1) // self._dilation[0] + 1 + filter_size_h = (self._output_size[1] - + (h_in - 1) * self._stride[1] + 2 * self._padding[1] + - 1) // self._dilation[1] + 1 + filter_size_w = (self._output_size[2] - + (w_in - 1) * self._stride[2] + 2 * self._padding[2] + - 1) // self._dilation[2] + 1 + self._filter_size = [filter_size_d, filter_size_h, filter_size_w] + else: + self._filter_size = utils.convert_to_list( + self._filter_size, 3, 'conv3d_transpose.filter_size') + + filter_shape = [ + self._input_channel, self._num_filters // self._groups + ] + self._filter_size + self._img_filter = self.create_parameter( + dtype=self._dtype, shape=filter_shape, attr=self._param_attr) + if self._bias_attr: + self._bias_param = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + + def forward(self, input): + pre_bias = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + self._helper.append_op( + type="conv3d_transpose", + inputs={'Input': [input], + 'Filter': [self._img_filter]}, + outputs={'Output': pre_bias}, + attrs={ + 'strides': self._stride, + 'paddings': self._padding, + 'dilations': self._dilation, + 'groups': self._groups if self._groups else 1, + 'use_cudnn': self._use_cudnn + }) + + if self._bias_attr: + pre_act = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + self._helper.append_op( + type='elementwise_add', + inputs={'X': [pre_bias], + 'Y': [self._bias_param]}, + outputs={'Out': [pre_act]}, + attrs={'axis': 1}) + else: + pre_act = pre_bias + + # Currently, we don't support inplace in imperative mode + return self._helper.append_activation(pre_act, act=self._act) + + class Pool2D(layers.Layer): def __init__(self, name_scope, @@ -1388,6 +1686,8 @@ def __init__(self, bias_attr=None, param_attr=None, act=None): + assert not _in_dygraph_mode( + ), "SequenceConv is not supported by dynamic graph mode yet!" super(SequenceConv, self).__init__(name_scope) self._num_filters = num_filters self._filter_size = filter_size @@ -1397,12 +1697,10 @@ def __init__(self, self._param_attr = param_attr def _build_once(self, input): - self._dtype = self._helper.input_dtype(input) - print(self._filter_size) filter_shape = [self._filter_size * input.shape[1], self._num_filters] self._filter_param = self.create_parameter( - attr=self.param_attr, shape=filter_shape, dtype=self._dtype) + attr=self._param_attr, shape=filter_shape, dtype=self._dtype) def forward(self, input): pre_bias = self._helper.create_variable_for_type_inference(self._dtype) @@ -1420,3 +1718,237 @@ def forward(self, input): }) pre_act = self._helper.append_bias_op(pre_bias) return self._helper.append_activation(pre_act) + + +class RowConv(layers.Layer): + def __init__(self, + name_scope, + future_context_size, + param_attr=None, + act=None): + assert not _in_dygraph_mode( + ), "RowConv is not supported by dynamic graph mode yet!" + super(RowConv, self).__init__(name_scope) + self._act = act + self._param_attr = param_attr + self._future_context_size = future_context_size + + def _build_once(self, input): + self._dtype = self._helper.input_dtype(input) + filter_shape = [self._future_context_size + 1, input.shape[1]] + self._filter_param = self.create_parameter( + attr=self._param_attr, + shape=filter_shape, + dtype=self._dtype, + is_bias=False) + + def forward(self, input): + out = self._helper.create_variable_for_type_inference(self._dtype) + self._helper.append_op( + type='row_conv', + inputs={'X': [input], + 'Filter': [self._filter_param]}, + outputs={'Out': [out]}) + return self._helper.append_activation(out, act=self._act) + + +class GroupNorm(layers.Layer): + """ + **Group Normalization Layer** + + Refer to `Group Normalization `_ . + + Args: + name_scope (str): See base class. + groups(int): The number of groups that divided from channels. + epsilon(float): The small value added to the variance to prevent + division by zero. + param_attr(ParamAttr|None): The parameter attribute for the learnable + scale :math:`g`. If it is set to False, no scale will be added to the output units. + If it is set to None, the bias is initialized one. Default: None. + bias_attr(ParamAttr|None): The parameter attribute for the learnable + bias :math:`b`. If it is set to False, no bias will be added to the output units. + If it is set to None, the bias is initialized zero. Default: None. + act(str): Activation to be applied to the output of group normalizaiton. + data_layout(string|NCHW): Only NCHW is supported. + dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc + + Returns: + Variable: A tensor variable which is the result after applying group normalization on the input. + + + """ + + def __init__(self, + name_scope, + groups, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + act=None, + data_layout='NCHW'): + super(GroupNorm, self).__init__(name_scope) + self._param_attr = param_attr + self._bias_attr = bias_attr + self._epsilon = epsilon + self._groups = groups + self._act = act + if data_layout != 'NCHW': + raise ValueError("unsupported data layout:" + data_layout) + + def _build_once(self, input): + self._dtype = self._helper.input_dtype(input) + param_shape = [input.shape[1]] + if self._bias_attr: + self._bias = self.create_parameter( + attr=self._bias_attr, + shape=param_shape, + dtype=self._dtype, + is_bias=True) + + if self._param_attr: + self._scale = self.create_parameter( + attr=self._param_attr, + shape=param_shape, + dtype=self._dtype, + default_initializer=Constant(1.0)) + + def forward(self, input): + inputs = {'X': input} + if self._bias: + inputs['Bias'] = self._bias + if self._scale: + inputs['Scale'] = self._scale + + # create output + mean_out = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True) + variance_out = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True) + group_norm_out = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + + self._helper.append_op( + type="group_norm", + inputs=inputs, + outputs={ + "Y": group_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={"epsilon": self._epsilon, + "groups": self._groups}) + + return self._helper.append_activation(group_norm_out, self._act) + + +class SpectralNorm(layers.Layer): + def __init__(self, name_scope, dim=0, power_iters=1, eps=1e-12, name=None): + super(SpectralNorm, self).__init__(name_scope) + self._power_iters = power_iters + self._eps = eps + self._dim = dim + + def _build_once(self, weight): + self._dtype = self._helper.input_dtype(weight) + input_shape = weight.shape + h = input_shape[self._dim] + w = np.prod(input_shape) // h + + self.u = self.create_parameter( + attr=ParamAttr(), + shape=[h], + dtype=self._dtype, + default_initializer=Normal(0., 1.)) + self.u.stop_gradient = True + + self.v = self.create_parameter( + attr=ParamAttr(), + shape=[w], + dtype=self._dtype, + default_initializer=Normal(0., 1.)) + self.v.stop_gradient = True + + def forward(self, weight): + inputs = {'Weight': weight, 'U': self.u, 'V': self.v} + out = self._helper.create_variable_for_type_inference(self._dtype) + self._helper.append_op( + type="spectral_norm", + inputs=inputs, + outputs={"Out": out, }, + attrs={ + "dim": self._dim, + "power_iters": self._power_iters, + "eps": self._eps, + }) + + return out + + +class TreeConv(layers.Layer): + def __init__(self, + name_scope, + output_size, + num_filters=1, + max_depth=2, + act='tanh', + param_attr=None, + bias_attr=None, + name=None): + super(TreeConv, self).__init__(name_scope) + self._name = name + self._output_size = output_size + self._act = act + self._max_depth = max_depth + self._num_filters = num_filters + self._bias_attr = bias_attr + self._param_attr = param_attr + + def _build_once(self, nodes_vector, edge_set): + assert isinstance(nodes_vector, Variable) + assert isinstance(edge_set, Variable) + self._dtype = self._helper.input_dtype(nodes_vector) + + feature_size = nodes_vector.shape[2] + w_shape = [feature_size, 3, self._output_size, self._num_filters] + if self._bias_attr: + self._bias_param = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + self.W = self.create_parameter( + attr=self._param_attr, + shape=w_shape, + dtype=self._dtype, + is_bias=False) + + def forward(self, nodes_vector, edge_set): + if self._name: + out = self.create_variable( + name=self._name, dtype=self._dtype, persistable=False) + else: + out = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + + self._helper.append_op( + type='tree_conv', + inputs={ + 'NodesVector': nodes_vector, + 'EdgeSet': edge_set, + 'Filter': self.W + }, + outputs={'Out': out, }, + attrs={'max_depth': self._max_depth}) + if self._bias_attr: + pre_activation = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + self._helper.append_op( + type='elementwise_add', + inputs={'X': [out], + 'Y': [self._bias_param]}, + outputs={'Out': [pre_activation]}, + attrs={'axis': 1}) + else: + pre_activation = out + return self._helper.append_activation(pre_activation, act=self._act) diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index fd07ff0ba3d21..c7c82f28e7c44 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -227,7 +227,7 @@ class Precision(MetricBase): metric.reset() for data in train_reader(): loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) - metric.update(preds=preds, labels=labels) + metric.update(preds=preds, labels=labels) numpy_precision = metric.eval() """ @@ -241,9 +241,11 @@ def update(self, preds, labels): raise ValueError("The 'preds' must be a numpy ndarray.") if not _is_numpy_(labels): raise ValueError("The 'labels' must be a numpy ndarray.") - sample_num = labels[0] + sample_num = labels.shape[0] + preds = np.rint(preds).astype("int32") + for i in range(sample_num): - pred = preds[i].astype("int32") + pred = preds[i] label = labels[i] if label == 1: if pred == label: diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 1390e759d7e30..0291bc25ed314 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -81,6 +81,7 @@ list(REMOVE_ITEM TEST_OPS test_imperative_resnet) list(REMOVE_ITEM TEST_OPS test_imperative_se_resnext) list(REMOVE_ITEM TEST_OPS test_imperative_mnist) list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer) +list(REMOVE_ITEM TEST_OPS test_layers) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) @@ -118,7 +119,7 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450) py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL) - +py_test_modules(test_layers MODULES test_layers ENVS FLAGS_cudnn_deterministic=1) if(NOT WIN32) py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL) endif() diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py index 4844d930daca7..a84ff1fd6d46c 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py @@ -23,6 +23,8 @@ from test_concat_op import * from test_gather_op import * from test_gaussian_random_batch_size_like_op import * +from test_uniform_random_batch_size_like_op import * +from test_fill_constant_batch_size_like_op import * from test_lod_reset_op import * from test_scatter_op import * from test_mean_op import * @@ -40,6 +42,7 @@ from test_sequence_scatter_op import * from test_sequence_slice_op import * from test_pad2d_op import * +from test_fill_zeros_like2_op import * if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py new file mode 100644 index 0000000000000..935653b07a6a4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py @@ -0,0 +1,50 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from paddle.fluid.framework import convert_np_dtype_to_dtype_ +from op_test import OpTest + + +class TestFillZerosLike2Op(OpTest): + def setUp(self): + self.op_type = "fill_zeros_like2" + self.dtype = np.float32 + self.init_dtype() + self.inputs = {'X': np.random.random((219, 232)).astype(self.dtype)} + self.outputs = {'Out': np.zeros_like(self.inputs["X"])} + self.attrs = {'dtype': convert_np_dtype_to_dtype_(self.dtype)} + + def init_dtype(self): + pass + + def test_check_output(self): + self.check_output() + + +class TestFillZerosLike2OpFp16(TestFillZerosLike2Op): + def init_dtype(self): + self.dtype = np.float16 + + +class TestFillZerosLike2OpFp64(TestFillZerosLike2Op): + def init_dtype(self): + self.dtype = np.float64 + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 13f2d662178c7..8b3094cb2a3a6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -348,6 +348,55 @@ def test_mlp(self): self.assertEqual(mlp._fc2, sublayers[1]) self.assertEqual(len(sublayers), 2) + def test_dygraph_vs_static(self): + inp1 = np.random.rand(4, 3, 3) + inp2 = np.random.rand(4, 3, 3) + + # dynamic graph + with fluid.dygraph.guard(): + if np.sum(inp1) < np.sum(inp2): + x = fluid.layers.elementwise_add(inp1, inp2) + else: + x = fluid.layers.elementwise_sub(inp1, inp2) + dygraph_result = x._numpy() + + # static graph + with new_program_scope(): + inp_data1 = fluid.layers.data( + name='inp1', shape=[3, 3], dtype=np.float32) + inp_data2 = fluid.layers.data( + name='inp2', shape=[3, 3], dtype=np.float32) + + a = fluid.layers.expand( + fluid.layers.reshape( + fluid.layers.reduce_sum(inp_data1), [1, 1]), [4, 1]) + b = fluid.layers.expand( + fluid.layers.reshape( + fluid.layers.reduce_sum(inp_data2), [1, 1]), [4, 1]) + cond = fluid.layers.less_than(x=a, y=b) + + ie = fluid.layers.IfElse(cond) + with ie.true_block(): + d1 = ie.input(inp_data1) + d2 = ie.input(inp_data2) + d3 = fluid.layers.elementwise_add(d1, d2) + ie.output(d3) + + with ie.false_block(): + d1 = ie.input(inp_data1) + d2 = ie.input(inp_data2) + d3 = fluid.layers.elementwise_sub(d1, d2) + ie.output(d3) + out = ie() + + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + static_result = exe.run(fluid.default_main_program(), + feed={'inp1': inp1, + 'inp2': inp2}, + fetch_list=out)[0] + self.assertTrue(np.allclose(dygraph_result, static_result)) + def test_rnn(self): np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 552eb019500b1..82eb61ba65463 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -16,6 +16,7 @@ import unittest import paddle.fluid as fluid +import paddle.fluid.core as core from paddle.fluid.dygraph.nn import Embedding import paddle.fluid.framework as framework from paddle.fluid.optimizer import SGDOptimizer @@ -278,7 +279,8 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - exe = fluid.Executor(fluid.CPUPlace()) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) sgd = SGDOptimizer(learning_rate=1e-3) x = fluid.layers.data( name="x", shape=[-1, num_steps, 1], dtype='int64') @@ -331,18 +333,16 @@ def test_ptb_rnn_cpu_float32(self): for k in range(3, len(out)): static_param_updated[static_param_name_list[k - 3]] = out[k] - self.assertTrue(np.allclose(static_loss_value, dy_loss._numpy())) - self.assertTrue(np.allclose(static_last_cell_value, last_cell._numpy())) + + self.assertTrue(np.array_equal(static_loss_value, dy_loss._numpy())) + self.assertTrue( + np.array_equal(static_last_cell_value, last_cell._numpy())) self.assertTrue( - np.allclose(static_last_hidden_value, last_hidden._numpy())) + np.array_equal(static_last_hidden_value, last_hidden._numpy())) for key, value in six.iteritems(static_param_init): - # print("static_init name: {}, value {}".format(key, value)) - # print("dy_init name: {}, value {}".format(key, dy_param_init[key])) - self.assertTrue(np.allclose(value, dy_param_init[key])) + self.assertTrue(np.array_equal(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): - # print("static name: {}, value {}".format(key, value)) - # print("dy name: {}, value {}".format(key, dy_param_updated[key])) - self.assertTrue(np.allclose(value, dy_param_updated[key])) + self.assertTrue(np.array_equal(value, dy_param_updated[key])) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 674965882d76e..89ba17f8b940d 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -595,6 +595,280 @@ def test_nce(self): self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(nce_loss3._numpy(), static_rlt)) + def test_conv3d(self): + with self.static_graph(): + images = layers.data( + name='pixel', shape=[3, 6, 6, 6], dtype='float32') + ret = layers.conv3d(input=images, num_filters=3, filter_size=2) + static_ret = self.get_static_graph_result( + feed={'pixel': np.ones( + [2, 3, 6, 6, 6], dtype='float32')}, + fetch_list=[ret])[0] + + with self.static_graph(): + images = layers.data( + name='pixel', shape=[3, 6, 6, 6], dtype='float32') + conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2) + ret = conv3d(images) + static_ret2 = self.get_static_graph_result( + feed={'pixel': np.ones( + [2, 3, 6, 6, 6], dtype='float32')}, + fetch_list=[ret])[0] + + with self.dynamic_graph(): + images = np.ones([2, 3, 6, 6, 6], dtype='float32') + conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2) + dy_ret = conv3d(base.to_variable(images)) + + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, static_ret2)) + + def test_row_conv(self): + input = np.arange(15).reshape([3, 5]).astype('float32') + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + + with self.static_graph(): + x = layers.data( + name='X', + shape=[3, 5], + dtype='float32', + lod_level=1, + append_batch_size=False) + ret = layers.row_conv(input=x, future_context_size=2) + static_ret = self.get_static_graph_result( + feed={ + 'X': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.static_graph(): + x = layers.data( + name='X', + shape=[3, 5], + dtype='float32', + lod_level=1, + append_batch_size=False) + rowConv = nn.RowConv('RowConv', future_context_size=2) + ret = rowConv(x) + static_ret2 = self.get_static_graph_result( + feed={ + 'X': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + # TODO: dygraph can't support LODTensor + + self.assertTrue(np.allclose(static_ret, static_ret2)) + + def test_group_norm(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + + shape = (2, 4, 3, 3) + + input = np.random.random(shape).astype('float32') + + with self.static_graph(): + X = fluid.layers.data( + name='X', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) + ret = layers.group_norm(input=X, groups=2) + static_ret = self.get_static_graph_result( + feed={ + 'X': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.static_graph(): + X = fluid.layers.data( + name='X', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) + groupNorm = nn.GroupNorm('GroupNorm', groups=2) + ret = groupNorm(X) + static_ret2 = self.get_static_graph_result( + feed={ + 'X': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.dynamic_graph(): + groupNorm = nn.GroupNorm('GroupNorm', groups=2) + dy_ret = groupNorm(base.to_variable(input)) + + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, static_ret2)) + + def test_spectral_norm(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + + shape = (2, 4, 3, 3) + + input = np.random.random(shape).astype('float32') + + with self.static_graph(): + Weight = fluid.layers.data( + name='Weight', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) + ret = layers.spectral_norm(weight=Weight, dim=1, power_iters=2) + static_ret = self.get_static_graph_result( + feed={ + 'Weight': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1]], place=place), + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.static_graph(): + Weight = fluid.layers.data( + name='Weight', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) + spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) + ret = spectralNorm(Weight) + static_ret2 = self.get_static_graph_result( + feed={ + 'Weight': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.dynamic_graph(): + spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) + dy_ret = spectralNorm(base.to_variable(input)) + + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, static_ret2)) + + def test_tree_conv(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + adj_array = [1, 2, 1, 3, 1, 4, 1, 5, 2, 6, 2, 7, 2, 8, 4, 9, 4, 10] + adj = np.array(adj_array).reshape((1, 9, 2)).astype('int32') + adj = np.tile(adj, (1, 1, 1)) + vectors = np.random.random((1, 10, 5)).astype('float32') + with self.static_graph(): + NodesVector = fluid.layers.data( + name='NodesVector', + shape=(1, 10, 5), + dtype='float32', + lod_level=1, + append_batch_size=False) + EdgeSet = fluid.layers.data( + name='EdgeSet', + shape=(1, 9, 2), + dtype='int32', + lod_level=1, + append_batch_size=False) + ret = layers.tree_conv( + nodes_vector=NodesVector, + edge_set=EdgeSet, + output_size=6, + num_filters=1, + max_depth=2) + static_ret = self.get_static_graph_result( + feed={ + 'NodesVector': fluid.create_lod_tensor( + data=vectors, recursive_seq_lens=[[1]], place=place), + 'EdgeSet': fluid.create_lod_tensor( + data=adj, recursive_seq_lens=[[1]], place=place) + }, + fetch_list=[ret], + with_lod=False)[0] + + with self.static_graph(): + NodesVector = fluid.layers.data( + name='NodesVector', + shape=(1, 10, 5), + dtype='float32', + lod_level=1, + append_batch_size=False) + EdgeSet = fluid.layers.data( + name='EdgeSet', + shape=(1, 9, 2), + dtype='int32', + lod_level=1, + append_batch_size=False) + treeConv = nn.TreeConv( + 'TreeConv', output_size=6, num_filters=1, max_depth=2) + ret = treeConv(NodesVector, EdgeSet) + static_ret2 = self.get_static_graph_result( + feed={ + 'NodesVector': fluid.create_lod_tensor( + data=vectors, recursive_seq_lens=[[1]], place=place), + 'EdgeSet': fluid.create_lod_tensor( + data=adj, recursive_seq_lens=[[1]], place=place) + }, + fetch_list=[ret], + with_lod=False)[0] + + with self.dynamic_graph(): + treeConv = nn.TreeConv( + 'SpectralNorm', output_size=6, num_filters=1, max_depth=2) + dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj)) + + self.assertTrue(np.allclose(static_ret, static_ret2)) + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + + def test_conv3d_transpose(self): + input_array = np.arange(0, 48).reshape( + [2, 3, 2, 2, 2]).astype('float32') + + with self.static_graph(): + img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32') + out = layers.conv3d_transpose( + input=img, num_filters=12, filter_size=12, use_cudnn=False) + static_rlt = self.get_static_graph_result( + feed={'pixel': input_array}, fetch_list=[out])[0] + with self.static_graph(): + img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32') + conv3d_transpose = nn.Conv3DTranspose( + 'Conv3DTranspose', + num_filters=12, + filter_size=12, + use_cudnn=False) + out = conv3d_transpose(img) + static_rlt2 = self.get_static_graph_result( + feed={'pixel': input_array}, fetch_list=[out])[0] + with self.dynamic_graph(): + conv3d_transpose = nn.Conv3DTranspose( + 'Conv3DTranspose', + num_filters=12, + filter_size=12, + use_cudnn=False) + dy_rlt = conv3d_transpose(base.to_variable(input_array)) + self.assertTrue(np.allclose(static_rlt2, static_rlt)) + self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + class TestBook(unittest.TestCase): def test_fit_a_line(self):