diff --git a/paddle/fluid/operators/chunk_eval_op.cc b/paddle/fluid/operators/chunk_eval_op.cc deleted file mode 100644 index 1d2ebec27334cf..00000000000000 --- a/paddle/fluid/operators/chunk_eval_op.cc +++ /dev/null @@ -1,202 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/chunk_eval_op.h" - -#include -#include - -namespace paddle { -namespace operators { - -class ChunkEvalOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Inference"), "Input", "Inference", "chunk_eval"); - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "chunk_eval"); - - OP_INOUT_CHECK( - ctx->HasOutput("Precision"), "Output", "Precision", "chunk_eval"); - OP_INOUT_CHECK(ctx->HasOutput("Recall"), "Output", "Recall", "chunk_eval"); - OP_INOUT_CHECK( - ctx->HasOutput("F1-Score"), "Output", "F1-Score", "chunk_eval"); - OP_INOUT_CHECK(ctx->HasOutput("NumInferChunks"), - "Output", - "NumInferChunks", - "chunk_eval"); - OP_INOUT_CHECK(ctx->HasOutput("NumLabelChunks"), - "Output", - "NumLabelChunks", - "chunk_eval"); - OP_INOUT_CHECK(ctx->HasOutput("NumCorrectChunks"), - "Output", - "NumCorrectChunks", - "chunk_eval"); - - auto inference_dim = ctx->GetInputDim("Inference"); - auto label_dim = ctx->GetInputDim("Label"); - - PADDLE_ENFORCE_EQ( - inference_dim, - label_dim, - phi::errors::InvalidArgument( - "Input(Inference)'s shape must be the same as Input(Label)'s " - "shape, but received [%s] (Inference) vs [%s] (Label).", - inference_dim, - label_dim)); - - bool use_padding = ctx->HasInput("SeqLength"); - if (use_padding) { - PADDLE_ENFORCE_EQ( - (inference_dim.size() == 3 && inference_dim[2] == 1) || - inference_dim.size() == 2, - true, - phi::errors::InvalidArgument( - "when Input(SeqLength) is provided, Input(Inference) " - "should be of dim 3 (batch_size, bucket, 1) or dim 2 " - "(batch_size, bucket), but received [%s].", - inference_dim)); - auto seq_length_dim = ctx->GetInputDim("SeqLength"); - PADDLE_ENFORCE_LE(seq_length_dim.size(), - 2, - phi::errors::InvalidArgument( - "Input(SeqLength)'s rank should not be greater " - "than 2, but received %d.", - seq_length_dim.size())); - } - - ctx->SetOutputDim("Precision", {1}); - ctx->SetOutputDim("Recall", {1}); - ctx->SetOutputDim("F1-Score", {1}); - ctx->SetOutputDim("NumInferChunks", {1}); - ctx->SetOutputDim("NumLabelChunks", {1}); - ctx->SetOutputDim("NumCorrectChunks", {1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, - platform::CPUPlace()); - } -}; - -class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Inference", - "(Tensor, default: Tensor). " - "Predictions from the network."); - AddInput("Label", - "(Tensor, default: Tensor). The true tag sequences."); - AddInput("SeqLength", - "(Tensor, default: Tensor). The length of each sequence, " - "used when Inference and Label are Tensor type .") - .AsDispensable(); - AddOutput("Precision", - "(float). The evaluated precision (called positive predictive " - "value) of chunks on the given mini-batch."); - AddOutput("Recall", - "(float). The evaluated recall (true positive rate or " - "sensitivity) of chunks on the given mini-batch."); - AddOutput("F1-Score", - "(float). The evaluated F1-Score on the given mini-batch."); - AddOutput("NumInferChunks", - "(int64_t). The number of chunks in Inference on the given " - "mini-batch."); - AddOutput( - "NumLabelChunks", - "(int64_t). The number of chunks in Label on the given mini-batch."); - AddOutput( - "NumCorrectChunks", - "(int64_t). The number of chunks both in Inference and Label on the " - "given mini-batch."); - AddAttr("num_chunk_types", - "The number of chunk type. See the description for details."); - AddAttr("chunk_scheme", - "The labeling scheme indicating " - "how to encode the chunks. Must be IOB, IOE, IOBES or " - "plain. See the description" - "for details.") - .SetDefault("IOB"); - AddAttr>("excluded_chunk_types", - "A list including chunk type ids " - "indicating chunk types that are not counted. " - "See the description for details.") - .SetDefault(std::vector{}); - AddComment(R"DOC( -For some basics of chunking, please refer to -'Chunking with Support Vector Machines '. - -ChunkEvalOp computes the precision, recall, and F1-score of chunk detection, -and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes. -Here is a NER example of labeling for these tagging schemes: - - Li Ming works at Agricultural Bank of China in Beijing. - IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC - IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC - IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC - IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC - -There are three chunk types(named entity types) including PER(person), ORG(organization) -and LOC(LOCATION), and we can see that the labels have the form -. - -Since the calculations actually use label ids rather than labels, extra attention -should be paid when mapping labels to ids to make CheckEvalOp work. The key point -is that the listed equations are satisfied by ids. - - tag_type = label % num_tag_type - chunk_type = label / num_tag_type - -where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type` -is the num of chunk types, and `tag_type` get its value from the following table. - - Scheme Begin Inside End Single - plain 0 - - - - IOB 0 1 - - - IOE - 0 1 - - IOBES 0 1 2 3 - -Still use NER as example, assuming the tagging scheme is IOB while chunk types are ORG, -PER and LOC. To satisfy the above equations, the label map can be like this: - - B-ORG 0 - I-ORG 1 - B-PER 2 - I-PER 3 - B-LOC 4 - I-LOC 5 - O 6 - -It's not hard to verify the equations noting that the num of chunk types -is 3 and the num of tag types in IOB scheme is 2. For example, the label -id of I-LOC is 5, the tag type id of I-LOC is 1, and the chunk type id of -I-LOC is 2, which consistent with the results from the equations. -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(chunk_eval, - ops::ChunkEvalOp, - ops::ChunkEvalOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - chunk_eval, CPU, ALL_LAYOUT, ops::ChunkEvalKernel, float) {} diff --git a/paddle/fluid/operators/chunk_eval_op.h b/paddle/fluid/operators/chunk_eval_op.h deleted file mode 100644 index 4b146176a43bc8..00000000000000 --- a/paddle/fluid/operators/chunk_eval_op.h +++ /dev/null @@ -1,358 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class ChunkEvalKernel : public framework::OpKernel { - public: - struct Segment { - int begin; - int end; - int type; - bool operator==(const Segment& y) const { - return begin == y.begin && end == y.end && type == y.type; - } - }; - - void GetSegments(const int64_t* label, - int length, - std::vector* segments, - int num_chunk_types, - int num_tag_types, - int other_chunk_type, - int tag_begin, - int tag_inside, - int tag_end, - int tag_single) const { - segments->clear(); - segments->reserve(length); - int chunk_start = 0; - bool in_chunk = false; - int tag = -1; - int type = other_chunk_type; - for (int i = 0; i < length; ++i) { - int prev_tag = tag; - int prev_type = type; - PADDLE_ENFORCE_LE( - label[i], - num_chunk_types * num_tag_types, - phi::errors::InvalidArgument( - "The value of Input(Label) should be less than the number of " - "chunk types times the number of tag types, but received %d " - "(Label) vs %d (chunk types) * %d (tag types).", - label[i], - num_chunk_types, - num_tag_types)); - tag = label[i] % num_tag_types; - type = label[i] / num_tag_types; - if (in_chunk && ChunkEnd(prev_tag, - prev_type, - tag, - type, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single)) { - Segment segment{ - chunk_start, // begin - i - 1, // end - prev_type, - }; - segments->push_back(segment); - in_chunk = false; - } - if (ChunkBegin(prev_tag, - prev_type, - tag, - type, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single)) { - chunk_start = i; - in_chunk = true; - } - } - if (in_chunk) { - Segment segment{ - chunk_start, // begin - length - 1, // end - type, - }; - segments->push_back(segment); - } - } - - bool ChunkEnd(int prev_tag, - int prev_type, - int tag, - int type, - int other_chunk_type, - int tag_begin, - int tag_inside, - int tag_end, - int tag_single) const { - if (prev_type == other_chunk_type) return false; - if (type == other_chunk_type) return true; - if (type != prev_type) return true; - if (prev_tag == tag_begin) return tag == tag_begin || tag == tag_single; - if (prev_tag == tag_inside) return tag == tag_begin || tag == tag_single; - if (prev_tag == tag_end) return true; - if (prev_tag == tag_single) return true; - return false; - } - - bool ChunkBegin(int prev_tag, - int prev_type, - int tag, - int type, - int other_chunk_type, - int tag_begin, - int tag_inside, - int tag_end, - int tag_single) const { - if (prev_type == other_chunk_type) return type != other_chunk_type; - if (type == other_chunk_type) return false; - if (type != prev_type) return true; - if (tag == tag_begin) return true; - if (tag == tag_inside) return prev_tag == tag_end || prev_tag == tag_single; - if (tag == tag_end) return prev_tag == tag_end || prev_tag == tag_single; - if (tag == tag_single) return true; - return false; - } - - void Compute(const framework::ExecutionContext& context) const override { - // initialize to parse configurations - int num_chunk_types, num_tag_types; - int other_chunk_type; - int tag_begin, tag_inside, tag_end, tag_single; - std::vector label_segments; - std::vector output_segments; - std::set excluded_chunk_types; - - if (context.Attr("chunk_scheme") == "IOB") { - num_tag_types = 2; - tag_begin = 0; - tag_inside = 1; - tag_end = -1; - tag_single = -1; - } else if (context.Attr("chunk_scheme") == "IOE") { - num_tag_types = 2; - tag_begin = -1; - tag_inside = 0; - tag_end = 1; - tag_single = -1; - } else if (context.Attr("chunk_scheme") == "IOBES") { - num_tag_types = 4; - tag_begin = 0; - tag_inside = 1; - tag_end = 2; - tag_single = 3; - } else if (context.Attr("chunk_scheme") == "plain") { - num_tag_types = 1; - tag_begin = -1; - tag_inside = -1; - tag_end = -1; - tag_single = -1; - } else { - PADDLE_THROW(phi::errors::InvalidArgument("Unknown chunk scheme.")); - } - other_chunk_type = num_chunk_types = context.Attr("num_chunk_types"); - excluded_chunk_types.insert( - context.Attr>("excluded_chunk_types").begin(), - context.Attr>("excluded_chunk_types").end()); - - auto* inference = context.Input("Inference"); - auto place = inference->place(); - auto* label = context.Input("Label"); - auto* precision = context.Output("Precision"); - auto* recall = context.Output("Recall"); - auto* f1 = context.Output("F1-Score"); - auto* num_infer_chunks = context.Output("NumInferChunks"); - auto* num_label_chunks = context.Output("NumLabelChunks"); - auto* num_correct_chunks = - context.Output("NumCorrectChunks"); - - const int64_t* inference_data = inference->data(); - const int64_t* label_data = label->data(); - T* precision_data = precision->mutable_data(place); - T* recall_data = recall->mutable_data(place); - T* f1_data = f1->mutable_data(place); - int64_t* num_infer_chunks_data = - num_infer_chunks->mutable_data(place); - int64_t* num_label_chunks_data = - num_label_chunks->mutable_data(place); - int64_t* num_correct_chunks_data = - num_correct_chunks->mutable_data(place); - *num_infer_chunks_data = 0; - *num_label_chunks_data = 0; - *num_correct_chunks_data = 0; - - auto lod = label->lod(); - bool use_padding = lod.empty(); - int num_sequences = 0; - - if (use_padding) { - auto dim1 = inference->dims()[1]; - auto* seq_length_t = context.Input("SeqLength"); - auto* seq_length_data = seq_length_t->data(); - num_sequences = seq_length_t->dims()[0]; - - for (int i = 0; i < num_sequences; ++i) { - int seq_length = seq_length_data[i]; - EvalOneSeq(inference_data + i * dim1, - label_data + i * dim1, - seq_length, - &output_segments, - &label_segments, - num_infer_chunks_data, - num_label_chunks_data, - num_correct_chunks_data, - num_chunk_types, - num_tag_types, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single, - excluded_chunk_types); - } - } else { - PADDLE_ENFORCE_EQ( - lod.size(), - 1UL, - phi::errors::InvalidArgument( - "Only support one level LoD sequence now, but received %d.", - lod.size())); - PADDLE_ENFORCE_EQ( - lod, - inference->lod(), - phi::errors::InvalidArgument( - "Input(Inference) and Input(Label) of Op(chunk_eval) should have " - "same LoD information.")); - num_sequences = lod[0].size() - 1; - - for (int i = 0; i < num_sequences; ++i) { - int seq_length = lod[0][i + 1] - lod[0][i]; - EvalOneSeq(inference_data + lod[0][i], - label_data + lod[0][i], - seq_length, - &output_segments, - &label_segments, - num_infer_chunks_data, - num_label_chunks_data, - num_correct_chunks_data, - num_chunk_types, - num_tag_types, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single, - excluded_chunk_types); - } - } - - *precision_data = !(*num_infer_chunks_data) - ? 0 - : static_cast(*num_correct_chunks_data) / - (*num_infer_chunks_data); - *recall_data = !(*num_label_chunks_data) - ? 0 - : static_cast(*num_correct_chunks_data) / - (*num_label_chunks_data); - *f1_data = !(*num_correct_chunks_data) - ? 0 - : 2 * (*precision_data) * (*recall_data) / - ((*precision_data) + (*recall_data)); - } - - void EvalOneSeq(const int64_t* output, - const int64_t* label, - int length, - std::vector* output_segments, - std::vector* label_segments, - int64_t* num_output_segments, - int64_t* num_label_segments, - int64_t* num_correct, - int num_chunk_types, - int num_tag_types, - int other_chunk_type, - int tag_begin, - int tag_inside, - int tag_end, - int tag_single, - const std::set& excluded_chunk_types) const { - GetSegments(output, - length, - output_segments, - num_chunk_types, - num_tag_types, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single); - GetSegments(label, - length, - label_segments, - num_chunk_types, - num_tag_types, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single); - size_t i = 0, j = 0; - while (i < output_segments->size() && j < label_segments->size()) { - if (output_segments->at(i) == label_segments->at(j) && - excluded_chunk_types.count(output_segments->at(i).type) != 1) { - ++(*num_correct); - } - if (output_segments->at(i).end < label_segments->at(j).end) { - ++i; - } else if (output_segments->at(i).end > label_segments->at(j).end) { - ++j; - } else { - ++i; - ++j; - } - } - for (auto& segment : (*label_segments)) { - if (excluded_chunk_types.count(segment.type) != 1) { - ++(*num_label_segments); - } - } - for (auto& segment : (*output_segments)) { - if (excluded_chunk_types.count(segment.type) != 1) { - ++(*num_output_segments); - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/test/cpp/inference/api/CMakeLists.txt b/test/cpp/inference/api/CMakeLists.txt index 14392a60feaf77..a9dc2189322de7 100644 --- a/test/cpp/inference/api/CMakeLists.txt +++ b/test/cpp/inference/api/CMakeLists.txt @@ -792,32 +792,16 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) # build test binary to be used in subsequent tests inference_analysis_api_test_build(${LEXICAL_TEST_APP} ${LEXICAL_TEST_APP_SRC}) - # run lexcial analysis test - inference_analysis_api_lexical_test_run( - test_analyzer_lexical_gru ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} - ${GRU_DATA_PATH}) - # run bfloat16 lexical analysis test - inference_analysis_api_lexical_bfloat16_test_run( - test_analyzer_lexical_gru_bfloat16 ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} - ${GRU_DATA_PATH}) - # run post-training quantization lexical analysis test - inference_analysis_api_lexical_int8_test_run( - test_analyzer_lexical_gru_int8 - ${LEXICAL_TEST_APP} - ${GRU_MODEL_PATH} - ${GRU_DATA_PATH} - true # enable_int8_ptq - false # enable_int8_qat - false) # fuse_multi_gru + # run post-training quantization lexical analysis test with multi_gru fuse - inference_analysis_api_lexical_int8_test_run( - test_analyzer_lexical_gru_int8_multi_gru - ${LEXICAL_TEST_APP} - ${GRU_MODEL_PATH} - ${GRU_DATA_PATH} - true # enable_int8_ptq - false # enable_int8_qat - true) # fuse_multi_gru + # inference_analysis_api_lexical_int8_test_run( + # test_analyzer_lexical_gru_int8_multi_gru + # ${LEXICAL_TEST_APP} + # ${GRU_MODEL_PATH} + # ${GRU_DATA_PATH} + # true # enable_int8_ptq + # false # enable_int8_qat + # true) # fuse_multi_gru # run qat gru test set(QAT_GRU_MODEL_ARCHIVE "GRU_quant_acc.tar.gz") diff --git a/test/legacy_test/test_chunk_eval_op.py b/test/legacy_test/test_chunk_eval_op.py deleted file mode 100644 index b9db50079b4b3d..00000000000000 --- a/test/legacy_test/test_chunk_eval_op.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -class Segment: - def __init__(self, chunk_type, start_idx, end_idx): - self.chunk_type = chunk_type - self.start_idx = start_idx - self.end_idx = end_idx - - def __str__(self): - return f'(Segment: {self.chunk_type}, {self.start_idx}, {self.end_idx})' - - __repr__ = __str__ - - -class TestChunkEvalOp(OpTest): - num_sequences = 5 - batch_size = 50 - - def parse_scheme(self): - if self.scheme == 'IOB': - self.num_tag_types = 2 - elif self.scheme == 'IOE': - self.num_tag_types = 2 - - def fill_with_chunks(self, data, chunks): - for chunk in chunks: - if self.scheme == 'IOB': - data[chunk.start_idx] = chunk.chunk_type * self.num_tag_types - data[ - chunk.start_idx + 1 : chunk.end_idx - ] = chunk.chunk_type * self.num_tag_types + ( - self.num_tag_types - 1 - ) - data[chunk.end_idx] = ( - chunk.chunk_type * self.num_tag_types - + (self.num_tag_types - 1) - if chunk.start_idx < chunk.end_idx - else data[chunk.start_idx] - ) - elif self.scheme == 'IOE': - data[chunk.start_idx : chunk.end_idx] = ( - chunk.chunk_type * self.num_tag_types - ) - data[chunk.end_idx] = chunk.chunk_type * self.num_tag_types + ( - self.num_tag_types - 1 - ) - - def rand_chunks(self, starts, num_chunks): - if num_chunks < 0: - num_chunks = np.random.randint(starts[-1]) - chunks = [] - # generate chunk beginnings - chunk_begins = sorted( - np.random.choice(list(range(starts[-1])), num_chunks, replace=False) - ) - seq_chunk_begins = [] - begin_idx = 0 - # divide chunks into sequences - for i in range(len(starts) - 1): - tmp_chunk_begins = [] - while ( - begin_idx < len(chunk_begins) - and chunk_begins[begin_idx] < starts[i + 1] - ): - tmp_chunk_begins.append(chunk_begins[begin_idx]) - begin_idx += 1 - seq_chunk_begins.append(tmp_chunk_begins) - # generate chunk ends - chunk_ends = [] - for i in range(len(seq_chunk_begins)): - for j in range(len(seq_chunk_begins[i])): - low = seq_chunk_begins[i][j] - high = ( - seq_chunk_begins[i][j + 1] - if j < len(seq_chunk_begins[i]) - 1 - else starts[i + 1] - ) - chunk_ends.append(np.random.randint(low, high)) - # generate chunks - for chunk_pos in zip(chunk_begins, chunk_ends): - chunk_type = np.random.randint(self.num_chunk_types) - chunks.append(Segment(chunk_type, *chunk_pos)) - return chunks - - def gen_chunks(self, infer, label, starts): - chunks = self.rand_chunks( - starts, - self.num_infer_chunks - + self.num_label_chunks - - self.num_correct_chunks, - ) - correct_chunks = np.random.choice( - list(range(len(chunks))), self.num_correct_chunks, replace=False - ) - infer_chunks = np.random.choice( - [x for x in range(len(chunks)) if x not in correct_chunks], - self.num_infer_chunks - self.num_correct_chunks, - replace=False, - ) - infer_chunks = sorted(correct_chunks.tolist() + infer_chunks.tolist()) - label_chunks = np.random.choice( - [x for x in range(len(chunks)) if x not in infer_chunks], - self.num_label_chunks - self.num_correct_chunks, - replace=False, - ) - label_chunks = sorted(correct_chunks.tolist() + label_chunks.tolist()) - self.fill_with_chunks(infer, [chunks[idx] for idx in infer_chunks]) - self.fill_with_chunks(label, [chunks[idx] for idx in label_chunks]) - # exclude types in excluded_chunk_types - if len(self.excluded_chunk_types) > 0: - for idx in correct_chunks: - if chunks[idx].chunk_type in self.excluded_chunk_types: - self.num_correct_chunks -= 1 - for idx in infer_chunks: - if chunks[idx].chunk_type in self.excluded_chunk_types: - self.num_infer_chunks -= 1 - for idx in label_chunks: - if chunks[idx].chunk_type in self.excluded_chunk_types: - self.num_label_chunks -= 1 - return ( - self.num_correct_chunks, - self.num_infer_chunks, - self.num_label_chunks, - ) - - def set_confs(self): - # Use the IOB scheme and labels with 2 chunk types - self.scheme = 'IOB' - self.num_chunk_types = 2 - self.excluded_chunk_types = [] - self.other_chunk_type = self.num_chunk_types - self.attrs = { - 'num_chunk_types': self.num_chunk_types, - 'chunk_scheme': self.scheme, - 'excluded_chunk_types': self.excluded_chunk_types, - } - self.parse_scheme() - ( - self.num_correct_chunks, - self.num_infer_chunks, - self.num_label_chunks, - ) = (4, 5, 9) - - def set_data(self): - infer = np.zeros((self.batch_size,)).astype('int64') - infer.fill(self.num_chunk_types * self.num_tag_types) - label = np.copy(infer) - starts = np.random.choice( - list(range(1, self.batch_size)), - self.num_sequences - 1, - replace=False, - ).tolist() - starts.extend([0, self.batch_size]) - starts = sorted(starts) - ( - self.num_correct_chunks, - self.num_infer_chunks, - self.num_label_chunks, - ) = self.gen_chunks(infer, label, starts) - lod = [] - for i in range(len(starts) - 1): - lod.append(starts[i + 1] - starts[i]) - self.set_input(infer, label, lod) - precision = ( - float(self.num_correct_chunks) / self.num_infer_chunks - if self.num_infer_chunks - else 0 - ) - recall = ( - float(self.num_correct_chunks) / self.num_label_chunks - if self.num_label_chunks - else 0 - ) - f1 = ( - float(2 * precision * recall) / (precision + recall) - if self.num_correct_chunks - else 0 - ) - self.outputs = { - 'Precision': np.asarray([precision], dtype='float32'), - 'Recall': np.asarray([recall], dtype='float32'), - 'F1-Score': np.asarray([f1], dtype='float32'), - 'NumInferChunks': np.asarray( - [self.num_infer_chunks], dtype='int64' - ), - 'NumLabelChunks': np.asarray( - [self.num_label_chunks], dtype='int64' - ), - 'NumCorrectChunks': np.asarray( - [self.num_correct_chunks], dtype='int64' - ), - } - - def set_input(self, infer, label, lod): - self.inputs = {'Inference': (infer, [lod]), 'Label': (label, [lod])} - - def setUp(self): - self.op_type = 'chunk_eval' - self.set_confs() - self.set_data() - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - -class TestChunkEvalOpWithExclude(TestChunkEvalOp): - def set_confs(self): - # Use the IOE scheme and labels with 3 chunk types - self.scheme = 'IOE' - self.num_chunk_types = 3 - self.excluded_chunk_types = [1] - self.other_chunk_type = self.num_chunk_types - self.attrs = { - 'num_chunk_types': self.num_chunk_types, - 'chunk_scheme': self.scheme, - 'excluded_chunk_types': self.excluded_chunk_types, - } - self.parse_scheme() - ( - self.num_correct_chunks, - self.num_infer_chunks, - self.num_label_chunks, - ) = (15, 18, 20) - - -class TestChunkEvalOpWithTensorInput(TestChunkEvalOp): - def set_input(self, infer, label, lod): - max_len = np.max(lod) - pad_infer = [] - pad_label = [] - start = 0 - for i in range(len(lod)): - end = lod[i] + start - pad_infer.append( - np.pad( - infer[start:end], - (0, max_len - lod[i]), - 'constant', - constant_values=(-1,), - ) - ) - pad_label.append( - np.pad( - label[start:end], - (0, max_len - lod[i]), - 'constant', - constant_values=(-1,), - ) - ) - start = end - - pad_infer = np.expand_dims(np.array(pad_infer, dtype='int64'), 2) - pad_label = np.expand_dims(np.array(pad_label, dtype='int64'), 2) - lod = np.array(lod, dtype='int64') - self.inputs = { - 'Inference': pad_infer, - 'Label': pad_label, - 'SeqLength': lod, - } - - -if __name__ == '__main__': - unittest.main()