examples/text_generation/ernie-gen/model.py

#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle
import paddle.nn as nn
import numpy as np


class StackModel(nn.Layer):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, src_ids, src_sids, src_pids, tgt_ids, tgt_sids, tgt_pids,
                attn_ids, mask_src_2_src, mask_tgt_2_srctgt,
                mask_attn_2_srctgtattn, tgt_labels, tgt_pos):
        _, __, info = self.model(
            src_ids,
            sent_ids=src_sids,
            pos_ids=src_pids,
            attn_bias=mask_src_2_src,
            encode_only=True)
        cached_k, cached_v = info['caches']
        _, __, info = self.model(
            tgt_ids,
            sent_ids=tgt_sids,
            pos_ids=tgt_pids,
            attn_bias=mask_tgt_2_srctgt,
            past_cache=(cached_k, cached_v),
            encode_only=True)
        cached_k2, cached_v2 = info['caches']
        past_cache_k = [
            paddle.concat([k, k2], 1) for k, k2 in zip(cached_k, cached_k2)
        ]
        past_cache_v = [
            paddle.concat([v, v2], 1) for v, v2 in zip(cached_v, cached_v2)
        ]
        loss, _, __ = self.model(
            attn_ids,
            sent_ids=tgt_sids,
            pos_ids=tgt_pids,
            attn_bias=mask_attn_2_srctgtattn,
            past_cache=(past_cache_k, past_cache_v),
            tgt_labels=tgt_labels,
            tgt_pos=tgt_pos)
        loss = loss.mean()
        return loss