Skip to content

Commit

Permalink
fix pegasusu convert (#109)
Browse files Browse the repository at this point in the history
* fix pegasus convert

* add line

---------

Co-authored-by: janinezhao <janinezhao@tencent.com>
  • Loading branch information
JINGZIjingzi and janinezhao authored Oct 27, 2023
1 parent d72dcbe commit 669d46c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,15 @@
help=".")
parser.add_argument("--output_model_path", type=str, default="models/output_model.bin",
help=".")
parser.add_argument("--layers_num", type=int, default=6, help=".")
parser.add_argument("--decoder_layers_num", type=int, default=6, help=".")
parser.add_argument("--layers_num", type=int, default=12, help=".")
parser.add_argument("--decoder_layers_num", type=int, default=12, help=".")

args = parser.parse_args()

input_model = torch.load(args.input_model_path, map_location="cpu")

output_model = collections.OrderedDict()

output_model["embedding.sinusoidalpos.pe"] = input_model["model.encoder.embed_positions.weight"].unsqueeze(1)
output_model["tgt_embedding.sinusoidalpos.pe"] = input_model["model.decoder.embed_positions.weight"].unsqueeze(1)
output_model["embedding.word.embedding.weight"] = input_model["model.encoder.embed_tokens.weight"]
output_model["tgt_embedding.word.embedding.weight"] = input_model["model.decoder.embed_tokens.weight"]
output_model["target.lm.output_layer.weight"] = input_model["lm_head.weight"]
Expand Down
19 changes: 17 additions & 2 deletions scripts/convert_pegasus_from_tencentpretrain_to_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import collections
import torch
import math

tencentpretrain_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, tencentpretrain_dir)
Expand All @@ -18,6 +19,7 @@
help=".")
parser.add_argument("--layers_num", type=int, default=12, help=".")
parser.add_argument("--decoder_layers_num", type=int, default=12, help=".")
parser.add_argument("--max_seq_length", type=int, default=1024, help=".")

args = parser.parse_args()

Expand All @@ -26,8 +28,21 @@
output_model = collections.OrderedDict()

output_model["model.shared.weight"] = input_model["embedding.word.embedding.weight"]
output_model["model.encoder.embed_positions.weight"] = input_model["embedding.sinusoidalpos.pe"].squeeze(1)
output_model["model.decoder.embed_positions.weight"] = input_model["tgt_embedding.sinusoidalpos.pe"].squeeze(1)

emb_size = input_model["embedding.word.embedding.weight"].shape[1]
pe = torch.zeros(args.max_seq_length, emb_size)
position = torch.arange(0, args.max_seq_length).unsqueeze(1)
div_term = torch.exp(
(
torch.arange(0, emb_size, 2, dtype=torch.float)
*- (math.log(10000.0) / emb_size)
)
)
pe[:, 0::2] = torch.sin(position.float() * div_term)
pe[:, 1::2] = torch.cos(position.float() * div_term)

output_model["model.encoder.embed_positions.weight"] = pe
output_model["model.decoder.embed_positions.weight"] = pe
output_model["model.encoder.embed_tokens.weight"] = input_model["embedding.word.embedding.weight"]
output_model["model.decoder.embed_tokens.weight"] = input_model["tgt_embedding.word.embedding.weight"]
output_model["lm_head.weight"] = input_model["target.lm.output_layer.weight"]
Expand Down

0 comments on commit 669d46c

Please sign in to comment.