From 6d0c84e6026e41df13bf5c577e0437d553aeb23b Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan Date: Sun, 28 Aug 2022 20:11:21 +0530 Subject: [PATCH 1/8] use tokenizer to output tensor --- docs/source/en/model_doc/t5.mdx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/source/en/model_doc/t5.mdx b/docs/source/en/model_doc/t5.mdx index 5a1928923476cb..72b7a76b66d79d 100644 --- a/docs/source/en/model_doc/t5.mdx +++ b/docs/source/en/model_doc/t5.mdx @@ -187,12 +187,15 @@ ignored. The code example below illustrates all of this. >>> # encode the targets >>> target_encoding = tokenizer( -... [output_sequence_1, output_sequence_2], padding="longest", max_length=max_target_length, truncation=True +... [output_sequence_1, output_sequence_2], +... padding="longest", +... max_length=max_target_length, +... truncation=True, +... return_tensors="pt", ... ) >>> labels = target_encoding.input_ids >>> # replace padding token id's of the labels by -100 so it's ignored by the loss ->>> labels = torch.tensor(labels) >>> labels[labels == tokenizer.pad_token_id] = -100 >>> # forward pass From e2bf88bdba6b5ca91aadc9a232ddd4dbf7cb53d7 Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan Date: Sun, 28 Aug 2022 20:20:35 +0530 Subject: [PATCH 2/8] add preprocessing for decoder_input_ids for bare T5Model --- src/transformers/models/t5/modeling_t5.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index e4c36109bd7709..5fbffcfdce22b7 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -1376,6 +1376,10 @@ def forward( ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 + >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. + >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. + >>> decoder_input_ids = model._shift_right(decoder_input_ids) + >>> # forward pass >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) >>> last_hidden_states = outputs.last_hidden_state From 8fbd5581d3630c85d76e33c399cc9bf8d4896447 Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan Date: Sun, 28 Aug 2022 20:38:43 +0530 Subject: [PATCH 3/8] add preprocessing to tf and flax --- src/transformers/models/t5/modeling_flax_t5.py | 4 ++++ src/transformers/models/t5/modeling_tf_t5.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index 918a605fc4813a..aca1c82bb5f0d9 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -1388,6 +1388,10 @@ class FlaxT5Model(FlaxT5PreTrainedModel): ... ).input_ids >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="np").input_ids + >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. + >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. + >>> decoder_input_ids = model._shift_right(decoder_input_ids) + >>> # forward pass >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) >>> last_hidden_states = outputs.last_hidden_state diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 091cb9d63eb42d..a0ff80d98bd12d 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -1180,6 +1180,10 @@ def call( ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1 + >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. + >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. + >>> decoder_input_ids = model._shift_right(decoder_input_ids) + >>> # forward pass >>> outputs = model(input_ids, decoder_input_ids=decoder_input_ids) >>> last_hidden_states = outputs.last_hidden_state From 6978581421f34b99ff320ab249781186af4d16d0 Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan Date: Sun, 28 Aug 2022 20:55:47 +0530 Subject: [PATCH 4/8] linting --- src/transformers/models/t5/modeling_flax_t5.py | 2 +- src/transformers/models/t5/modeling_t5.py | 2 +- src/transformers/models/t5/modeling_tf_t5.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index aca1c82bb5f0d9..d39e370aab0544 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -1388,7 +1388,7 @@ class FlaxT5Model(FlaxT5PreTrainedModel): ... ).input_ids >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="np").input_ids - >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. + >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. >>> decoder_input_ids = model._shift_right(decoder_input_ids) diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index 5fbffcfdce22b7..e0ddf18762dab4 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -1376,7 +1376,7 @@ def forward( ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 - >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. + >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. >>> decoder_input_ids = model._shift_right(decoder_input_ids) diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index a0ff80d98bd12d..78c07a7f46e049 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -1180,7 +1180,7 @@ def call( ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1 - >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. + >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. >>> decoder_input_ids = model._shift_right(decoder_input_ids) From 92670b1fc5a7de419991149006bf2a2f63873447 Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan Date: Sun, 28 Aug 2022 21:06:00 +0530 Subject: [PATCH 5/8] linting --- docs/source/en/model_doc/t5.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/en/model_doc/t5.mdx b/docs/source/en/model_doc/t5.mdx index 72b7a76b66d79d..92cd753b645767 100644 --- a/docs/source/en/model_doc/t5.mdx +++ b/docs/source/en/model_doc/t5.mdx @@ -187,9 +187,9 @@ ignored. The code example below illustrates all of this. >>> # encode the targets >>> target_encoding = tokenizer( -... [output_sequence_1, output_sequence_2], -... padding="longest", -... max_length=max_target_length, +... [output_sequence_1, output_sequence_2], +... padding="longest", +... max_length=max_target_length, ... truncation=True, ... return_tensors="pt", ... ) From 785d4d0a99bc46165bbd02a8420fdfd7b48e6de0 Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan Date: Mon, 5 Sep 2022 20:34:33 +0530 Subject: [PATCH 6/8] Update src/transformers/models/t5/modeling_flax_t5.py Co-authored-by: Patrick von Platen --- src/transformers/models/t5/modeling_flax_t5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index d39e370aab0544..2732bf591690f7 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -1389,7 +1389,7 @@ class FlaxT5Model(FlaxT5PreTrainedModel): >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="np").input_ids >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. - >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. + >>> # This is not needed for torch's T5ForConditionalGeneration as it does this internally using labels arg. >>> decoder_input_ids = model._shift_right(decoder_input_ids) >>> # forward pass From 8af355c94a91eccd4fb0f2107e35a9b3b5ae198c Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan Date: Mon, 5 Sep 2022 20:34:39 +0530 Subject: [PATCH 7/8] Update src/transformers/models/t5/modeling_tf_t5.py Co-authored-by: Patrick von Platen --- src/transformers/models/t5/modeling_tf_t5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 78c07a7f46e049..dc909c8d8f3349 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -1181,7 +1181,7 @@ def call( >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1 >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. - >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. + >>> # This is not needed for torch's T5ForConditionalGeneration as it does this internally using labels arg. >>> decoder_input_ids = model._shift_right(decoder_input_ids) >>> # forward pass From e78d0ef4416c1b06cf082cc2e243c9acc361ca6b Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan Date: Mon, 5 Sep 2022 20:35:22 +0530 Subject: [PATCH 8/8] Update src/transformers/models/t5/modeling_t5.py Co-authored-by: Patrick von Platen --- src/transformers/models/t5/modeling_t5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index e0ddf18762dab4..0ec4c74d1d8da5 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -1377,7 +1377,7 @@ def forward( >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model. - >>> # This is not needed for T5ForConditionalGeneration as it does this internally using labels arg. + >>> # This is not needed for torch's T5ForConditionalGeneration as it does this internally using labels arg. >>> decoder_input_ids = model._shift_right(decoder_input_ids) >>> # forward pass