diff --git a/.github/container/manifest.yaml b/.github/container/manifest.yaml index b9c06e2e6..7ffa7a685 100644 --- a/.github/container/manifest.yaml +++ b/.github/container/manifest.yaml @@ -31,6 +31,7 @@ t5x: mirror/patch/partial-checkpoint-restore: file://patches/t5x/mirror-patch-partial-checkpoint-restore.patch # pull/1392/head # https://github.com/google-research/t5x/pull/1392: Add support for partial checkpoint restore mirror/patch/dali-support: file://patches/t5x/mirror-patch-dali-support.patch # pull/1393/head # https://github.com/google-research/t5x/pull/1393: Adds DALI support to t5x mirror/patch/t5x_te_in_contrib_noindent: file://patches/t5x/mirror-patch-t5x_te_in_contrib_noindent.patch # pull/1391/head # https://github.com/google-research/t5x/pull/1391: Adds transformer engine support and GPU optimizations to T5x (enables H100) + mirror/patch/fix-default-vocab: file://patches/t5x/mirror-patch-fix-default-vocab.patch # pull/1609/head # https://github.com/google-research/t5x/pull/1609: Fixes seqio vocab mismatch paxml: url: https://github.com/google/paxml.git mirror_url: https://github.com/nvjax-svc-0/paxml.git diff --git a/.github/container/test-t5x.sh b/.github/container/test-t5x.sh index 942e4b2c4..554ba7003 100755 --- a/.github/container/test-t5x.sh +++ b/.github/container/test-t5x.sh @@ -175,10 +175,10 @@ seqio.TaskRegistry.add( ], output_features=dict( inputs=seqio.Feature( - vocabulary=t5.data.get_default_vocabulary(), add_eos=True, required=False + vocabulary=seqio.SentencePieceVocabulary(sentencepiece_model_file="gs://t5-data/vocabs/cc_all.32000.100extra/sentencepiece.model"), add_eos=True, required=False ), targets=seqio.Feature( - vocabulary=t5.data.get_default_vocabulary(), add_eos=True + vocabulary=seqio.SentencePieceVocabulary(sentencepiece_model_file="gs://t5-data/vocabs/cc_all.32000.100extra/sentencepiece.model"), add_eos=True ) ), metric_fns=[]