Promote t5 and variants (#2064)

* Promote T5 from prototype to beta * Add models to README * Move T5 tests in integration tests * Fix linting * Fix formatting * Actually add t5 * Add the rest of the files you absolute donkey * Fix linting * Modify paths for generation tests * Fix linting
pytorch · Feb 17, 2023 · 670e52a · 670e52a
1 parent 2cd5e12
commit 670e52a
Show file tree

Hide file tree

Showing 20 changed files with 21 additions and 39 deletions.
diff --git a/README.rst b/README.rst
@@ -121,6 +121,8 @@ The library currently consist of following pre-trained models:
 * RoBERTa: `Base and Large Architecture <https://github.com/pytorch/fairseq/tree/main/examples/roberta#pre-trained-models>`_
 * `DistilRoBERTa <https://github.com/huggingface/transformers/blob/main/examples/research_projects/distillation/README.md>`_
 * XLM-RoBERTa: `Base and Large Architure <https://github.com/pytorch/fairseq/tree/main/examples/xlmr#pre-trained-models>`_
+* T5: `Small, Base, Large, 3B, and 11B Architecture <https://github.com/google-research/text-to-text-transfer-transformer>`_
+* Flan-T5: `Small, Base, Large, XL, and XXL Architecture <https://github.com/google-research/t5x>`_
 
 Tokenizers
 ==========

diff --git a/test/integration_tests/test_models.py → .../integration_tests/test_roberta_models.py b/test/integration_tests/test_models.py → .../integration_tests/test_roberta_models.py
diff --git a/...ntegration_tests/prototype/test_models.py → test/integration_tests/test_t5_models.py b/...ntegration_tests/prototype/test_models.py → test/integration_tests/test_t5_models.py
@@ -3,7 +3,8 @@
 import pytest # noqa: F401
 import torch
 from parameterized import parameterized_class
-from torchtext.prototype.models import (
+from torchtext.models import T5Bundle
+from torchtext.models import (
  T5_BASE,
  T5_BASE_ENCODER,
  T5_BASE_GENERATION,
@@ -14,7 +15,6 @@
  T5_SMALL_ENCODER,
  T5_SMALL_GENERATION,
 )
-from torchtext.prototype.models.t5.bundler import T5Bundle
 from torchtext_unittest.common.assets import get_asset_path
 from torchtext_unittest.common.parameterized_utils import nested_params
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase

diff --git a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py
@@ -3,11 +3,12 @@
 import pytest
 import torch
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
-from torchtext_unittest.models.models_test_impl import BaseTestModels
+from torchtext_unittest.models.roberta_models_test_impl import RobertaBaseTestModels
+from torchtext_unittest.models.t5_models_test_impl import T5BaseTestModels
 
 
 @pytest.mark.gpu_test
 @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available")
-class TestModels32GPU(BaseTestModels, TorchtextTestCase):
+class TestModels32GPU(RobertaBaseTestModels, T5BaseTestModels, TorchtextTestCase):
  dtype = torch.float32
  device = torch.device("cuda")
diff --git a/test/torchtext_unittest/models/models_cpu_test.py b/test/torchtext_unittest/models/models_cpu_test.py
@@ -1,9 +1,10 @@
 import torch
 
 from ..common.torchtext_test_case import TorchtextTestCase
-from .models_test_impl import BaseTestModels
+from .roberta_models_test_impl import RobertaBaseTestModels
+from .t5_models_test_impl import T5BaseTestModels
 
 
-class TestModels32CPU(BaseTestModels, TorchtextTestCase):
+class TestModels32CPU(RobertaBaseTestModels, T5BaseTestModels, TorchtextTestCase):
  dtype = torch.float32
  device = torch.device("cpu")
diff --git a/...htext_unittest/models/models_test_impl.py → ...ittest/models/roberta_models_test_impl.py b/...htext_unittest/models/models_test_impl.py → ...ittest/models/roberta_models_test_impl.py
@@ -7,7 +7,7 @@
 from ..common.case_utils import TestBaseMixin
 
 
-class BaseTestModels(TestBaseMixin):
+class RobertaBaseTestModels(TestBaseMixin):
  def get_model(self, encoder_conf, head=None, freeze_encoder=False, checkpoint=None, override_checkpoint_head=False):
  from torchtext.models import RobertaBundle
 

diff --git a/...test/prototype/models/models_test_impl.py → ...xt_unittest/models/t5_models_test_impl.py b/...test/prototype/models/models_test_impl.py → ...xt_unittest/models/t5_models_test_impl.py
@@ -6,9 +6,9 @@
 from torchtext_unittest.common.case_utils import TestBaseMixin
 
 
-class BaseTestModels(TestBaseMixin):
+class T5BaseTestModels(TestBaseMixin):
  def test_t5_bundler_build_model(self) -> None:
- from torchtext.prototype.models import T5Conf, T5Model, T5Bundle
+ from torchtext.models import T5Conf, T5Model, T5Bundle
 
  # case: user provides encoder checkpoint state dict
  dummy_encoder_conf = T5Conf(
@@ -57,7 +57,7 @@ def test_t5_bundler_build_model(self) -> None:
 
  @patch("logging.Logger.warning")
  def test_t5_bundler_get_model(self, mock):
- from torchtext.prototype.models import T5Conf, T5Bundle
+ from torchtext.models import T5Conf, T5Bundle
 
  # encoder-decoder with generation
  dummy_t5_generation_conf = T5Conf(
@@ -77,7 +77,7 @@ def test_t5_bundler_get_model(self, mock):
  )
 
  def test_t5_bundler_raise_checkpoint(self) -> None:
- from torchtext.prototype.models import T5Conf, T5Bundle
+ from torchtext.models import T5Conf, T5Bundle
 
  # encoder-only
  with self.assertRaises(TypeError):
@@ -132,7 +132,7 @@ def test_t5_bundler_raise_checkpoint(self) -> None:
  )
 
  def test_t5_bundler_conf_property(self) -> None:
- from torchtext.prototype.models import T5Conf, T5Bundle
+ from torchtext.models import T5Conf, T5Bundle
 
  dummy_t5_conf = T5Conf(
  encoder_only=False,
@@ -148,7 +148,7 @@ def test_t5_bundler_conf_property(self) -> None:
 
  def test_t5_bundler_train(self) -> None:
  from torch.optim import SGD
- from torchtext.prototype.models import T5Conf, T5Model, T5Bundle
+ from torchtext.models import T5Conf, T5Model, T5Bundle
 
  torch.manual_seed(123)
 

diff --git a/...ttest/prototype/models/test_transforms.py → ...ext_unittest/models/t5_test_transforms.py b/...ttest/prototype/models/test_transforms.py → ...ext_unittest/models/t5_test_transforms.py
@@ -1,5 +1,5 @@
 import torch
-from torchtext.prototype.models import T5Transform
+from torchtext.models import T5Transform
 from torchtext_unittest.common.assets import get_asset_path
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
 

diff --git a/test/torchtext_unittest/prototype/models/__init__.py b/test/torchtext_unittest/prototype/models/__init__.py
diff --git a/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py b/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py
diff --git a/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py b/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py
diff --git a/test/torchtext_unittest/prototype/test_generate.py b/test/torchtext_unittest/prototype/test_generate.py
@@ -1,8 +1,8 @@
 from unittest.mock import patch
 
 import torch
+from torchtext.models import T5_BASE_GENERATION
 from torchtext.prototype.generate import DEFAULT_MAX_SEQ_LEN, GenerationUtil
-from torchtext.prototype.models import T5_BASE_GENERATION
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
 
 

diff --git a/torchtext/models/__init__.py b/torchtext/models/__init__.py
@@ -1 +1,2 @@
 from .roberta import * # noqa: F401, F403
+from .t5 import * # noqa: F401, F403
diff --git a/torchtext/prototype/models/t5/__init__.py → torchtext/models/t5/__init__.py b/torchtext/prototype/models/t5/__init__.py → torchtext/models/t5/__init__.py
diff --git a/torchtext/prototype/models/t5/bundler.py → torchtext/models/t5/bundler.py b/torchtext/prototype/models/t5/bundler.py → torchtext/models/t5/bundler.py
diff --git a/torchtext/prototype/models/t5/model.py → torchtext/models/t5/model.py b/torchtext/prototype/models/t5/model.py → torchtext/models/t5/model.py
diff --git a/torchtext/prototype/models/t5/modules.py → torchtext/models/t5/modules.py b/torchtext/prototype/models/t5/modules.py → torchtext/models/t5/modules.py
diff --git a/...htext/prototype/models/t5/t5_transform.py → torchtext/models/t5/t5_transform.py b/...htext/prototype/models/t5/t5_transform.py → torchtext/models/t5/t5_transform.py
diff --git a/torchtext/prototype/generate.py b/torchtext/prototype/generate.py
@@ -25,7 +25,7 @@ class GenerationUtil:
  This means that popular HuggingFace implementation of T5, Bart, and GPT-2 can all be used with these generation utils!
  >>> from transformers import T5Model
  >>> model = T5Model.from_pretrained("t5-base")
- >>> generative_model = GenerationUtil(model=model, is_huggingface_model=True)
+ >>> generative_model = GenerationUtils(model=model, is_huggingface_model=True)
  >>> generative_model.generate(input_ids, num_beams=1, max_len=100)
 
  More examples can be found in the `notebooks` directory of this repository.

diff --git a/torchtext/prototype/models/__init__.py b/torchtext/prototype/models/__init__.py