From 670e52a3df658f6332f2904cfed67308f3f5adce Mon Sep 17 00:00:00 2001
From: Joe Cummings <jrcummings27@gmail.com>
Date: Fri, 17 Feb 2023 15:36:16 -0500
Subject: [PATCH] Promote t5 and variants (#2064)

* Promote T5 from prototype to beta

* Add models to README

* Move T5 tests in integration tests

* Fix linting

* Fix formatting

* Actually add t5

* Add the rest of the files you absolute donkey

* Fix linting

* Modify paths for generation tests

* Fix linting
---
 README.rst                                          |  2 ++
 .../{test_models.py => test_roberta_models.py}      |  0
 .../{prototype/test_models.py => test_t5_models.py} |  4 ++--
 .../models/gpu_tests/models_gpu_test.py             |  5 +++--
 test/torchtext_unittest/models/models_cpu_test.py   |  5 +++--
 ...els_test_impl.py => roberta_models_test_impl.py} |  2 +-
 .../t5_models_test_impl.py}                         | 12 ++++++------
 .../t5_test_transforms.py}                          |  2 +-
 .../torchtext_unittest/prototype/models/__init__.py |  0
 .../models/gpu_tests/prototype_models_gpu_test.py   | 13 -------------
 .../prototype/models/prototype_models_cpu_test.py   |  9 ---------
 test/torchtext_unittest/prototype/test_generate.py  |  2 +-
 torchtext/models/__init__.py                        |  1 +
 torchtext/{prototype => }/models/t5/__init__.py     |  0
 torchtext/{prototype => }/models/t5/bundler.py      |  0
 torchtext/{prototype => }/models/t5/model.py        |  0
 torchtext/{prototype => }/models/t5/modules.py      |  0
 torchtext/{prototype => }/models/t5/t5_transform.py |  0
 torchtext/prototype/generate.py                     |  2 +-
 torchtext/prototype/models/__init__.py              |  1 -
 20 files changed, 21 insertions(+), 39 deletions(-)
 rename test/integration_tests/{test_models.py => test_roberta_models.py} (100%)
 rename test/integration_tests/{prototype/test_models.py => test_t5_models.py} (98%)
 rename test/torchtext_unittest/models/{models_test_impl.py => roberta_models_test_impl.py} (99%)
 rename test/torchtext_unittest/{prototype/models/models_test_impl.py => models/t5_models_test_impl.py} (94%)
 rename test/torchtext_unittest/{prototype/models/test_transforms.py => models/t5_test_transforms.py} (97%)
 delete mode 100644 test/torchtext_unittest/prototype/models/__init__.py
 delete mode 100644 test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py
 delete mode 100644 test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py
 rename torchtext/{prototype => }/models/t5/__init__.py (100%)
 rename torchtext/{prototype => }/models/t5/bundler.py (100%)
 rename torchtext/{prototype => }/models/t5/model.py (100%)
 rename torchtext/{prototype => }/models/t5/modules.py (100%)
 rename torchtext/{prototype => }/models/t5/t5_transform.py (100%)
 delete mode 100644 torchtext/prototype/models/__init__.py

diff --git a/README.rst b/README.rst
index b9651ba55b..6b757af521 100644
--- a/README.rst
+++ b/README.rst
@@ -121,6 +121,8 @@ The library currently consist of following pre-trained models:
 * RoBERTa: `Base and Large Architecture <https://github.com/pytorch/fairseq/tree/main/examples/roberta#pre-trained-models>`_
 * `DistilRoBERTa <https://github.com/huggingface/transformers/blob/main/examples/research_projects/distillation/README.md>`_
 * XLM-RoBERTa: `Base and Large Architure <https://github.com/pytorch/fairseq/tree/main/examples/xlmr#pre-trained-models>`_
+* T5: `Small, Base, Large, 3B, and 11B Architecture <https://github.com/google-research/text-to-text-transfer-transformer>`_
+* Flan-T5: `Small, Base, Large, XL, and XXL Architecture <https://github.com/google-research/t5x>`_
 
 Tokenizers
 ==========
diff --git a/test/integration_tests/test_models.py b/test/integration_tests/test_roberta_models.py
similarity index 100%
rename from test/integration_tests/test_models.py
rename to test/integration_tests/test_roberta_models.py
diff --git a/test/integration_tests/prototype/test_models.py b/test/integration_tests/test_t5_models.py
similarity index 98%
rename from test/integration_tests/prototype/test_models.py
rename to test/integration_tests/test_t5_models.py
index 82894a819e..c7ea3b794f 100644
--- a/test/integration_tests/prototype/test_models.py
+++ b/test/integration_tests/test_t5_models.py
@@ -3,7 +3,8 @@
 import pytest  # noqa: F401
 import torch
 from parameterized import parameterized_class
-from torchtext.prototype.models import (
+from torchtext.models import T5Bundle
+from torchtext.models import (
     T5_BASE,
     T5_BASE_ENCODER,
     T5_BASE_GENERATION,
@@ -14,7 +15,6 @@
     T5_SMALL_ENCODER,
     T5_SMALL_GENERATION,
 )
-from torchtext.prototype.models.t5.bundler import T5Bundle
 from torchtext_unittest.common.assets import get_asset_path
 from torchtext_unittest.common.parameterized_utils import nested_params
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
diff --git a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py
index d33bc89075..58faf6e634 100644
--- a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py
+++ b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py
@@ -3,11 +3,12 @@
 import pytest
 import torch
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
-from torchtext_unittest.models.models_test_impl import BaseTestModels
+from torchtext_unittest.models.roberta_models_test_impl import RobertaBaseTestModels
+from torchtext_unittest.models.t5_models_test_impl import T5BaseTestModels
 
 
 @pytest.mark.gpu_test
 @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available")
-class TestModels32GPU(BaseTestModels, TorchtextTestCase):
+class TestModels32GPU(RobertaBaseTestModels, T5BaseTestModels, TorchtextTestCase):
     dtype = torch.float32
     device = torch.device("cuda")
diff --git a/test/torchtext_unittest/models/models_cpu_test.py b/test/torchtext_unittest/models/models_cpu_test.py
index 3bcd3e4eb5..6f130f81c9 100644
--- a/test/torchtext_unittest/models/models_cpu_test.py
+++ b/test/torchtext_unittest/models/models_cpu_test.py
@@ -1,9 +1,10 @@
 import torch
 
 from ..common.torchtext_test_case import TorchtextTestCase
-from .models_test_impl import BaseTestModels
+from .roberta_models_test_impl import RobertaBaseTestModels
+from .t5_models_test_impl import T5BaseTestModels
 
 
-class TestModels32CPU(BaseTestModels, TorchtextTestCase):
+class TestModels32CPU(RobertaBaseTestModels, T5BaseTestModels, TorchtextTestCase):
     dtype = torch.float32
     device = torch.device("cpu")
diff --git a/test/torchtext_unittest/models/models_test_impl.py b/test/torchtext_unittest/models/roberta_models_test_impl.py
similarity index 99%
rename from test/torchtext_unittest/models/models_test_impl.py
rename to test/torchtext_unittest/models/roberta_models_test_impl.py
index cdfd196268..b5b25fedef 100644
--- a/test/torchtext_unittest/models/models_test_impl.py
+++ b/test/torchtext_unittest/models/roberta_models_test_impl.py
@@ -7,7 +7,7 @@
 from ..common.case_utils import TestBaseMixin
 
 
-class BaseTestModels(TestBaseMixin):
+class RobertaBaseTestModels(TestBaseMixin):
     def get_model(self, encoder_conf, head=None, freeze_encoder=False, checkpoint=None, override_checkpoint_head=False):
         from torchtext.models import RobertaBundle
 
diff --git a/test/torchtext_unittest/prototype/models/models_test_impl.py b/test/torchtext_unittest/models/t5_models_test_impl.py
similarity index 94%
rename from test/torchtext_unittest/prototype/models/models_test_impl.py
rename to test/torchtext_unittest/models/t5_models_test_impl.py
index 48dd47626a..bd36f32715 100644
--- a/test/torchtext_unittest/prototype/models/models_test_impl.py
+++ b/test/torchtext_unittest/models/t5_models_test_impl.py
@@ -6,9 +6,9 @@
 from torchtext_unittest.common.case_utils import TestBaseMixin
 
 
-class BaseTestModels(TestBaseMixin):
+class T5BaseTestModels(TestBaseMixin):
     def test_t5_bundler_build_model(self) -> None:
-        from torchtext.prototype.models import T5Conf, T5Model, T5Bundle
+        from torchtext.models import T5Conf, T5Model, T5Bundle
 
         # case: user provides encoder checkpoint state dict
         dummy_encoder_conf = T5Conf(
@@ -57,7 +57,7 @@ def test_t5_bundler_build_model(self) -> None:
 
     @patch("logging.Logger.warning")
     def test_t5_bundler_get_model(self, mock):
-        from torchtext.prototype.models import T5Conf, T5Bundle
+        from torchtext.models import T5Conf, T5Bundle
 
         # encoder-decoder with generation
         dummy_t5_generation_conf = T5Conf(
@@ -77,7 +77,7 @@ def test_t5_bundler_get_model(self, mock):
         )
 
     def test_t5_bundler_raise_checkpoint(self) -> None:
-        from torchtext.prototype.models import T5Conf, T5Bundle
+        from torchtext.models import T5Conf, T5Bundle
 
         # encoder-only
         with self.assertRaises(TypeError):
@@ -132,7 +132,7 @@ def test_t5_bundler_raise_checkpoint(self) -> None:
             )
 
     def test_t5_bundler_conf_property(self) -> None:
-        from torchtext.prototype.models import T5Conf, T5Bundle
+        from torchtext.models import T5Conf, T5Bundle
 
         dummy_t5_conf = T5Conf(
             encoder_only=False,
@@ -148,7 +148,7 @@ def test_t5_bundler_conf_property(self) -> None:
 
     def test_t5_bundler_train(self) -> None:
         from torch.optim import SGD
-        from torchtext.prototype.models import T5Conf, T5Model, T5Bundle
+        from torchtext.models import T5Conf, T5Model, T5Bundle
 
         torch.manual_seed(123)
 
diff --git a/test/torchtext_unittest/prototype/models/test_transforms.py b/test/torchtext_unittest/models/t5_test_transforms.py
similarity index 97%
rename from test/torchtext_unittest/prototype/models/test_transforms.py
rename to test/torchtext_unittest/models/t5_test_transforms.py
index 82d70a4719..a3d3a58e18 100644
--- a/test/torchtext_unittest/prototype/models/test_transforms.py
+++ b/test/torchtext_unittest/models/t5_test_transforms.py
@@ -1,5 +1,5 @@
 import torch
-from torchtext.prototype.models import T5Transform
+from torchtext.models import T5Transform
 from torchtext_unittest.common.assets import get_asset_path
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
 
diff --git a/test/torchtext_unittest/prototype/models/__init__.py b/test/torchtext_unittest/prototype/models/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py b/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py
deleted file mode 100644
index 65eccc35a5..0000000000
--- a/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import unittest
-
-import pytest
-import torch
-from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
-from torchtext_unittest.prototype.models.models_test_impl import BaseTestModels
-
-
-@pytest.mark.gpu_test
-@unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available")
-class TestModels32GPU(BaseTestModels, TorchtextTestCase):
-    dtype = torch.float32
-    device = torch.device("cuda")
diff --git a/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py b/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py
deleted file mode 100644
index 57c5e4bbfd..0000000000
--- a/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
-
-from .models_test_impl import BaseTestModels
-
-
-class TestModels32CPU(BaseTestModels, TorchtextTestCase):
-    dtype = torch.float32
-    device = torch.device("cpu")
diff --git a/test/torchtext_unittest/prototype/test_generate.py b/test/torchtext_unittest/prototype/test_generate.py
index 7b8e0bf287..02ee687001 100644
--- a/test/torchtext_unittest/prototype/test_generate.py
+++ b/test/torchtext_unittest/prototype/test_generate.py
@@ -1,8 +1,8 @@
 from unittest.mock import patch
 
 import torch
+from torchtext.models import T5_BASE_GENERATION
 from torchtext.prototype.generate import DEFAULT_MAX_SEQ_LEN, GenerationUtil
-from torchtext.prototype.models import T5_BASE_GENERATION
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
 
 
diff --git a/torchtext/models/__init__.py b/torchtext/models/__init__.py
index a7cbc0c88a..7b80407811 100644
--- a/torchtext/models/__init__.py
+++ b/torchtext/models/__init__.py
@@ -1 +1,2 @@
 from .roberta import *  # noqa: F401, F403
+from .t5 import *  # noqa: F401, F403
diff --git a/torchtext/prototype/models/t5/__init__.py b/torchtext/models/t5/__init__.py
similarity index 100%
rename from torchtext/prototype/models/t5/__init__.py
rename to torchtext/models/t5/__init__.py
diff --git a/torchtext/prototype/models/t5/bundler.py b/torchtext/models/t5/bundler.py
similarity index 100%
rename from torchtext/prototype/models/t5/bundler.py
rename to torchtext/models/t5/bundler.py
diff --git a/torchtext/prototype/models/t5/model.py b/torchtext/models/t5/model.py
similarity index 100%
rename from torchtext/prototype/models/t5/model.py
rename to torchtext/models/t5/model.py
diff --git a/torchtext/prototype/models/t5/modules.py b/torchtext/models/t5/modules.py
similarity index 100%
rename from torchtext/prototype/models/t5/modules.py
rename to torchtext/models/t5/modules.py
diff --git a/torchtext/prototype/models/t5/t5_transform.py b/torchtext/models/t5/t5_transform.py
similarity index 100%
rename from torchtext/prototype/models/t5/t5_transform.py
rename to torchtext/models/t5/t5_transform.py
diff --git a/torchtext/prototype/generate.py b/torchtext/prototype/generate.py
index 53e1e003be..80afebe90e 100644
--- a/torchtext/prototype/generate.py
+++ b/torchtext/prototype/generate.py
@@ -25,7 +25,7 @@ class GenerationUtil:
     This means that popular HuggingFace implementation of T5, Bart, and GPT-2 can all be used with these generation utils!
     >>> from transformers import T5Model
     >>> model = T5Model.from_pretrained("t5-base")
-    >>> generative_model = GenerationUtil(model=model, is_huggingface_model=True)
+    >>> generative_model = GenerationUtils(model=model, is_huggingface_model=True)
     >>> generative_model.generate(input_ids, num_beams=1, max_len=100)
 
     More examples can be found in the `notebooks` directory of this repository.
diff --git a/torchtext/prototype/models/__init__.py b/torchtext/prototype/models/__init__.py
deleted file mode 100644
index ab659dda3d..0000000000
--- a/torchtext/prototype/models/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .t5 import *  # noqa: F401, F403