From 670e52a3df658f6332f2904cfed67308f3f5adce Mon Sep 17 00:00:00 2001 From: Joe Cummings Date: Fri, 17 Feb 2023 15:36:16 -0500 Subject: [PATCH] Promote t5 and variants (#2064) * Promote T5 from prototype to beta * Add models to README * Move T5 tests in integration tests * Fix linting * Fix formatting * Actually add t5 * Add the rest of the files you absolute donkey * Fix linting * Modify paths for generation tests * Fix linting --- README.rst | 2 ++ .../{test_models.py => test_roberta_models.py} | 0 .../{prototype/test_models.py => test_t5_models.py} | 4 ++-- .../models/gpu_tests/models_gpu_test.py | 5 +++-- test/torchtext_unittest/models/models_cpu_test.py | 5 +++-- ...els_test_impl.py => roberta_models_test_impl.py} | 2 +- .../t5_models_test_impl.py} | 12 ++++++------ .../t5_test_transforms.py} | 2 +- .../torchtext_unittest/prototype/models/__init__.py | 0 .../models/gpu_tests/prototype_models_gpu_test.py | 13 ------------- .../prototype/models/prototype_models_cpu_test.py | 9 --------- test/torchtext_unittest/prototype/test_generate.py | 2 +- torchtext/models/__init__.py | 1 + torchtext/{prototype => }/models/t5/__init__.py | 0 torchtext/{prototype => }/models/t5/bundler.py | 0 torchtext/{prototype => }/models/t5/model.py | 0 torchtext/{prototype => }/models/t5/modules.py | 0 torchtext/{prototype => }/models/t5/t5_transform.py | 0 torchtext/prototype/generate.py | 2 +- torchtext/prototype/models/__init__.py | 1 - 20 files changed, 21 insertions(+), 39 deletions(-) rename test/integration_tests/{test_models.py => test_roberta_models.py} (100%) rename test/integration_tests/{prototype/test_models.py => test_t5_models.py} (98%) rename test/torchtext_unittest/models/{models_test_impl.py => roberta_models_test_impl.py} (99%) rename test/torchtext_unittest/{prototype/models/models_test_impl.py => models/t5_models_test_impl.py} (94%) rename test/torchtext_unittest/{prototype/models/test_transforms.py => models/t5_test_transforms.py} (97%) delete mode 100644 test/torchtext_unittest/prototype/models/__init__.py delete mode 100644 test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py delete mode 100644 test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py rename torchtext/{prototype => }/models/t5/__init__.py (100%) rename torchtext/{prototype => }/models/t5/bundler.py (100%) rename torchtext/{prototype => }/models/t5/model.py (100%) rename torchtext/{prototype => }/models/t5/modules.py (100%) rename torchtext/{prototype => }/models/t5/t5_transform.py (100%) delete mode 100644 torchtext/prototype/models/__init__.py diff --git a/README.rst b/README.rst index b9651ba55b..6b757af521 100644 --- a/README.rst +++ b/README.rst @@ -121,6 +121,8 @@ The library currently consist of following pre-trained models: * RoBERTa: `Base and Large Architecture `_ * `DistilRoBERTa `_ * XLM-RoBERTa: `Base and Large Architure `_ +* T5: `Small, Base, Large, 3B, and 11B Architecture `_ +* Flan-T5: `Small, Base, Large, XL, and XXL Architecture `_ Tokenizers ========== diff --git a/test/integration_tests/test_models.py b/test/integration_tests/test_roberta_models.py similarity index 100% rename from test/integration_tests/test_models.py rename to test/integration_tests/test_roberta_models.py diff --git a/test/integration_tests/prototype/test_models.py b/test/integration_tests/test_t5_models.py similarity index 98% rename from test/integration_tests/prototype/test_models.py rename to test/integration_tests/test_t5_models.py index 82894a819e..c7ea3b794f 100644 --- a/test/integration_tests/prototype/test_models.py +++ b/test/integration_tests/test_t5_models.py @@ -3,7 +3,8 @@ import pytest # noqa: F401 import torch from parameterized import parameterized_class -from torchtext.prototype.models import ( +from torchtext.models import T5Bundle +from torchtext.models import ( T5_BASE, T5_BASE_ENCODER, T5_BASE_GENERATION, @@ -14,7 +15,6 @@ T5_SMALL_ENCODER, T5_SMALL_GENERATION, ) -from torchtext.prototype.models.t5.bundler import T5Bundle from torchtext_unittest.common.assets import get_asset_path from torchtext_unittest.common.parameterized_utils import nested_params from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase diff --git a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py index d33bc89075..58faf6e634 100644 --- a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py +++ b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py @@ -3,11 +3,12 @@ import pytest import torch from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase -from torchtext_unittest.models.models_test_impl import BaseTestModels +from torchtext_unittest.models.roberta_models_test_impl import RobertaBaseTestModels +from torchtext_unittest.models.t5_models_test_impl import T5BaseTestModels @pytest.mark.gpu_test @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available") -class TestModels32GPU(BaseTestModels, TorchtextTestCase): +class TestModels32GPU(RobertaBaseTestModels, T5BaseTestModels, TorchtextTestCase): dtype = torch.float32 device = torch.device("cuda") diff --git a/test/torchtext_unittest/models/models_cpu_test.py b/test/torchtext_unittest/models/models_cpu_test.py index 3bcd3e4eb5..6f130f81c9 100644 --- a/test/torchtext_unittest/models/models_cpu_test.py +++ b/test/torchtext_unittest/models/models_cpu_test.py @@ -1,9 +1,10 @@ import torch from ..common.torchtext_test_case import TorchtextTestCase -from .models_test_impl import BaseTestModels +from .roberta_models_test_impl import RobertaBaseTestModels +from .t5_models_test_impl import T5BaseTestModels -class TestModels32CPU(BaseTestModels, TorchtextTestCase): +class TestModels32CPU(RobertaBaseTestModels, T5BaseTestModels, TorchtextTestCase): dtype = torch.float32 device = torch.device("cpu") diff --git a/test/torchtext_unittest/models/models_test_impl.py b/test/torchtext_unittest/models/roberta_models_test_impl.py similarity index 99% rename from test/torchtext_unittest/models/models_test_impl.py rename to test/torchtext_unittest/models/roberta_models_test_impl.py index cdfd196268..b5b25fedef 100644 --- a/test/torchtext_unittest/models/models_test_impl.py +++ b/test/torchtext_unittest/models/roberta_models_test_impl.py @@ -7,7 +7,7 @@ from ..common.case_utils import TestBaseMixin -class BaseTestModels(TestBaseMixin): +class RobertaBaseTestModels(TestBaseMixin): def get_model(self, encoder_conf, head=None, freeze_encoder=False, checkpoint=None, override_checkpoint_head=False): from torchtext.models import RobertaBundle diff --git a/test/torchtext_unittest/prototype/models/models_test_impl.py b/test/torchtext_unittest/models/t5_models_test_impl.py similarity index 94% rename from test/torchtext_unittest/prototype/models/models_test_impl.py rename to test/torchtext_unittest/models/t5_models_test_impl.py index 48dd47626a..bd36f32715 100644 --- a/test/torchtext_unittest/prototype/models/models_test_impl.py +++ b/test/torchtext_unittest/models/t5_models_test_impl.py @@ -6,9 +6,9 @@ from torchtext_unittest.common.case_utils import TestBaseMixin -class BaseTestModels(TestBaseMixin): +class T5BaseTestModels(TestBaseMixin): def test_t5_bundler_build_model(self) -> None: - from torchtext.prototype.models import T5Conf, T5Model, T5Bundle + from torchtext.models import T5Conf, T5Model, T5Bundle # case: user provides encoder checkpoint state dict dummy_encoder_conf = T5Conf( @@ -57,7 +57,7 @@ def test_t5_bundler_build_model(self) -> None: @patch("logging.Logger.warning") def test_t5_bundler_get_model(self, mock): - from torchtext.prototype.models import T5Conf, T5Bundle + from torchtext.models import T5Conf, T5Bundle # encoder-decoder with generation dummy_t5_generation_conf = T5Conf( @@ -77,7 +77,7 @@ def test_t5_bundler_get_model(self, mock): ) def test_t5_bundler_raise_checkpoint(self) -> None: - from torchtext.prototype.models import T5Conf, T5Bundle + from torchtext.models import T5Conf, T5Bundle # encoder-only with self.assertRaises(TypeError): @@ -132,7 +132,7 @@ def test_t5_bundler_raise_checkpoint(self) -> None: ) def test_t5_bundler_conf_property(self) -> None: - from torchtext.prototype.models import T5Conf, T5Bundle + from torchtext.models import T5Conf, T5Bundle dummy_t5_conf = T5Conf( encoder_only=False, @@ -148,7 +148,7 @@ def test_t5_bundler_conf_property(self) -> None: def test_t5_bundler_train(self) -> None: from torch.optim import SGD - from torchtext.prototype.models import T5Conf, T5Model, T5Bundle + from torchtext.models import T5Conf, T5Model, T5Bundle torch.manual_seed(123) diff --git a/test/torchtext_unittest/prototype/models/test_transforms.py b/test/torchtext_unittest/models/t5_test_transforms.py similarity index 97% rename from test/torchtext_unittest/prototype/models/test_transforms.py rename to test/torchtext_unittest/models/t5_test_transforms.py index 82d70a4719..a3d3a58e18 100644 --- a/test/torchtext_unittest/prototype/models/test_transforms.py +++ b/test/torchtext_unittest/models/t5_test_transforms.py @@ -1,5 +1,5 @@ import torch -from torchtext.prototype.models import T5Transform +from torchtext.models import T5Transform from torchtext_unittest.common.assets import get_asset_path from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase diff --git a/test/torchtext_unittest/prototype/models/__init__.py b/test/torchtext_unittest/prototype/models/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py b/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py deleted file mode 100644 index 65eccc35a5..0000000000 --- a/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py +++ /dev/null @@ -1,13 +0,0 @@ -import unittest - -import pytest -import torch -from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase -from torchtext_unittest.prototype.models.models_test_impl import BaseTestModels - - -@pytest.mark.gpu_test -@unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available") -class TestModels32GPU(BaseTestModels, TorchtextTestCase): - dtype = torch.float32 - device = torch.device("cuda") diff --git a/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py b/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py deleted file mode 100644 index 57c5e4bbfd..0000000000 --- a/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py +++ /dev/null @@ -1,9 +0,0 @@ -import torch -from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase - -from .models_test_impl import BaseTestModels - - -class TestModels32CPU(BaseTestModels, TorchtextTestCase): - dtype = torch.float32 - device = torch.device("cpu") diff --git a/test/torchtext_unittest/prototype/test_generate.py b/test/torchtext_unittest/prototype/test_generate.py index 7b8e0bf287..02ee687001 100644 --- a/test/torchtext_unittest/prototype/test_generate.py +++ b/test/torchtext_unittest/prototype/test_generate.py @@ -1,8 +1,8 @@ from unittest.mock import patch import torch +from torchtext.models import T5_BASE_GENERATION from torchtext.prototype.generate import DEFAULT_MAX_SEQ_LEN, GenerationUtil -from torchtext.prototype.models import T5_BASE_GENERATION from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase diff --git a/torchtext/models/__init__.py b/torchtext/models/__init__.py index a7cbc0c88a..7b80407811 100644 --- a/torchtext/models/__init__.py +++ b/torchtext/models/__init__.py @@ -1 +1,2 @@ from .roberta import * # noqa: F401, F403 +from .t5 import * # noqa: F401, F403 diff --git a/torchtext/prototype/models/t5/__init__.py b/torchtext/models/t5/__init__.py similarity index 100% rename from torchtext/prototype/models/t5/__init__.py rename to torchtext/models/t5/__init__.py diff --git a/torchtext/prototype/models/t5/bundler.py b/torchtext/models/t5/bundler.py similarity index 100% rename from torchtext/prototype/models/t5/bundler.py rename to torchtext/models/t5/bundler.py diff --git a/torchtext/prototype/models/t5/model.py b/torchtext/models/t5/model.py similarity index 100% rename from torchtext/prototype/models/t5/model.py rename to torchtext/models/t5/model.py diff --git a/torchtext/prototype/models/t5/modules.py b/torchtext/models/t5/modules.py similarity index 100% rename from torchtext/prototype/models/t5/modules.py rename to torchtext/models/t5/modules.py diff --git a/torchtext/prototype/models/t5/t5_transform.py b/torchtext/models/t5/t5_transform.py similarity index 100% rename from torchtext/prototype/models/t5/t5_transform.py rename to torchtext/models/t5/t5_transform.py diff --git a/torchtext/prototype/generate.py b/torchtext/prototype/generate.py index 53e1e003be..80afebe90e 100644 --- a/torchtext/prototype/generate.py +++ b/torchtext/prototype/generate.py @@ -25,7 +25,7 @@ class GenerationUtil: This means that popular HuggingFace implementation of T5, Bart, and GPT-2 can all be used with these generation utils! >>> from transformers import T5Model >>> model = T5Model.from_pretrained("t5-base") - >>> generative_model = GenerationUtil(model=model, is_huggingface_model=True) + >>> generative_model = GenerationUtils(model=model, is_huggingface_model=True) >>> generative_model.generate(input_ids, num_beams=1, max_len=100) More examples can be found in the `notebooks` directory of this repository. diff --git a/torchtext/prototype/models/__init__.py b/torchtext/prototype/models/__init__.py deleted file mode 100644 index ab659dda3d..0000000000 --- a/torchtext/prototype/models/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .t5 import * # noqa: F401, F403