pytorch · joecummings · Feb 13, 2023 · Feb 13, 2023
diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml
@@ -70,4 +70,4 @@ jobs:
         # Run Tests
         python3 -m torch.utils.collect_env
         cd test
-        python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 torchtext_unittest/models/gpu_tests
+        python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 -m gpu_test torchtext_unittest
diff --git a/pytest.ini b/pytest.ini
@@ -1,3 +1,5 @@
 [pytest]
 testpaths = test/
 python_paths = ./
+markers =
+    gpu_test: marks cuda tests
diff --git a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py
@@ -1,10 +1,12 @@
 import unittest
 
+import pytest
 import torch
 from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
 from torchtext_unittest.models.models_test_impl import BaseTestModels
 
 
+@pytest.mark.gpu_test
 @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available")
 class TestModels32GPU(BaseTestModels, TorchtextTestCase):
     dtype = torch.float32

diff --git a/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py b/test/torchtext_unittest/prototype/models/gpu_tests/prototype_models_gpu_test.py
@@ -0,0 +1,13 @@
+import unittest
+
+import pytest
+import torch
+from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
+from torchtext_unittest.prototype.models.models_test_impl import BaseTestModels
+
+
+@pytest.mark.gpu_test
+@unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available")
+class TestModels32GPU(BaseTestModels, TorchtextTestCase):
+    dtype = torch.float32
+    device = torch.device("cuda")
diff --git a/..._unittest/prototype/models/test_models.py → ...test/prototype/models/models_test_impl.py b/..._unittest/prototype/models/test_models.py → ...test/prototype/models/models_test_impl.py
@@ -3,10 +3,10 @@
 
 import torch
 from torch.nn import functional as F
-from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
+from torchtext_unittest.common.case_utils import TestBaseMixin
 
 
-class TestModels(TorchtextTestCase):
+class BaseTestModels(TestBaseMixin):
     def test_t5_bundler_build_model(self) -> None:
         from torchtext.prototype.models import T5Conf, T5Model, T5Bundle
 
@@ -152,8 +152,8 @@ def test_t5_bundler_train(self) -> None:
 
         def _train(model):
             optim = SGD(model.parameters(), lr=1)
-            model_input = torch.tensor([[1, 2, 3, 4, 5]])
-            target = torch.tensor([1])
+            model_input = torch.tensor([[1, 2, 3, 4, 5]]).to(device=self.device)
+            target = torch.tensor([1]).to(device=self.device)
             output = model(model_input)["decoder_output"]
             logits = F.log_softmax(output[:, -1], dim=-1)
             loss = F.cross_entropy(logits, target)
@@ -177,6 +177,7 @@ def _train(model):
             freeze_model=False,
             checkpoint=dummy_model.state_dict(),
         )
+        model.to(device=self.device, dtype=self.dtype)
         current_state_dict = copy.deepcopy(model.state_dict())
 
         _train(model)

diff --git a/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py b/test/torchtext_unittest/prototype/models/prototype_models_cpu_test.py
@@ -0,0 +1,9 @@
+import torch
+from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase
+
+from .models_test_impl import BaseTestModels
+
+
+class TestModels32CPU(BaseTestModels, TorchtextTestCase):
+    dtype = torch.float32
+    device = torch.device("cpu")