From 966af5ac663b85edea8c9ae28567b17de3a66a5e Mon Sep 17 00:00:00 2001
From: Academich <mihandronov@gmail.com>
Date: Wed, 30 Oct 2024 17:09:22 +0100
Subject: [PATCH] Added a test that checks if the forward pass of the CRF
 module works on a GPU

---
 tests/test_crf.py | 323 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 319 insertions(+), 4 deletions(-)

diff --git a/tests/test_crf.py b/tests/test_crf.py
index 9b5c449..ce0ac6a 100644
--- a/tests/test_crf.py
+++ b/tests/test_crf.py
@@ -34,18 +34,23 @@ def compute_score(crf, emission, tag):
     return score
 
 
-def make_crf(num_tags=5, batch_first=False):
-    return CRF(num_tags, batch_first=batch_first)
+def make_crf(num_tags=5, batch_first=False, device=None):
+    model = CRF(num_tags, batch_first=batch_first)
+    if device is not None:
+        model = model.to(device)
+    return model
 
 
-def make_emissions(crf, seq_length=3, batch_size=2):
+def make_emissions(crf, seq_length=3, batch_size=2, device=None):
     em = torch.randn(seq_length, batch_size, crf.num_tags)
     if crf.batch_first:
         em = em.transpose(0, 1)
+    if device is not None:
+        em = em.to(device)
     return em
 
 
-def make_tags(crf, seq_length=3, batch_size=2):
+def make_tags(crf, seq_length=3, batch_size=2, device=None):
     # shape: (seq_length, batch_size)
     ts = torch.tensor([[random.randrange(crf.num_tags)
                         for b in range(batch_size)]
@@ -53,6 +58,8 @@ def make_tags(crf, seq_length=3, batch_size=2):
                       dtype=torch.long)
     if crf.batch_first:
         ts = ts.transpose(0, 1)
+    if device is not None:
+        ts = ts.to(device)
     return ts
 
 
@@ -340,6 +347,314 @@ def test_invalid_reduction(self):
         assert 'invalid reduction: foo' in str(excinfo.value)
 
 
+class TestForwardOnCuda:
+
+    def test_works_with_mask(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        crf = make_crf(device=device)
+        seq_length, batch_size = 3, 2
+
+        # shape: (seq_length, batch_size, num_tags)
+        emissions = make_emissions(crf, seq_length, batch_size, device=device)
+        # shape: (seq_length, batch_size)
+        tags = make_tags(crf, seq_length, batch_size, device=device)
+        # mask should have size of (seq_length, batch_size)
+        mask = torch.tensor([[1, 1, 1], [1, 1, 0]], dtype=torch.bool, device=device).transpose(0, 1)
+
+        # shape: ()
+        llh = crf(emissions, tags, mask=mask)
+
+        # shape: (batch_size, seq_length, num_tags)
+        emissions = emissions.transpose(0, 1)
+        # shape: (batch_size, seq_length)
+        tags = tags.transpose(0, 1)
+        # shape: (batch_size, seq_length)
+        mask = mask.transpose(0, 1)
+
+        # Compute log likelihood manually
+        manual_llh = 0.
+        for emission, tag, mask_ in zip(emissions, tags, mask):
+            seq_len = mask_.sum()
+            emission, tag = emission[:seq_len], tag[:seq_len]
+            numerator = compute_score(crf, emission, tag)
+            all_scores = [
+                compute_score(crf, emission, t)
+                for t in itertools.product(range(crf.num_tags), repeat=seq_len)
+            ]
+            denominator = math.log(sum(math.exp(s) for s in all_scores))
+            manual_llh += numerator - denominator
+
+        assert_close(llh, manual_llh)
+        llh.backward()  # ensure gradients can be computed
+
+    def test_works_without_mask(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        crf = make_crf(device=device)
+        # shape: (seq_length, batch_size, num_tags)
+        emissions = make_emissions(crf, device=device)
+        # shape: (seq_length, batch_size)
+        tags = make_tags(crf, device=device)
+
+        llh_no_mask = crf(emissions, tags)
+        # No mask means the mask is all ones
+        llh_mask = crf(emissions, tags, mask=torch.ones_like(tags).bool())
+
+        assert_close(llh_no_mask, llh_mask)
+
+    def test_batched_loss(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        crf = make_crf(device=device)
+        batch_size = 10
+
+        # shape: (seq_length, batch_size, num_tags)
+        emissions = make_emissions(crf, batch_size=batch_size, device=device)
+        # shape: (seq_length, batch_size)
+        tags = make_tags(crf, batch_size=batch_size, device=device)
+
+        llh = crf(emissions, tags)
+        assert torch.is_tensor(llh)
+        assert llh.shape == ()
+
+        total_llh = 0.
+        for i in range(batch_size):
+            # shape: (seq_length, 1, num_tags)
+            emissions_ = emissions[:, i, :].unsqueeze(1)
+            # shape: (seq_length, 1)
+            tags_ = tags[:, i].unsqueeze(1)
+            # shape: ()
+            total_llh += crf(emissions_, tags_)
+
+        assert_close(llh, total_llh)
+
+    def test_reduction_none(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        crf = make_crf(device=device)
+        # shape: (seq_length, batch_size, num_tags)
+        emissions = make_emissions(crf, device=device)
+        # shape: (seq_length, batch_size)
+        tags = make_tags(crf, device=device)
+
+        seq_length, batch_size = tags.shape
+
+        llh = crf(emissions, tags, reduction='none')
+
+        assert torch.is_tensor(llh)
+        assert llh.shape == (batch_size,)
+
+        # shape: (batch_size, seq_length, num_tags)
+        emissions = emissions.transpose(0, 1)
+        # shape: (batch_size, seq_length)
+        tags = tags.transpose(0, 1)
+
+        # Compute log likelihood manually
+        manual_llh = []
+        for emission, tag in zip(emissions, tags):
+            numerator = compute_score(crf, emission, tag)
+            all_scores = [
+                compute_score(crf, emission, t)
+                for t in itertools.product(range(crf.num_tags), repeat=seq_length)
+            ]
+            denominator = math.log(sum(math.exp(s) for s in all_scores))
+            manual_llh.append(numerator - denominator)
+
+        assert_close(llh, torch.tensor(manual_llh, device=device))
+
+    def test_reduction_mean(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        crf = make_crf(device=device)
+        # shape: (seq_length, batch_size, num_tags)
+        emissions = make_emissions(crf, device=device)
+        # shape: (seq_length, batch_size)
+        tags = make_tags(crf, device=device)
+
+        seq_length, batch_size = tags.shape
+
+        llh = crf(emissions, tags, reduction='mean')
+
+        assert torch.is_tensor(llh)
+        assert llh.shape == ()
+
+        # shape: (batch_size, seq_length, num_tags)
+        emissions = emissions.transpose(0, 1)
+        # shape: (batch_size, seq_length)
+        tags = tags.transpose(0, 1)
+
+        # Compute log likelihood manually
+        manual_llh = 0
+        for emission, tag in zip(emissions, tags):
+            numerator = compute_score(crf, emission, tag)
+            all_scores = [
+                compute_score(crf, emission, t)
+                for t in itertools.product(range(crf.num_tags), repeat=seq_length)
+            ]
+            denominator = math.log(sum(math.exp(s) for s in all_scores))
+            manual_llh += numerator - denominator
+
+        assert_close(llh, manual_llh / batch_size)
+
+    def test_reduction_token_mean(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        crf = make_crf(device=device)
+        seq_length, batch_size = 3, 2
+
+        # shape: (seq_length, batch_size, num_tags)
+        emissions = make_emissions(crf, seq_length, batch_size, device=device)
+        # shape: (seq_length, batch_size)
+        tags = make_tags(crf, seq_length, batch_size, device=device)
+        # mask should have size of (seq_length, batch_size)
+        mask = torch.tensor([[1, 1, 1], [1, 1, 0]], dtype=torch.bool, device=device).transpose(0, 1)
+
+        llh = crf(emissions, tags, mask=mask, reduction='token_mean')
+
+        assert torch.is_tensor(llh)
+        assert llh.shape == ()
+
+        # shape: (batch_size, seq_length, num_tags)
+        emissions = emissions.transpose(0, 1)
+        # shape: (batch_size, seq_length)
+        tags = tags.transpose(0, 1)
+        # shape: (batch_size, seq_length)
+        mask = mask.transpose(0, 1)
+
+        # Compute log likelihood manually
+        manual_llh, n_tokens = 0, 0
+        for emission, tag, mask_ in zip(emissions, tags, mask):
+            seq_len = mask_.sum()
+            emission, tag = emission[:seq_len], tag[:seq_len]
+            numerator = compute_score(crf, emission, tag)
+            all_scores = [
+                compute_score(crf, emission, t)
+                for t in itertools.product(range(crf.num_tags), repeat=seq_len)
+            ]
+            denominator = math.log(sum(math.exp(s) for s in all_scores))
+            manual_llh += numerator - denominator
+            n_tokens += seq_len
+
+        assert_close(llh, manual_llh / n_tokens)
+
+    def test_batch_first(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        crf = make_crf(device=device)
+        # shape: (seq_length, batch_size, num_tags)
+        emissions = make_emissions(crf, device=device)
+        # shape: (seq_length, batch_size)
+        tags = make_tags(crf, device=device)
+        llh = crf(emissions, tags)
+
+        crf_bf = make_crf(batch_first=True, device=device)
+        # Copy parameter values from non-batch-first CRF; requires_grad must be False
+        # to avoid runtime error of in-place operation on a leaf variable
+        crf_bf.start_transitions.requires_grad_(False).copy_(crf.start_transitions)
+        crf_bf.end_transitions.requires_grad_(False).copy_(crf.end_transitions)
+        crf_bf.transitions.requires_grad_(False).copy_(crf.transitions)
+
+        # shape: (batch_size, seq_length, num_tags)
+        emissions = emissions.transpose(0, 1)
+        # shape: (batch_size, seq_length)
+        tags = tags.transpose(0, 1)
+        llh_bf = crf_bf(emissions, tags)
+
+        assert_close(llh, llh_bf)
+
+    def test_emissions_has_bad_number_of_dimension(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        emissions = torch.randn(1, 2).to(device)
+        tags = torch.empty(2, 2, dtype=torch.long, device=device)
+        crf = make_crf(device=device)
+
+        with pytest.raises(ValueError) as excinfo:
+            crf(emissions, tags)
+        assert 'emissions must have dimension of 3, got 2' in str(excinfo.value)
+
+    def test_emissions_and_tags_size_mismatch(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        emissions = torch.randn(1, 2, 3).to(device)
+        tags = torch.empty(2, 2, dtype=torch.long, device=device)
+        crf = make_crf(3, device=device)
+
+        with pytest.raises(ValueError) as excinfo:
+            crf(emissions, tags)
+        assert (
+                   'the first two dimensions of emissions and tags must match, '
+                   'got (1, 2) and (2, 2)') in str(excinfo.value)
+
+    def test_emissions_last_dimension_not_equal_to_number_of_tags(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        emissions = torch.randn(1, 2, 3).to(device)
+        tags = torch.empty(1, 2, dtype=torch.long, device=device)
+        crf = make_crf(10, device=device)
+
+        with pytest.raises(ValueError) as excinfo:
+            crf(emissions, tags)
+        assert 'expected last dimension of emissions is 10, got 3' in str(excinfo.value)
+
+    def test_first_timestep_mask_is_not_all_on(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        emissions = torch.randn(3, 2, 4).to(device)
+        tags = torch.empty(3, 2, dtype=torch.long, device=device)
+        mask = torch.tensor([[1, 1, 1], [0, 0, 0]], dtype=torch.bool, device=device).transpose(0, 1)
+        crf = make_crf(4, device=device)
+
+        with pytest.raises(ValueError) as excinfo:
+            crf(emissions, tags, mask=mask)
+        assert 'mask of the first timestep must all be on' in str(excinfo.value)
+
+        emissions = emissions.transpose(0, 1)
+        tags = tags.transpose(0, 1)
+        mask = mask.transpose(0, 1)
+        crf = make_crf(4, batch_first=True)
+
+        with pytest.raises(ValueError) as excinfo:
+            crf(emissions, tags, mask=mask)
+        assert 'mask of the first timestep must all be on' in str(excinfo.value)
+
+    def test_invalid_reduction(self):
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        else:
+            raise RuntimeError("No GPU with CUDA to test on.")
+        crf = make_crf(device=device)
+        emissions = make_emissions(crf, device=device)
+        tags = make_tags(crf, device=device)
+
+        with pytest.raises(ValueError) as excinfo:
+            crf(emissions, tags, reduction='foo')
+        assert 'invalid reduction: foo' in str(excinfo.value)
+
+
 class TestDecode:
     def test_works_with_mask(self):
         crf = make_crf()