[testing] Fix seed_oss (#41052)

ydshieh · vasqu · web-flow · commit 4f3601154580 · 2025-09-22T14:54:30.000+02:00
* fix

* fix

* fix

* fix

* fix

* fix

* Update tests/models/seed_oss/test_modeling_seed_oss.py

Co-authored-by: Anton Vlasjuk &lt;73884904+vasqu@users.noreply.github.com&gt;

* fix

---------

Co-authored-by: ydshieh &lt;ydshieh@users.noreply.github.com&gt;
Co-authored-by: Anton Vlasjuk &lt;73884904+vasqu@users.noreply.github.com&gt;
diff --git a/tests/models/seed_oss/test_modeling_seed_oss.py b/tests/models/seed_oss/test_modeling_seed_oss.py
@@ -90,54 +90,27 @@ class SeedOssIntegrationTest(unittest.TestCase):
     input_text = ["How to make pasta?", "Hi ByteDance-Seed"]
     model_id = "ByteDance-Seed/Seed-OSS-36B-Base"
 
-    def tearDown(self):
+    def setUp(self):
         cleanup(torch_device, gc_collect=True)
 
-    def test_model_36b_fp16(self):
-        EXPECTED_TEXTS = [
-            "How to make pasta?\nHow to make pasta?\nPasta is a popular dish that is enjoyed by people all over",
-            "Hi ByteDance-Seed team,\nI am trying to run the code on my local machine. I have installed all the",
-        ]
-
-        model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.float16, device_map="auto")
-
-        tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True, return_token_type_ids=False).to(
-            model.model.embed_tokens.weight.device
-        )
-
-        output = model.generate(**inputs, max_new_tokens=20, do_sample=False)
-        output_text = tokenizer.batch_decode(output, skip_special_tokens=True)
-
-        self.assertEqual(output_text, EXPECTED_TEXTS)
+    def tearDown(self):
+        cleanup(torch_device, gc_collect=True)
 
-    def test_model_36b_bf16(self):
+    def test_model_36b_eager(self):
         EXPECTED_TEXTS = [
             "How to make pasta?\nHow to make pasta?\nPasta is a popular dish that is enjoyed by people all over",
-            "Hi ByteDance-Seed team,\nI am trying to run the code on my local machine. I have installed all the",
+            "Hi ByteDance-Seed team,\nI am trying to run the code on the <beginning of the code>seed",
         ]
 
-        model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16, device_map="auto")
-
-        tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True).to(
-            model.model.embed_tokens.weight.device
-        )
-
-        output = model.generate(**inputs, max_new_tokens=20, do_sample=False)
-        output_text = tokenizer.batch_decode(output, skip_special_tokens=True)
-
-        self.assertEqual(output_text, EXPECTED_TEXTS)
-
-    def test_model_36b_eager(self):
-        EXPECTED_TEXTS = ""
-
         model = AutoModelForCausalLM.from_pretrained(
-            self.model_id, torch_dtype=torch.bfloat16, attn_implementation="eager", device_map="auto"
+            "ByteDance-Seed/Seed-OSS-36B-Base",
+            torch_dtype=torch.bfloat16,
+            attn_implementation="eager",
+            device_map="auto",
         )
 
         tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True).to(
+        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True, return_token_type_ids=False).to(
             model.model.embed_tokens.weight.device
         )
 
@@ -149,15 +122,14 @@ def test_model_36b_eager(self):
     def test_model_36b_sdpa(self):
         EXPECTED_TEXTS = [
             "How to make pasta?\nHow to make pasta?\nPasta is a popular dish that is enjoyed by people all over",
-            "Hi ByteDance-Seed team,\nI am trying to run the code on my local machine. I have installed all the",
+            "Hi ByteDance-Seed team,\nI am trying to run the code on the <beginning of the code>seed",
         ]
 
-        model = AutoModelForCausalLM.from_pretrained(
-            self.model_id, torch_dtype=torch.bfloat16, attn_implementation="sdpa", device_map="auto"
-        )
+        # default attention is `sdpa` (and this model repo. doesn't specify explicitly) --> we get `sdpa` here
+        model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16, device_map="auto")
 
         tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True).to(
+        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True, return_token_type_ids=False).to(
             model.model.embed_tokens.weight.device
         )
 
@@ -170,15 +142,16 @@ def test_model_36b_sdpa(self):
     @require_torch_large_gpu
     @pytest.mark.flash_attn_test
     def test_model_36b_flash_attn(self):
-        EXPECTED_TEXTS = ""
+        EXPECTED_TEXTS = [
+            "How to make pasta?\nHow to make pasta?\nPasta is a popular dish that is enjoyed by people all over",
+            "Hi ByteDance-Seed team,\nI am trying to run the code on the <beginning of the code>seed",
+        ]
 
         model = AutoModelForCausalLM.from_pretrained(
             self.model_id, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", device_map="auto"
         )
-        model.to(torch_device)
-
         tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True).to(
+        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True, return_token_type_ids=False).to(
             model.model.embed_tokens.weight.device
         )