@@ -90,54 +90,27 @@ class SeedOssIntegrationTest(unittest.TestCase):
9090 input_text = ["How to make pasta?" , "Hi ByteDance-Seed" ]
9191 model_id = "ByteDance-Seed/Seed-OSS-36B-Base"
9292
93- def tearDown (self ):
93+ def setUp (self ):
9494 cleanup (torch_device , gc_collect = True )
9595
96- def test_model_36b_fp16 (self ):
97- EXPECTED_TEXTS = [
98- "How to make pasta?\n How to make pasta?\n Pasta is a popular dish that is enjoyed by people all over" ,
99- "Hi ByteDance-Seed team,\n I am trying to run the code on my local machine. I have installed all the" ,
100- ]
101-
102- model = AutoModelForCausalLM .from_pretrained (self .model_id , torch_dtype = torch .float16 , device_map = "auto" )
103-
104- tokenizer = AutoTokenizer .from_pretrained (self .model_id )
105- inputs = tokenizer (self .input_text , return_tensors = "pt" , padding = True , return_token_type_ids = False ).to (
106- model .model .embed_tokens .weight .device
107- )
108-
109- output = model .generate (** inputs , max_new_tokens = 20 , do_sample = False )
110- output_text = tokenizer .batch_decode (output , skip_special_tokens = True )
111-
112- self .assertEqual (output_text , EXPECTED_TEXTS )
96+ def tearDown (self ):
97+ cleanup (torch_device , gc_collect = True )
11398
114- def test_model_36b_bf16 (self ):
99+ def test_model_36b_eager (self ):
115100 EXPECTED_TEXTS = [
116101 "How to make pasta?\n How to make pasta?\n Pasta is a popular dish that is enjoyed by people all over" ,
117- "Hi ByteDance-Seed team,\n I am trying to run the code on my local machine. I have installed all the" ,
102+ "Hi ByteDance-Seed team,\n I am trying to run the code on the <beginning of the code>seed " ,
118103 ]
119104
120- model = AutoModelForCausalLM .from_pretrained (self .model_id , torch_dtype = torch .bfloat16 , device_map = "auto" )
121-
122- tokenizer = AutoTokenizer .from_pretrained (self .model_id )
123- inputs = tokenizer (self .input_text , return_tensors = "pt" , padding = True ).to (
124- model .model .embed_tokens .weight .device
125- )
126-
127- output = model .generate (** inputs , max_new_tokens = 20 , do_sample = False )
128- output_text = tokenizer .batch_decode (output , skip_special_tokens = True )
129-
130- self .assertEqual (output_text , EXPECTED_TEXTS )
131-
132- def test_model_36b_eager (self ):
133- EXPECTED_TEXTS = ""
134-
135105 model = AutoModelForCausalLM .from_pretrained (
136- self .model_id , torch_dtype = torch .bfloat16 , attn_implementation = "eager" , device_map = "auto"
106+ "ByteDance-Seed/Seed-OSS-36B-Base" ,
107+ torch_dtype = torch .bfloat16 ,
108+ attn_implementation = "eager" ,
109+ device_map = "auto" ,
137110 )
138111
139112 tokenizer = AutoTokenizer .from_pretrained (self .model_id )
140- inputs = tokenizer (self .input_text , return_tensors = "pt" , padding = True ).to (
113+ inputs = tokenizer (self .input_text , return_tensors = "pt" , padding = True , return_token_type_ids = False ).to (
141114 model .model .embed_tokens .weight .device
142115 )
143116
@@ -149,15 +122,14 @@ def test_model_36b_eager(self):
149122 def test_model_36b_sdpa (self ):
150123 EXPECTED_TEXTS = [
151124 "How to make pasta?\n How to make pasta?\n Pasta is a popular dish that is enjoyed by people all over" ,
152- "Hi ByteDance-Seed team,\n I am trying to run the code on my local machine. I have installed all the" ,
125+ "Hi ByteDance-Seed team,\n I am trying to run the code on the <beginning of the code>seed " ,
153126 ]
154127
155- model = AutoModelForCausalLM .from_pretrained (
156- self .model_id , torch_dtype = torch .bfloat16 , attn_implementation = "sdpa" , device_map = "auto"
157- )
128+ # default attention is `sdpa` (and this model repo. doesn't specify explicitly) --> we get `sdpa` here
129+ model = AutoModelForCausalLM .from_pretrained (self .model_id , torch_dtype = torch .bfloat16 , device_map = "auto" )
158130
159131 tokenizer = AutoTokenizer .from_pretrained (self .model_id )
160- inputs = tokenizer (self .input_text , return_tensors = "pt" , padding = True ).to (
132+ inputs = tokenizer (self .input_text , return_tensors = "pt" , padding = True , return_token_type_ids = False ).to (
161133 model .model .embed_tokens .weight .device
162134 )
163135
@@ -170,15 +142,16 @@ def test_model_36b_sdpa(self):
170142 @require_torch_large_gpu
171143 @pytest .mark .flash_attn_test
172144 def test_model_36b_flash_attn (self ):
173- EXPECTED_TEXTS = ""
145+ EXPECTED_TEXTS = [
146+ "How to make pasta?\n How to make pasta?\n Pasta is a popular dish that is enjoyed by people all over" ,
147+ "Hi ByteDance-Seed team,\n I am trying to run the code on the <beginning of the code>seed" ,
148+ ]
174149
175150 model = AutoModelForCausalLM .from_pretrained (
176151 self .model_id , torch_dtype = torch .bfloat16 , attn_implementation = "flash_attention_2" , device_map = "auto"
177152 )
178- model .to (torch_device )
179-
180153 tokenizer = AutoTokenizer .from_pretrained (self .model_id )
181- inputs = tokenizer (self .input_text , return_tensors = "pt" , padding = True ).to (
154+ inputs = tokenizer (self .input_text , return_tensors = "pt" , padding = True , return_token_type_ids = False ).to (
182155 model .model .embed_tokens .weight .device
183156 )
184157
0 commit comments