@@ -27,7 +27,7 @@ def server():
2727        "bfloat16" ,
2828        "--enforce-eager" ,
2929        "--max-model-len" ,
30-         "8192 " ,
30+         "512 " ,
3131        "--chat-template" ,
3232        DUMMY_CHAT_TEMPLATE ,
3333    ]
@@ -60,10 +60,10 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str):
6060
6161    assert  embeddings .id  is  not None 
6262    assert  len (embeddings .data ) ==  1 
63-     assert  len (embeddings .data [0 ].embedding ) ==  4096 
63+     assert  len (embeddings .data [0 ].embedding ) ==  384 
6464    assert  embeddings .usage .completion_tokens  ==  0 
65-     assert  embeddings .usage .prompt_tokens  ==  9 
66-     assert  embeddings .usage .total_tokens  ==  9 
65+     assert  embeddings .usage .prompt_tokens  ==  11 
66+     assert  embeddings .usage .total_tokens  ==  11 
6767
6868    # test using token IDs 
6969    input_tokens  =  [1 , 1 , 1 , 1 , 1 ]
@@ -77,7 +77,7 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str):
7777
7878    assert  embeddings .id  is  not None 
7979    assert  len (embeddings .data ) ==  1 
80-     assert  len (embeddings .data [0 ].embedding ) ==  4096 
80+     assert  len (embeddings .data [0 ].embedding ) ==  384 
8181    assert  embeddings .usage .completion_tokens  ==  0 
8282    assert  embeddings .usage .prompt_tokens  ==  5 
8383    assert  embeddings .usage .total_tokens  ==  5 
@@ -101,10 +101,10 @@ async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str):
101101
102102    assert  embeddings .id  is  not None 
103103    assert  len (embeddings .data ) ==  3 
104-     assert  len (embeddings .data [0 ].embedding ) ==  4096 
104+     assert  len (embeddings .data [0 ].embedding ) ==  384 
105105    assert  embeddings .usage .completion_tokens  ==  0 
106-     assert  embeddings .usage .prompt_tokens  ==  32 
107-     assert  embeddings .usage .total_tokens  ==  32 
106+     assert  embeddings .usage .prompt_tokens  ==  33 
107+     assert  embeddings .usage .total_tokens  ==  33 
108108
109109    # test List[List[int]] 
110110    input_tokens  =  [[4 , 5 , 7 , 9 , 20 ], [15 , 29 , 499 ], [24 , 24 , 24 , 24 , 24 ],
@@ -119,7 +119,7 @@ async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str):
119119
120120    assert  embeddings .id  is  not None 
121121    assert  len (embeddings .data ) ==  4 
122-     assert  len (embeddings .data [0 ].embedding ) ==  4096 
122+     assert  len (embeddings .data [0 ].embedding ) ==  384 
123123    assert  embeddings .usage .completion_tokens  ==  0 
124124    assert  embeddings .usage .prompt_tokens  ==  17 
125125    assert  embeddings .usage .total_tokens  ==  17 
@@ -234,7 +234,7 @@ async def test_single_embedding_truncation(client: openai.AsyncOpenAI,
234234
235235    assert  embeddings .id  is  not None 
236236    assert  len (embeddings .data ) ==  1 
237-     assert  len (embeddings .data [0 ].embedding ) ==  4096 
237+     assert  len (embeddings .data [0 ].embedding ) ==  384 
238238    assert  embeddings .usage .completion_tokens  ==  0 
239239    assert  embeddings .usage .prompt_tokens  ==  10 
240240    assert  embeddings .usage .total_tokens  ==  10 
@@ -252,7 +252,7 @@ async def test_single_embedding_truncation(client: openai.AsyncOpenAI,
252252
253253    assert  embeddings .id  is  not None 
254254    assert  len (embeddings .data ) ==  1 
255-     assert  len (embeddings .data [0 ].embedding ) ==  4096 
255+     assert  len (embeddings .data [0 ].embedding ) ==  384 
256256    assert  embeddings .usage .completion_tokens  ==  0 
257257    assert  embeddings .usage .prompt_tokens  ==  10 
258258    assert  embeddings .usage .total_tokens  ==  10 
0 commit comments