Skip to content

Commit 6721806

Browse files
DarkLight1337amitm02
authored andcommitted
[Deprecation] Remove fallbacks for Embeddings API (vllm-project#18795)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: amit <amit.man@gmail.com>
1 parent ca91a87 commit 6721806

File tree

3 files changed

+12
-60
lines changed

3 files changed

+12
-60
lines changed

vllm/config.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -797,17 +797,12 @@ def _resolve_task(
797797
else:
798798
# Aliases
799799
if task_option == "embedding":
800-
preferred_task = self._get_preferred_task(
801-
architectures, supported_tasks)
802-
if preferred_task != "embed":
803-
msg = ("The 'embedding' task will be restricted to "
804-
"embedding models in a future release. Please "
805-
"pass `--task classify`, `--task score`, or "
806-
"`--task reward` explicitly for other pooling "
807-
"models.")
808-
warnings.warn(msg, DeprecationWarning, stacklevel=2)
809-
810-
task_option = preferred_task or "embed"
800+
msg = ("The 'embedding' task has been renamed to "
801+
"'embed', please use the new name. The old name "
802+
"will be removed in v1.0.")
803+
warnings.warn(msg, DeprecationWarning, stacklevel=2)
804+
805+
task_option = "embed"
811806

812807
if task_option not in supported_tasks:
813808
msg = (

vllm/entrypoints/openai/api_server.py

Lines changed: 5 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from functools import partial
1818
from http import HTTPStatus
1919
from json import JSONDecodeError
20-
from typing import Annotated, Optional, Union
20+
from typing import Annotated, Optional
2121

2222
import prometheus_client
2323
import regex as re
@@ -59,9 +59,7 @@
5959
EmbeddingChatRequest,
6060
EmbeddingCompletionRequest,
6161
EmbeddingRequest,
62-
EmbeddingResponse,
63-
EmbeddingResponseData,
64-
ErrorResponse,
62+
EmbeddingResponse, ErrorResponse,
6563
LoadLoRAAdapterRequest,
6664
PoolingChatRequest,
6765
PoolingCompletionRequest,
@@ -627,37 +625,10 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
627625
async def create_embedding(request: EmbeddingRequest, raw_request: Request):
628626
handler = embedding(raw_request)
629627
if handler is None:
630-
fallback_handler = pooling(raw_request)
631-
if fallback_handler is None:
632-
return base(raw_request).create_error_response(
633-
message="The model does not support Embeddings API")
628+
return base(raw_request).create_error_response(
629+
message="The model does not support Embeddings API")
634630

635-
logger.warning(
636-
"Embeddings API will become exclusive to embedding models "
637-
"in a future release. To return the hidden states directly, "
638-
"use the Pooling API (`/pooling`) instead.")
639-
640-
res = await fallback_handler.create_pooling(request, raw_request)
641-
642-
generator: Union[ErrorResponse, EmbeddingResponse]
643-
if isinstance(res, PoolingResponse):
644-
generator = EmbeddingResponse(
645-
id=res.id,
646-
object=res.object,
647-
created=res.created,
648-
model=res.model,
649-
data=[
650-
EmbeddingResponseData(
651-
index=d.index,
652-
embedding=d.data, # type: ignore
653-
) for d in res.data
654-
],
655-
usage=res.usage,
656-
)
657-
else:
658-
generator = res
659-
else:
660-
generator = await handler.create_embedding(request, raw_request)
631+
generator = await handler.create_embedding(request, raw_request)
661632

662633
if isinstance(generator, ErrorResponse):
663634
return JSONResponse(content=generator.model_dump(),

vllm/outputs.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Any, Generic, Optional, Union
88

99
import torch
10-
from typing_extensions import TypeVar, deprecated
10+
from typing_extensions import TypeVar
1111

1212
from vllm.logger import init_logger
1313
from vllm.lora.request import LoRARequest
@@ -76,14 +76,6 @@ def __eq__(self, other: object) -> bool:
7676
return (isinstance(other, self.__class__) and bool(
7777
(self.data == other.data).all()))
7878

79-
@property
80-
@deprecated("`LLM.encode()` now stores raw outputs in the `data` "
81-
"attribute. To return embeddings, use `LLM.embed()`. "
82-
"To return class probabilities, use `LLM.classify()` "
83-
"and access the `probs` attribute. ")
84-
def embedding(self) -> list[float]:
85-
return self.data.tolist()
86-
8779

8880
class RequestOutput:
8981
"""The output data of a completion request to the LLM.
@@ -506,12 +498,6 @@ def from_base(pooling_output: PoolingOutput):
506498
def __repr__(self) -> str:
507499
return f"ScoringOutput(score={self.score})"
508500

509-
@property
510-
@deprecated("`LLM.score()` now returns scalar scores. "
511-
"Please access it via the `score` attribute. ")
512-
def embedding(self) -> list[float]:
513-
return [self.score]
514-
515501

516502
class ScoringRequestOutput(PoolingRequestOutput[ScoringOutput]):
517503

0 commit comments

Comments
 (0)