|
17 | 17 | from functools import partial |
18 | 18 | from http import HTTPStatus |
19 | 19 | from json import JSONDecodeError |
20 | | -from typing import Annotated, Optional, Union |
| 20 | +from typing import Annotated, Optional |
21 | 21 |
|
22 | 22 | import prometheus_client |
23 | 23 | import regex as re |
|
59 | 59 | EmbeddingChatRequest, |
60 | 60 | EmbeddingCompletionRequest, |
61 | 61 | EmbeddingRequest, |
62 | | - EmbeddingResponse, |
63 | | - EmbeddingResponseData, |
64 | | - ErrorResponse, |
| 62 | + EmbeddingResponse, ErrorResponse, |
65 | 63 | LoadLoRAAdapterRequest, |
66 | 64 | PoolingChatRequest, |
67 | 65 | PoolingCompletionRequest, |
@@ -627,37 +625,10 @@ async def create_completion(request: CompletionRequest, raw_request: Request): |
627 | 625 | async def create_embedding(request: EmbeddingRequest, raw_request: Request): |
628 | 626 | handler = embedding(raw_request) |
629 | 627 | if handler is None: |
630 | | - fallback_handler = pooling(raw_request) |
631 | | - if fallback_handler is None: |
632 | | - return base(raw_request).create_error_response( |
633 | | - message="The model does not support Embeddings API") |
| 628 | + return base(raw_request).create_error_response( |
| 629 | + message="The model does not support Embeddings API") |
634 | 630 |
|
635 | | - logger.warning( |
636 | | - "Embeddings API will become exclusive to embedding models " |
637 | | - "in a future release. To return the hidden states directly, " |
638 | | - "use the Pooling API (`/pooling`) instead.") |
639 | | - |
640 | | - res = await fallback_handler.create_pooling(request, raw_request) |
641 | | - |
642 | | - generator: Union[ErrorResponse, EmbeddingResponse] |
643 | | - if isinstance(res, PoolingResponse): |
644 | | - generator = EmbeddingResponse( |
645 | | - id=res.id, |
646 | | - object=res.object, |
647 | | - created=res.created, |
648 | | - model=res.model, |
649 | | - data=[ |
650 | | - EmbeddingResponseData( |
651 | | - index=d.index, |
652 | | - embedding=d.data, # type: ignore |
653 | | - ) for d in res.data |
654 | | - ], |
655 | | - usage=res.usage, |
656 | | - ) |
657 | | - else: |
658 | | - generator = res |
659 | | - else: |
660 | | - generator = await handler.create_embedding(request, raw_request) |
| 631 | + generator = await handler.create_embedding(request, raw_request) |
661 | 632 |
|
662 | 633 | if isinstance(generator, ErrorResponse): |
663 | 634 | return JSONResponse(content=generator.model_dump(), |
|
0 commit comments