From 24165ceef642df2d5026af6ea1b5370f603580d0 Mon Sep 17 00:00:00 2001 From: Lev Vereshchagin Date: Thu, 5 Dec 2024 16:04:04 +0300 Subject: [PATCH 1/2] Revert "Use niquests (#7)" This reverts commit bfaec30ec9ef68d3258bb6c927da193090762b2f. --- .github/workflows/publish.yml | 2 +- .github/workflows/test.yml | 7 +- Justfile | 10 --- README.md | 12 ++- any_llm_client/clients/openai.py | 68 +++++++++-------- any_llm_client/clients/yandexgpt.py | 66 ++++++++++------- any_llm_client/core.py | 24 +++--- any_llm_client/http.py | 111 +++++++++------------------- any_llm_client/main.py | 14 ++-- any_llm_client/retry.py | 2 +- any_llm_client/sse.py | 11 --- pyproject.toml | 11 +-- tests/conftest.py | 27 ------- tests/test_http.py | 47 +++++------- tests/test_openai_client.py | 73 ++++++++++-------- tests/test_static.py | 8 -- tests/test_yandexgpt_client.py | 71 +++++++++--------- tests/testing_app.py | 28 ------- 18 files changed, 241 insertions(+), 351 deletions(-) delete mode 100644 any_llm_client/sse.py delete mode 100644 tests/testing_app.py diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index ed65661..787069e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -11,7 +11,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: extractions/setup-just@v2 - - uses: astral-sh/setup-uv@v4 + - uses: astral-sh/setup-uv@v3 with: enable-cache: true cache-dependency-glob: "**/pyproject.toml" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1c8f8c6..7bc9343 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: extractions/setup-just@v2 - - uses: astral-sh/setup-uv@v4 + - uses: astral-sh/setup-uv@v3 with: enable-cache: true cache-dependency-glob: "**/pyproject.toml" @@ -32,12 +32,13 @@ jobs: - "3.10" - "3.11" - "3.12" + - "3.13" steps: - uses: actions/checkout@v4 - uses: extractions/setup-just@v2 - - uses: astral-sh/setup-uv@v4 + - uses: astral-sh/setup-uv@v3 with: enable-cache: true cache-dependency-glob: "**/pyproject.toml" - - run: uv venv --python ${{ matrix.python-version }} + - run: uv python install ${{ matrix.python-version }} - run: just test -vv diff --git a/Justfile b/Justfile index a29b3b6..bae709c 100644 --- a/Justfile +++ b/Justfile @@ -10,18 +10,8 @@ lint: uv run --group lint ruff format uv run --group lint mypy . -_test-no-http *args: - uv run pytest --ignore tests/test_http.py {{ args }} - test *args: - #!/bin/bash - uv run litestar --app tests.testing_app:app run & - APP_PID=$! uv run pytest {{ args }} - TEST_RESULT=$? - kill $APP_PID - wait $APP_PID 2>/dev/null - exit $TEST_RESULT publish: rm -rf dist diff --git a/README.md b/README.md index c8345bd..395ac8e 100644 --- a/README.md +++ b/README.md @@ -162,25 +162,23 @@ async with any_llm_client.OpenAIClient(config, ...) as client: #### Timeouts, proxy & other HTTP settings -Pass custom [niquests](https://niquests.readthedocs.io) kwargs to `any_llm_client.get_client()`: +Pass custom [HTTPX](https://www.python-httpx.org) kwargs to `any_llm_client.get_client()`: ```python -import urllib3 +import httpx import any_llm_client async with any_llm_client.get_client( ..., - proxies={"https://api.openai.com": "http://localhost:8030"}, - timeout=urllib3.Timeout(total=10.0, connect=5.0), + mounts={"https://api.openai.com": httpx.AsyncHTTPTransport(proxy="http://localhost:8030")}, + timeout=httpx.Timeout(None, connect=5.0), ) as client: ... ``` -`timeout` and `proxies` parameters are special cased here: `niquests.AsyncSession` doesn't receive them by default. - -Default timeout is `urllib3.Timeout(total=None, connect=5.0)`. +Default timeout is `httpx.Timeout(None, connect=5.0)` (5 seconds on connect, unlimited on read, write or pool). #### Retries diff --git a/any_llm_client/clients/openai.py b/any_llm_client/clients/openai.py index c9ab16e..65bb5d2 100644 --- a/any_llm_client/clients/openai.py +++ b/any_llm_client/clients/openai.py @@ -6,7 +6,8 @@ from http import HTTPStatus import annotated_types -import niquests +import httpx +import httpx_sse import pydantic import typing_extensions @@ -19,9 +20,8 @@ OutOfTokensOrSymbolsError, UserMessage, ) -from any_llm_client.http import HttpClient, HttpStatusError +from any_llm_client.http import get_http_client_from_kwargs, make_http_request, make_streaming_http_request from any_llm_client.retry import RequestRetryConfig -from any_llm_client.sse import parse_sse_events OPENAI_AUTH_TOKEN_ENV_NAME: typing.Final = "ANY_LLM_CLIENT_OPENAI_AUTH_TOKEN" @@ -99,18 +99,16 @@ def _make_user_assistant_alternate_messages( yield ChatCompletionsMessage(role=current_message_role, content="\n\n".join(current_message_content_chunks)) -def _handle_status_error(error: HttpStatusError) -> typing.NoReturn: - if ( - error.status_code == HTTPStatus.BAD_REQUEST and b"Please reduce the length of the messages" in error.content - ): # vLLM - raise OutOfTokensOrSymbolsError(response_content=error.content) - raise LLMError(response_content=error.content) +def _handle_status_error(*, status_code: int, content: bytes) -> typing.NoReturn: + if status_code == HTTPStatus.BAD_REQUEST and b"Please reduce the length of the messages" in content: # vLLM + raise OutOfTokensOrSymbolsError(response_content=content) + raise LLMError(response_content=content) @dataclasses.dataclass(slots=True, init=False) class OpenAIClient(LLMClient): config: OpenAIConfig - http_client: HttpClient + httpx_client: httpx.AsyncClient request_retry: RequestRetryConfig def __init__( @@ -118,15 +116,14 @@ def __init__( config: OpenAIConfig, *, request_retry: RequestRetryConfig | None = None, - **niquests_kwargs: typing.Any, # noqa: ANN401 + **httpx_kwargs: typing.Any, # noqa: ANN401 ) -> None: self.config = config - self.http_client = HttpClient( - request_retry=request_retry or RequestRetryConfig(), niquests_kwargs=niquests_kwargs - ) + self.request_retry = request_retry or RequestRetryConfig() + self.httpx_client = get_http_client_from_kwargs(httpx_kwargs) - def _build_request(self, payload: dict[str, typing.Any]) -> niquests.Request: - return niquests.Request( + def _build_request(self, payload: dict[str, typing.Any]) -> httpx.Request: + return self.httpx_client.build_request( method="POST", url=str(self.config.url), json=payload, @@ -155,17 +152,24 @@ async def request_llm_message( **extra or {}, ).model_dump(mode="json") try: - response: typing.Final = await self.http_client.request(self._build_request(payload)) - except HttpStatusError as exception: - _handle_status_error(exception) - return ChatCompletionsNotStreamingResponse.model_validate_json(response).choices[0].message.content + response: typing.Final = await make_http_request( + httpx_client=self.httpx_client, + request_retry=self.request_retry, + build_request=lambda: self._build_request(payload), + ) + except httpx.HTTPStatusError as exception: + _handle_status_error(status_code=exception.response.status_code, content=exception.response.content) + try: + return ChatCompletionsNotStreamingResponse.model_validate_json(response.content).choices[0].message.content + finally: + await response.aclose() - async def _iter_partial_responses(self, response: typing.AsyncIterable[bytes]) -> typing.AsyncIterable[str]: + async def _iter_partial_responses(self, response: httpx.Response) -> typing.AsyncIterable[str]: text_chunks: typing.Final = [] - async for one_event in parse_sse_events(response): - if one_event.data == "[DONE]": + async for event in httpx_sse.EventSource(response).aiter_sse(): + if event.data == "[DONE]": break - validated_response = ChatCompletionsStreamingEvent.model_validate_json(one_event.data) + validated_response = ChatCompletionsStreamingEvent.model_validate_json(event.data) if not (one_chunk := validated_response.choices[0].delta.content): continue text_chunks.append(one_chunk) @@ -183,13 +187,19 @@ async def stream_llm_partial_messages( **extra or {}, ).model_dump(mode="json") try: - async with self.http_client.stream(request=self._build_request(payload)) as response: + async with make_streaming_http_request( + httpx_client=self.httpx_client, + request_retry=self.request_retry, + build_request=lambda: self._build_request(payload), + ) as response: yield self._iter_partial_responses(response) - except HttpStatusError as exception: - _handle_status_error(exception) + except httpx.HTTPStatusError as exception: + content: typing.Final = await exception.response.aread() + await exception.response.aclose() + _handle_status_error(status_code=exception.response.status_code, content=content) async def __aenter__(self) -> typing_extensions.Self: - await self.http_client.__aenter__() + await self.httpx_client.__aenter__() return self async def __aexit__( @@ -198,4 +208,4 @@ async def __aexit__( exc_value: BaseException | None, traceback: types.TracebackType | None, ) -> None: - await self.http_client.__aexit__(exc_type=exc_type, exc_value=exc_value, traceback=traceback) + await self.httpx_client.__aexit__(exc_type=exc_type, exc_value=exc_value, traceback=traceback) diff --git a/any_llm_client/clients/yandexgpt.py b/any_llm_client/clients/yandexgpt.py index 305a3f5..10c8818 100644 --- a/any_llm_client/clients/yandexgpt.py +++ b/any_llm_client/clients/yandexgpt.py @@ -6,12 +6,12 @@ from http import HTTPStatus import annotated_types -import niquests +import httpx import pydantic import typing_extensions from any_llm_client.core import LLMClient, LLMConfig, LLMError, Message, OutOfTokensOrSymbolsError, UserMessage -from any_llm_client.http import HttpClient, HttpStatusError +from any_llm_client.http import get_http_client_from_kwargs, make_http_request, make_streaming_http_request from any_llm_client.retry import RequestRetryConfig @@ -61,34 +61,34 @@ class YandexGPTResponse(pydantic.BaseModel): result: YandexGPTResult -def _handle_status_error(error: HttpStatusError) -> typing.NoReturn: - if error.status_code == HTTPStatus.BAD_REQUEST and ( - b"number of input tokens must be no more than" in error.content - or (b"text length is" in error.content and b"which is outside the range" in error.content) +def _handle_status_error(*, status_code: int, content: bytes) -> typing.NoReturn: + if status_code == HTTPStatus.BAD_REQUEST and ( + b"number of input tokens must be no more than" in content + or (b"text length is" in content and b"which is outside the range" in content) ): - raise OutOfTokensOrSymbolsError(response_content=error.content) - raise LLMError(response_content=error.content) + raise OutOfTokensOrSymbolsError(response_content=content) + raise LLMError(response_content=content) @dataclasses.dataclass(slots=True, init=False) class YandexGPTClient(LLMClient): config: YandexGPTConfig - http_client: HttpClient + httpx_client: httpx.AsyncClient + request_retry: RequestRetryConfig def __init__( self, config: YandexGPTConfig, *, request_retry: RequestRetryConfig | None = None, - **niquests_kwargs: typing.Any, # noqa: ANN401 + **httpx_kwargs: typing.Any, # noqa: ANN401 ) -> None: self.config = config - self.http_client = HttpClient( - request_retry=request_retry or RequestRetryConfig(), niquests_kwargs=niquests_kwargs - ) + self.request_retry = request_retry or RequestRetryConfig() + self.httpx_client = get_http_client_from_kwargs(httpx_kwargs) - def _build_request(self, payload: dict[str, typing.Any]) -> niquests.Request: - return niquests.Request( + def _build_request(self, payload: dict[str, typing.Any]) -> httpx.Request: + return self.httpx_client.build_request( method="POST", url=str(self.config.url), json=payload, @@ -121,14 +121,18 @@ async def request_llm_message( ) try: - response: typing.Final = await self.http_client.request(self._build_request(payload)) - except HttpStatusError as exception: - _handle_status_error(exception) - - return YandexGPTResponse.model_validate_json(response).result.alternatives[0].message.text - - async def _iter_completion_messages(self, response: typing.AsyncIterable[bytes]) -> typing.AsyncIterable[str]: - async for one_line in response: + response: typing.Final = await make_http_request( + httpx_client=self.httpx_client, + request_retry=self.request_retry, + build_request=lambda: self._build_request(payload), + ) + except httpx.HTTPStatusError as exception: + _handle_status_error(status_code=exception.response.status_code, content=exception.response.content) + + return YandexGPTResponse.model_validate_json(response.content).result.alternatives[0].message.text + + async def _iter_completion_messages(self, response: httpx.Response) -> typing.AsyncIterable[str]: + async for one_line in response.aiter_lines(): validated_response = YandexGPTResponse.model_validate_json(one_line) yield validated_response.result.alternatives[0].message.text @@ -141,13 +145,19 @@ async def stream_llm_partial_messages( ) try: - async with self.http_client.stream(request=self._build_request(payload)) as response: + async with make_streaming_http_request( + httpx_client=self.httpx_client, + request_retry=self.request_retry, + build_request=lambda: self._build_request(payload), + ) as response: yield self._iter_completion_messages(response) - except HttpStatusError as exception: - _handle_status_error(exception) + except httpx.HTTPStatusError as exception: + content: typing.Final = await exception.response.aread() + await exception.response.aclose() + _handle_status_error(status_code=exception.response.status_code, content=content) async def __aenter__(self) -> typing_extensions.Self: - await self.http_client.__aenter__() + await self.httpx_client.__aenter__() return self async def __aexit__( @@ -156,4 +166,4 @@ async def __aexit__( exc_value: BaseException | None, traceback: types.TracebackType | None, ) -> None: - await self.http_client.__aexit__(exc_type=exc_type, exc_value=exc_value, traceback=traceback) + await self.httpx_client.__aexit__(exc_type=exc_type, exc_value=exc_value, traceback=traceback) diff --git a/any_llm_client/core.py b/any_llm_client/core.py index 6acb9d2..e0ced36 100644 --- a/any_llm_client/core.py +++ b/any_llm_client/core.py @@ -48,6 +48,18 @@ def AssistantMessage(text: str) -> Message: # noqa: N802 return Message(role=MessageRole.assistant, text=text) +@dataclasses.dataclass +class LLMError(Exception): + response_content: bytes + + def __str__(self) -> str: + return self.__repr__().removeprefix(self.__class__.__name__) + + +@dataclasses.dataclass +class OutOfTokensOrSymbolsError(LLMError): ... + + class LLMConfig(pydantic.BaseModel): model_config = pydantic.ConfigDict(protected_namespaces=()) api_type: str @@ -71,15 +83,3 @@ async def __aexit__( exc_value: BaseException | None, traceback: types.TracebackType | None, ) -> None: ... - - -@dataclasses.dataclass -class LLMError(Exception): - response_content: bytes - - def __str__(self) -> str: - return self.__repr__().removeprefix(self.__class__.__name__) - - -@dataclasses.dataclass -class OutOfTokensOrSymbolsError(LLMError): ... diff --git a/any_llm_client/http.py b/any_llm_client/http.py index bb7f23a..7d05030 100644 --- a/any_llm_client/http.py +++ b/any_llm_client/http.py @@ -1,95 +1,52 @@ import contextlib import dataclasses -import types import typing -import niquests +import httpx import stamina -import typing_extensions -import urllib3 from any_llm_client.retry import RequestRetryConfig -DEFAULT_HTTP_TIMEOUT: typing.Final = urllib3.Timeout(total=None, connect=5.0) +DEFAULT_HTTP_TIMEOUT: typing.Final = httpx.Timeout(None, connect=5.0) -@dataclasses.dataclass -class HttpStatusError(Exception): - status_code: int - content: bytes +def get_http_client_from_kwargs(kwargs: dict[str, typing.Any]) -> httpx.AsyncClient: + kwargs_with_defaults: typing.Final = kwargs.copy() + kwargs_with_defaults.setdefault("timeout", DEFAULT_HTTP_TIMEOUT) + return httpx.AsyncClient(**kwargs_with_defaults) -@dataclasses.dataclass(slots=True, init=False) -class HttpClient: - client: niquests.AsyncSession - timeout: urllib3.Timeout - _make_not_streaming_request_with_retries: typing.Callable[[niquests.Request], typing.Awaitable[niquests.Response]] - _make_streaming_request_with_retries: typing.Callable[[niquests.Request], typing.Awaitable[niquests.AsyncResponse]] - _retried_exceptions: typing.ClassVar = (niquests.HTTPError, HttpStatusError) - - def __init__(self, request_retry: RequestRetryConfig, niquests_kwargs: dict[str, typing.Any]) -> None: - modified_kwargs: typing.Final = niquests_kwargs.copy() - self.timeout = modified_kwargs.pop("timeout", DEFAULT_HTTP_TIMEOUT) - proxies: typing.Final = modified_kwargs.pop("proxies", None) - - self.client = niquests.AsyncSession(**modified_kwargs) - if proxies: - self.client.proxies = proxies - - request_retry_dict: typing.Final = dataclasses.asdict(request_retry) - - self._make_not_streaming_request_with_retries = stamina.retry( - on=self._retried_exceptions, **request_retry_dict - )(self._make_not_streaming_request) - self._make_streaming_request_with_retries = stamina.retry(on=self._retried_exceptions, **request_retry_dict)( - self._make_streaming_request - ) - - async def _make_not_streaming_request(self, request: niquests.Request) -> niquests.Response: - response: typing.Final = await self.client.send(self.client.prepare_request(request), timeout=self.timeout) - try: - response.raise_for_status() - except niquests.HTTPError as exception: - raise HttpStatusError(status_code=response.status_code, content=response.content) from exception # type: ignore[arg-type] - finally: - response.close() +async def make_http_request( + *, + httpx_client: httpx.AsyncClient, + request_retry: RequestRetryConfig, + build_request: typing.Callable[[], httpx.Request], +) -> httpx.Response: + @stamina.retry(on=httpx.HTTPError, **dataclasses.asdict(request_retry)) + async def make_request_with_retries() -> httpx.Response: + response: typing.Final = await httpx_client.send(build_request()) + response.raise_for_status() return response - async def request(self, request: niquests.Request) -> bytes: - response: typing.Final = await self._make_not_streaming_request_with_retries(request) - return response.content # type: ignore[return-value] - - async def _make_streaming_request(self, request: niquests.Request) -> niquests.AsyncResponse: - response: typing.Final = await self.client.send( - self.client.prepare_request(request), stream=True, timeout=self.timeout - ) - try: - response.raise_for_status() - except niquests.HTTPError as exception: - status_code: typing.Final = response.status_code - content: typing.Final = await response.content # type: ignore[misc] - await response.close() # type: ignore[misc] - raise HttpStatusError(status_code=status_code, content=content) from exception # type: ignore[arg-type] - return response # type: ignore[return-value] + return await make_request_with_retries() - @contextlib.asynccontextmanager - async def stream(self, request: niquests.Request) -> typing.AsyncIterator[typing.AsyncIterable[bytes]]: - response: typing.Final = await self._make_streaming_request_with_retries(request) - try: - response.__aenter__() - yield response.iter_lines() # type: ignore[misc] - finally: - await response.raw.close() # type: ignore[union-attr] - async def __aenter__(self) -> typing_extensions.Self: - await self.client.__aenter__() # type: ignore[no-untyped-call] - return self +@contextlib.asynccontextmanager +async def make_streaming_http_request( + *, + httpx_client: httpx.AsyncClient, + request_retry: RequestRetryConfig, + build_request: typing.Callable[[], httpx.Request], +) -> typing.AsyncIterator[httpx.Response]: + @stamina.retry(on=httpx.HTTPError, **dataclasses.asdict(request_retry)) + async def make_request_with_retries() -> httpx.Response: + response: typing.Final = await httpx_client.send(build_request(), stream=True) + response.raise_for_status() + return response - async def __aexit__( - self, - exc_type: type[BaseException] | None, - exc_value: BaseException | None, - traceback: types.TracebackType | None, - ) -> None: - await self.client.__aexit__(exc_type, exc_value, traceback) # type: ignore[no-untyped-call] + response: typing.Final = await make_request_with_retries() + try: + yield response + finally: + await response.aclose() diff --git a/any_llm_client/main.py b/any_llm_client/main.py index d9c6c76..d734e67 100644 --- a/any_llm_client/main.py +++ b/any_llm_client/main.py @@ -19,7 +19,7 @@ def get_client( config: AnyLLMConfig, *, request_retry: RequestRetryConfig | None = None, - **niquests_kwargs: typing.Any, # noqa: ANN401 + **httpx_kwargs: typing.Any, # noqa: ANN401 ) -> LLMClient: ... else: @@ -28,7 +28,7 @@ def get_client( config: typing.Any, # noqa: ANN401, ARG001 *, request_retry: RequestRetryConfig | None = None, # noqa: ARG001 - **niquests_kwargs: typing.Any, # noqa: ANN401, ARG001 + **httpx_kwargs: typing.Any, # noqa: ANN401, ARG001 ) -> LLMClient: raise AssertionError("unknown LLM config type") @@ -37,24 +37,24 @@ def _( config: YandexGPTConfig, *, request_retry: RequestRetryConfig | None = None, - **niquests_kwargs: typing.Any, # noqa: ANN401 + **httpx_kwargs: typing.Any, # noqa: ANN401 ) -> LLMClient: - return YandexGPTClient(config=config, request_retry=request_retry, **niquests_kwargs) + return YandexGPTClient(config=config, request_retry=request_retry, **httpx_kwargs) @get_client.register def _( config: OpenAIConfig, *, request_retry: RequestRetryConfig | None = None, - **niquests_kwargs: typing.Any, # noqa: ANN401 + **httpx_kwargs: typing.Any, # noqa: ANN401 ) -> LLMClient: - return OpenAIClient(config=config, request_retry=request_retry, **niquests_kwargs) + return OpenAIClient(config=config, request_retry=request_retry, **httpx_kwargs) @get_client.register def _( config: MockLLMConfig, *, request_retry: RequestRetryConfig | None = None, # noqa: ARG001 - **niquests_kwargs: typing.Any, # noqa: ANN401, ARG001 + **httpx_kwargs: typing.Any, # noqa: ANN401, ARG001 ) -> LLMClient: return MockLLMClient(config=config) diff --git a/any_llm_client/retry.py b/any_llm_client/retry.py index 3aded39..d043322 100644 --- a/any_llm_client/retry.py +++ b/any_llm_client/retry.py @@ -4,7 +4,7 @@ @dataclasses.dataclass(frozen=True, kw_only=True, slots=True) class RequestRetryConfig: - """Request retry configuration that is passed to `stamina.retry`. Applies to niquests.HTTPError. + """Request retry configuration that is passed to `stamina.retry`. Applies to httpx.HTTPError. Uses defaults from `stamina.retry` except for attempts: by default 3 instead of 10. See more at https://stamina.hynek.me/en/stable/api.html#stamina.retry diff --git a/any_llm_client/sse.py b/any_llm_client/sse.py deleted file mode 100644 index cdd61a7..0000000 --- a/any_llm_client/sse.py +++ /dev/null @@ -1,11 +0,0 @@ -import typing - -import httpx_sse -from httpx_sse._decoders import SSEDecoder - - -async def parse_sse_events(response: typing.AsyncIterable[bytes]) -> typing.AsyncIterator[httpx_sse.ServerSentEvent]: - sse_decoder: typing.Final = SSEDecoder() - async for one_line in response: - if event := sse_decoder.decode(one_line.decode().rstrip("\n")): - yield event diff --git a/pyproject.toml b/pyproject.toml index f374081..7904e41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,15 +8,16 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Software Development :: Libraries", "Topic :: System :: Networking", "Typing :: Typed", ] authors = [{ name = "Lev Vereshchagin", email = "mail@vrslev.com" }] -requires-python = ">=3.10,<3.13" +requires-python = ">=3.10" dependencies = [ "httpx-sse>=0.4.0", - "niquests>=3.11.1", + "httpx>=0.27.2", "pydantic>=2.9.2", "stamina>=24.3.0", ] @@ -26,7 +27,6 @@ dynamic = ["version"] dev = [ "anyio", "faker", - "litestar[standard]", "polyfactory", "pydantic-settings", "pytest-cov", @@ -82,12 +82,9 @@ lines-after-imports = 2 "examples/*.py" = ["INP001", "T201"] [tool.pytest.ini_options] -addopts = "--cov=." +addopts = "--cov" [tool.coverage.report] skip_covered = true show_missing = true exclude_also = ["if typing.TYPE_CHECKING:"] - -[tool.coverage.run] -omit = ["tests/testing_app.py"] diff --git a/tests/conftest.py b/tests/conftest.py index 125f440..c003365 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,5 @@ import contextlib import typing -from unittest import mock import pytest import stamina @@ -34,29 +33,3 @@ async def consume_llm_partial_responses( ) -> list[str]: async with request_llm_partial_responses_context_manager as response_iterable: return [one_item async for one_item in response_iterable] - - -def _make_async_stream_iterable(lines: str) -> typing.Any: # noqa: ANN401 - async def iter_lines() -> typing.AsyncIterable[bytes]: - for line in lines.splitlines(): - yield line.encode() - - return iter_lines() - - -def mock_http_client(llm_client: any_llm_client.LLMClient, request_mock: mock.AsyncMock) -> any_llm_client.LLMClient: - assert hasattr(llm_client, "http_client") - llm_client.http_client = mock.Mock( - request=request_mock, - stream=mock.Mock( - return_value=mock.Mock( - __aenter__=( - mock.AsyncMock(return_value=_make_async_stream_iterable(request_mock.return_value)) - if isinstance(request_mock.return_value, str) - else request_mock - ), - __aexit__=mock.AsyncMock(return_value=None), - ) - ), - ) - return llm_client diff --git a/tests/test_http.py b/tests/test_http.py index 35cb0e2..b7e4f0e 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -1,40 +1,27 @@ +import copy import typing -from http import HTTPStatus -import niquests -import pytest +import httpx -from any_llm_client.http import HttpClient, HttpStatusError -from any_llm_client.retry import RequestRetryConfig +from any_llm_client.http import DEFAULT_HTTP_TIMEOUT, get_http_client_from_kwargs -BASE_URL: typing.Final = "http://127.0.0.1:8000" +class TestGetHttpClientFromKwargs: + def test_http_timeout_is_added(self) -> None: + original_kwargs: typing.Final = {"mounts": {"http://": None}} + passed_kwargs: typing.Final = copy.deepcopy(original_kwargs) + client: typing.Final = get_http_client_from_kwargs(passed_kwargs) -async def test_http_client_request_ok() -> None: - client: typing.Final = HttpClient(request_retry=RequestRetryConfig(), niquests_kwargs={}) - result: typing.Final = await client.request(niquests.Request(method="GET", url=f"{BASE_URL}/request-ok")) - assert result == b'{"ok":true}' + assert client.timeout == DEFAULT_HTTP_TIMEOUT + assert original_kwargs == passed_kwargs + def test_http_timeout_is_not_modified_if_set(self) -> None: + timeout: typing.Final = httpx.Timeout(7, connect=5, read=3) + original_kwargs: typing.Final = {"mounts": {"http://": None}, "timeout": timeout} + passed_kwargs: typing.Final = copy.deepcopy(original_kwargs) -async def test_http_client_request_rail() -> None: - client: typing.Final = HttpClient(request_retry=RequestRetryConfig(), niquests_kwargs={}) - with pytest.raises(HttpStatusError) as exc_info: - await client.request(niquests.Request(method="GET", url=f"{BASE_URL}/request-fail")) - assert exc_info.value.status_code == HTTPStatus.IM_A_TEAPOT - assert exc_info.value.content == b'{"ok":false}' + client: typing.Final = get_http_client_from_kwargs(passed_kwargs) - -async def test_http_client_stream_ok() -> None: - client: typing.Final = HttpClient(request_retry=RequestRetryConfig(), niquests_kwargs={}) - async with client.stream(niquests.Request(method="GET", url=f"{BASE_URL}/stream-ok")) as response: - result: typing.Final = [one_chunk async for one_chunk in response] - assert result == [b"ok", b"true"] - - -async def test_http_client_stream_rail() -> None: - client: typing.Final = HttpClient(request_retry=RequestRetryConfig(), niquests_kwargs={}) - with pytest.raises(HttpStatusError) as exc_info: - await client.stream(niquests.Request(method="GET", url=f"{BASE_URL}/stream-fail")).__aenter__() - assert exc_info.value.status_code == HTTPStatus.IM_A_TEAPOT - assert exc_info.value.content == b"ok\nfalse" + assert client.timeout == timeout + assert original_kwargs == passed_kwargs diff --git a/tests/test_openai_client.py b/tests/test_openai_client.py index fb3d71a..c563623 100644 --- a/tests/test_openai_client.py +++ b/tests/test_openai_client.py @@ -1,7 +1,7 @@ import typing -from unittest import mock import faker +import httpx import pydantic import pytest from polyfactory.factories.pydantic_factory import ModelFactory @@ -15,8 +15,7 @@ OneStreamingChoice, OneStreamingChoiceDelta, ) -from any_llm_client.http import HttpStatusError -from tests.conftest import LLMFuncRequestFactory, consume_llm_partial_responses, mock_http_client +from tests.conftest import LLMFuncRequestFactory, consume_llm_partial_responses class OpenAIConfigFactory(ModelFactory[any_llm_client.OpenAIConfig]): ... @@ -25,25 +24,32 @@ class OpenAIConfigFactory(ModelFactory[any_llm_client.OpenAIConfig]): ... class TestOpenAIRequestLLMResponse: async def test_ok(self, faker: faker.Faker) -> None: expected_result: typing.Final = faker.pystr() - response: typing.Final = ChatCompletionsNotStreamingResponse( - choices=[ - OneNotStreamingChoice( - message=ChatCompletionsMessage(role=any_llm_client.MessageRole.assistant, content=expected_result) - ) - ] - ).model_dump_json() - client: typing.Final = mock_http_client( - any_llm_client.get_client(OpenAIConfigFactory.build()), mock.AsyncMock(return_value=response) + response: typing.Final = httpx.Response( + 200, + json=ChatCompletionsNotStreamingResponse( + choices=[ + OneNotStreamingChoice( + message=ChatCompletionsMessage( + role=any_llm_client.MessageRole.assistant, content=expected_result + ) + ) + ] + ).model_dump(mode="json"), ) - result: typing.Final = await client.request_llm_message(**LLMFuncRequestFactory.build()) + result: typing.Final = await any_llm_client.get_client( + OpenAIConfigFactory.build(), transport=httpx.MockTransport(lambda _: response) + ).request_llm_message(**LLMFuncRequestFactory.build()) assert result == expected_result async def test_fails_without_alternatives(self) -> None: - response: typing.Final = ChatCompletionsNotStreamingResponse.model_construct(choices=[]).model_dump(mode="json") - client: typing.Final = mock_http_client( - any_llm_client.get_client(OpenAIConfigFactory.build()), mock.AsyncMock(return_value=response) + response: typing.Final = httpx.Response( + 200, + json=ChatCompletionsNotStreamingResponse.model_construct(choices=[]).model_dump(mode="json"), + ) + client: typing.Final = any_llm_client.get_client( + OpenAIConfigFactory.build(), transport=httpx.MockTransport(lambda _: response) ) with pytest.raises(pydantic.ValidationError): @@ -71,7 +77,9 @@ async def test_ok(self, faker: faker.Faker) -> None: "Hi there. How is you", "Hi there. How is your day?", ] - response: typing.Final = ( + config: typing.Final = OpenAIConfigFactory.build() + func_request: typing.Final = LLMFuncRequestFactory.build() + response_content: typing.Final = ( "\n\n".join( "data: " + ChatCompletionsStreamingEvent(choices=[OneStreamingChoice(delta=one_message)]).model_dump_json() @@ -79,22 +87,24 @@ async def test_ok(self, faker: faker.Faker) -> None: ) + f"\n\ndata: [DONE]\n\ndata: {faker.pystr()}\n\n" ) - client: typing.Final = mock_http_client( - any_llm_client.get_client(OpenAIConfigFactory.build()), mock.AsyncMock(return_value=response) + response: typing.Final = httpx.Response( + 200, headers={"Content-Type": "text/event-stream"}, content=response_content ) + client: typing.Final = any_llm_client.get_client(config, transport=httpx.MockTransport(lambda _: response)) - result: typing.Final = await consume_llm_partial_responses( - client.stream_llm_partial_messages(**LLMFuncRequestFactory.build()) - ) + result: typing.Final = await consume_llm_partial_responses(client.stream_llm_partial_messages(**func_request)) assert result == expected_result async def test_fails_without_alternatives(self) -> None: - response: typing.Final = ( + response_content: typing.Final = ( f"data: {ChatCompletionsStreamingEvent.model_construct(choices=[]).model_dump_json()}\n\n" ) - client: typing.Final = mock_http_client( - any_llm_client.get_client(OpenAIConfigFactory.build()), mock.AsyncMock(return_value=response) + response: typing.Final = httpx.Response( + 200, headers={"Content-Type": "text/event-stream"}, content=response_content + ) + client: typing.Final = any_llm_client.get_client( + OpenAIConfigFactory.build(), transport=httpx.MockTransport(lambda _: response) ) with pytest.raises(pydantic.ValidationError): @@ -105,9 +115,8 @@ class TestOpenAILLMErrors: @pytest.mark.parametrize("stream", [True, False]) @pytest.mark.parametrize("status_code", [400, 500]) async def test_fails_with_unknown_error(self, stream: bool, status_code: int) -> None: - client: typing.Final = mock_http_client( - any_llm_client.get_client(OpenAIConfigFactory.build()), - mock.AsyncMock(side_effect=HttpStatusError(status_code=status_code, content=b"")), + client: typing.Final = any_llm_client.get_client( + OpenAIConfigFactory.build(), transport=httpx.MockTransport(lambda _: httpx.Response(status_code)) ) coroutine: typing.Final = ( @@ -128,10 +137,10 @@ async def test_fails_with_unknown_error(self, stream: bool, status_code: int) -> b'{"object":"error","message":"This model\'s maximum context length is 16384 tokens. However, you requested 100000 tokens in the messages, Please reduce the length of the messages.","type":"BadRequestError","param":null,"code":400}', # noqa: E501 ], ) - async def test_fails_with_out_of_tokens_error(self, stream: bool, content: bytes) -> None: - client: typing.Final = mock_http_client( - any_llm_client.get_client(OpenAIConfigFactory.build()), - mock.AsyncMock(side_effect=HttpStatusError(status_code=400, content=content)), + async def test_fails_with_out_of_tokens_error(self, stream: bool, content: bytes | None) -> None: + response: typing.Final = httpx.Response(400, content=content) + client: typing.Final = any_llm_client.get_client( + OpenAIConfigFactory.build(), transport=httpx.MockTransport(lambda _: response) ) coroutine: typing.Final = ( diff --git a/tests/test_static.py b/tests/test_static.py index 4045573..5ec106f 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -10,8 +10,6 @@ import any_llm_client from any_llm_client.clients.openai import ChatCompletionsRequest from any_llm_client.clients.yandexgpt import YandexGPTRequest -from any_llm_client.http import HttpClient -from any_llm_client.retry import RequestRetryConfig from tests.conftest import LLMFuncRequest @@ -49,12 +47,6 @@ def test_llm_func_request_has_same_annotations_as_llm_client_methods() -> None: assert all(annotations == all_annotations[0] for annotations in all_annotations) -def test_proxies_are_set_on_http_client(faker: faker.Faker) -> None: - proxies: typing.Final = faker.pydict() - http_client: typing.Final = HttpClient(request_retry=RequestRetryConfig(), niquests_kwargs={"proxies": proxies}) - assert http_client.client.proxies == proxies - - @pytest.mark.parametrize("model_type", [YandexGPTRequest, ChatCompletionsRequest]) def test_dumped_llm_request_payload_dump_has_extra_data(model_type: type[pydantic.BaseModel]) -> None: extra: typing.Final = {"hi": "there", "hi-hi": "there-there"} diff --git a/tests/test_yandexgpt_client.py b/tests/test_yandexgpt_client.py index 8d7c079..8160ac0 100644 --- a/tests/test_yandexgpt_client.py +++ b/tests/test_yandexgpt_client.py @@ -1,15 +1,14 @@ import typing -from unittest import mock import faker +import httpx import pydantic import pytest from polyfactory.factories.pydantic_factory import ModelFactory import any_llm_client from any_llm_client.clients.yandexgpt import YandexGPTAlternative, YandexGPTResponse, YandexGPTResult -from any_llm_client.http import HttpStatusError -from tests.conftest import LLMFuncRequestFactory, consume_llm_partial_responses, mock_http_client +from tests.conftest import LLMFuncRequestFactory, consume_llm_partial_responses class YandexGPTConfigFactory(ModelFactory[any_llm_client.YandexGPTConfig]): ... @@ -18,25 +17,27 @@ class YandexGPTConfigFactory(ModelFactory[any_llm_client.YandexGPTConfig]): ... class TestYandexGPTRequestLLMResponse: async def test_ok(self, faker: faker.Faker) -> None: expected_result: typing.Final = faker.pystr() - response: typing.Final = YandexGPTResponse( - result=YandexGPTResult( - alternatives=[YandexGPTAlternative(message=any_llm_client.AssistantMessage(expected_result))] - ) - ).model_dump_json() - client: typing.Final = mock_http_client( - any_llm_client.get_client(YandexGPTConfigFactory.build()), mock.AsyncMock(return_value=response) + response: typing.Final = httpx.Response( + 200, + json=YandexGPTResponse( + result=YandexGPTResult( + alternatives=[YandexGPTAlternative(message=any_llm_client.AssistantMessage(expected_result))] + ) + ).model_dump(mode="json"), ) - result: typing.Final = await client.request_llm_message(**LLMFuncRequestFactory.build()) + result: typing.Final = await any_llm_client.get_client( + YandexGPTConfigFactory.build(), transport=httpx.MockTransport(lambda _: response) + ).request_llm_message(**LLMFuncRequestFactory.build()) assert result == expected_result async def test_fails_without_alternatives(self) -> None: - response: typing.Final = YandexGPTResponse( - result=YandexGPTResult.model_construct(alternatives=[]) - ).model_dump_json() - client: typing.Final = mock_http_client( - any_llm_client.get_client(YandexGPTConfigFactory.build()), mock.AsyncMock(return_value=response) + response: typing.Final = httpx.Response( + 200, json=YandexGPTResponse(result=YandexGPTResult.model_construct(alternatives=[])).model_dump(mode="json") + ) + client: typing.Final = any_llm_client.get_client( + YandexGPTConfigFactory.build(), transport=httpx.MockTransport(lambda _: response) ) with pytest.raises(pydantic.ValidationError): @@ -46,8 +47,9 @@ async def test_fails_without_alternatives(self) -> None: class TestYandexGPTRequestLLMPartialResponses: async def test_ok(self, faker: faker.Faker) -> None: expected_result: typing.Final = faker.pylist(value_types=[str]) + config: typing.Final = YandexGPTConfigFactory.build() func_request: typing.Final = LLMFuncRequestFactory.build() - response: typing.Final = ( + response_content: typing.Final = ( "\n".join( YandexGPTResponse( result=YandexGPTResult( @@ -58,20 +60,24 @@ async def test_ok(self, faker: faker.Faker) -> None: ) + "\n" ) - client: typing.Final = mock_http_client( - any_llm_client.get_client(YandexGPTConfigFactory.build()), mock.AsyncMock(return_value=response) - ) + response: typing.Final = httpx.Response(200, content=response_content) - result: typing.Final = await consume_llm_partial_responses(client.stream_llm_partial_messages(**func_request)) + result: typing.Final = await consume_llm_partial_responses( + any_llm_client.get_client( + config, transport=httpx.MockTransport(lambda _: response) + ).stream_llm_partial_messages(**func_request) + ) assert result == expected_result async def test_fails_without_alternatives(self) -> None: - response: typing.Final = ( + response_content: typing.Final = ( YandexGPTResponse(result=YandexGPTResult.model_construct(alternatives=[])).model_dump_json() + "\n" ) - client: typing.Final = mock_http_client( - any_llm_client.get_client(YandexGPTConfigFactory.build()), mock.AsyncMock(return_value=response) + response: typing.Final = httpx.Response(200, content=response_content) + + client: typing.Final = any_llm_client.get_client( + YandexGPTConfigFactory.build(), transport=httpx.MockTransport(lambda _: response) ) with pytest.raises(pydantic.ValidationError): @@ -81,10 +87,9 @@ async def test_fails_without_alternatives(self) -> None: class TestYandexGPTLLMErrors: @pytest.mark.parametrize("stream", [True, False]) @pytest.mark.parametrize("status_code", [400, 500]) - async def test_fails_with_unknown_error(self, faker: faker.Faker, stream: bool, status_code: int) -> None: - client: typing.Final = mock_http_client( - any_llm_client.get_client(YandexGPTConfigFactory.build()), - mock.AsyncMock(side_effect=HttpStatusError(status_code=status_code, content=faker.pystr().encode())), + async def test_fails_with_unknown_error(self, stream: bool, status_code: int) -> None: + client: typing.Final = any_llm_client.get_client( + YandexGPTConfigFactory.build(), transport=httpx.MockTransport(lambda _: httpx.Response(status_code)) ) coroutine: typing.Final = ( @@ -99,16 +104,16 @@ async def test_fails_with_unknown_error(self, faker: faker.Faker, stream: bool, @pytest.mark.parametrize("stream", [True, False]) @pytest.mark.parametrize( - "content", + "response_content", [ b"...folder_id=1111: number of input tokens must be no more than 8192, got 28498...", b"...folder_id=1111: text length is 349354, which is outside the range (0, 100000]...", ], ) - async def test_fails_with_out_of_tokens_error(self, stream: bool, content: bytes) -> None: - client: typing.Final = mock_http_client( - any_llm_client.get_client(YandexGPTConfigFactory.build()), - mock.AsyncMock(side_effect=HttpStatusError(status_code=400, content=content)), + async def test_fails_with_out_of_tokens_error(self, stream: bool, response_content: bytes | None) -> None: + response: typing.Final = httpx.Response(400, content=response_content) + client: typing.Final = any_llm_client.get_client( + YandexGPTConfigFactory.build(), transport=httpx.MockTransport(lambda _: response) ) coroutine: typing.Final = ( diff --git a/tests/testing_app.py b/tests/testing_app.py deleted file mode 100644 index 66dc2ff..0000000 --- a/tests/testing_app.py +++ /dev/null @@ -1,28 +0,0 @@ -import typing - -import litestar -import litestar.background_tasks -from litestar.response import Stream - - -@litestar.get("/request-ok") -async def request_ok() -> dict[str, typing.Any]: - return {"ok": True} - - -@litestar.get("/request-fail", status_code=418) -async def request_fail() -> dict[str, typing.Any]: - return {"ok": False} - - -@litestar.get("/stream-ok") -async def stream_ok() -> Stream: - return Stream("ok\ntrue") - - -@litestar.get("/stream-fail") -async def stream_fail() -> Stream: - return Stream("ok\nfalse", status_code=418) - - -app = litestar.Litestar(route_handlers=[request_ok, request_fail, stream_ok, stream_fail]) From 0fd6cce82432bce8eaa16d4415858165fbb9bbbf Mon Sep 17 00:00:00 2001 From: Lev Vereshchagin Date: Thu, 5 Dec 2024 16:04:14 +0300 Subject: [PATCH 2/2] Leave good stuff --- .github/workflows/test.yml | 7 +++---- any_llm_client/core.py | 24 ++++++++++++------------ pyproject.toml | 2 +- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7bc9343..1c8f8c6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: extractions/setup-just@v2 - - uses: astral-sh/setup-uv@v3 + - uses: astral-sh/setup-uv@v4 with: enable-cache: true cache-dependency-glob: "**/pyproject.toml" @@ -32,13 +32,12 @@ jobs: - "3.10" - "3.11" - "3.12" - - "3.13" steps: - uses: actions/checkout@v4 - uses: extractions/setup-just@v2 - - uses: astral-sh/setup-uv@v3 + - uses: astral-sh/setup-uv@v4 with: enable-cache: true cache-dependency-glob: "**/pyproject.toml" - - run: uv python install ${{ matrix.python-version }} + - run: uv venv --python ${{ matrix.python-version }} - run: just test -vv diff --git a/any_llm_client/core.py b/any_llm_client/core.py index e0ced36..6acb9d2 100644 --- a/any_llm_client/core.py +++ b/any_llm_client/core.py @@ -48,18 +48,6 @@ def AssistantMessage(text: str) -> Message: # noqa: N802 return Message(role=MessageRole.assistant, text=text) -@dataclasses.dataclass -class LLMError(Exception): - response_content: bytes - - def __str__(self) -> str: - return self.__repr__().removeprefix(self.__class__.__name__) - - -@dataclasses.dataclass -class OutOfTokensOrSymbolsError(LLMError): ... - - class LLMConfig(pydantic.BaseModel): model_config = pydantic.ConfigDict(protected_namespaces=()) api_type: str @@ -83,3 +71,15 @@ async def __aexit__( exc_value: BaseException | None, traceback: types.TracebackType | None, ) -> None: ... + + +@dataclasses.dataclass +class LLMError(Exception): + response_content: bytes + + def __str__(self) -> str: + return self.__repr__().removeprefix(self.__class__.__name__) + + +@dataclasses.dataclass +class OutOfTokensOrSymbolsError(LLMError): ... diff --git a/pyproject.toml b/pyproject.toml index 7904e41..2eeb88e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ lines-after-imports = 2 "examples/*.py" = ["INP001", "T201"] [tool.pytest.ini_options] -addopts = "--cov" +addopts = "--cov=." [tool.coverage.report] skip_covered = true