From 3696fa580c9454847e3a3557f5810dc5b25badee Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 28 May 2025 09:47:28 -0700 Subject: [PATCH 1/8] Changes for new thinking format and URL format --- infra/main.bicep | 2 +- src/quartapp/chat.py | 31 ++++++++----------------------- src/quartapp/templates/index.html | 2 +- 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/infra/main.bicep b/infra/main.bicep index cdf701e..7e3eee4 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -167,7 +167,7 @@ output AZURE_LOCATION string = location output AZURE_TENANT_ID string = tenant().tenantId output AZURE_DEEPSEEK_DEPLOYMENT string = aiServicesDeploymentName -output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/models' +output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/openai' output SERVICE_ACA_IDENTITY_PRINCIPAL_ID string = aca.outputs.identityPrincipalId output SERVICE_ACA_NAME string = aca.outputs.name diff --git a/src/quartapp/chat.py b/src/quartapp/chat.py index 9d9de44..c867262 100644 --- a/src/quartapp/chat.py +++ b/src/quartapp/chat.py @@ -2,6 +2,7 @@ import os import httpx +from azure.core.credentials import AzureKeyCredential from azure.identity.aio import AzureDeveloperCliCredential, ManagedIdentityCredential, get_bearer_token_provider from openai import AsyncOpenAI, DefaultAsyncHttpxClient from quart import ( @@ -18,7 +19,11 @@ @bp.before_app_serving async def configure_openai(): - if os.getenv("RUNNING_IN_PRODUCTION"): + if azure_openai_key := os.getenv("AZURE_OPENAI_API_KEY_FOR_APP"): + # use key credential + current_app.logger.info("Using Azure OpenAI with API key") + bp.azure_credential = AzureKeyCredential(azure_openai_key) + elif os.getenv("RUNNING_IN_PRODUCTION"): client_id = os.environ["AZURE_CLIENT_ID"] current_app.logger.info("Using Azure OpenAI with managed identity credential for client ID: %s", client_id) bp.azure_credential = ManagedIdentityCredential(client_id=client_id) @@ -45,7 +50,7 @@ def sync_auth_flow(self, request): bp.openai_client = AsyncOpenAI( base_url=os.environ["AZURE_INFERENCE_ENDPOINT"], api_key="placeholder", - default_query={"api-version": "2024-05-01-preview"}, + default_query={"api-version": "preview"}, http_client=DefaultAsyncHttpxClient(auth=TokenBasedAuth()), ) @@ -82,29 +87,9 @@ async def response_stream(): ) try: - is_thinking = False async for update in await chat_coroutine: if update.choices: - content = update.choices[0].delta.content - if content == "": - is_thinking = True - update.choices[0].delta.content = None - update.choices[0].delta.reasoning_content = "" - elif content == "": - is_thinking = False - update.choices[0].delta.content = None - update.choices[0].delta.reasoning_content = "" - elif content: - if is_thinking: - yield json.dumps( - {"delta": {"content": None, "reasoning_content": content, "role": "assistant"}}, - ensure_ascii=False, - ) + "\n" - else: - yield json.dumps( - {"delta": {"content": content, "reasoning_content": None, "role": "assistant"}}, - ensure_ascii=False, - ) + "\n" + yield update.choices[0].model_dump_json() + "\n" except Exception as e: current_app.logger.error(e) yield json.dumps({"error": str(e)}, ensure_ascii=False) + "\n" diff --git a/src/quartapp/templates/index.html b/src/quartapp/templates/index.html index 7430215..527d3e8 100644 --- a/src/quartapp/templates/index.html +++ b/src/quartapp/templates/index.html @@ -120,7 +120,7 @@ messageDiv.querySelector(".thoughts").style.display = "block"; messageDiv.querySelector(".thoughts-content").innerHTML = converter.makeHtml(thoughts); } - } else { + } else if (event.delta.content) { messageDiv.querySelector(".loading-bar").style.display = "none"; answer += event.delta.content; messageDiv.querySelector(".answer-content").innerHTML = converter.makeHtml(answer); From d27f95e9b4ea76080014e433816449cb7d4336b4 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 28 May 2025 09:49:23 -0700 Subject: [PATCH 2/8] change url to v1 --- infra/main.bicep | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/main.bicep b/infra/main.bicep index 7e3eee4..c962ffd 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -167,7 +167,7 @@ output AZURE_LOCATION string = location output AZURE_TENANT_ID string = tenant().tenantId output AZURE_DEEPSEEK_DEPLOYMENT string = aiServicesDeploymentName -output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/openai' +output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/openai/v1' output SERVICE_ACA_IDENTITY_PRINCIPAL_ID string = aca.outputs.identityPrincipalId output SERVICE_ACA_NAME string = aca.outputs.name From 9d8da382bb8922444d79279777be411153e188d0 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 28 May 2025 09:50:40 -0700 Subject: [PATCH 3/8] Adjust endpoint for ACA env vars too --- infra/main.bicep | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/main.bicep b/infra/main.bicep index c962ffd..2a01b1e 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -123,7 +123,7 @@ module aca 'aca.bicep' = { containerAppsEnvironmentName: containerApps.outputs.environmentName containerRegistryName: containerApps.outputs.registryName aiServicesDeploymentName: aiServicesDeploymentName - aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com/models' + aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com/openai/v1' exists: acaExists } } From b950db691f32682ef00855d228c19b80b550de53 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 28 May 2025 20:07:06 -0700 Subject: [PATCH 4/8] Azure OpenAI change --- infra/main.bicep | 4 ++-- src/quartapp/chat.py | 21 +++++---------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/infra/main.bicep b/infra/main.bicep index 2a01b1e..24c3b44 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -123,7 +123,7 @@ module aca 'aca.bicep' = { containerAppsEnvironmentName: containerApps.outputs.environmentName containerRegistryName: containerApps.outputs.registryName aiServicesDeploymentName: aiServicesDeploymentName - aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com/openai/v1' + aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com' exists: acaExists } } @@ -167,7 +167,7 @@ output AZURE_LOCATION string = location output AZURE_TENANT_ID string = tenant().tenantId output AZURE_DEEPSEEK_DEPLOYMENT string = aiServicesDeploymentName -output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/openai/v1' +output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com' output SERVICE_ACA_IDENTITY_PRINCIPAL_ID string = aca.outputs.identityPrincipalId output SERVICE_ACA_NAME string = aca.outputs.name diff --git a/src/quartapp/chat.py b/src/quartapp/chat.py index c867262..562e1ef 100644 --- a/src/quartapp/chat.py +++ b/src/quartapp/chat.py @@ -1,10 +1,9 @@ import json import os -import httpx from azure.core.credentials import AzureKeyCredential from azure.identity.aio import AzureDeveloperCliCredential, ManagedIdentityCredential, get_bearer_token_provider -from openai import AsyncOpenAI, DefaultAsyncHttpxClient +from openai import AsyncAzureOpenAI from quart import ( Blueprint, Response, @@ -37,21 +36,11 @@ async def configure_openai(): bp.azure_credential, "https://cognitiveservices.azure.com/.default" ) - class TokenBasedAuth(httpx.Auth): - async def async_auth_flow(self, request): - token = await openai_token_provider() - request.headers["Authorization"] = f"Bearer {token}" - yield request - - def sync_auth_flow(self, request): - raise RuntimeError("Cannot use a sync authentication class with httpx.AsyncClient") - # Create the Asynchronous Azure OpenAI client - bp.openai_client = AsyncOpenAI( - base_url=os.environ["AZURE_INFERENCE_ENDPOINT"], - api_key="placeholder", - default_query={"api-version": "preview"}, - http_client=DefaultAsyncHttpxClient(auth=TokenBasedAuth()), + bp.openai_client = AsyncAzureOpenAI( + azure_endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"], + azure_ad_token_provider=openai_token_provider, + api_version="2025-04-01-preview", # temporary ) # Set the model name to the Azure OpenAI model deployment name From 9b77c538a899e281146b328a13a522a9a9c0a950 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 28 May 2025 20:08:01 -0700 Subject: [PATCH 5/8] Rm key credential support --- src/quartapp/chat.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/quartapp/chat.py b/src/quartapp/chat.py index 562e1ef..5643d43 100644 --- a/src/quartapp/chat.py +++ b/src/quartapp/chat.py @@ -1,7 +1,6 @@ import json import os -from azure.core.credentials import AzureKeyCredential from azure.identity.aio import AzureDeveloperCliCredential, ManagedIdentityCredential, get_bearer_token_provider from openai import AsyncAzureOpenAI from quart import ( @@ -18,11 +17,7 @@ @bp.before_app_serving async def configure_openai(): - if azure_openai_key := os.getenv("AZURE_OPENAI_API_KEY_FOR_APP"): - # use key credential - current_app.logger.info("Using Azure OpenAI with API key") - bp.azure_credential = AzureKeyCredential(azure_openai_key) - elif os.getenv("RUNNING_IN_PRODUCTION"): + if os.getenv("RUNNING_IN_PRODUCTION"): client_id = os.environ["AZURE_CLIENT_ID"] current_app.logger.info("Using Azure OpenAI with managed identity credential for client ID: %s", client_id) bp.azure_credential = ManagedIdentityCredential(client_id=client_id) From 0b865c9e1fb4303d4fc2dd64f4d41dae97412231 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 29 May 2025 06:25:19 -0700 Subject: [PATCH 6/8] Update test mocks --- tests/conftest.py | 39 +++++++++++++++++-- .../test_chat_stream_text/result.jsonlines | 15 +++---- .../result.jsonlines | 15 +++---- 3 files changed, 51 insertions(+), 18 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c5cbac0..ba516b2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ @pytest.fixture def mock_openai_chatcompletion(monkeypatch): class AsyncChatCompletionIterator: - def __init__(self, answer: str): + def __init__(self, reasoning: str, answer: str): self.chunk_index = 0 self.chunks = [ openai.types.chat.ChatCompletionChunk( @@ -32,10 +32,41 @@ def __init__(self, answer: str): ], ) ] + reasoning_deltas = reasoning.split(" ") + for reasoning_index, reasoning_delta in enumerate(reasoning_deltas): + # Text completion chunks include whitespace, so we need to add it back in + if reasoning_index > 0: + answer_delta = " " + reasoning_delta + self.chunks.append( + openai.types.chat.ChatCompletionChunk( + id="test-123", + object="chat.completion.chunk", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + role=None, reasoning_content=reasoning_delta + ), + finish_reason=None, + index=0, + logprobs=None, + # Only Azure includes content_filter_results + content_filter_results={ + "hate": {"filtered": False, "severity": "safe"}, + "self_harm": {"filtered": False, "severity": "safe"}, + "sexual": {"filtered": False, "severity": "safe"}, + "violence": {"filtered": False, "severity": "safe"}, + }, + ) + ], + created=1703462735, + model="DeepSeek-R1", + ) + ) + answer_deltas = answer.split(" ") for answer_index, answer_delta in enumerate(answer_deltas): # Text completion chunks include whitespace, so we need to add it back in - if answer_index > 0 and answer_delta != "": + if answer_index > 0: answer_delta = " " + answer_delta self.chunks.append( openai.types.chat.ChatCompletionChunk( @@ -95,9 +126,9 @@ async def mock_acreate(*args, **kwargs): # Only mock a stream=True completion last_message = kwargs.get("messages")[-1]["content"] if last_message == "What is the capital of France?": - return AsyncChatCompletionIterator(" hmm The capital of France is Paris.") + return AsyncChatCompletionIterator("hmm", "The capital of France is Paris.") elif last_message == "What is the capital of Germany?": - return AsyncChatCompletionIterator(" hmm The capital of Germany is Berlin.") + return AsyncChatCompletionIterator("hmm", "The capital of Germany is Berlin.") else: raise ValueError(f"Unexpected message: {last_message}") diff --git a/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines index 50730aa..f81bde8 100644 --- a/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines @@ -1,7 +1,8 @@ -{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}} -{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " France", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " Paris.", "reasoning_content": null, "role": "assistant"}} +{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" France","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" Paris.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}} diff --git a/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines index 42d9a6c..2d41c6e 100644 --- a/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines @@ -1,7 +1,8 @@ -{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}} -{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " Germany", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " Berlin.", "reasoning_content": null, "role": "assistant"}} +{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" Germany","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" Berlin.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}} From c76a850cd74c653c28c95a59c24b8fb5df1126c8 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 29 May 2025 06:28:33 -0700 Subject: [PATCH 7/8] Upgrade package reqs --- src/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/requirements.txt b/src/requirements.txt index 7bf7067..b038813 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -38,7 +38,7 @@ cffi==1.17.1 # via cryptography charset-normalizer==3.4.0 # via requests -click==8.1.7 +click==8.2.1 # via # flask # quart @@ -58,7 +58,7 @@ frozenlist==1.4.1 # aiosignal gunicorn==23.0.0 # via quartapp (pyproject.toml) -h11==0.14.0 +h11==0.16.0 # via # httpcore # hypercorn @@ -68,7 +68,7 @@ h2==4.1.0 # via hypercorn hpack==4.0.0 # via h2 -httpcore==1.0.7 +httpcore==1.0.9 # via httpx httptools==0.6.4 # via quartapp (pyproject.toml) @@ -156,7 +156,7 @@ typing-extensions==4.12.2 # pydantic-core urllib3==2.2.3 # via requests -uvicorn==0.32.0 +uvicorn==0.34.2 # via quartapp (pyproject.toml) uvloop==0.20.0 ; sys_platform != "win32" and (sys_platform != "cygwin" and platform_python_implementation != "PyPy") # via quartapp (pyproject.toml) From 6e52bab200cc11377d2e92736dc44b2ef4897b55 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 29 May 2025 06:30:24 -0700 Subject: [PATCH 8/8] Workflow update --- .github/workflows/python-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-check.yaml b/.github/workflows/python-check.yaml index 5dbea0c..040b097 100644 --- a/.github/workflows/python-check.yaml +++ b/.github/workflows/python-check.yaml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-20.04", "windows-latest"] + os: ["ubuntu-latest", "windows-latest"] python_version: ["3.11"] steps: - uses: actions/checkout@v4