Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,27 @@
"title": "DataCollectorConfiguration",
"description": "Data collector configuration for sending data to ingress server."
},
"FeedbackCategory": {
"type": "string",
"enum": [
"incorrect",
"not_relevant",
"incomplete",
"outdated_information",
"unsafe",
"other"
],
"x-enum-varnames": [
"INCORRECT",
"NOT_RELEVANT",
"INCOMPLETE",
"OUTDATED_INFORMATION",
"UNSAFE",
"OTHER"
],
"title": "FeedbackCategory",
"description": "Enum representing predefined feedback categories for AI responses.\n\nThese categories help provide structured feedback about AI inference quality when users provide negative feedback (thumbs down). Multiple categories can be selected to provide comprehensive feedback about response issues."
},
"FeedbackRequest": {
"properties": {
"conversation_id": {
Expand Down Expand Up @@ -975,6 +996,27 @@
"examples": [
"I'm not satisfied with the response because it is too vague."
]
},
"categories": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/FeedbackCategory"
},
"type": "array"
},
{
"type": "null"
}
],
"title": "Categories",
"description": "List of feedback categories that apply to the LLM response.",
"examples": [
[
"incorrect",
"incomplete"
]
]
Comment on lines +1000 to +1019
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Schema does not enforce non-empty & unique categories list

The server-side model requires the list to be non-empty & de-duplicated, but the OpenAPI schema leaves those constraints implicit.
Adding minItems and uniqueItems makes the contract self-describing and prevents clients from sending invalid payloads that will be rejected at runtime.

       "items": {
         "$ref": "#/components/schemas/FeedbackCategory"
       },
       "type": "array",
+      "minItems": 1,
+      "uniqueItems": true
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
"categories": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/FeedbackCategory"
},
"type": "array"
},
{
"type": "null"
}
],
"title": "Categories",
"description": "List of feedback categories that apply to the LLM response.",
"examples": [
[
"incorrect",
"incomplete"
]
]
"categories": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/FeedbackCategory"
},
"type": "array",
"minItems": 1,
"uniqueItems": true
},
{
"type": "null"
}
],
"title": "Categories",
"description": "List of feedback categories that apply to the LLM response.",
"examples": [
[
"incorrect",
"incomplete"
]
]
}
🤖 Prompt for AI Agents
In docs/openapi.json around lines 1000 to 1019, the schema for the "categories"
array does not enforce that the list is non-empty and contains unique items. To
fix this, add "minItems": 1 and "uniqueItems": true to the array schema under
"categories" to explicitly require at least one item and ensure all items are
unique, aligning the schema with the server-side model constraints.

}
},
"type": "object",
Expand All @@ -984,14 +1026,25 @@
"llm_response"
],
"title": "FeedbackRequest",
"description": "Model representing a feedback request.\n\nAttributes:\n conversation_id: The required conversation ID (UUID).\n user_question: The required user question.\n llm_response: The required LLM response.\n sentiment: The optional sentiment.\n user_feedback: The optional user feedback.\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n user_question=\"what are you doing?\",\n user_feedback=\"Great service!\",\n llm_response=\"I don't know\",\n sentiment=-1,\n )\n ```",
"description": "Model representing a feedback request.\n\nAttributes:\n conversation_id: The required conversation ID (UUID).\n user_question: The required user question.\n llm_response: The required LLM response.\n sentiment: The optional sentiment.\n user_feedback: The optional user feedback.\n categories: The optional list of feedback categories (multi-select).\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n user_question=\"what are you doing?\",\n user_feedback=\"Great service!\",\n llm_response=\"I don't know\",\n sentiment=-1,\n categories=[FeedbackCategory.INCOMPLETE, FeedbackCategory.UNSAFE]\n )\n ```",
"examples": [
{
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"llm_response": "bar",
"sentiment": 1,
"user_feedback": "Great service!",
"user_question": "foo"
},
{
"categories": [
"incomplete",
"not_relevant"
],
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"llm_response": "You need to use Docker and Kubernetes for everything.",
"sentiment": -1,
"user_feedback": "This response is too general and doesn't provide specific steps.",
"user_question": "How do I deploy a web app?"
}
]
},
Expand Down
33 changes: 32 additions & 1 deletion docs/openapi.md
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,24 @@ Data collector configuration for sending data to ingress server.
| connection_timeout | integer | |


## FeedbackCategory


Enum representing predefined feedback categories for AI responses.

These categories help provide structured feedback about AI inference quality.
Multiple categories can be selected to provide comprehensive feedback.

| Value | Description |
|-------|-------------|
| incorrect | The answer provided is completely wrong |
| not_relevant | This answer doesn't address my question at all |
| incomplete | The answer only covers part of what I asked about |
| outdated_information | This information is from several years ago and no longer accurate |
| unsafe | This response could be harmful or dangerous if followed |
| other | The response has issues not covered by other categories |


## FeedbackRequest


Expand All @@ -468,15 +486,27 @@ Attributes:
llm_response: The required LLM response.
sentiment: The optional sentiment.
user_feedback: The optional user feedback.
categories: The optional list of feedback categories (multi-select).

Example:
Examples:
```python
# Basic feedback
feedback_request = FeedbackRequest(
conversation_id="12345678-abcd-0000-0123-456789abcdef",
user_question="what are you doing?",
user_feedback="Great service!",
llm_response="I don't know",
sentiment=1
)

# Feedback with categories
feedback_request = FeedbackRequest(
conversation_id="12345678-abcd-0000-0123-456789abcdef",
user_question="How do I deploy a web app?",
llm_response="You need to use Docker and Kubernetes for everything.",
user_feedback="This response is too general and doesn't provide specific steps.",
sentiment=-1,
categories=["incomplete", "not_relevant"]
)
```

Expand All @@ -488,6 +518,7 @@ Example:
| llm_response | string | |
| sentiment | | |
| user_feedback | | Feedback on the LLM response. |
| categories | array[FeedbackCategory] | List of feedback categories that apply to the LLM response. |


## FeedbackResponse
Expand Down
86 changes: 78 additions & 8 deletions src/models/requests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Model for service requests."""

from typing import Optional, Self
from enum import Enum

from pydantic import BaseModel, model_validator, field_validator, Field
from llama_stack_client.types.agents.turn_create_params import Document
Expand Down Expand Up @@ -146,6 +147,22 @@ def validate_media_type(self) -> Self:
return self


class FeedbackCategory(str, Enum):
"""Enum representing predefined feedback categories for AI responses.

These categories help provide structured feedback about AI inference quality
when users provide negative feedback (thumbs down). Multiple categories can
be selected to provide comprehensive feedback about response issues.
"""

INCORRECT = "incorrect" # "The answer provided is completely wrong"
NOT_RELEVANT = "not_relevant" # "This answer doesn't address my question at all"
INCOMPLETE = "incomplete" # "The answer only covers part of what I asked about"
OUTDATED_INFORMATION = "outdated_information" # "This information is from several years ago and no longer accurate" # pylint: disable=line-too-long
UNSAFE = "unsafe" # "This response could be harmful or dangerous if followed"
OTHER = "other" # "The response has issues not covered by other categories"


class FeedbackRequest(BaseModel):
"""Model representing a feedback request.

Expand All @@ -155,15 +172,17 @@ class FeedbackRequest(BaseModel):
llm_response: The required LLM response.
sentiment: The optional sentiment.
user_feedback: The optional user feedback.
categories: The optional list of feedback categories (multi-select for negative feedback).

Example:
```python
feedback_request = FeedbackRequest(
conversation_id="12345678-abcd-0000-0123-456789abcdef",
user_question="what are you doing?",
user_feedback="Great service!",
user_feedback="This response is not helpful",
llm_response="I don't know",
sentiment=-1,
categories=[FeedbackCategory.INCORRECT, FeedbackCategory.INCOMPLETE]
)
```
"""
Expand All @@ -179,6 +198,15 @@ class FeedbackRequest(BaseModel):
description="Feedback on the LLM response.",
examples=["I'm not satisfied with the response because it is too vague."],
)
# Optional list of predefined feedback categories for negative feedback
categories: Optional[list[FeedbackCategory]] = Field(
default=None,
description=(
"List of feedback categories that describe issues with the LLM response "
"(for negative feedback)."
),
examples=[["incorrect", "incomplete"]],
)

# provides examples for /docs endpoint
model_config = {
Expand All @@ -188,9 +216,26 @@ class FeedbackRequest(BaseModel):
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"user_question": "foo",
"llm_response": "bar",
"user_feedback": "Great service!",
"sentiment": 1,
}
"user_feedback": "Not satisfied with the response quality.",
"sentiment": -1,
},
{
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"user_question": "What is the capital of France?",
"llm_response": "The capital of France is Berlin.",
"sentiment": -1,
"categories": ["incorrect"],
},
{
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"user_question": "How do I deploy a web app?",
"llm_response": "Use Docker.",
"user_feedback": (
"This response is too general and doesn't provide specific steps."
),
"sentiment": -1,
"categories": ["incomplete", "not_relevant"],
},
]
}
}
Expand All @@ -213,9 +258,34 @@ def check_sentiment(cls, value: Optional[int]) -> Optional[int]:
)
return value

@field_validator("categories")
@classmethod
def validate_categories(
cls, value: Optional[list[FeedbackCategory]]
) -> Optional[list[FeedbackCategory]]:
"""Validate feedback categories list."""
if value is None:
return value

if not isinstance(value, list):
raise ValueError("Categories must be a list")

if len(value) == 0:
return None # Convert empty list to None for consistency

unique_categories = list(set(value))
return unique_categories
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this validation take into account the "sentiment" field ? Because from L224 says:

"List of feedback categories that describe issues with the LLM response (for negative feedback)."

So I assume, it's only required when feedback is negative.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently in assisted chatbot we will use this endpoint that way, as it's more aligned with other implementations, so that the user either gives thumbs up, or thumbs down + optional comment.
But maybe even so the API should not have such constraint. There might be positive categories coming later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, thanks for the explanation


@model_validator(mode="after")
def check_sentiment_or_user_feedback_set(self) -> Self:
"""Ensure that either 'sentiment' or 'user_feedback' is set."""
if self.sentiment is None and self.user_feedback is None:
raise ValueError("Either 'sentiment' or 'user_feedback' must be set")
def check_feedback_provided(self) -> Self:
"""Ensure that at least one form of feedback is provided."""
if (
self.sentiment is None
and self.user_feedback is None
and self.categories is None
):
raise ValueError(
"At least one form of feedback must be provided: "
"'sentiment', 'user_feedback', or 'categories'"
)
return self
75 changes: 53 additions & 22 deletions tests/unit/app/endpoints/test_feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,31 @@ async def test_assert_feedback_enabled(mocker):
await assert_feedback_enabled(mocker.Mock())


def test_feedback_endpoint_handler(mocker):
"""Test that feedback_endpoint_handler processes feedback correctly."""
@pytest.mark.parametrize(
"feedback_request_data",
[
{},
{
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"user_question": "What is Kubernetes?",
"llm_response": "It's some computer thing.",
"sentiment": -1,
"categories": ["incorrect", "incomplete"],
},
],
ids=["no_categories", "with_negative_categories"],
)
def test_feedback_endpoint_handler(mocker, feedback_request_data):
"""Test that feedback_endpoint_handler processes feedback for different payloads."""

# Mock the dependencies
mocker.patch("app.endpoints.feedback.assert_feedback_enabled", return_value=None)
mocker.patch("utils.common.retrieve_user_id", return_value="test_user_id")
mocker.patch("app.endpoints.feedback.store_feedback", return_value=None)

# Mock the feedback request
# Prepare the feedback request mock
feedback_request = mocker.Mock()
feedback_request.model_dump.return_value = feedback_request_data

# Call the endpoint handler
result = feedback_endpoint_handler(
Expand All @@ -65,7 +81,7 @@ def test_feedback_endpoint_handler(mocker):
_ensure_feedback_enabled=assert_feedback_enabled,
)

# Assert that the response is as expected
# Assert that the expected response is returned
assert result.response == "feedback received"


Expand Down Expand Up @@ -94,35 +110,50 @@ def test_feedback_endpoint_handler_error(mocker):
assert exc_info.value.detail["response"] == "Error storing user feedback"


def test_store_feedback(mocker):
"""Test that store_feedback calls the correct storage function."""
@pytest.mark.parametrize(
"feedback_request_data",
[
{
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"user_question": "What is OpenStack?",
"llm_response": "It's some cloud thing.",
"user_feedback": "This response is not helpful!",
"sentiment": -1,
},
{
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"user_question": "What is Kubernetes?",
"llm_response": "K8s.",
"sentiment": -1,
"categories": ["incorrect", "not_relevant", "incomplete"],
},
],
ids=["negative_text_feedback", "negative_feedback_with_categories"],
)
def test_store_feedback(mocker, feedback_request_data):
"""Test that store_feedback correctly stores various feedback payloads."""

configuration.user_data_collection_configuration.feedback_storage = "fake-path"

# Patch filesystem and helpers
mocker.patch("builtins.open", mocker.mock_open())
mocker.patch("app.endpoints.feedback.Path", return_value=mocker.MagicMock())
mocker.patch("app.endpoints.feedback.get_suid", return_value="fake-uuid")

# Mock the JSON to assert the data is stored correctly
# Patch json to inspect stored data
mock_json = mocker.patch("app.endpoints.feedback.json")

user_id = "test_user_id"
feedback_request = {
"conversation_id": "12345678-abcd-0000-0123-456789abcdef",
"user_question": "What is OpenStack?",
"llm_response": "OpenStack is a cloud computing platform.",
"user_feedback": "Great service!",
"sentiment": 1,

store_feedback(user_id, feedback_request_data)

expected_data = {
"user_id": user_id,
"timestamp": mocker.ANY,
**feedback_request_data,
}

store_feedback(user_id, feedback_request)
mock_json.dump.assert_called_once_with(
{
"user_id": user_id,
"timestamp": mocker.ANY,
**feedback_request,
},
mocker.ANY,
)
mock_json.dump.assert_called_once_with(expected_data, mocker.ANY)


def test_feedback_status():
Expand Down
Loading