From e309458d0499faa23134bfdafb0e458457cd59f6 Mon Sep 17 00:00:00 2001
From: Major Hayden <major@redhat.com>
Date: Tue, 16 Dec 2025 10:35:19 -0600
Subject: [PATCH 1/2] feat(rlsapi): register v1 infer endpoint and add
 integration tests

Register the rlsapi v1 router in the application to expose the /v1/infer
endpoint for stateless inference requests from the RHEL Lightspeed
Command Line Assistant (CLA).

Integration tests cover:
- Successful inference with various input combinations
- Request ID generation and uniqueness
- Error handling for Llama Stack connection failures
- Empty response fallback behavior

Signed-off-by: Major Hayden <major@redhat.com>
---
 src/app/routers.py                            |   5 +
 .../endpoints/test_rlsapi_v1_integration.py   | 350 ++++++++++++++++++
 tests/unit/app/test_routers.py                |   7 +-
 3 files changed, 360 insertions(+), 2 deletions(-)
 create mode 100644 tests/integration/endpoints/test_rlsapi_v1_integration.py

diff --git a/src/app/routers.py b/src/app/routers.py
index ae9cf51ce..3abc915ea 100644
--- a/src/app/routers.py
+++ b/src/app/routers.py
@@ -20,6 +20,8 @@
     tools,
     # V2 endpoints for Response API support
     query_v2,
+    # RHEL Lightspeed rlsapi v1 compatibility
+    rlsapi_v1,
 )
 
 
@@ -49,6 +51,9 @@ def include_routers(app: FastAPI) -> None:
     # Note: query_v2, streaming_query_v2, and conversations_v3 are now exposed at /v1 above
     # The old query, streaming_query, and conversations modules are deprecated
 
+    # RHEL Lightspeed rlsapi v1 compatibility - stateless CLA (Command Line Assistant) endpoint
+    app.include_router(rlsapi_v1.router, prefix="/v1")
+
     # road-core does not version these endpoints
     app.include_router(health.router)
     app.include_router(authorized.router)
diff --git a/tests/integration/endpoints/test_rlsapi_v1_integration.py b/tests/integration/endpoints/test_rlsapi_v1_integration.py
new file mode 100644
index 000000000..3dd89d7f2
--- /dev/null
+++ b/tests/integration/endpoints/test_rlsapi_v1_integration.py
@@ -0,0 +1,350 @@
+"""Integration tests for the rlsapi v1 /infer endpoint.
+
+Tests the stateless inference endpoint used by the RHEL Lightspeed Command Line
+Assistant (CLA) for single-turn LLM queries without conversation persistence.
+"""
+
+# pylint: disable=too-many-arguments
+# pylint: disable=too-many-positional-arguments
+# pylint: disable=protected-access
+# pylint: disable=unused-argument
+
+from typing import Any, NamedTuple
+
+import pytest
+from fastapi import HTTPException, status
+from llama_stack_client import APIConnectionError
+from llama_stack_client.types.alpha.agents.turn import Turn
+from pytest_mock import MockerFixture
+
+import constants
+from app.endpoints.rlsapi_v1 import infer_endpoint
+from authentication.interface import AuthTuple
+from configuration import AppConfig
+from models.rlsapi.requests import (
+    RlsapiV1Attachment,
+    RlsapiV1CLA,
+    RlsapiV1Context,
+    RlsapiV1InferRequest,
+    RlsapiV1SystemInfo,
+    RlsapiV1Terminal,
+)
+from models.rlsapi.responses import RlsapiV1InferResponse
+from tests.unit.utils.auth_helpers import mock_authorization_resolvers
+from utils.suid import check_suid
+
+
+class MockAgentFixture(NamedTuple):
+    """Container for mocked Llama Stack agent components."""
+
+    client: Any
+    agent: Any
+    holder_class: Any
+
+
+# ==========================================
+# Shared Fixtures
+# ==========================================
+
+
+@pytest.fixture(name="rlsapi_config")
+def rlsapi_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppConfig:
+    """Extend test_config with inference defaults required by rlsapi v1.
+
+    NOTE(major): The standard test configuration doesn't include inference
+    settings (default_model, default_provider) which rlsapi v1 requires.
+    """
+    test_config.inference.default_model = "test-model"
+    test_config.inference.default_provider = "test-provider"
+    mocker.patch("app.endpoints.rlsapi_v1.configuration", test_config)
+    return test_config
+
+
+@pytest.fixture(name="mock_authorization")
+def mock_authorization_fixture(mocker: MockerFixture) -> None:
+    """Mock authorization resolvers for integration tests."""
+    mock_authorization_resolvers(mocker)
+
+
+def _create_mock_agent(
+    mocker: MockerFixture,
+    response_content: str = "Use the `ls` command to list files in a directory.",
+    output_message: Any = "default",
+) -> MockAgentFixture:
+    """Create a mocked Llama Stack agent with configurable response.
+
+    Args:
+        mocker: pytest-mock fixture
+        response_content: Text content for the LLM response
+        output_message: Custom output_message Mock, or "default" to create one,
+                       or None for no output_message
+
+    Returns:
+        MockAgentFixture with client, agent, and holder_class components
+    """
+    mock_holder_class = mocker.patch(
+        "app.endpoints.rlsapi_v1.AsyncLlamaStackClientHolder"
+    )
+    mock_client = mocker.AsyncMock()
+
+    # Configure output message
+    if output_message == "default":
+        mock_output_message = mocker.Mock()
+        mock_output_message.content = response_content
+    else:
+        mock_output_message = output_message
+
+    mock_turn = mocker.Mock(spec=Turn)
+    mock_turn.output_message = mock_output_message
+    mock_turn.steps = []
+
+    mock_agent = mocker.AsyncMock()
+    mock_agent.create_turn = mocker.AsyncMock(return_value=mock_turn)
+    mock_agent._agent_id = "test_agent_id"
+
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.get_temp_agent",
+        return_value=(mock_agent, "test_session_id", None),
+    )
+
+    mock_holder_instance = mock_holder_class.return_value
+    mock_holder_instance.get_client.return_value = mock_client
+
+    return MockAgentFixture(mock_client, mock_agent, mock_holder_class)
+
+
+@pytest.fixture(name="mock_llama_stack")
+def mock_llama_stack_fixture(
+    rlsapi_config: AppConfig, mocker: MockerFixture
+) -> MockAgentFixture:
+    """Mock Llama Stack client with successful response."""
+    _ = rlsapi_config
+    return _create_mock_agent(mocker)
+
+
+# ==========================================
+# Basic Response Tests
+# ==========================================
+
+
+@pytest.mark.asyncio
+async def test_rlsapi_v1_infer_minimal_request(
+    mock_llama_stack: MockAgentFixture,
+    mock_authorization: None,
+    test_auth: AuthTuple,
+) -> None:
+    """Test /v1/infer endpoint with minimal request (question only)."""
+    response = await infer_endpoint(
+        infer_request=RlsapiV1InferRequest(question="How do I list files?"),
+        auth=test_auth,
+    )
+
+    assert isinstance(response, RlsapiV1InferResponse)
+    assert response.data.text == "Use the `ls` command to list files in a directory."
+    assert check_suid(response.data.request_id)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("context", "test_id"),
+    [
+        pytest.param(
+            RlsapiV1Context(stdin="Error: Connection refused"),
+            "stdin_only",
+            id="stdin_only",
+        ),
+        pytest.param(
+            RlsapiV1Context(
+                attachments=RlsapiV1Attachment(contents="[mysqld]\nmax=150")
+            ),
+            "attachment_only",
+            id="attachment_only",
+        ),
+        pytest.param(
+            RlsapiV1Context(terminal=RlsapiV1Terminal(output="Permission denied")),
+            "terminal_only",
+            id="terminal_only",
+        ),
+        pytest.param(
+            RlsapiV1Context(
+                stdin="dmesg output",
+                attachments=RlsapiV1Attachment(contents="log content"),
+                terminal=RlsapiV1Terminal(output="command not found"),
+                systeminfo=RlsapiV1SystemInfo(os="RHEL", version="9.3", arch="x86_64"),
+                cla=RlsapiV1CLA(nevra="cla-0.4.0", version="0.4.0"),
+            ),
+            "full_context",
+            id="full_context",
+        ),
+    ],
+)
+async def test_rlsapi_v1_infer_with_context(
+    mock_llama_stack: MockAgentFixture,
+    mock_authorization: None,
+    test_auth: AuthTuple,
+    context: RlsapiV1Context,
+    test_id: str,
+) -> None:
+    """Test /v1/infer endpoint with various context configurations."""
+    response = await infer_endpoint(
+        infer_request=RlsapiV1InferRequest(question="Help me?", context=context),
+        auth=test_auth,
+    )
+
+    assert isinstance(response, RlsapiV1InferResponse)
+    assert response.data.text is not None
+    assert response.data.request_id is not None
+
+
+@pytest.mark.asyncio
+async def test_rlsapi_v1_infer_generates_unique_request_ids(
+    mock_llama_stack: MockAgentFixture,
+    mock_authorization: None,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that each /v1/infer call generates a unique request_id."""
+    request = RlsapiV1InferRequest(question="How do I list files?")
+
+    responses = [
+        await infer_endpoint(infer_request=request, auth=test_auth) for _ in range(3)
+    ]
+    request_ids = {r.data.request_id for r in responses}
+
+    assert len(request_ids) == 3
+    assert all(check_suid(rid) for rid in request_ids)
+
+
+# ==========================================
+# Error Handling Tests
+# ==========================================
+
+
+@pytest.mark.asyncio
+async def test_rlsapi_v1_infer_connection_error_returns_503(
+    rlsapi_config: AppConfig,
+    mock_authorization: None,
+    test_auth: AuthTuple,
+    mocker: MockerFixture,
+) -> None:
+    """Test /v1/infer returns 503 when Llama Stack is unavailable."""
+    _ = rlsapi_config
+
+    # Create agent that raises APIConnectionError
+    mock_holder_class = mocker.patch(
+        "app.endpoints.rlsapi_v1.AsyncLlamaStackClientHolder"
+    )
+    mock_agent = mocker.AsyncMock()
+    mock_agent.create_turn = mocker.AsyncMock(
+        side_effect=APIConnectionError(request=mocker.Mock())
+    )
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.get_temp_agent",
+        return_value=(mock_agent, "test_session_id", None),
+    )
+    mock_holder_class.return_value.get_client.return_value = mocker.AsyncMock()
+
+    with pytest.raises(HTTPException) as exc_info:
+        await infer_endpoint(
+            infer_request=RlsapiV1InferRequest(question="Test"),
+            auth=test_auth,
+        )
+
+    assert exc_info.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+    assert isinstance(exc_info.value.detail, dict)
+    assert "Llama Stack" in exc_info.value.detail["response"]
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "output_message",
+    [
+        pytest.param(None, id="none_output_message"),
+        pytest.param("empty", id="empty_content"),
+    ],
+)
+async def test_rlsapi_v1_infer_fallback_responses(
+    rlsapi_config: AppConfig,
+    mock_authorization: None,
+    test_auth: AuthTuple,
+    mocker: MockerFixture,
+    output_message: Any,
+) -> None:
+    """Test /v1/infer returns fallback for empty/None responses."""
+    _ = rlsapi_config
+
+    if output_message == "empty":
+        mock_output = mocker.Mock()
+        mock_output.content = ""
+        _create_mock_agent(mocker, output_message=mock_output)
+    else:
+        _create_mock_agent(mocker, output_message=None)
+
+    response = await infer_endpoint(
+        infer_request=RlsapiV1InferRequest(question="Test"),
+        auth=test_auth,
+    )
+
+    assert response.data.text == constants.UNABLE_TO_PROCESS_RESPONSE
+
+
+# ==========================================
+# Input Source Combination Tests
+# ==========================================
+
+
+@pytest.mark.asyncio
+async def test_rlsapi_v1_infer_input_source_combination(
+    rlsapi_config: AppConfig,
+    mock_authorization: None,
+    test_auth: AuthTuple,
+    mocker: MockerFixture,
+) -> None:
+    """Test that input sources are properly combined before sending to LLM."""
+    _ = rlsapi_config
+    mocks = _create_mock_agent(mocker)
+
+    await infer_endpoint(
+        infer_request=RlsapiV1InferRequest(
+            question="My question",
+            context=RlsapiV1Context(
+                stdin="stdin content",
+                attachments=RlsapiV1Attachment(contents="attachment content"),
+                terminal=RlsapiV1Terminal(output="terminal output"),
+            ),
+        ),
+        auth=test_auth,
+    )
+
+    # Verify all parts present in message sent to LLM
+    call_args = mocks.agent.create_turn.call_args
+    message_content = call_args.kwargs["messages"][0]["content"]
+
+    for expected in ["My question", "stdin content", "attachment content", "terminal"]:
+        assert expected in message_content
+
+
+# ==========================================
+# Skip RAG Tests
+# ==========================================
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "skip_rag",
+    [pytest.param(False, id="default_false"), pytest.param(True, id="explicit_true")],
+)
+async def test_rlsapi_v1_infer_skip_rag(
+    mock_llama_stack: MockAgentFixture,
+    mock_authorization: None,
+    test_auth: AuthTuple,
+    skip_rag: bool,
+) -> None:
+    """Test skip_rag parameter is accepted.
+
+    NOTE(major): RAG is not implemented in lightspeed-stack rlsapi v1.
+    """
+    request = RlsapiV1InferRequest(question="How do I list files?", skip_rag=skip_rag)
+    assert request.skip_rag == skip_rag
+
+    response = await infer_endpoint(infer_request=request, auth=test_auth)
+    assert isinstance(response, RlsapiV1InferResponse)
diff --git a/tests/unit/app/test_routers.py b/tests/unit/app/test_routers.py
index 1245a07ba..5d2b8e6cd 100644
--- a/tests/unit/app/test_routers.py
+++ b/tests/unit/app/test_routers.py
@@ -23,6 +23,7 @@
     authorized,
     metrics,
     tools,
+    rlsapi_v1,
 )  # noqa:E402
 
 
@@ -65,7 +66,7 @@ def test_include_routers() -> None:
     include_routers(app)
 
     # are all routers added?
-    assert len(app.routers) == 16
+    assert len(app.routers) == 17
     assert root.router in app.get_routers()
     assert info.router in app.get_routers()
     assert models.router in app.get_routers()
@@ -84,6 +85,7 @@ def test_include_routers() -> None:
     assert conversations_v2.router in app.get_routers()
     assert conversations_v3.router in app.get_routers()
     assert metrics.router in app.get_routers()
+    assert rlsapi_v1.router in app.get_routers()
 
 
 def test_check_prefixes() -> None:
@@ -92,7 +94,7 @@ def test_check_prefixes() -> None:
     include_routers(app)
 
     # are all routers added?
-    assert len(app.routers) == 16
+    assert len(app.routers) == 17
     assert app.get_router_prefix(root.router) == ""
     assert app.get_router_prefix(info.router) == "/v1"
     assert app.get_router_prefix(models.router) == "/v1"
@@ -112,3 +114,4 @@ def test_check_prefixes() -> None:
     assert app.get_router_prefix(conversations_v2.router) == "/v2"
     assert app.get_router_prefix(conversations_v3.router) == "/v1"
     assert app.get_router_prefix(metrics.router) == ""
+    assert app.get_router_prefix(rlsapi_v1.router) == "/v1"

From 802b169e310253633dada5392ba324382ac7f139 Mon Sep 17 00:00:00 2001
From: Major Hayden <major@redhat.com>
Date: Tue, 16 Dec 2025 10:37:17 -0600
Subject: [PATCH 2/2] docs(rlsapi): add CLA example configuration

Provide an example configuration for RHEL Lightspeed Command Line
Assistant (CLA) deployments using the rlsapi v1 /infer endpoint.

The example includes:
- Service and Llama Stack configuration
- System prompt customization options (inline, file, profile module)
- Red Hat Identity authentication setup
- Authorization rules for rlsapi_v1_infer action

Signed-off-by: Major Hayden <major@redhat.com>
---
 examples/lightspeed-stack-rlsapi-cla.yaml | 37 +++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 examples/lightspeed-stack-rlsapi-cla.yaml

diff --git a/examples/lightspeed-stack-rlsapi-cla.yaml b/examples/lightspeed-stack-rlsapi-cla.yaml
new file mode 100644
index 000000000..8dcedfa2b
--- /dev/null
+++ b/examples/lightspeed-stack-rlsapi-cla.yaml
@@ -0,0 +1,37 @@
+name: RHEL Lightspeed CLA Configuration Example
+# Example configuration for RHEL Lightspeed Command Line Assistant (CLA)
+# deployments using the rlsapi v1 /infer endpoint.
+#
+# The rlsapi v1 endpoint provides stateless inference without conversation
+# history or RAG. It is designed for the RHEL Lightspeed CLI tool.
+service:
+  host: localhost
+  port: 8080
+  auth_enabled: true
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  use_as_library_client: false
+  url: http://localhost:8321
+inference:
+  # Configure the default model for rlsapi v1 inference
+  # Provider ID must match the provider_id in your Llama Stack config
+  default_provider: google-vertex
+  default_model: gemini-2.5-flash
+
+# Red Hat Identity authentication (typical for CLA deployments)
+authentication:
+  module: "rh-identity"
+  rh_identity_config:
+    required_entitlements: ["rhel"]
+
+# Authorization configuration for CLA
+# Grant rlsapi_v1_infer to all authenticated users
+authorization:
+  access_rules:
+    - role: "*"
+      actions: ["info", "rlsapi_v1_infer"]
+    # Optional: Add admin role for full access
+    # - role: "admin"
+    #   actions: ["admin"]