Skip to content

Commit f92755d

Browse files
committed
add e2e tests for health endpoints
1 parent b3351ca commit f92755d

File tree

3 files changed

+141
-56
lines changed

3 files changed

+141
-56
lines changed

tests/e2e/features/environment.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
4. after_scenario
88
"""
99

10+
import subprocess
11+
import time
1012
from behave.model import Scenario
1113
from behave.runner import Context
1214

@@ -32,6 +34,52 @@ def before_scenario(context: Context, scenario: Scenario) -> None:
3234

3335
def after_scenario(context: Context, scenario: Scenario) -> None:
3436
"""Run after each scenario is run."""
37+
# Restore Llama Stack connection if it was disrupted
38+
if hasattr(context, "llama_stack_was_running") and context.llama_stack_was_running:
39+
try:
40+
# Start the llama-stack container again
41+
subprocess.run(
42+
["docker", "start", "llama-stack"], check=True, capture_output=True
43+
)
44+
45+
# Wait for the service to be healthy
46+
print("Restoring Llama Stack connection...")
47+
time.sleep(5)
48+
49+
# Check if it's healthy
50+
for attempt in range(6): # Try for 30 seconds
51+
try:
52+
result = subprocess.run(
53+
[
54+
"docker",
55+
"exec",
56+
"llama-stack",
57+
"curl",
58+
"-f",
59+
"http://localhost:8321/v1/health",
60+
],
61+
capture_output=True,
62+
timeout=5,
63+
check=True,
64+
)
65+
if result.returncode == 0:
66+
print("✓ Llama Stack connection restored successfully")
67+
break
68+
except subprocess.TimeoutExpired:
69+
print(f"⏱Health check timed out on attempt {attempt + 1}/6")
70+
71+
if attempt < 5:
72+
print(
73+
f"Waiting for Llama Stack to be healthy... (attempt {attempt + 1}/6)"
74+
)
75+
time.sleep(5)
76+
else:
77+
print(
78+
"Warning: Llama Stack may not be fully healthy after restoration"
79+
)
80+
81+
except subprocess.CalledProcessError as e:
82+
print(f"Warning: Could not restore Llama Stack connection: {e}")
3583

3684

3785
def before_feature(context: Context, feature: Scenario) -> None:

tests/e2e/features/health.feature

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,64 @@
1-
# Feature: Health endpoint API tests
2-
#TODO: fix test
3-
4-
# Background:
5-
# Given The service is started locally
6-
# And REST API service hostname is localhost
7-
# And REST API service port is 8080
8-
# And REST API service prefix is /v1
9-
10-
11-
# Scenario: Check if service report proper readiness state
12-
# Given The system is in default state
13-
# When I access endpoint "readiness" using HTTP GET method
14-
# Then The status code of the response is 200
15-
# And The body of the response has the following schema
16-
# """
17-
# {
18-
# "ready": "bool",
19-
# "reason": "str",
20-
# "providers": "list[str]"
21-
# }
22-
# """
23-
# And The body of the response is the following
24-
# """
25-
# {"ready": true, "reason": "All providers are healthy", "providers": []}
26-
# """
27-
28-
# Scenario: Check if service report proper readiness state when llama stack is not available
29-
# Given The system is in default state
30-
# And The llama-stack connection is disrupted
31-
# When I access endpoint "readiness" using HTTP GET method
32-
# Then The status code of the response is 503
33-
34-
# Scenario: Check if service report proper liveness state
35-
# Given The system is in default state
36-
# When I access endpoint "liveness" using HTTP GET method
37-
# Then The status code of the response is 200
38-
# And The body of the response has the following schema
39-
# """
40-
# {
41-
# "alive": "bool"
42-
# }
43-
# """
44-
# And The body of the response is the following
45-
# """
46-
# {"alive":true}
47-
# """
48-
49-
# Scenario: Check if service report proper liveness state when llama stack is not available
50-
# Given The system is in default state
51-
# And The llama-stack connection is disrupted
52-
# When I access endpoint "liveness" using HTTP GET method
53-
# Then The status code of the response is 503
1+
Feature: REST API tests
2+
3+
4+
Background:
5+
Given The service is started locally
6+
And REST API service hostname is localhost
7+
And REST API service port is 8080
8+
And REST API service prefix is /v1
9+
10+
11+
Scenario: Check if service report proper readiness state
12+
Given The system is in default state
13+
When I access endpoint "readiness" using HTTP GET method
14+
Then The status code of the response is 200
15+
And The body of the response has the following schema
16+
"""
17+
{
18+
"ready": "bool",
19+
"reason": "str",
20+
"providers": "list[str]"
21+
}
22+
"""
23+
And The body of the response is the following
24+
"""
25+
{"ready": true, "reason": "All providers are healthy", "providers": []}
26+
"""
27+
28+
29+
Scenario: Check if service report proper liveness state
30+
Given The system is in default state
31+
When I access endpoint "liveness" using HTTP GET method
32+
Then The status code of the response is 200
33+
And The body of the response has the following schema
34+
"""
35+
{
36+
"alive": "bool"
37+
}
38+
"""
39+
And The body of the response is the following
40+
"""
41+
{"alive": true}
42+
"""
43+
44+
45+
Scenario: Check if service report proper readiness state when llama stack is not available
46+
Given The system is in default state
47+
And The llama-stack connection is disrupted
48+
When I access endpoint "readiness" using HTTP GET method
49+
Then The status code of the response is 503
50+
And The body of the response, ignoring the "providers" field, is the following
51+
"""
52+
{"ready": false, "reason": "Providers not healthy: unknown"}
53+
"""
54+
55+
56+
Scenario: Check if service report proper liveness state even when llama stack is not available
57+
Given The system is in default state
58+
And The llama-stack connection is disrupted
59+
When I access endpoint "liveness" using HTTP GET method
60+
Then The status code of the response is 200
61+
And The body of the response is the following
62+
"""
63+
{"alive": true}
64+
"""

tests/e2e/features/steps/health.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,40 @@
11
"""Implementation of common test steps."""
22

3+
import subprocess
4+
import time
35
from behave import given # pyright: ignore[reportAttributeAccessIssue]
46
from behave.runner import Context
57

68

79
@given("The llama-stack connection is disrupted")
810
def llama_stack_connection_broken(context: Context) -> None:
9-
"""Break llama_stack connection."""
10-
# TODO: add step implementation
11-
assert context is not None
11+
"""Break llama_stack connection by stopping the container."""
12+
# Store original state for restoration
13+
context.llama_stack_was_running = False
14+
15+
try:
16+
result = subprocess.run(
17+
["docker", "inspect", "-f", "{{.State.Running}}", "llama-stack"],
18+
capture_output=True,
19+
text=True,
20+
check=True,
21+
)
22+
23+
if result.stdout.strip():
24+
context.llama_stack_was_running = True
25+
subprocess.run(
26+
["docker", "stop", "llama-stack"], check=True, capture_output=True
27+
)
28+
29+
# Wait a moment for the connection to be fully disrupted
30+
time.sleep(2)
31+
32+
print("Llama Stack connection disrupted successfully")
33+
else:
34+
print("Llama Stack container was not running")
35+
36+
except subprocess.CalledProcessError as e:
37+
print(f"Warning: Could not disrupt Llama Stack connection: {e}")
1238

1339

1440
@given("the service is stopped")

0 commit comments

Comments
 (0)