generated from kubernetes/kubernetes-template-project
-
Notifications
You must be signed in to change notification settings - Fork 176
Closed
Labels
kind/bugCategorizes issue or PR as related to a bug.Categorizes issue or PR as related to a bug.needs-triageIndicates an issue or PR lacks a `triage/foo` label and requires one.Indicates an issue or PR lacks a `triage/foo` label and requires one.
Description
What happened:
Fellow guide docs: https://gateway-api-inference-extension.sigs.k8s.io/guides/#__tabbed_1_3, step by step, encouter bug with: kubectl get httproute llm-route -o yaml
root@controller-01:~/llm-d# kubectl get httproute llm-route -o yaml
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"gateway.networking.k8s.io/v1","kind":"HTTPRoute","metadata":{"annotations":{},"name":"llm-route","namespace":"default"},"spec":{"parentRefs":[{"group":"gateway.networking.k8s.io","kind":"Gateway","name":"inference-gateway"}],"rules":[{"backendRefs":[{"group":"inference.networking.x-k8s.io","kind":"InferencePool","name":"vllm-llama3-8b-instruct"}],"matches":[{"path":{"type":"PathPrefix","value":"/"}}],"timeouts":{"request":"300s"}}]}}
creationTimestamp: "2025-09-04T06:14:44Z"
generation: 1
name: llm-route
namespace: default
resourceVersion: "60397"
uid: 6b2bade6-ecf7-4dbf-9f1f-d2ba6c836cf1
spec:
parentRefs:
- group: gateway.networking.k8s.io
kind: Gateway
name: inference-gateway
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
weight: 1
matches:
- path:
type: PathPrefix
value: /
timeouts:
request: 300s
status:
parents:
- conditions:
- lastTransitionTime: "2025-09-04T06:14:44Z"
message: InferencePool "vllm-llama3-8b-instruct" not found
observedGeneration: 1
reason: BackendNotFound
status: "False"
type: ResolvedRefs
- lastTransitionTime: "2025-09-04T06:14:44Z"
message: ""
observedGeneration: 1
reason: Accepted
status: "True"
type: Accepted
controllerName: kgateway.dev/kgateway
parentRef:
group: gateway.networking.k8s.io
kind: Gateway
name: inference-gateway
here is my resource:
root@controller-01:~/llm-d# kubectl get pod
NAME READY STATUS RESTARTS AGE
inference-gateway-9c9675965-r5vsb 1/1 Running 0 18m
vllm-llama3-8b-instruct-6c9757687-4b5jt 1/1 Running 0 3h22m
vllm-llama3-8b-instruct-6c9757687-cw4h4 1/1 Running 0 3h22m
vllm-llama3-8b-instruct-6c9757687-f6zc2 1/1 Running 0 3h22m
vllm-llama3-8b-instruct-epp-7fc7b677f-b4sqj 1/1 Running 0 17m
root@controller-01:~/llm-d# kubectl get InferencePool
NAME AGE
vllm-llama3-8b-instruct 17m
and 500 response by curl:
root@controller-01:~/llm-d# curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{
"model": "food-review",
"prompt": "Write as if you were a critic: San Francisco",
"max_tokens": 100,
"temperature": 0
}'
HTTP/1.1 500 Internal Server Error
date: Thu, 04 Sep 2025 06:29:54 GMT
server: envoy
connection: close
content-length: 0
Metadata
Metadata
Assignees
Labels
kind/bugCategorizes issue or PR as related to a bug.Categorizes issue or PR as related to a bug.needs-triageIndicates an issue or PR lacks a `triage/foo` label and requires one.Indicates an issue or PR lacks a `triage/foo` label and requires one.