Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add corrected healthcheck for Inference Servicew #34

Merged
merged 1 commit into from
Nov 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions templates/plumbing/argocd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,70 @@ spec:
hs.message = "Waiting for PVC"
return hs

# FIXME(bandini): To be dropped once 49431b9add9393ef2ece78d4f63968b6c10276e0 in argocd is in a relased version
- kind: InferenceService
group: serving.kserve.io
check: |
local health_status = {}

health_status.status = "Progressing"
health_status.message = "Waiting for InferenceService to report status..."

if obj.status ~= nil then

local progressing = false
local degraded = false
local status_false = 0
local status_unknown = 0
local msg = ""

if obj.status.modelStatus ~= nil then
if obj.status.modelStatus.transitionStatus ~= "UpToDate" then
if obj.status.modelStatus.transitionStatus == "InProgress" then
progressing = true
else
degraded = true
end
msg = msg .. "0: transitionStatus | " .. obj.status.modelStatus.transitionStatus
end
end

if obj.status.conditions ~= nil then
for i, condition in pairs(obj.status.conditions) do

if condition.status == "Unknown" then
status_unknown = status_unknown + 1
elseif condition.status == "False" then
status_false = status_false + 1
end

if condition.status ~= "True" then
msg = msg .. " | " .. i .. ": " .. condition.type .. " | " .. condition.status
if condition.reason ~= nil and condition.reason ~= "" then
msg = msg .. " | " .. condition.reason
end
if condition.message ~= nil and condition.message ~= "" then
msg = msg .. " | " .. condition.message
end
end

end

if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then
health_status.status = "Healthy"
msg = "InferenceService is healthy."
elseif degraded == false and status_unknown >= 0 then
health_status.status = "Progressing"
else
health_status.status = "Degraded"
end

health_status.message = msg
end
end

return health_status

resourceTrackingMethod: {{ $.Values.clusterGroup.argoCD.resourceTrackingMethod}}
applicationInstanceLabelKey: argocd.argoproj.io/instance
applicationSet:
Expand Down