Skip to content

Commit

Permalink
Merge pull request #4333 from tejal29/when_to_show_events
Browse files Browse the repository at this point in the history
fetch events correctly by adding  v1 group version kind to runtime schema
  • Loading branch information
balopat authored Jun 18, 2020
2 parents 68c2864 + e43a91d commit 0bf498d
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 182 deletions.
24 changes: 18 additions & 6 deletions docs/content/en/api/skaffold.swagger.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions docs/content/en/docs/references/api/grpc.md
Original file line number Diff line number Diff line change
Expand Up @@ -741,10 +741,12 @@ BUILD, DEPLOY, STATUSCHECK, DEVINIT
| STATUSCHECK_NODE_UNREACHABLE | 405 | Node unreachable error |
| STATUSCHECK_NODE_NOT_READY | 406 | Node not ready error |
| STATUSCHECK_FAILED_SCHEDULING | 407 | Scheduler failure error |
| STATUSCHECK_UNHEALTHY | 408 | Readiness probe failed |
| UNKNOWN_ERROR | 500 | Could not determine error and phase |
| STATUSCHECK_UNKNOWN | 501 | Status Check error unknown |
| STATUSCHECK_UNKNOWN_UNSCHEDULABLE | 502 | Container is unschedulable due to unknown reasons |
| STATUSCHECK_CONTAINER_WAITING_UNKNOWN | 503 | Container is waiting due to unknown reason |
| STATUSCHECK_UNKNOWN_EVENT | 509 | Container event reason unknown |
| DEPLOY_UNKNOWN | 504 | Deploy failed due to unknown reason |
| SYNC_UNKNOWN | 505 | SYNC failed due to known reason |
| BUILD_UNKNOWN | 506 | Build failed due to unknown reason |
Expand Down
41 changes: 22 additions & 19 deletions pkg/diag/validator/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,16 @@ const (
errImagePullBackOff = "ErrImagePullBackOff"
containerCreating = "ContainerCreating"
podKind = "pod"

failedScheduling = "FailedScheduling"
unhealthy = "Unhealthy"
)

var (
runContainerRe = regexp.MustCompile(errorPrefix)
taintsRe = regexp.MustCompile(taintsExp)
// for testing
runCli = executeCLI

unknownFailures = map[proto.StatusCode]struct{}{
proto.StatusCode_STATUSCHECK_UNKNOWN: {},
proto.StatusCode_STATUSCHECK_UNKNOWN_UNSCHEDULABLE: {},
proto.StatusCode_STATUSCHECK_CONTAINER_WAITING_UNKNOWN: {},
}
)

// PodValidator implements the Validator interface for Pods
Expand Down Expand Up @@ -188,34 +185,40 @@ func getUntoleratedTaints(reason string, message string) (proto.StatusCode, erro
}

func processPodEvents(e corev1.EventInterface, pod v1.Pod, ps *podStatus) {
// if failures are known, return
if _, ok := unknownFailures[ps.statusCode]; !ok {
return
}

// Get pod events.
events, err := e.Search(runtime.NewScheme(), &pod)
scheme := runtime.NewScheme()
scheme.AddKnownTypes(v1.SchemeGroupVersion, &pod)
events, err := e.Search(scheme, &pod)
if err != nil {
logrus.Debugf("could not fetch events for resource %s due to %v", pod.Name, err)
return
}
// find the latest failed event.
var recentEvent *v1.Event
for _, event := range events.Items {
for _, e := range events.Items {
if e.Type == v1.EventTypeNormal {
continue
}
event := e.DeepCopy()
if recentEvent == nil || recentEvent.EventTime.Before(&event.EventTime) {
recentEvent = &event
recentEvent = event
}
}
if recentEvent == nil || recentEvent.Type == v1.EventTypeNormal {
if recentEvent == nil {
return
}
if recentEvent.Reason == "FailedScheduling" {
switch recentEvent.Reason {
case failedScheduling:
ps.statusCode = proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING
ps.err = fmt.Errorf(recentEvent.Message)
return
case unhealthy:
ps.statusCode = proto.StatusCode_STATUSCHECK_UNHEALTHY
ps.err = fmt.Errorf(recentEvent.Message)
default:
// TODO: Add unique error codes for reasons
ps.statusCode = proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT
ps.err = fmt.Errorf("%s: %s", recentEvent.Reason, recentEvent.Message)
}
// TODO: Add unique error codes for reasons
ps.err = fmt.Errorf("%s: %s", recentEvent.Reason, recentEvent.Message)
}

type podStatus struct {
Expand Down
51 changes: 45 additions & 6 deletions pkg/diag/validator/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
Expand All @@ -37,6 +38,8 @@ func TestRun(t *testing.T) {
output []byte
err error
}
before := time.Now()
after := before.Add(3 * time.Second)
tests := []struct {
description string
pods []*v1.Pod
Expand Down Expand Up @@ -303,10 +306,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN, nil)},
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
},
{
description: "pod condition a warning event followed up normal",
description: "pod condition a warning event followed up normal event",
pods: []*v1.Pod{{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Expand All @@ -326,17 +329,19 @@ func TestRun(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{Name: "one", Namespace: "test"},
Reason: "eventCode", Type: "Warning", Message: "dummy event",
EventTime: metav1.MicroTime{Time: before},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "two", Namespace: "test"},
Reason: "eventCode", Type: "Normal", Message: "success",
Reason: "Created", Type: "Normal", Message: "Container Created",
EventTime: metav1.MicroTime{Time: after},
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("could not determine"), proto.StatusCode_STATUSCHECK_UNKNOWN, nil)},
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
},
{
description: "pod condition a warning event followed up normal adds last warning seen",
description: "pod condition a normal event followed by a warning event",
pods: []*v1.Pod{{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Expand All @@ -353,13 +358,47 @@ func TestRun(t *testing.T) {
},
}},
events: []v1.Event{
{
ObjectMeta: metav1.ObjectMeta{Name: "two", Namespace: "test"},
Reason: "Created", Type: "Normal", Message: "Container Created",
EventTime: metav1.MicroTime{Time: before},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "one", Namespace: "test"},
Reason: "eventCode", Type: "Warning", Message: "dummy event",
EventTime: metav1.MicroTime{Time: after},
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
},
{
description: "pod condition a warning event followed up by warning adds last warning seen",
pods: []*v1.Pod{{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "test",
},
TypeMeta: metav1.TypeMeta{Kind: "Pod"},
Status: v1.PodStatus{
Phase: v1.PodPending,
Conditions: []v1.PodCondition{{
Type: v1.PodScheduled,
Status: v1.ConditionUnknown,
Message: "could not determine",
}},
},
}},
events: []v1.Event{
{
ObjectMeta: metav1.ObjectMeta{Name: "two", Namespace: "test"}, Reason: "FailedScheduling", Type: "Warning",
Message: "0/1 nodes are available: 1 node(s) had taint {key: value}, that the pod didn't tolerate",
Message: "0/1 nodes are available: 1 node(s) had taint {key: value}, that the pod didn't tolerate",
EventTime: metav1.MicroTime{Time: after},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "one", Namespace: "test"},
Reason: "eventCode", Type: "Warning", Message: "dummy event",
EventTime: metav1.MicroTime{Time: before},
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
Expand Down
Loading

0 comments on commit 0bf498d

Please sign in to comment.