Skip to content

Commit

Permalink
Chore(checks): Makes the default health check tunable and remove AUT …
Browse files Browse the repository at this point in the history
…and Aux checks from infra experiments (#576)

Signed-off-by: uditgaurav <udit@chaosnative.com>

Signed-off-by: uditgaurav <udit@chaosnative.com>
  • Loading branch information
uditgaurav authored Sep 30, 2022
1 parent b69ed69 commit d4e05db
Show file tree
Hide file tree
Showing 42 changed files with 721 additions and 933 deletions.
4 changes: 2 additions & 2 deletions contribute/developer-guide/templates/experiment_k8s.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func Experiment(clients clients.ClientSets){
// POD STATUS CHECKS FOR THE APPLICATION UNDER TEST AND AUXILIARY APPLICATIONS ARE ADDED BY DEFAULT

//PRE-CHAOS APPLICATION STATUS CHECK
if chaosDetails.DefaultAppHealthCheck {
if chaosDetails.DefaultHealthCheck {
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)")
if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil {
log.Errorf("Application status check failed, err: %v", err)
Expand Down Expand Up @@ -152,7 +152,7 @@ func Experiment(clients clients.ClientSets){
// POD STATUS CHECKS FOR THE APPLICATION UNDER TEST AND AUXILIARY APPLICATIONS ARE ADDED BY DEFAULT

//POST-CHAOS APPLICATION STATUS CHECK
if chaosDetails.DefaultAppHealthCheck {
if chaosDetails.DefaultHealthCheck {
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)")
if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil {
log.Errorf("Application status check failed, err: %v", err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"github.com/litmuschaos/litmus-go/pkg/log"
"github.com/litmuschaos/litmus-go/pkg/probe"
"github.com/litmuschaos/litmus-go/pkg/result"
"github.com/litmuschaos/litmus-go/pkg/status"
"github.com/litmuschaos/litmus-go/pkg/types"
"github.com/litmuschaos/litmus-go/pkg/utils/common"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -74,26 +73,6 @@ func AWSSSMChaosByID(clients clients.ClientSets) {
"Sequence": experimentsDetails.Sequence,
})

//PRE-CHAOS APPLICATION STATUS CHECK
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)")
if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil {
log.Errorf("Application status check failed, err: %v", err)
failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}

//PRE-CHAOS AUXILIARY APPLICATION STATUS CHECK
if experimentsDetails.AuxiliaryAppInfo != "" {
log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)")
if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil {
log.Errorf("Auxiliary Application status check failed, err: %v", err)
failStep := "[pre-chaos]: Failed to verify that the Auxiliary Applications are in running state, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
}

if experimentsDetails.EngineName != "" {
// marking AUT as running, as we already checked the status of application under test
msg := "AUT: Running"
Expand Down Expand Up @@ -125,14 +104,16 @@ func AWSSSMChaosByID(clients clients.ClientSets) {
return
}

//Verify the aws ec2 instance is running (pre chaos)
if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil {
log.Errorf("failed to get the ec2 instance status, err: %v", err)
failStep := "[pre-chaos]: Failed to verify the AWS ec2 instance status, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
if chaosDetails.DefaultHealthCheck {
//Verify the aws ec2 instance is running (pre chaos)
if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil {
log.Errorf("failed to get the ec2 instance status, err: %v", err)
failStep := "[pre-chaos]: Failed to verify the AWS ec2 instance status, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
log.Info("[Status]: EC2 instance is in running state")
}
log.Info("[Status]: EC2 instance is in running state")

// Including the litmus lib for aws-ssm-chaos-by-id
switch experimentsDetails.ChaosLib {
Expand Down Expand Up @@ -160,33 +141,16 @@ func AWSSSMChaosByID(clients clients.ClientSets) {
log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName)
resultDetails.Verdict = v1alpha1.ResultVerdictPassed

//Verify the aws ec2 instance is running (post chaos)
if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil {
log.Errorf("failed to get the ec2 instance status, err: %v", err)
failStep := "[post-chaos]: Failed to verify the AWS ec2 instance status, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
log.Info("[Status]: EC2 instance is in running state (post chaos)")

//POST-CHAOS APPLICATION STATUS CHECK
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)")
if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil {
log.Errorf("Application status check failed, err: %v", err)
failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
if chaosDetails.DefaultHealthCheck {

//POST-CHAOS AUXILIARY APPLICATION STATUS CHECK
if experimentsDetails.AuxiliaryAppInfo != "" {
log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)")
if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil {
log.Errorf("Auxiliary Application status check failed, err: %v", err)
failStep := "[post-chaos]: Failed to verify that the Auxiliary Applications are running, err: " + err.Error()
//Verify the aws ec2 instance is running (post chaos)
if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil {
log.Errorf("failed to get the ec2 instance status, err: %v", err)
failStep := "[post-chaos]: Failed to verify the AWS ec2 instance status, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
log.Info("[Status]: EC2 instance is in running state (post chaos)")
}

if experimentsDetails.EngineName != "" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"github.com/litmuschaos/litmus-go/pkg/log"
"github.com/litmuschaos/litmus-go/pkg/probe"
"github.com/litmuschaos/litmus-go/pkg/result"
"github.com/litmuschaos/litmus-go/pkg/status"
"github.com/litmuschaos/litmus-go/pkg/types"
"github.com/litmuschaos/litmus-go/pkg/utils/common"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -74,34 +73,16 @@ func AWSSSMChaosByTag(clients clients.ClientSets) {
"Sequence": experimentsDetails.Sequence,
})

//PRE-CHAOS APPLICATION STATUS CHECK
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)")
if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil {
log.Errorf("Application status check failed, err: %v", err)
failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}

//PRE-CHAOS AUXILIARY APPLICATION STATUS CHECK
if experimentsDetails.AuxiliaryAppInfo != "" {
log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)")
if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil {
log.Errorf("Auxiliary Application status check failed, err: %v", err)
failStep := "[pre-chaos]: Failed to verify that the Auxiliary Applications are in running state, err: " + err.Error()
if chaosDetails.DefaultHealthCheck {
//Verify that the instance should have permission to perform ssm api calls
if err := ssm.CheckInstanceInformation(&experimentsDetails); err != nil {
log.Errorf("target instance status check failed, err: %v", err)
failStep := "[pre-chaos]: Failed to verify the AWS ec2 instance status, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
}

//Verify that the instance should have permission to perform ssm api calls
if err := ssm.CheckInstanceInformation(&experimentsDetails); err != nil {
log.Errorf("target instance status check failed, err: %v", err)
failStep := "[pre-chaos]: Failed to verify the AWS ec2 instance status, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}

if experimentsDetails.EngineName != "" {
// marking AUT as running, as we already checked the status of application under test
msg := "AUT: Running"
Expand Down Expand Up @@ -151,33 +132,15 @@ func AWSSSMChaosByTag(clients clients.ClientSets) {
log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName)
resultDetails.Verdict = v1alpha1.ResultVerdictPassed

//Verify the aws ec2 instance is running (post chaos)
if err := ec2.InstanceStatusCheckByTag(experimentsDetails.EC2InstanceTag, experimentsDetails.Region); err != nil {
log.Errorf("failed to get the ec2 instance status, err: %v", err)
failStep := "[post-chaos]: Failed to verify the AWS ec2 instance status, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
log.Info("[Status]: EC2 instance is in running state (post chaos)")

//POST-CHAOS APPLICATION STATUS CHECK
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)")
if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil {
log.Errorf("Application status check failed, err: %v", err)
failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}

//POST-CHAOS AUXILIARY APPLICATION STATUS CHECK
if experimentsDetails.AuxiliaryAppInfo != "" {
log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)")
if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil {
log.Errorf("Auxiliary Application status check failed, err: %v", err)
failStep := "[post-chaos]: Failed to verify that the Auxiliary Applications are running, err: " + err.Error()
if chaosDetails.DefaultHealthCheck {
//Verify the aws ec2 instance is running (post chaos)
if err := ec2.InstanceStatusCheckByTag(experimentsDetails.EC2InstanceTag, experimentsDetails.Region); err != nil {
log.Errorf("failed to get the ec2 instance status, err: %v", err)
failStep := "[post-chaos]: Failed to verify the AWS ec2 instance status, err: " + err.Error()
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
log.Info("[Status]: EC2 instance is in running state (post chaos)")
}

if experimentsDetails.EngineName != "" {
Expand Down
16 changes: 8 additions & 8 deletions experiments/cassandra/pod-delete/experiment/pod-delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func CasssandraPodDelete(clients clients.ClientSets) {
go common.AbortWatcher(experimentsDetails.ChaoslibDetail.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails)

//PRE-CHAOS APPLICATION STATUS CHECK
if chaosDetails.DefaultAppHealthCheck {
if chaosDetails.DefaultHealthCheck {
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)")
if err = status.AUTStatusCheck(experimentsDetails.ChaoslibDetail.AppNS, experimentsDetails.ChaoslibDetail.AppLabel, experimentsDetails.ChaoslibDetail.TargetContainer, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients, &chaosDetails); err != nil {
log.Errorf("Application status check failed, err: %v", err)
Expand All @@ -100,21 +100,21 @@ func CasssandraPodDelete(clients clients.ClientSets) {

if experimentsDetails.ChaoslibDetail.EngineName != "" {
// marking AUT as running, as we already checked the status of application under test
msg := common.GetStatusMessage(chaosDetails.DefaultAppHealthCheck, "AUT: Running", "")
msg := common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "")

// run the probes in the pre-chaos check
if len(resultDetails.ProbeDetails) != 0 {

if err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil {
log.Errorf("Probes Failed, err: %v", err)
failStep := "[pre-chaos]: Failed while running probes, err: " + err.Error()
msg = common.GetStatusMessage(chaosDetails.DefaultAppHealthCheck, "AUT: Running", "Unsuccessful")
msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Unsuccessful")
types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails)
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine")
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
msg = common.GetStatusMessage(chaosDetails.DefaultAppHealthCheck, "AUT: Running", "Successful")
msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful")
}
// generating the events for the pre-chaos check
types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails)
Expand Down Expand Up @@ -155,7 +155,7 @@ func CasssandraPodDelete(clients clients.ClientSets) {
resultDetails.Verdict = v1alpha1.ResultVerdictPassed

//POST-CHAOS APPLICATION STATUS CHECK
if chaosDetails.DefaultAppHealthCheck {
if chaosDetails.DefaultHealthCheck {
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)")
if err = status.AUTStatusCheck(experimentsDetails.ChaoslibDetail.AppNS, experimentsDetails.ChaoslibDetail.AppLabel, experimentsDetails.ChaoslibDetail.TargetContainer, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients, &chaosDetails); err != nil {
log.Errorf("Application status check failed, err: %v", err)
Expand All @@ -178,20 +178,20 @@ func CasssandraPodDelete(clients clients.ClientSets) {

if experimentsDetails.ChaoslibDetail.EngineName != "" {
// marking AUT as running, as we already checked the status of application under test
msg := common.GetStatusMessage(chaosDetails.DefaultAppHealthCheck, "AUT: Running", "")
msg := common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "")

// run the probes in the post-chaos check
if len(resultDetails.ProbeDetails) != 0 {
if err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil {
log.Errorf("Probes Failed, err: %v", err)
failStep := "[post-chaos]: Failed while running probes, err: " + err.Error()
msg = common.GetStatusMessage(chaosDetails.DefaultAppHealthCheck, "AUT: Running", "Unsuccessful")
msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Unsuccessful")
types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails)
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine")
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}
msg = common.GetStatusMessage(chaosDetails.DefaultAppHealthCheck, "AUT: Running", "Successful")
msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful")
}

// generating post chaos event
Expand Down
Loading

0 comments on commit d4e05db

Please sign in to comment.