Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding template for Metrocluster diagnostics check #2601

Merged
merged 14 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions cmd/collectors/rest/plugins/metroclustercheck/metroclustercheck.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package metroclustercheck

import (
"github.com/netapp/harvest/v2/cmd/poller/plugin"
"github.com/netapp/harvest/v2/pkg/matrix"
"github.com/netapp/harvest/v2/pkg/tree/node"
"github.com/tidwall/gjson"
)

type MetroclusterCheck struct {
*plugin.AbstractPlugin
data *matrix.Matrix
}

func New(p *plugin.AbstractPlugin) plugin.Plugin {
return &MetroclusterCheck{AbstractPlugin: p}
}

func (m *MetroclusterCheck) Init() error {

var err error
pluginMetrics := []string{"cluster_status", "node_status", "aggr_status", "volume_status"}
pluginLabels := []string{"result", "name", "node", "aggregate", "volume"}

if err = m.InitAbc(); err != nil {
return err
}

m.data = matrix.New(m.Parent+".Metrocluster", "metrocluster_check", "metrocluster_check")
exportOptions := node.NewS("export_options")
instanceKeys := exportOptions.NewChildS("instance_keys", "")
for _, label := range pluginLabels {
instanceKeys.NewChildS("", label)
}
m.data.SetExportOptions(exportOptions)

for _, metric := range pluginMetrics {
if err = m.createMetric(metric); err != nil {
return err
}
}
return nil
}

func (m *MetroclusterCheck) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, error) {
// Purge and reset data
m.data.PurgeInstances()
m.data.Reset()

// Set all global labels
data := dataMap[m.Object]
m.data.SetGlobalLabels(data.GetGlobalLabels())

for _, instance := range data.GetInstances() {
instance.SetExportable(false)
m.update(instance.GetLabel("cluster"), "cluster")
m.update(instance.GetLabel("node"), "node")
m.update(instance.GetLabel("aggregate"), "aggregate")
m.update(instance.GetLabel("volume"), "volume")
}

return []*matrix.Matrix{m.data}, nil
}

func (m *MetroclusterCheck) update(objectInfo string, object string) {
var (
newDetailInstance *matrix.Instance
key string
err error
)

if objectInfo == "" {
return
}

objectInfoJSON := gjson.Result{Type: gjson.JSON, Raw: objectInfo}
for _, detail := range objectInfoJSON.Get("details").Array() {
clusterName := detail.Get("cluster.name").String()
nodeName := detail.Get("node.name")
aggregateName := detail.Get("aggregate.name")
volumeName := detail.Get("volume.name")
for _, check := range detail.Get("checks").Array() {
name := check.Get("name").String()
result := check.Get("result").String()
switch object {
case "volume":
key = clusterName + nodeName.String() + aggregateName.String() + volumeName.String() + name
case "aggregate":
key = clusterName + nodeName.String() + aggregateName.String() + name
case "node":
key = clusterName + nodeName.String() + name
case "cluster":
key = clusterName + name
}

if newDetailInstance, err = m.data.NewInstance(key); err != nil {
m.Logger.Error().Err(err).Str("arwInstanceKey", key).Msg("Failed to create arw instance")
continue
}
newDetailInstance.SetLabel("name", name)
newDetailInstance.SetLabel("result", result)
newDetailInstance.SetLabel("volume", volumeName.String())
newDetailInstance.SetLabel("aggregate", aggregateName.String())
newDetailInstance.SetLabel("node", nodeName.String())

switch object {
case "volume":
m.setValue("volume_status", newDetailInstance, result)
case "aggregate":
m.setValue("aggr_status", newDetailInstance, result)
case "node":
m.setValue("node_status", newDetailInstance, result)
case "cluster":
m.setValue("cluster_status", newDetailInstance, result)
}
}
}
}

func (m *MetroclusterCheck) createMetric(metricName string) error {
if _, err := m.data.NewMetricFloat64(metricName, metricName); err != nil {
m.Logger.Error().Stack().Err(err).Msg("add metric")
return err
}
return nil
}
func (m *MetroclusterCheck) setValue(metricName string, newDetailInstance *matrix.Instance, result string) {
// populate numeric data
value := 0.0
if result == "ok" {
value = 1.0
}

met := m.data.GetMetric(metricName)
if err := met.SetValueFloat64(newDetailInstance, value); err != nil {
m.Logger.Error().Stack().Err(err).Float64("value", value).Msg("Failed to parse value")
} else {
m.Logger.Debug().Float64("value", value).Msg("added value")
}
}
14 changes: 13 additions & 1 deletion cmd/collectors/rest/rest.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package rest

import (
"errors"
"fmt"
"github.com/netapp/harvest/v2/cmd/collectors"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/certificate"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/disk"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/health"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/metroclustercheck"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/netroute"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/ontaps3service"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/qospolicyadaptive"
Expand Down Expand Up @@ -424,6 +426,8 @@ func (r *Rest) LoadPlugin(kind string, abc *plugin.AbstractPlugin) plugin.Plugin
return qospolicyadaptive.New(abc)
case "OntapS3Service":
return ontaps3service.New(abc)
case "MetroclusterCheck":
return metroclustercheck.New(abc)
default:
r.Logger.Warn().Str("kind", kind).Msg("no rest plugin found ")
}
Expand Down Expand Up @@ -580,12 +584,20 @@ func (r *Rest) GetRestData(href string) ([]gjson.Result, error) {

result, err := rest.Fetch(r.Client, href)
if err != nil {
return nil, fmt.Errorf("failed to fetch data: %w", err)
return r.handleError(err)
}

return result, nil
}

func (r *Rest) handleError(err error) ([]gjson.Result, error) {
if errs.IsRestErr(err, errs.MetroClusterNotConfigured) {
// MetroCluster is not configured, return ErrMetroClusterNotConfigured
return nil, errors.Join(errs.ErrAPIRequestRejected, errs.New(errs.ErrMetroClusterNotConfigured, err.Error()))
}
return nil, fmt.Errorf("failed to fetch data: %w", err)
}

func (r *Rest) CollectAutoSupport(p *collector.Payload) {
var exporterTypes []string
for _, exporter := range r.Exporters {
Expand Down
9 changes: 7 additions & 2 deletions cmd/poller/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,13 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) {
} else if errors.Is(err, errs.ErrAPIRequestRejected) {
// API was rejected, this happens when a resource is not available or does not exist
c.Schedule.SetStandByModeMax(task, 1*time.Hour)
// Log as info since some of these aren't errors
c.Logger.Info().Err(err).Str("task", task.Name).Msg("Entering standby mode")
// Log metro cluster at trace level
if errors.Is(err, errs.ErrMetroClusterNotConfigured) {
c.Logger.Trace().Err(err).Str("task", task.Name).Msg("Entering standby mode")
} else {
// Log as info since these are not errors.
c.Logger.Info().Err(err).Str("task", task.Name).Msg("Entering standby mode")
}
} else {
c.Logger.Error().Err(err).Str("task", task.Name).Send()
}
Expand Down
20 changes: 20 additions & 0 deletions conf/rest/9.12.0/metrocluster_check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: MetroclusterCheck
Hardikl marked this conversation as resolved.
Show resolved Hide resolved
query: api/cluster/metrocluster/diagnostics
object: metrocluster_check

counters:
- ^aggregate => aggregate
- ^cluster => cluster
- ^node => node
- ^volume => volume
- hidden_fields:
- cluster
- aggregate
- node
- volume
Hardikl marked this conversation as resolved.
Show resolved Hide resolved

plugins:
- MetroclusterCheck # Creates metrics from details collected above

export_options:
include_all_labels: true
1 change: 1 addition & 0 deletions conf/rest/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ objects:
LIF: lif.yaml
Health: health.yaml
Lun: lun.yaml
MetroclusterCheck: metrocluster_check.yaml
# Mediator: mediator.yaml
Namespace: namespace.yaml
# NDMPSession: ndmp_sessions.yaml
Expand Down
Loading
Loading