Skip to content

Commit

Permalink
Adds memory usage to the metadata (#364)
Browse files Browse the repository at this point in the history
  • Loading branch information
0sewa0 authored Mar 23, 2021
1 parent 56d6896 commit 5b8e5dc
Show file tree
Hide file tree
Showing 2 changed files with 297 additions and 27 deletions.
220 changes: 220 additions & 0 deletions docs/insights-archive-sample/insights-operator/gathers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
{
"status_reports": [
{
"name": "clusterconfig.GatherMostRecentMetrics",
"duration_in_ms": 0,
"records_count": 0,
"errors": null
},
{
"name": "clusterconfig.GatherSAPConfig",
"duration_in_ms": 204,
"records_count": 0,
"errors": [
"datahubs.installers.datahub.sap.com is forbidden: User \"system:serviceaccount:openshift-insights:gather\" cannot list resource \"datahubs\" in API group \"installers.datahub.sap.com\" at the cluster scope"
]
},
{
"name": "clusterconfig.GatherClusterVersion",
"duration_in_ms": 205,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherClusterImagePruner",
"duration_in_ms": 207,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherOLMOperators",
"duration_in_ms": 335,
"records_count": 0,
"errors": [
"operators.operators.coreos.com is forbidden: User \"system:serviceaccount:openshift-insights:gather\" cannot list resource \"operators\" in API group \"operators.coreos.com\" at the cluster scope"
]
},
{
"name": "clusterconfig.GatherClusterNetwork",
"duration_in_ms": 337,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherContainerRuntimeConfig",
"duration_in_ms": 338,
"records_count": 0,
"errors": null
},
{
"name": "clusterconfig.GatherHostSubnet",
"duration_in_ms": 339,
"records_count": 6,
"errors": null
},
{
"name": "clusterconfig.GatherClusterInfrastructure",
"duration_in_ms": 470,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherClusterAuthentication",
"duration_in_ms": 871,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherClusterIngress",
"duration_in_ms": 879,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherClusterProxy",
"duration_in_ms": 882,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherMachineSet",
"duration_in_ms": 891,
"records_count": 3,
"errors": null
},
{
"name": "clusterconfig.GatherClusterID",
"duration_in_ms": 1002,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherClusterOAuth",
"duration_in_ms": 1152,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherSAPVsystemIptablesLogs",
"duration_in_ms": 1289,
"records_count": 0,
"errors": [
"datahubs.installers.datahub.sap.com is forbidden: User \"system:serviceaccount:openshift-insights:gather\" cannot list resource \"datahubs\" in API group \"installers.datahub.sap.com\" at the cluster scope"
]
},
{
"name": "clusterconfig.GatherClusterImageRegistry",
"duration_in_ms": 1289,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherCertificateSigningRequests",
"duration_in_ms": 1424,
"records_count": 0,
"errors": null
},
{
"name": "clusterconfig.GatherSAPPods",
"duration_in_ms": 1558,
"records_count": 0,
"errors": [
"datahubs.installers.datahub.sap.com is forbidden: User \"system:serviceaccount:openshift-insights:gather\" cannot list resource \"datahubs\" in API group \"installers.datahub.sap.com\" at the cluster scope"
]
},
{
"name": "clusterconfig.GatherPodDisruptionBudgets",
"duration_in_ms": 1560,
"records_count": 2,
"errors": null
},
{
"name": "clusterconfig.GatherClusterFeatureGates",
"duration_in_ms": 1699,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherNetNamespace",
"duration_in_ms": 2176,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherMachineConfigPool",
"duration_in_ms": 2228,
"records_count": 2,
"errors": null
},
{
"name": "clusterconfig.GatherCRD",
"duration_in_ms": 2396,
"records_count": 2,
"errors": null
},
{
"name": "clusterconfig.GatherNodes",
"duration_in_ms": 2640,
"records_count": 6,
"errors": null
},
{
"name": "clusterconfig.GatherConfigMaps",
"duration_in_ms": 3047,
"records_count": 10,
"errors": [
"configmaps \"cluster-monitoring-config\" not found"
]
},
{
"name": "clusterconfig.GatherOpenShiftAPIServerOperatorLogs",
"duration_in_ms": 3316,
"records_count": 0,
"errors": null
},
{
"name": "clusterconfig.GatherOpenshiftAuthenticationLogs",
"duration_in_ms": 3578,
"records_count": 0,
"errors": null
},
{
"name": "clusterconfig.GatherOpenshiftSDNControllerLogs",
"duration_in_ms": 3578,
"records_count": 0,
"errors": null
},
{
"name": "clusterconfig.GatherContainerImages",
"duration_in_ms": 4162,
"records_count": 19,
"errors": null
},
{
"name": "clusterconfig.GatherOpenshiftSDNLogs",
"duration_in_ms": 5635,
"records_count": 0,
"errors": null
},
{
"name": "clusterconfig.GatherClusterOperators",
"duration_in_ms": 7744,
"records_count": 31,
"errors": null
},
{
"name": "clusterconfig.GatherInstallPlans",
"duration_in_ms": 13408,
"records_count": 1,
"errors": null
},
{
"name": "clusterconfig.GatherServiceAccounts",
"duration_in_ms": 14211,
"records_count": 1,
"errors": null
}
],
"memory_alloc_bytes": 20949008,
"uptime_seconds": 58.282
}
104 changes: 77 additions & 27 deletions pkg/gather/clusterconfig/0_gatherer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ import (
"github.com/openshift/insights-operator/pkg/record"
)

type gatherMetadata struct {
StatusReports []gatherStatusReport `json:"status_reports"`
MemoryAlloc uint64 `json:"memory_alloc_bytes"`
Uptime float64 `json:"uptime_seconds"`
}

type gatherStatusReport struct {
Name string `json:"name"`
Duration time.Duration `json:"duration_in_ms"`
Expand All @@ -41,6 +47,27 @@ type gathering struct {
canFail bool
}

// GatherInfo from reflection
type GatherInfo struct {
name string
result gatherResult
function gatherFunction
canFail bool
rvString string
}

// NewGatherInfo that holds reflection information
func NewGatherInfo(gather string, rv reflect.Value) *GatherInfo {
gatherFunc := gatherFunctions[gather].function
return &GatherInfo{
name: runtime.FuncForPC(reflect.ValueOf(gatherFunc).Pointer()).Name(),
result: rv.Interface().(gatherResult),
function: gatherFunc,
canFail: gatherFunctions[gather].canFail,
rvString: rv.String(),
}
}

func important(function gatherFunction) gathering {
return gathering{function, false}
}
Expand Down Expand Up @@ -87,8 +114,11 @@ var gatherFunctions = map[string]gathering{
"olm_operators": failable(GatherOLMOperators),
}

var startTime time.Time

// New creates new Gatherer
func New(gatherKubeConfig *rest.Config, gatherProtoKubeConfig *rest.Config, metricsGatherKubeConfig *rest.Config) *Gatherer {
startTime = time.Now()
return &Gatherer{
gatherKubeConfig: gatherKubeConfig,
gatherProtoKubeConfig: gatherProtoKubeConfig,
Expand Down Expand Up @@ -117,36 +147,19 @@ func (g *Gatherer) Gather(ctx context.Context, gatherList []string, recorder rec
}

// Gets the info from the Go routines
remaining := len(cases)
for remaining > 0 {
for range gatherList {
chosen, value, _ := reflect.Select(cases)
// The chosen channel has been closed, so zero out the channel to disable the case
cases[chosen].Chan = reflect.ValueOf(nil)
remaining -= 1

elapsed := time.Since(starts[chosen]).Truncate(time.Millisecond)

gatherResults, _ := value.Interface().(gatherResult)
gatherFunc := gatherFunctions[gatherList[chosen]].function
gatherCanFail := gatherFunctions[gatherList[chosen]].canFail
gatherName := runtime.FuncForPC(reflect.ValueOf(gatherFunc).Pointer()).Name()
klog.V(4).Infof("Gather %s took %s to process %d records", gatherName, elapsed, len(gatherResults.records))
gatherReport = append(gatherReport, gatherStatusReport{gatherName, time.Duration(elapsed.Milliseconds()), len(gatherResults.records), extractErrors(gatherResults.errors)})

if gatherCanFail {
for _, err := range gatherResults.errors {
klog.V(5).Infof("Couldn't gather %s' received following error: %s\n", gatherName, err.Error())
}
} else {
errors = append(errors, extractErrors(gatherResults.errors)...)
}
for _, record := range gatherResults.records {
if err := recorder.Record(record); err != nil {
errors = append(errors, fmt.Sprintf("unable to record %s: %v", record.Name, err))
continue
}
gather := gatherList[chosen]

gi := NewGatherInfo(gather, value)
statusReport, errorsReport := createStatusReport(gi, recorder, starts[chosen])

if len(errorsReport) > 0 {
errors = append(errors, errorsReport...)
}
klog.V(5).Infof("Read from %s's channel and received %s\n", gatherName, value.String())
gatherReport = append(gatherReport, statusReport)
}

// Creates the gathering performance report
Expand All @@ -160,6 +173,40 @@ func (g *Gatherer) Gather(ctx context.Context, gatherList []string, recorder rec
return nil
}

func createStatusReport(gather *GatherInfo, recorder record.Interface, starts time.Time) (gatherStatusReport, []string) {
var errors []string
elapsed := time.Since(starts).Truncate(time.Millisecond)

klog.V(4).Infof("Gather %s took %s to process %d records", gather.name, elapsed, len(gather.result.records))

shortName := strings.Replace(gather.name, "github.com/openshift/insights-operator/pkg/gather/", "", 1)
report := gatherStatusReport{shortName, time.Duration(elapsed.Milliseconds()), len(gather.result.records), extractErrors(gather.result.errors)}

if gather.canFail {
for _, err := range gather.result.errors {
klog.V(5).Infof("Couldn't gather %s' received following error: %s\n", gather.name, err.Error())
}
} else {
errors = extractErrors(gather.result.errors)
}

errors = append(errors, recordStatusReport(recorder, gather.result.records)...)
klog.V(5).Infof("Read from %s's channel and received %s\n", gather.name, gather.rvString)

return report, errors
}

func recordStatusReport(recorder record.Interface, records []record.Record) []string {
var errors []string
for _, record := range records {
if err := recorder.Record(record); err != nil {
errors = append(errors, fmt.Sprintf("unable to record %s: %v", record.Name, err))
continue
}
}
return errors
}

// Runs each gather functions in a goroutine.
// Every gather function is given its own channel to send back the results.
// 1. return value: `cases` list, used for dynamically reading from the channels.
Expand Down Expand Up @@ -191,7 +238,10 @@ func (g *Gatherer) startGathering(gatherList []string, errors *[]string) ([]refl
}

func recordGatherReport(recorder record.Interface, report []gatherStatusReport) error {
r := record.Record{Name: "insights-operator/gathers", Item: record.JSONMarshaller{Object: report}}
var m runtime.MemStats
runtime.ReadMemStats(&m)
metadata := gatherMetadata{report, m.HeapAlloc, time.Since(startTime).Truncate(time.Millisecond).Seconds()}
r := record.Record{Name: "insights-operator/gathers", Item: record.JSONMarshaller{Object: metadata}}
return recorder.Record(r)
}

Expand Down

0 comments on commit 5b8e5dc

Please sign in to comment.