diff --git a/README.md b/README.md index 710b430..0953ee1 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,8 @@ anka_instance_state_count | Count of Instances in a particular State (labels: ar anka_instance_state_per_template_count | Count of Instances in a particular state, per Template (labels: state, template_uuid, template_name) anka_instance_state_per_group_count | Count of Instances in a particular state, per Group (labels: state, group_name) -- | -- +anka_instance_max_age_per_template_seconds | Age of oldest Instance in a particular state, per Template (labels: state, template_uuid, template_name) +-- | -- anka_node_instance_count | Count of Instances running on the Node (labels: id, name, arch) anka_node_instance_capacity | Total Instance slots (capacity) on the Node (labels: id, name, arch) anka_node_states | Node state (1 = current state) (labels: id, name, state) diff --git a/VERSION b/VERSION index cc868b6..99eba4d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.0.1 \ No newline at end of file +4.1.0 \ No newline at end of file diff --git a/go.mod b/go.mod index 4458d37..cbeef33 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ toolchain go1.22.5 require ( github.com/prometheus/client_golang v1.20.5 - github.com/prometheus/exporter-toolkit v0.13.1 + github.com/prometheus/exporter-toolkit v0.13.2 ) require ( @@ -20,14 +20,14 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.60.1 // indirect + github.com/prometheus/common v0.61.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/net v0.29.0 // indirect - golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/text v0.19.0 // indirect - google.golang.org/protobuf v1.34.2 // indirect + golang.org/x/crypto v0.31.0 // indirect + golang.org/x/net v0.32.0 // indirect + golang.org/x/oauth2 v0.24.0 // indirect + golang.org/x/sync v0.10.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect + google.golang.org/protobuf v1.35.2 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index bd18e86..74e605b 100644 --- a/go.sum +++ b/go.sum @@ -33,30 +33,30 @@ github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+ github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= -github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= -github.com/prometheus/exporter-toolkit v0.13.1 h1:Evsh0gWQo2bdOHlnz9+0Nm7/OFfIwhE2Ws4A2jIlR04= -github.com/prometheus/exporter-toolkit v0.13.1/go.mod h1:ujdv2YIOxtdFxxqtloLpbqmxd5J0Le6IITUvIRSWjj0= +github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ= +github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s= +github.com/prometheus/exporter-toolkit v0.13.2 h1:Z02fYtbqTMy2i/f+xZ+UK5jy/bl1Ex3ndzh06T/Q9DQ= +github.com/prometheus/exporter-toolkit v0.13.2/go.mod h1:tCqnfx21q6qN1KA4U3Bfb8uWzXfijIrJz3/kTIqMV7g= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= -golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= -golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= +google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/src/client/client.go b/src/client/client.go index 34722b9..b2774ae 100644 --- a/src/client/client.go +++ b/src/client/client.go @@ -2,6 +2,7 @@ package client import ( "fmt" + "io" "reflect" "runtime" "sync" @@ -26,8 +27,9 @@ type Client struct { func NewClient(addr, username, password string, interval int, certs ClientTLSCerts, uak UAK) (*Client, error) { communicator, err := NewCommunicator(addr, username, password, certs, uak) - if err != nil { - return nil, err + if err != nil || communicator == nil { + log.Error("Failed to create communicator") + return nil, fmt.Errorf("failed to create communicator: %v", err) } c := &Client{ events: map[events.Event][]func(interface{}) error{ @@ -42,7 +44,26 @@ func NewClient(addr, username, password string, interval int, certs ClientTLSCer errorTimeoutSeconds: 10, } if err := c.communicator.TestConnection(); err != nil { - log.Fatal(fmt.Sprintf("Error testing connection: %s", err.Error())) + response, err := c.communicator.getResponse("/api/v1/status", "", "") + if err != nil { + log.Error(fmt.Sprintf("Error getting response: %s", err.Error())) + } else { + bodyBytes := make([]byte, 1024) + for { + n, err := response.Body.Read(bodyBytes) + if n > 0 { + log.Error(fmt.Sprintf("call to %s returned %d code and body of '%s'", response.Request.URL, response.StatusCode, string(bodyBytes[:n]))) + return nil, fmt.Errorf("failed to test connection") + } + if err != nil { + if err != io.EOF { + log.Error(fmt.Sprintf("Error reading response body: %s", err.Error())) + return nil, fmt.Errorf("failed to test connection") + } + break + } + } + } return nil, err } return c, nil @@ -51,7 +72,10 @@ func NewClient(addr, username, password string, interval int, certs ClientTLSCer func (client *Client) Init() { // We must first populate the data from the Controller API that is going to be stored in state before we attempt to create metrics from it // Order matters here since GetVmsData for example relies on RegistryTemplatesData - client.communicator.GetRegistryTemplatesData() + _, err := client.communicator.GetRegistryTemplatesData() + if err != nil { + log.Error(fmt.Sprintf("Error getting registry templates data: %v", err)) + } go client.initDataLoop(client.communicator.GetNodesData, events.EVENT_NODE_UPDATED) go client.initDataLoop(client.communicator.GetVmsData, events.EVENT_VM_DATA_UPDATED) go client.initDataLoop(client.communicator.GetRegistryDiskData, events.EVENT_REGISTRY_DISK_DATA_UPDATED) diff --git a/src/client/communicator.go b/src/client/communicator.go index 5b006b5..4fa24db 100644 --- a/src/client/communicator.go +++ b/src/client/communicator.go @@ -153,7 +153,7 @@ func (comm *Communicator) GetRegistryTemplatesData() (interface{}, error) { resp := &types.RegistryTemplateResponse{} templates, err := comm.getData(endpoint, resp) if err != nil { - return nil, fmt.Errorf("getting registry templates error: %s", err) + return nil, fmt.Errorf("getting registry templates error: %s", err.Error()) } templatesArray := templates.([]types.Template) templatesMap := state.GetState().GetTemplatesMap() @@ -163,7 +163,7 @@ func (comm *Communicator) GetRegistryTemplatesData() (interface{}, error) { resp := &types.RegistryTemplateTagsResponse{} tagsData, err := comm.getData(endpoint, resp) if err != nil { - return nil, fmt.Errorf("getting registry template %s/%s tags error: %s", template.UUID, template.Name, err) + return nil, fmt.Errorf("getting registry template %s/%s tags error: %s", template.UUID, template.Name, err.Error()) } tags := tagsData.(types.RegistryTemplateTags) templatesArray[i].Tags = tags.Versions diff --git a/src/metrics/instance_state_per.go b/src/metrics/instance_state_per.go index c82ef6e..8190212 100644 --- a/src/metrics/instance_state_per.go +++ b/src/metrics/instance_state_per.go @@ -1,8 +1,12 @@ package metrics import ( + "fmt" + "time" + "github.com/prometheus/client_golang/prometheus" "github.com/veertuinc/anka-prometheus-exporter/src/events" + "github.com/veertuinc/anka-prometheus-exporter/src/log" "github.com/veertuinc/anka-prometheus-exporter/src/types" ) @@ -150,6 +154,64 @@ var ankaInstanceStatePerMetrics = []InstanceStatePerMetric{ } }, }, + { + BaseAnkaMetric: BaseAnkaMetric{ + metric: CreateGaugeMetricVec("anka_instance_max_age_per_template_seconds", "Age of oldest Instance in a particular state, per Template. Visible only for templates with at least one instance (label: state, template_uuid, template_name)", []string{"state", "template_uuid", "template_name"}), + event: events.EVENT_VM_DATA_UPDATED, + }, + HandleData: func(instances []types.Instance, metric *prometheus.GaugeVec) { + var InstanceAgePerTemplateMaximumMap = map[string]map[string]int{} + var instanceTemplates []string + var instanceTemplatesMap = map[string]string{} + now := time.Now() + for _, instance := range instances { + instanceTemplates = append(instanceTemplates, instance.Vm.TemplateUUID) + instanceTemplatesMap[instance.Vm.TemplateUUID] = instance.Vm.TemplateName + } + instanceTemplates = uniqueThisStringArray(instanceTemplates) + for _, wantedState := range types.InstanceStates { + if _, ok := InstanceAgePerTemplateMaximumMap[wantedState]; !ok { + InstanceAgePerTemplateMaximumMap[wantedState] = make(map[string]int) + } + for _, wantedInstanceTemplate := range instanceTemplates { + age := 0.0 + for _, instance := range instances { + if instance.Vm.State == wantedState { + if instance.Vm.TemplateUUID == wantedInstanceTemplate { + var instanceTime time.Time + var err error + // cr_time only set on an instance creation (in the DB) and never changed + // ts gets updated from time to time due to different events, like save image, termination etc + // both ts and cr_time are members of an Instance object, they do not depend on the vm (regardless if the vm has started or not) + if instance.Vm.State != "Started" && instance.Vm.State != "Scheduling" { // can't use CreationTime because it doesn't change after Scheduling happens + instanceTime, err = time.Parse(time.RFC3339, instance.Vm.LastUpdateTime) + if err != nil { + log.Error(fmt.Sprintf("Error parsing LastUpdateTime %s for template %s: %s", instance.Vm.LastUpdateTime, wantedInstanceTemplate, err.Error())) + } + } else { + instanceTime, err = time.Parse(time.RFC3339, instance.Vm.CreationTime) + if err != nil { + log.Error(fmt.Sprintf("Error parsing CreationTime %s for template %s: %s", instance.Vm.CreationTime, wantedInstanceTemplate, err.Error())) + } + } + thisAge := now.Sub(instanceTime).Seconds() + age = max(age, thisAge) + } + } + } + if _, ok := InstanceAgePerTemplateMaximumMap[wantedState][wantedInstanceTemplate]; !ok { + InstanceAgePerTemplateMaximumMap[wantedState][wantedInstanceTemplate] = int(age) + } + } + } + checkAndHandleResetOfGaugeVecMetric((len(instances) + len(instanceTemplates)), "anka_instance_max_age_per_template_seconds", metric) + for wantedState, wantedStateMap := range InstanceAgePerTemplateMaximumMap { + for wantedTemplateUUID, age := range wantedStateMap { + metric.With(prometheus.Labels{"state": wantedState, "template_uuid": wantedTemplateUUID, "template_name": instanceTemplatesMap[wantedTemplateUUID]}).Set(float64(age)) + } + } + }, + }, } func init() { // runs on exporter init only (updates are made with the above EventHandler; triggered by the Client) diff --git a/src/types/types.go b/src/types/types.go index fea9aa6..d4d3a36 100644 --- a/src/types/types.go +++ b/src/types/types.go @@ -79,12 +79,14 @@ type Instance struct { } type VmData struct { - State string `json:"instance_state"` - TemplateUUID string `json:"vmid"` - TemplateName string - GroupUUID string `json:"group_id"` - NodeUUID string `json:"node_id"` - Arch string `json:"arch"` + State string `json:"instance_state"` + TemplateUUID string `json:"vmid"` + TemplateName string + GroupUUID string `json:"group_id"` + NodeUUID string `json:"node_id"` + Arch string `json:"arch"` + CreationTime string `json:"cr_time"` + LastUpdateTime string `json:"ts"` } type Response interface {