Skip to content

Commit

Permalink
[chore] [receiver/datadog] Add support for Service Checks (open-telem…
Browse files Browse the repository at this point in the history
…etry#34474)

Description:
This PR adds support for Datadog Service Checks.

Follow up of
open-telemetry#33631
,
open-telemetry#33957
and
open-telemetry#34180.

The full version of the code can be found in the
cedwards/datadog-metrics-receiver-full branch, or in Grafana Alloy:
https://github.com/grafana/alloy/tree/main/internal/etc/datadogreceiver

Link to tracking Issue:

open-telemetry#18278

Testing:
Unit tests, as well as an end-to-end test, have been added.

---------

Signed-off-by: alexgreenbank <alex.greenbank@grafana.com>
Co-authored-by: Carrie Edwards <edwrdscarrie@gmail.com>
Co-authored-by: Juraci Paixão Kröhling <juraci@kroehling.de>
  • Loading branch information
3 people authored and jriguera committed Oct 4, 2024
1 parent c5519e5 commit 8f136de
Show file tree
Hide file tree
Showing 4 changed files with 461 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package translator // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/datadogreceiver/internal/translator"

import (
"time"

"github.com/DataDog/datadog-api-client-go/v2/api/datadogV1"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pmetric"

"github.com/open-telemetry/opentelemetry-collector-contrib/internal/exp/metrics/identity"
)

type ServiceCheck struct {
Check string `json:"check"`
HostName string `json:"host_name"`
Status datadogV1.ServiceCheckStatus `json:"status"`
Timestamp int64 `json:"timestamp,omitempty"`
Tags []string `json:"tags,omitempty"`
}

// More information on Datadog service checks: https://docs.datadoghq.com/api/latest/service-checks/
func (mt *MetricsTranslator) TranslateServices(services []ServiceCheck) pmetric.Metrics {
bt := newBatcher()
bt.Metrics = pmetric.NewMetrics()

for _, service := range services {
metricProperties := parseSeriesProperties("service_check", "service_check", service.Tags, service.HostName, mt.buildInfo.Version, mt.stringPool)
metric, metricID := bt.Lookup(metricProperties) // TODO(alexg): proper name

dps := metric.Gauge().DataPoints()
dps.EnsureCapacity(1)

dp := dps.AppendEmpty()
dp.SetTimestamp(pcommon.Timestamp(service.Timestamp * time.Second.Nanoseconds())) // OTel uses nanoseconds, while Datadog uses seconds
metricProperties.dpAttrs.CopyTo(dp.Attributes())
dp.SetIntValue(int64(service.Status))

// TODO(alexg): Do this stream thing for service check metrics?
stream := identity.OfStream(metricID, dp)
ts, ok := mt.streamHasTimestamp(stream)
if ok {
dp.SetStartTimestamp(ts)
}
mt.updateLastTsForStream(stream, dp.Timestamp())
}
return bt.Metrics
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package translator

import (
"encoding/json"
"testing"

"github.com/DataDog/datadog-api-client-go/v2/api/datadogV1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pmetric"
)

var (
testTimestamp = int64(1700000000)
)

func TestHandleStructureParsing(t *testing.T) {
tests := []struct {
name string
checkRunPayload []byte
expectedServices []ServiceCheck
}{
{
name: "happy",
checkRunPayload: []byte(`[
{
"check": "datadog.agent.check_status",
"host_name": "hosta",
"status": 0,
"message": "",
"tags": [
"check:container"
]
},
{
"check": "app.working",
"host_name": "hosta",
"timestamp": 1700000000,
"status": 0,
"message": "",
"tags": null
},
{
"check": "env.test",
"host_name": "hosta",
"status": 0,
"message": "",
"tags": [
"env:argle", "foo:bargle"
]
}
]`),
expectedServices: []ServiceCheck{
{
Check: "datadog.agent.check_status",
HostName: "hosta",
Status: 0,
Tags: []string{"check:container"},
},
{
Check: "app.working",
HostName: "hosta",
Status: 0,
Timestamp: 1700000000,
},
{
Check: "env.test",
HostName: "hosta",
Status: 0,
Tags: []string{"env:argle", "foo:bargle"},
},
},
},
{
name: "happy no tags",
checkRunPayload: []byte(`[
{
"check": "app.working",
"host_name": "hosta",
"timestamp": 1700000000,
"status": 0,
"message": "",
"tags": null
}
]`),
expectedServices: []ServiceCheck{
{
Check: "app.working",
HostName: "hosta",
Status: 0,
Timestamp: 1700000000,
},
},
},
{
name: "happy no timestamp",
checkRunPayload: []byte(`[
{
"check": "env.test",
"host_name": "hosta",
"status": 0,
"message": "",
"tags": [
"env:argle", "foo:bargle"
]
}
]`),
expectedServices: []ServiceCheck{
{
Check: "env.test",
HostName: "hosta",
Status: 0,
Tags: []string{"env:argle", "foo:bargle"},
},
},
},
{
name: "empty",
checkRunPayload: []byte(`[]`),
expectedServices: []ServiceCheck{},
},
{
name: "happy no hostname",
checkRunPayload: []byte(`[
{
"check": "env.test",
"status": 0,
"message": "",
"tags": [
"env:argle", "foo:bargle"
]
}
]`),
expectedServices: []ServiceCheck{
{
Check: "env.test",
Status: 0,
Tags: []string{"env:argle", "foo:bargle"},
},
},
},
{
name: "empty",
checkRunPayload: []byte(`[]`),
expectedServices: []ServiceCheck{},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var services []ServiceCheck
err := json.Unmarshal(tt.checkRunPayload, &services)
require.NoError(t, err, "Failed to unmarshal service payload JSON")
assert.Equal(t, tt.expectedServices, services, "Parsed series does not match expected series")
})
}
}

func TestTranslateCheckRun(t *testing.T) {
tests := []struct {
name string
services []ServiceCheck
expect func(t *testing.T, result pmetric.Metrics)
}{
{
name: "OK status, with TS, no tags, no hostname",
services: []ServiceCheck{
{
Check: "app.working",
Timestamp: 1700000000,
Status: datadogV1.SERVICECHECKSTATUS_OK,
Tags: []string{},
},
},
expect: func(t *testing.T, result pmetric.Metrics) {
expectedAttrs := tagsToAttributes([]string{}, "", newStringPool())
require.Equal(t, 1, result.ResourceMetrics().Len())
requireResourceAttributes(t, result.ResourceMetrics().At(0).Resource().Attributes(), expectedAttrs.resource)
require.Equal(t, 1, result.MetricCount())
require.Equal(t, 1, result.DataPointCount())

requireScope(t, result, expectedAttrs.scope, component.NewDefaultBuildInfo().Version)

metric := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0)
requireGauge(t, metric, "service_check", 1)

dp := metric.Gauge().DataPoints().At(0)
requireDp(t, dp, expectedAttrs.dp, 1700000000, 0)
},
},
{
name: "OK status, no TS",
services: []ServiceCheck{
{
Check: "app.working",
HostName: "foo",
Status: datadogV1.SERVICECHECKSTATUS_OK,
Tags: []string{"env:tag1", "version:tag2"},
},
},
expect: func(t *testing.T, result pmetric.Metrics) {
expectedAttrs := tagsToAttributes([]string{"env:tag1", "version:tag2"}, "foo", newStringPool())
require.Equal(t, 1, result.ResourceMetrics().Len())
requireResourceAttributes(t, result.ResourceMetrics().At(0).Resource().Attributes(), expectedAttrs.resource)
require.Equal(t, 1, result.MetricCount())
require.Equal(t, 1, result.DataPointCount())

requireScope(t, result, expectedAttrs.scope, component.NewDefaultBuildInfo().Version)

metric := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0)
requireGauge(t, metric, "service_check", 1)

dp := metric.Gauge().DataPoints().At(0)
requireDp(t, dp, expectedAttrs.dp, 0, 0)
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mt := createMetricsTranslator()
mt.buildInfo = component.BuildInfo{
Command: "otelcol",
Description: "OpenTelemetry Collector",
Version: "latest",
}
result := mt.TranslateServices(tt.services)

tt.expect(t, result)
})
}
}

func TestTranslateCheckRunStatuses(t *testing.T) {
tests := []struct {
name string
services []ServiceCheck
expectedStatus int64
}{
{
name: "OK status, no TS",
services: []ServiceCheck{
{
Check: "app.working",
HostName: "foo",
Status: datadogV1.SERVICECHECKSTATUS_OK,
Tags: []string{"env:tag1", "version:tag2"},
},
},
expectedStatus: 0,
},
{
name: "Warning status",
services: []ServiceCheck{
{
Check: "app.warning",
HostName: "foo",
Status: datadogV1.SERVICECHECKSTATUS_WARNING,
Tags: []string{"env:tag1", "version:tag2"},
Timestamp: testTimestamp,
},
},
expectedStatus: 1,
},
{
name: "Critical status",
services: []ServiceCheck{
{
Check: "app.critical",
HostName: "foo",
Status: datadogV1.SERVICECHECKSTATUS_CRITICAL,
Tags: []string{"env:tag1", "version:tag2"},
Timestamp: testTimestamp,
},
},
expectedStatus: 2,
},
{
name: "Unknown status",
services: []ServiceCheck{
{
Check: "app.unknown",
HostName: "foo",
Status: datadogV1.SERVICECHECKSTATUS_UNKNOWN,
Tags: []string{"env:tag1", "version:tag2"},
Timestamp: testTimestamp,
},
},
expectedStatus: 3,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mt := createMetricsTranslator()
mt.buildInfo = component.BuildInfo{
Command: "otelcol",
Description: "OpenTelemetry Collector",
Version: "latest",
}
result := mt.TranslateServices(tt.services)

require.Equal(t, 1, result.MetricCount())
require.Equal(t, 1, result.DataPointCount())

requireScopeMetrics(t, result, 1, 1)

requireScope(t, result, pcommon.NewMap(), component.NewDefaultBuildInfo().Version)

metrics := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics()
for i := 0; i < metrics.Len(); i++ {
metric := metrics.At(i)
assert.Equal(t, tt.expectedStatus, metric.Gauge().DataPoints().At(0).IntValue())
}
})
}
}
Loading

0 comments on commit 8f136de

Please sign in to comment.