Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push host metadata on startup #65

Merged
merged 6 commits into from
Oct 12, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 4 additions & 16 deletions exporter/datadogexporter/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,23 +98,11 @@ type TagsConfig struct {
}

// GetTags gets the default tags extracted from the configuration
func (t *TagsConfig) GetTags(addHost bool) []string {
tags := make([]string, 0, 4)
func (t *TagsConfig) GetTags() []string {
tags := make([]string, 0, len(t.Tags)+1)

vars := map[string]string{
"env": t.Env,
"service": t.Service,
"version": t.Version,
}

if addHost {
vars["host"] = t.Hostname
}

for name, val := range vars {
if val != "" {
tags = append(tags, fmt.Sprintf("%s:%s", name, val))
}
if t.Env != "none" {
tags = append(tags, fmt.Sprintf("env:%s", t.Env))
}

tags = append(tags, t.Tags...)
Expand Down
21 changes: 12 additions & 9 deletions exporter/datadogexporter/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,24 +84,27 @@ func TestLoadConfig(t *testing.T) {

func TestTags(t *testing.T) {
tc := TagsConfig{
Hostname: "customhost",
Env: "customenv",
Service: "customservice",
Version: "customversion",
Tags: []string{"key1:val1", "key2:val2"},
// environment should be picked up if it is not 'none'
Env: "customenv",

// these should be ignored;
// they are used only on trace translation
Service: "customservice",
Version: "customversion",
Tags: []string{"key1:val1", "key2:val2"},
}

assert.ElementsMatch(t,
[]string{
"host:customhost",
"env:customenv",
"service:customservice",
"version:customversion",
"key1:val1",
"key2:val2",
},
tc.GetTags(true), // get host
tc.GetTags(),
)

tc.Env = "none"
assert.ElementsMatch(t, tc.GetTags(), tc.Tags)
}

// TestOverrideMetricsURL tests that the metrics URL is overridden
Expand Down
30 changes: 30 additions & 0 deletions exporter/datadogexporter/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ package datadogexporter

import (
"context"
"time"

"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/config/configmodels"
"go.opentelemetry.io/collector/config/confignet"
"go.opentelemetry.io/collector/exporter/exporterhelper"
"go.uber.org/zap"
)

const (
Expand All @@ -28,6 +30,9 @@ const (

// DefaultSite is the default site of the Datadog intake to send data to
DefaultSite = "datadoghq.com"

// maxRetries is the maximum number of retries for pushing host metadata
maxRetries = 5
)

// NewFactory creates a Datadog exporter factory
Expand Down Expand Up @@ -86,6 +91,31 @@ func createMetricsExporter(
return nil, err
}

go func() {
// Send host metadata
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be done separately/tracked as an improvement, but ideally this should also send a host metadata payload every 30 minutes, so that:

  • Datadog continues to see current host metadata payloads from this collector (even if, based on the current code, it looks like the payload's contents would not change as long as the collector isn't restarted?)
  • if the payload fails to be sent, this is eventually resolved with the next payload 30 minutes later

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do this separately, right now it doesn't change but I want to add support for EC2 tags so this makes sense. Thanks!

var sent bool
wait := 1 * time.Second
metadata := getHostMetadata(cfg)
for i := 0; i < maxRetries; i++ {
err := exp.pushHostMetadata(metadata)
if err != nil {
params.Logger.Warn("Sending host metadata failed", zap.Error(err))
} else {
sent = true
params.Logger.Info("Sent host metadata", zap.Int("numRetries", i))
break
}

time.Sleep(wait)
wait = 2 * wait
}

if !sent {
// log and continue without metadata
params.Logger.Error("Could not send host metadata", zap.Int("numRetries", maxRetries))
}
}()

return exporterhelper.NewMetricsExporter(
cfg,
exp.PushMetricsData,
Expand Down
65 changes: 65 additions & 0 deletions exporter/datadogexporter/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ package datadogexporter

import "os"

const (
opentelemetryFlavor string = "opentelemetry-collector"
opentelemetryVersion string = "alpha"
)

// GetHost gets the hostname according to configuration.
// It gets the configuration hostname and if
// not available it relies on the OS hostname
Expand All @@ -30,3 +35,63 @@ func GetHost(cfg *Config) *string {
}
return &host
}

// hostMetadata includes metadata about the host tags,
// host aliases and identifies the host as an OpenTelemetry host
type hostMetadata struct {
// Meta includes metadata about the host.
Meta *meta `json:"meta"`

// InternalHostname is the canonical hostname
InternalHostname string `json:"internalHostname"`

// Version is the OpenTelemetry Collector version.
// This is used for correctly identifying the Collector in the backend,
// and for telemetry purposes.
Version string `json:"otel_version"`

// Flavor is always set to "opentelemetry-collector".
// It is used for telemetry purposes in the backend.
Flavor string `json:"agent-flavor"`

// Tags includes the host tags
Tags *hostTags `json:"host-tags"`
}

// hostTags are the host tags.
// Currently only system (configuration) tags are considered.
type hostTags struct {
// System are host tags set in the configuration
System []string `json:"system,omitempty"`
}

// meta includes metadata about the host aliases
type meta struct {
// InstanceID is the EC2 instance id the Collector is running on, if available
InstanceID string `json:"instance-id,omitempty"`

// EC2Hostname is the hostname from the EC2 metadata API
EC2Hostname string `json:"ec2-hostname,omitempty"`

// Hostname is the canonical hostname
Hostname string `json:"hostname"`

// SocketHostname is the OS hostname
SocketHostname string `json:"socket-hostname"`

// HostAliases are other available host names
HostAliases []string `json:"host-aliases,omitempty"`
}

func getHostMetadata(cfg *Config) hostMetadata {
host := *GetHost(cfg)
return hostMetadata{
InternalHostname: host,
Flavor: opentelemetryFlavor,
Version: opentelemetryVersion,
Tags: &hostTags{cfg.TagsConfig.GetTags()},
Meta: &meta{
Hostname: host,
},
}
}
44 changes: 34 additions & 10 deletions exporter/datadogexporter/metrics_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
package datadogexporter

import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"time"

"go.opentelemetry.io/collector/consumer/pdata"
"go.uber.org/zap"
Expand All @@ -26,23 +31,47 @@ type metricsExporter struct {
logger *zap.Logger
cfg *Config
client *datadog.Client
tags []string
}

func newMetricsExporter(logger *zap.Logger, cfg *Config) (*metricsExporter, error) {
client := datadog.NewClient(cfg.API.Key, "")
client.SetBaseUrl(cfg.Metrics.TCPAddr.Endpoint)

// Calculate tags at startup
tags := cfg.TagsConfig.GetTags(false)
return &metricsExporter{logger, cfg, client}, nil
}

// pushHostMetadata sends a host metadata payload to the "/intake" endpoint
func (exp *metricsExporter) pushHostMetadata(metadata hostMetadata) error {
path := exp.cfg.Metrics.TCPAddr.Endpoint + "/intake"
buf, _ := json.Marshal(metadata)
req, _ := http.NewRequest(http.MethodPost, path, bytes.NewBuffer(buf))
req.Header.Set("DD-API-KEY", exp.cfg.API.Key)
req.Header.Set("Content-Type", "application/json")
mx-psi marked this conversation as resolved.
Show resolved Hide resolved

client := &http.Client{Timeout: 10 * time.Second}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a blocker for this PR, but we should investigate if we also need here all the options that the Agent passes as Transport on its http client. Maybe @olivielpeau can give his opinion on this. Links to the relevant parts in the Agent code:

newHTTPClient() which calls CreateHTTPTransport():
https://github.com/DataDog/datadog-agent/blob/012a209d3acecf96fa74236cc5fc5a6d028a4daf/pkg/forwarder/worker.go#L55-L63

CreateHTTPTransport():
https://github.com/DataDog/datadog-agent/blob/012a209d3acecf96fa74236cc5fc5a6d028a4daf/pkg/util/http/transport.go#L21-L55

This git blame might help us track why they were added in the Agent: https://github.com/DataDog/datadog-agent/blame/7305c07eaf917f63274749dfdd96119b0b713883/pkg/util/common.go

Eg: the PR that disables FallbackDelay mentions "Some concerns have been raised about the effect this might have on the intake."

Also, the proxy settings are probably something we also want.

Copy link
Member Author

@mx-psi mx-psi Oct 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should investigate if we also need here all the options that the Agent passes as Transport on its http client

the zero value for the http.Client uses http.DefaultTransport that differs from the Transport the Agent uses in:

  1. the FallBackDelay being disabled in the Agent,
  2. the MaxIdleConnsPerHost set to 5 in the Agent instead of the default of 2,
  3. being able to disable TLS in the Agent and
  4. the Proxy settings being taken by default from the environment here (which is what every other exporter does).

I think we should keep (4) as is and maybe change (1)/(2)/(3) depending on what Agent Core says. If we change (1)/(2)/(3) we should also change it on the metrics exporter client which uses the default client (maybe on zorkian's library?).

Copy link

@albertvaka albertvaka Oct 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Being consistent with how the proxy is configured everywhere else makes sense, I think MaxIdleConnsPerHost shouldn't matter and disabling TLS is not something we need to allow. The only thing then is FallBackDelay, which I don't understand why was disabled in the Agent... but it can't be that bad if it's the default in Go :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. we want to disable Fast Fallback as it can create unnecessary stress on Datadog's intake.
  2. MaxIdleConnsPerHost set to 5 or 2 isn't that important, unless we expect a lot of concurrent connections to Datadog's intake from this client (not the case here), we want to use Go's default here, I think it changed across Go versions and we haven't updated it yet in datadog-agent.
  3. we don't really allow disabling TLS on datadog-agent, but there's an opt-in to disable certificate validation, which can be useful for some specific proxy settings. There's also an option to force TLS 1.2 and up on the client side, which is a client-side enforcement some users require. Such settings may not be required now, but may be asked in the future.
  4. agreed, I'm not aware of the specifics of how exporters should support proxy settings, but consistency with other exporters/the OT collector makes sense. Note that if you don't use the default transport, this means explicitly setting the Proxy field to ProxyFromEnvironments.

Also, HTTP/2 is not enabled on most intake endpoints at the moment, so you can leave ForceAttemptHTTP2 to false.

resp, err := client.Do(req)

if err != nil {
return err
}

defer resp.Body.Close()

return &metricsExporter{logger, cfg, client, tags}, nil
if resp.StatusCode >= 400 {
return fmt.Errorf(
"'%d - %s' error when sending metadata payload to %s",
resp.StatusCode,
resp.Status,
path,
)
}

return nil
}

func (exp *metricsExporter) processMetrics(metrics []datadog.Metric) {
addNamespace := exp.cfg.Metrics.Namespace != ""
overrideHostname := exp.cfg.Hostname != ""
addTags := len(exp.tags) > 0

for i := range metrics {
if addNamespace {
Expand All @@ -53,11 +82,6 @@ func (exp *metricsExporter) processMetrics(metrics []datadog.Metric) {
if overrideHostname || metrics[i].GetHost() == "" {
metrics[i].Host = GetHost(exp.cfg)
}

if addTags {
metrics[i].Tags = append(metrics[i].Tags, exp.tags...)
}

}
}

Expand Down
4 changes: 2 additions & 2 deletions exporter/datadogexporter/metrics_exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ func TestProcessMetrics(t *testing.T) {
0,
[]string{"key2:val2"},
),
}
}

exp.processMetrics(metrics)

assert.Equal(t, "test_host", *metrics[0].Host)
assert.Equal(t, "test.metric_name", *metrics[0].Metric)
assert.ElementsMatch(t,
[]string{"key:val", "env:test_env", "key2:val2"},
[]string{"key2:val2"},
metrics[0].Tags,
)

Expand Down