Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move error tracking to context values, add multiple canaries to otel-cli status #227

Merged
merged 21 commits into from
Jun 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions data_for_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,9 @@ var suites = []FixtureSuite{
Expect: Results{
Config: otlpclient.DefaultConfig(),
Diagnostics: otlpclient.Diagnostics{
IsRecording: false,
NumArgs: 1,
IsRecording: false,
NumArgs: 1,
ParsedTimeoutMs: 1000,
},
},
},
Expand Down
7 changes: 6 additions & 1 deletion otelcli/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,16 @@ func doExec(cmd *cobra.Command, args []string) {
}
span.EndTimeUnixNano = uint64(time.Now().UnixNano())

err := otlpclient.SendSpan(ctx, client, config, span)
ctx, err := otlpclient.SendSpan(ctx, client, config, span)
if err != nil {
config.SoftFail("unable to send span: %s", err)
}

_, err = client.Stop(ctx)
if err != nil {
config.SoftFail("client.Stop() failed: %s", err)
}

// set the global exit code so main() can grab it and os.Exit() properly
otlpclient.Diag.ExecExitCode = child.ProcessState.ExitCode()

Expand Down
4 changes: 3 additions & 1 deletion otelcli/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ func doSpan(cmd *cobra.Command, args []string) {
config := getConfig(ctx)
ctx, client := otlpclient.StartClient(ctx, config)
span := otlpclient.NewProtobufSpanWithConfig(config)
err := otlpclient.SendSpan(ctx, client, config, span)
ctx, err := otlpclient.SendSpan(ctx, client, config, span)
config.SoftFailIfErr(err)
_, err = client.Stop(ctx)
config.SoftFailIfErr(err)
otlpclient.PropagateTraceparent(config, span, os.Stdout)
}
2 changes: 1 addition & 1 deletion otelcli/span_background.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func doSpanBackground(cmd *cobra.Command, args []string) {
bgs.Run()

span.EndTimeUnixNano = uint64(time.Now().UnixNano())
err := otlpclient.SendSpan(ctx, client, config, span)
_, err := otlpclient.SendSpan(ctx, client, config, span)
if err != nil {
config.SoftFail("Sending span failed: %s", err)
}
Expand Down
99 changes: 78 additions & 21 deletions otelcli/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ package otelcli
import (
"encoding/hex"
"encoding/json"
"log"
"fmt"
"os"
"strconv"
"strings"
"time"

"github.com/equinix-labs/otel-cli/otlpclient"
"github.com/spf13/cobra"
Expand All @@ -17,22 +18,34 @@ import (
// and is also used in ../main_test.go for automated testing.
type StatusOutput struct {
Config otlpclient.Config `json:"config"`
Spans []map[string]string `json:"spans"`
SpanData map[string]string `json:"span_data"`
Env map[string]string `json:"env"`
Diagnostics otlpclient.Diagnostics `json:"diagnostics"`
Errors otlpclient.ErrorList `json:"errors"`
}

func statusCmd(config *otlpclient.Config) *cobra.Command {
cmd := cobra.Command{
Use: "status",
Short: "start up otel and dump status, optionally sending a canary span",
Short: "send at least one canary and dump status",
Long: `This subcommand is still experimental and the output format is not yet frozen.

By default just one canary is sent. When --canary-count is set, that number of canaries
are sent. If --canary-interval is set, status will sleep the specified duration
between canaries, up to --timeout (default 1s).

Example:
otel-cli status
otel-cli status --canary-count 10 --canary-interval 10 --timeout 10s
`,
Run: doStatus,
}

defaults := otlpclient.DefaultConfig()
cmd.Flags().IntVar(&config.StatusCanaryCount, "canary-count", defaults.StatusCanaryCount, "number of canaries to send")
cmd.Flags().StringVar(&config.StatusCanaryInterval, "canary-interval", defaults.StatusCanaryInterval, "number of milliseconds to wait between canaries")

addCommonParams(&cmd, config)
addClientParams(&cmd, config)
addSpanParams(&cmd, config)
Expand All @@ -41,18 +54,14 @@ Example:
}

func doStatus(cmd *cobra.Command, args []string) {
exitCode := 0
var err error
var exitCode int
allSpans := []map[string]string{}

ctx := cmd.Context()
config := getConfig(ctx)
ctx, client := otlpclient.StartClient(ctx, config)

// TODO: this always canaries as it is, gotta find the right flags
// to try to stall sending at the end so as much as possible of the otel
// code still executes
span := otlpclient.NewProtobufSpanWithConfig(config)
span.Name = "otel-cli status"
span.Kind = tracepb.Span_SPAN_KIND_INTERNAL

env := make(map[string]string)
for _, e := range os.Environ() {
parts := strings.SplitN(e, "=", 2)
Expand All @@ -70,31 +79,79 @@ func doStatus(cmd *cobra.Command, args []string) {
}
}

// send the span out before printing anything
err := otlpclient.SendSpan(ctx, client, config, span)
if err != nil {
if config.Fail {
log.Fatalf("%s", err)
var canaryCount int
var lastSpan *tracepb.Span
deadline := config.StartupTime.Add(config.ParseCliTimeout())
interval := config.ParseStatusCanaryInterval()
for {
// should be rare but a caller could request 0 canaries, in which case the
// client will be started and stopped, but no canaries sent
if config.StatusCanaryCount == 0 {
// TODO: remove this after SpanData is eliminated
lastSpan = otlpclient.NewProtobufSpan()
lastSpan.Name = "unsent canary"
break
}

span := otlpclient.NewProtobufSpanWithConfig(config)
span.Name = "otel-cli status"
if canaryCount > 0 {
span.Name = fmt.Sprintf("otel-cli status canary %d", canaryCount)
}
span.Kind = tracepb.Span_SPAN_KIND_INTERNAL

// when doing multiple canaries, child each new span to the previous one
if lastSpan != nil {
span.TraceId = lastSpan.TraceId
span.ParentSpanId = lastSpan.SpanId
}
lastSpan = span
allSpans = append(allSpans, otlpclient.SpanToStringMap(span, nil))

// send it to the server. ignore errors here, they'll happen for sure
// and the base errors will be tunneled up through otlpclient.GetErrorList()
ctx, _ = otlpclient.SendSpan(ctx, client, config, span)
canaryCount++

if canaryCount == config.StatusCanaryCount {
break
} else if time.Now().After(deadline) {
break
} else {
config.SoftLog("%s", err)
time.Sleep(interval)
}
}

ctx, err = client.Stop(ctx)
if err != nil {
config.SoftFail("client.Stop() failed: %s", err)
}

// otlpclient saves all errors to a key in context so they can be used
// to validate assumptions here & in tests
errorList := otlpclient.GetErrorList(ctx)

// TODO: does it make sense to turn SpanData into a list of spans?
outData := StatusOutput{
Config: config,
Env: env,
Spans: allSpans,
// use only the last span's data here, leftover from when status only
// ever sent one canary
// legacy, will be removed once test suite is updated
SpanData: map[string]string{
"trace_id": hex.EncodeToString(span.TraceId),
"span_id": hex.EncodeToString(span.SpanId),
"trace_id": hex.EncodeToString(lastSpan.TraceId),
"span_id": hex.EncodeToString(lastSpan.SpanId),
"is_sampled": strconv.FormatBool(config.IsRecording()),
},
// Diagnostics is deprecated, being replaced by Errors below and eventually
// another stringmap of stuff that was tunneled through context.Context
Diagnostics: otlpclient.Diag,
Errors: errorList,
}

js, err := json.MarshalIndent(outData, "", " ")
if err != nil {
log.Fatal(err)
}
config.SoftFailIfErr(err)

os.Stdout.Write(js)
os.Stdout.WriteString("\n")
Expand Down
50 changes: 40 additions & 10 deletions otlpclient/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ func DefaultConfig() Config {
BackgroundSockdir: "",
BackgroundWait: false,
BackgroundSkipParentPidCheck: false,
StatusCanaryCount: 1,
StatusCanaryInterval: "",
SpanStartTime: "now",
SpanEndTime: "now",
EventName: "todo-generate-default-event-names",
Expand Down Expand Up @@ -102,6 +104,9 @@ type Config struct {
BackgroundWait bool `json:"background_wait" env:""`
BackgroundSkipParentPidCheck bool `json:"background_skip_parent_pid_check"`

StatusCanaryCount int `json:"status_canary_count"`
StatusCanaryInterval string `json:"status_canary_interval"`

SpanStartTime string `json:"span_start_time" env:""`
SpanEndTime string `json:"span_end_time" env:""`
EventName string `json:"event_name" env:""`
Expand Down Expand Up @@ -241,24 +246,37 @@ func (c Config) IsRecording() bool {
return true
}

// ParseCliTimeout parses the cliTimeout global string value to a time.Duration.
// ParseCliTimeout parses the --timeout string value to a time.Duration.
func (c Config) ParseCliTimeout() time.Duration {
out, err := parseDuration(c.Timeout)
Diag.ParsedTimeoutMs = out.Milliseconds()
c.SoftFailIfErr(err)
return out
}

// ParseStatusCanaryInterval parses the --canary-interval string value to a time.Duration.
func (c Config) ParseStatusCanaryInterval() time.Duration {
out, err := parseDuration(c.StatusCanaryInterval)
c.SoftFailIfErr(err)
return out
}

// parseDuration parses a string duration into a time.Duration.
// When no duration letter is provided (e.g. ms, s, m, h), seconds are assumed.
// It logs an error and returns time.Duration(0) if the string is empty or unparseable.
func (c Config) ParseCliTimeout() time.Duration {
func parseDuration(d string) (time.Duration, error) {
var out time.Duration
if c.Timeout == "" {
if d == "" {
out = time.Duration(0)
} else if d, err := time.ParseDuration(c.Timeout); err == nil {
out = d
} else if secs, serr := strconv.ParseInt(c.Timeout, 10, 0); serr == nil {
} else if parsed, err := time.ParseDuration(d); err == nil {
out = parsed
} else if secs, serr := strconv.ParseInt(d, 10, 0); serr == nil {
out = time.Second * time.Duration(secs)
} else {
c.SoftLog("unable to parse --timeout %q: %s", c.Timeout, err)
out = time.Duration(0)
return time.Duration(0), fmt.Errorf("unable to parse duration string %q: %s", d, err)
}

Diag.ParsedTimeoutMs = out.Milliseconds()
return out
return out, nil
}

// SoftLog only calls through to log if otel-cli was run with the --verbose flag.
Expand Down Expand Up @@ -588,6 +606,18 @@ func (c Config) WithBackgroundSkipParentPidCheck(with bool) Config {
return c
}

// WithStatusCanaryCount returns the config with StatusCanaryCount set to the provided value.
func (c Config) WithStatusCanaryCount(with int) Config {
c.StatusCanaryCount = with
return c
}

// WithStatusCanaryInterval returns the config with StatusCanaryInterval set to the provided value.
func (c Config) WithStatusCanaryInterval(with string) Config {
c.StatusCanaryInterval = with
return c
}

// WithSpanStartTime returns the config with SpanStartTime set to the provided value.
func (c Config) WithSpanStartTime(with string) Config {
c.SpanStartTime = with
Expand Down
10 changes: 10 additions & 0 deletions otlpclient/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,16 @@ func TestWithBackgroundWait(t *testing.T) {
t.Fail()
}
}
func TestWithStatusCanaryCount(t *testing.T) {
if DefaultConfig().WithStatusCanaryCount(1337).StatusCanaryCount != 1337 {
t.Fail()
}
}
func TestWithStatusCanaryInterval(t *testing.T) {
if DefaultConfig().WithStatusCanaryInterval("1337ms").StatusCanaryInterval != "1337ms" {
t.Fail()
}
}
func TestWithSpanStartTime(t *testing.T) {
if DefaultConfig().WithSpanStartTime("foobar").SpanStartTime != "foobar" {
t.Fail()
Expand Down
Loading