From 56f8fd4f8622dea64a4863f890c80cadc4668789 Mon Sep 17 00:00:00 2001 From: Aaron Prindle Date: Wed, 12 May 2021 11:32:11 -0700 Subject: [PATCH] skaffold trace wrapping of critical functions and skaffold trace exporters What is the problem being solved? Part of #5756, adding opentelemetry trace information to skaffold commands. Added trace information to specific performance critical skaffold functions (identified in go/cloud-trace-skaffold). Also added 4 trace exporters - gcp-skaffold, gcp-adc, stdout, and jaeger. This PR uses env var based enabling/disabling for the trace for simplicity and to hide it from users directly. Why is this the best approach? Using opentelemetry tracing is the obvious choice as we use open telemetry libs for metrics and it is becoming the metrics/tracing standard. Using an env var in this PR and later integrating the flag setup was considered optimal as currently skaffold tracing will be used for benchmarking/bottleneck-identifying for select use cases while the user facing UX w/ jaeger, etc. is still being worked out. What other approaches did you consider? There was the possibility of building tracing directly into skaffold events but I think with the current wrapper setup in pkg/skaffold/instrumentation/trace.go (w/ the minimal code required) and the fact that many trace locations will not be event locations (eg: how long to hash a file, etc.) it makes sense to not integrate them. What side effects will this approach have? There shouldn't be any side effects w/ this approach as the default "off" for tracing and the minimal user visibility for now should mean that it used only for select use cases experimentally. I have done timing tests with the no-op/empty trace (SKAFFOLD_TRACE unset) and it does not change the performance of skaffold. What future work remains to be done? Future work includes wiring up a --trace flag through dev, build, deploy, etc. and working on how skaffold might be able to do distributed tracing w/ other tools (minikube, buildpacks, etc.). Additionally the ability to allow for more sporadic sampling (vs AlwaysSample) should be added. Some future work mentioned in PR review included: - OTEL_TRACES_EXPORTER=* support (vs SKAFFOLD_TRACE) --- cmd/skaffold/app/cmd/runner.go | 3 +- cmd/skaffold/skaffold.go | 4 +- examples/jaeger-skaffold-trace/README.md | 52 ++++++ go.mod | 4 +- go.sum | 17 +- .../examples/jaeger-skaffold-trace/README.md | 40 ++++ .../jaeger-all-in-one-template.yaml | 168 +++++++++++++++++ .../jaeger-skaffold-trace/skaffold.yaml | 8 + pkg/skaffold/build/cache/hash.go | 8 + pkg/skaffold/build/cache/lookup.go | 9 + pkg/skaffold/build/cache/retrieve.go | 11 ++ pkg/skaffold/build/custom/build.go | 6 + pkg/skaffold/build/docker/docker.go | 7 + pkg/skaffold/build/gcb/cloud_build.go | 8 + pkg/skaffold/build/jib/build.go | 7 + pkg/skaffold/build/scheduler.go | 9 + pkg/skaffold/deploy/deploy_mux.go | 9 + pkg/skaffold/deploy/helm/deploy.go | 13 ++ pkg/skaffold/deploy/kpt/kpt.go | 58 +++++- pkg/skaffold/deploy/kubectl/cli.go | 6 + pkg/skaffold/deploy/kubectl/kubectl.go | 48 ++++- pkg/skaffold/deploy/kustomize/kustomize.go | 39 +++- pkg/skaffold/graph/dependencies.go | 14 ++ pkg/skaffold/instrumentation/export.go | 103 +++++++++++ pkg/skaffold/instrumentation/export_test.go | 2 +- pkg/skaffold/instrumentation/new.go | 40 ++++ pkg/skaffold/instrumentation/trace.go | 123 ++++++++++++ pkg/skaffold/instrumentation/trace_test.go | 175 ++++++++++++++++++ pkg/skaffold/kubernetes/manifest/images.go | 13 +- .../kubernetes/manifest/images_test.go | 9 +- .../portforward/forwarder_manager.go | 5 + pkg/skaffold/render/renderer/renderer.go | 2 +- pkg/skaffold/runner/v1/apply.go | 8 +- pkg/skaffold/runner/v1/deploy.go | 10 + pkg/skaffold/runner/v1/dev.go | 41 +++- pkg/skaffold/runner/v1/new.go | 15 +- 36 files changed, 1052 insertions(+), 42 deletions(-) create mode 100644 examples/jaeger-skaffold-trace/README.md create mode 100644 integration/examples/jaeger-skaffold-trace/README.md create mode 100644 integration/examples/jaeger-skaffold-trace/jaeger-all-in-one-template.yaml create mode 100644 integration/examples/jaeger-skaffold-trace/skaffold.yaml create mode 100644 pkg/skaffold/instrumentation/new.go create mode 100644 pkg/skaffold/instrumentation/trace.go create mode 100644 pkg/skaffold/instrumentation/trace_test.go diff --git a/cmd/skaffold/app/cmd/runner.go b/cmd/skaffold/app/cmd/runner.go index d3cd73c8f65..f442d16e829 100644 --- a/cmd/skaffold/app/cmd/runner.go +++ b/cmd/skaffold/app/cmd/runner.go @@ -64,13 +64,12 @@ func createNewRunner(out io.Writer, opts config.SkaffoldOptions) (runner.Runner, return nil, nil, nil, err } - instrumentation.InitMeterFromConfig(configs, opts.User) + instrumentation.Init(configs, opts.User) runner, err := v1.NewForConfig(runCtx) if err != nil { event.InititializationFailed(err) return nil, nil, nil, fmt.Errorf("creating runner: %w", err) } - return runner, configs, runCtx, nil } diff --git a/cmd/skaffold/skaffold.go b/cmd/skaffold/skaffold.go index 711ca5dba10..d622ac17542 100644 --- a/cmd/skaffold/skaffold.go +++ b/cmd/skaffold/skaffold.go @@ -46,9 +46,7 @@ func main() { code = exitCode(err) } } - if err := instrumentation.ExportMetrics(code); err != nil { - logrus.Debugf("error exporting metrics %v", err) - } + instrumentation.ShutdownAndFlush(context.Background(), code) os.Exit(code) } diff --git a/examples/jaeger-skaffold-trace/README.md b/examples/jaeger-skaffold-trace/README.md new file mode 100644 index 00000000000..03655e28688 --- /dev/null +++ b/examples/jaeger-skaffold-trace/README.md @@ -0,0 +1,52 @@ +### Example: Skaffold Command Tracing with Jaeger + + +_**WARNING: Skaffold's trace functionality is experimental and may change without notice.**_ + +In this example: + +* Use Skaffold to deploy a local/remote Jaeger instance +* Enable Skaffold tracing functionality to get trace information about skaffold `dev`, `build`, `deploy`, etc. timings +* Send Skaffold trace information to Jaeger and have that information be visible in the Jaeger UI, + +In this example, we'll walk through enabling Skaffold trace information that can be used to explore performance bottlenecks and to get a more in depth view of user's local dev loop. + +_**WARNING: If you're running this on a cloud cluster, this example will create a service and expose a webserver. +It's highly recommended that you only run this example on a local, private cluster like minikube or Kubernetes in Docker for Desktop.**_ + +#### Setting up Jaeger locally + +Use docker to start a local jaeger instance using the Jaeger project's [all-in-one docker setup](https://www.jaegertracing.io/docs/getting-started/#all-in-one): +```bash +docker run -d --name jaeger \ + -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \ + -p 5775:5775/udp \ + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p 16686:16686 \ + -p 14268:14268 \ + -p 14250:14250 \ + -p 9411:9411 \ + jaegertracing/all-in-one:1.22 +``` + +Now, in a different terminal, go to another Skaffold example (eg: microservices), enable SKAFFOLD_TRACE with Jaeger and start dev session there: +```bash +cd ../microservices +export SKAFFOLD_TRACE=jaeger +skaffold dev +``` + +Now go to the Jaeger UI that Skaffold will port-forward to localhost at http://127.0.0.1:16686/ + +Select service:`skaffold-trace` in the left bar and then click `Find Traces` on the bottom of the left bar. + +From here you should be able to view all of the relevant traces + +#### Cleaning up +To cleanup Jaeger all-in-one setup, run the following: +``` +docker kill jaeger # stops the running jaeger container +docker rm jaeger #removes the container image +``` \ No newline at end of file diff --git a/go.mod b/go.mod index 8bf7430ae74..b0fda16be57 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( cloud.google.com/go/storage v1.10.0 github.com/AlecAivazis/survey/v2 v2.2.7 github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.20.0 + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v0.20.0 github.com/aws/aws-sdk-go v1.36.30 // indirect github.com/blang/semver v3.5.1+incompatible github.com/bmatcuk/doublestar v1.2.4 @@ -24,7 +25,6 @@ require ( github.com/buildpacks/lifecycle v0.10.2 github.com/buildpacks/pack v0.18.1 github.com/cenkalti/backoff/v4 v4.0.2 - github.com/daixiang0/gci v0.2.8 // indirect github.com/docker/cli v20.10.0-beta1.0.20201117192004-5cc239616494+incompatible github.com/docker/distribution v2.7.1+incompatible github.com/docker/docker v20.10.0-beta1.0.20201110211921-af34b94a78a1+incompatible @@ -66,8 +66,10 @@ require ( github.com/spf13/pflag v1.0.5 github.com/tektoncd/pipeline v0.5.1-0.20190731183258-9d7e37e85bf8 github.com/xeipuuv/gojsonschema v1.2.0 + go.opencensus.io v0.22.5 go.opentelemetry.io/otel v0.20.0 go.opentelemetry.io/otel/exporters/stdout v0.20.0 + go.opentelemetry.io/otel/exporters/trace/jaeger v0.20.0 // indirect go.opentelemetry.io/otel/metric v0.20.0 go.opentelemetry.io/otel/sdk v0.20.0 // indirect go.opentelemetry.io/otel/sdk/metric v0.20.0 diff --git a/go.sum b/go.sum index 1adfc79d689..18626783c0c 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,7 @@ cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6 cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= +cloud.google.com/go v0.61.0/go.mod h1:XukKJg4Y7QsUu0Hxg3qQKUWR4VuWivmyMK2+rUyxAqw= cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= cloud.google.com/go v0.72.0 h1:eWRCuwubtDrCJG0oSUMgnsbD4CmPFQF2ei4OFbXvwww= @@ -132,6 +133,8 @@ github.com/GoogleCloudPlatform/k8s-cloud-provider v0.0.0-20190822182118-27a4ced3 github.com/GoogleCloudPlatform/k8s-cloud-provider v0.0.0-20200415212048-7901bc822317/go.mod h1:DF8FZRxMHMGv/vP2lQP6h+dYzzjpuRn24VeRiYn3qjQ= github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.20.0 h1:H4Hs0jLf0IcQ96dgx3gPFRke2zyjrzpYMecVsrWKrAc= github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.20.0/go.mod h1:3DfYfW/GJ2p+Yd1vGImcFO1jKaqvZmAMMIdWk8BJsjw= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v0.20.0 h1:ExGRyJwOUijAPv/RzCJ3p1CNUxBQGzVO238m1lFjLS4= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v0.20.0/go.mod h1:f4BFp2+kV6s/OKj3IP/34keB/OE7tTTaZZQyX/mQ7Ng= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd h1:sjQovDkwrZp8u+gxLtPgKGjk5hCxuy2hrRejBTA9xFU= github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd/go.mod h1:64YHyfSL2R96J44Nlwm39UHepQbyR5q10x7iYa1ks2E= @@ -355,8 +358,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4= -github.com/daixiang0/gci v0.2.8 h1:1mrIGMBQsBu0P7j7m1M8Lb+ZeZxsZL+jyGX4YoMJJpg= -github.com/daixiang0/gci v0.2.8/go.mod h1:+4dZ7TISfSmqfAGv59ePaHfNzgGtIkHAhhdKggP1JAc= github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -435,6 +436,8 @@ github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZM github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= +github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= +github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= github.com/form3tech-oss/jwt-go v3.2.2+incompatible h1:TcekIExNqud5crz4xD2pavyTgWiPvpYe4Xau31I0PRk= @@ -1351,10 +1354,16 @@ go.opencensus.io v0.22.4-0.20200608061201-1901b56b9515/go.mod h1:yxeiOL68Rb0Xd1d go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5 h1:dntmOdLpSpHlVqbW5Eay97DelsZHe+55D+xC6i0dDS0= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= +go.opentelemetry.io/contrib v0.20.0 h1:ubFQUn0VCZ0gPwIoJfBJVpeBlyRMxu8Mm/huKWYd9p0= +go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUzl5H4LY0Kc= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0 h1:Q3C9yzW6I9jqEc8sawxzxZmY48fs9u220KXq6d5s3XU= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0/go.mod h1:2AboqHi0CiIZU0qwhtUfCYD1GeUzvvIXWNkhDt7ZMG4= go.opentelemetry.io/otel v0.20.0 h1:eaP0Fqu7SXHwvjiqDq83zImeehOHX8doTvU9AwXON8g= go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo= go.opentelemetry.io/otel/exporters/stdout v0.20.0 h1:NXKkOWV7Np9myYrQE0wqRS3SbwzbupHu07rDONKubMo= go.opentelemetry.io/otel/exporters/stdout v0.20.0/go.mod h1:t9LUU3JvYlmoPA61abhvsXxKh58xdyi3nMtI6JiR8v0= +go.opentelemetry.io/otel/exporters/trace/jaeger v0.20.0 h1:FoclOadJNul1vUiKnZU0sKFWOZtZQq3jUzSbrX2jwNM= +go.opentelemetry.io/otel/exporters/trace/jaeger v0.20.0/go.mod h1:10qwvAmKpvwRO5lL3KQ8EWznPp89uGfhcbK152LFWsQ= go.opentelemetry.io/otel/metric v0.20.0 h1:4kzhXFP+btKm4jwxpjIqjs41A7MakRFUS86bqLHTIw8= go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU= go.opentelemetry.io/otel/oteltest v0.20.0 h1:HiITxCawalo5vQzdHfKeZurV8x7ljcqAgiWzF6Vaeaw= @@ -1732,13 +1741,13 @@ golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200616133436-c1934b75d054/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200701151220-7cb253f4c4f8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200713011307-fd294ab11aed/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= golang.org/x/tools v0.0.0-20200916195026-c9a70fc28ce3/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU= golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.0.0-20201118003311-bd56c0adb394/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0 h1:po9/4sTYwZU9lPhi1tOrb4hCv3qrhiQ77LZfGa2OjwY= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= @@ -1835,6 +1844,8 @@ google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEY google.golang.org/genproto v0.0.0-20200527145253-8367513e4ece/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200605102947-12044bf5ea91/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= +google.golang.org/genproto v0.0.0-20200711021454-869866162049/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200715011427-11fb19a81f2c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= diff --git a/integration/examples/jaeger-skaffold-trace/README.md b/integration/examples/jaeger-skaffold-trace/README.md new file mode 100644 index 00000000000..3f29e5d1bb4 --- /dev/null +++ b/integration/examples/jaeger-skaffold-trace/README.md @@ -0,0 +1,40 @@ +### Example: Skaffold Command Tracing with Jaeger + + +_**WARNING: Skaffold's trace functionality is experimental and may change without notice.**_ + +In this example: + +* Use Skaffold to deploy a local/remote Jaeger instance +* Enable Skaffold tracing functionality to get trace information about skaffold `dev`, `build`, `deploy`, etc. timings +* Send Skaffold trace information to Jaeger and have that information be visible in the Jaeger UI, + +In this example, we'll walk through enabling Skaffold trace information that can be used to explore performance bottlenecks and to get a more in depth view of user's local dev loop. + +_**WARNING: If you're running this on a cloud cluster, this example will create a service and expose a webserver. +It's highly recommended that you only run this example on a local, private cluster like minikube or Kubernetes in Docker for Desktop.**_ + +#### Running the example on minikube + +Start minikube (or verify that minikube is up and running locally) +```bash +minikube start +``` + +From this directory, run +```bash +skaffold dev --port-forward +``` + +Now, in a different terminal, go to another Skaffold example (eg: microservices), enable SKAFFOLD_TRACE with Jaeger and start dev session there: +```bash +cd ../microservices +export SKAFFOLD_TRACE=jaeger +skaffold dev +``` + +Now go to the Jaeger UI that Skaffold will port-forward to localhost at http://127.0.0.1:16686/ + +Select service:`skaffold-trace` in the left bar and then click `Find Traces` on the bottom of the left bar. + +From here you should be able to view all of the relevant traces \ No newline at end of file diff --git a/integration/examples/jaeger-skaffold-trace/jaeger-all-in-one-template.yaml b/integration/examples/jaeger-skaffold-trace/jaeger-all-in-one-template.yaml new file mode 100644 index 00000000000..23e567ef301 --- /dev/null +++ b/integration/examples/jaeger-skaffold-trace/jaeger-all-in-one-template.yaml @@ -0,0 +1,168 @@ +# Copyright 2017-2019 The Jaeger Authors +# Copyright 2021 The Skaffold Authors + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# adapted from https://github.com/jaegertracing/jaeger-kubernetes/blob/master/all-in-one/jaeger-all-in-one-template.yml +# changes made include: +# - updating Deployment to correct apiVersion for newer k8s API - extensions/v1beta1 -> apps/v1 +# - fixing deployment to have required `selector` field in newer k8s Deployment API +# - changed Service - query-http to not use port 80 -> port 16686 which is the export local jaeger port (vs web hosted) +apiVersion: v1 +kind: List +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: jaeger + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: all-in-one + spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: all-in-one + template: + metadata: + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: all-in-one + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "16686" + spec: + containers: + - env: + - name: COLLECTOR_ZIPKIN_HTTP_PORT + value: "9411" + image: jaegertracing/all-in-one + name: jaeger + ports: + - containerPort: 5775 + protocol: UDP + - containerPort: 6831 + protocol: UDP + - containerPort: 6832 + protocol: UDP + - containerPort: 5778 + protocol: TCP + - containerPort: 16686 + protocol: TCP + - containerPort: 9411 + protocol: TCP + readinessProbe: + httpGet: + path: "/" + port: 14269 + initialDelaySeconds: 5 +- apiVersion: v1 + kind: Service + metadata: + name: jaeger-query + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: query + spec: + ports: + - name: query-http + port: 16686 + protocol: TCP + targetPort: 16686 + selector: + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: all-in-one + type: LoadBalancer +- apiVersion: v1 + kind: Service + metadata: + name: jaeger-collector + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: collector + spec: + ports: + - name: jaeger-collector-tchannel + port: 14267 + protocol: TCP + targetPort: 14267 + - name: jaeger-collector-http + port: 14268 + protocol: TCP + targetPort: 14268 + - name: jaeger-collector-zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + selector: + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: all-in-one + type: ClusterIP +- apiVersion: v1 + kind: Service + metadata: + name: jaeger-agent + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + spec: + ports: + - name: agent-zipkin-thrift + port: 5775 + protocol: UDP + targetPort: 5775 + - name: agent-compact + port: 6831 + protocol: UDP + targetPort: 6831 + - name: agent-binary + port: 6832 + protocol: UDP + targetPort: 6832 + - name: agent-configs + port: 5778 + protocol: TCP + targetPort: 5778 + clusterIP: None + selector: + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: all-in-one +- apiVersion: v1 + kind: Service + metadata: + name: zipkin + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: zipkin + spec: + ports: + - name: jaeger-collector-zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + clusterIP: None + selector: + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: all-in-one + diff --git a/integration/examples/jaeger-skaffold-trace/skaffold.yaml b/integration/examples/jaeger-skaffold-trace/skaffold.yaml new file mode 100644 index 00000000000..9750c09e106 --- /dev/null +++ b/integration/examples/jaeger-skaffold-trace/skaffold.yaml @@ -0,0 +1,8 @@ +apiVersion: skaffold/v2beta16 +kind: Config +metadata: + name: jaeger-all-in-one +deploy: + kubectl: + manifests: + - jaeger-all-in-one-template.yaml diff --git a/pkg/skaffold/build/cache/hash.go b/pkg/skaffold/build/cache/hash.go index fcb7910f369..3a99b1bd681 100644 --- a/pkg/skaffold/build/cache/hash.go +++ b/pkg/skaffold/build/cache/hash.go @@ -33,6 +33,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/config" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/docker" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/util" ) @@ -66,14 +67,21 @@ func newArtifactHasher(artifacts graph.ArtifactGraph, lister DependencyLister, m } func (h *artifactHasherImpl) hash(ctx context.Context, a *latestV1.Artifact) (string, error) { + ctx, endTrace := instrumentation.StartTrace(ctx, "hash_GenerateHashOneArtifact", map[string]string{ + "ImageName": instrumentation.PII(a.ImageName), + }) + defer endTrace() + hash, err := h.safeHash(ctx, a) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return "", err } hashes := []string{hash} for _, dep := range sortedDependencies(a, h.artifacts) { depHash, err := h.hash(ctx, dep) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return "", err } hashes = append(hashes, depHash) diff --git a/pkg/skaffold/build/cache/lookup.go b/pkg/skaffold/build/cache/lookup.go index 3650f83211d..b90e6b079ce 100644 --- a/pkg/skaffold/build/cache/lookup.go +++ b/pkg/skaffold/build/cache/lookup.go @@ -25,6 +25,7 @@ import ( "github.com/sirupsen/logrus" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/docker" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/tag" ) @@ -33,6 +34,9 @@ func (c *cache) lookupArtifacts(ctx context.Context, tags tag.ImageTags, artifac details := make([]cacheDetails, len(artifacts)) // Create a new `artifactHasher` on every new dev loop. // This way every artifact hash is calculated at most once in a single dev loop, and recalculated on every dev loop. + + ctx, endTrace := instrumentation.StartTrace(ctx, "lookupArtifacts_CacheLookupArtifacts") + defer endTrace() h := newArtifactHasherFunc(c.artifactGraph, c.lister, c.cfg.Mode()) var wg sync.WaitGroup for i := range artifacts { @@ -50,6 +54,11 @@ func (c *cache) lookupArtifacts(ctx context.Context, tags tag.ImageTags, artifac } func (c *cache) lookup(ctx context.Context, a *latestV1.Artifact, tag string, h artifactHasher) cacheDetails { + ctx, endTrace := instrumentation.StartTrace(ctx, "lookup_CacheLookupOneArtifact", map[string]string{ + "ImageName": instrumentation.PII(a.ImageName), + }) + defer endTrace() + hash, err := h.hash(ctx, a) if err != nil { return failed{err: fmt.Errorf("getting hash for artifact %q: %s", a.ImageName, err)} diff --git a/pkg/skaffold/build/cache/retrieve.go b/pkg/skaffold/build/cache/retrieve.go index 981a2d96db6..75a8c1970c3 100644 --- a/pkg/skaffold/build/cache/retrieve.go +++ b/pkg/skaffold/build/cache/retrieve.go @@ -29,6 +29,7 @@ import ( sErrors "github.com/GoogleContainerTools/skaffold/pkg/skaffold/errors" eventV2 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event/v2" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/tag" @@ -42,6 +43,8 @@ func (c *cache) Build(ctx context.Context, out io.Writer, tags tag.ImageTags, ar start := time.Now() output.Default.Fprintln(out, "Checking cache...") + ctx, endTrace := instrumentation.StartTrace(ctx, "Build_CheckBuildCache") + defer endTrace() lookup := make(chan []cacheDetails) go func() { lookup <- c.lookupArtifacts(ctx, tags, artifacts) }() @@ -64,6 +67,7 @@ func (c *cache) Build(ctx context.Context, out io.Writer, tags tag.ImageTags, ar switch result := result.(type) { case failed: output.Red.Fprintln(out, "Error checking cache.") + endTrace(instrumentation.TraceEndError(result.err)) return nil, result.err case needsBuilding: @@ -77,6 +81,7 @@ func (c *cache) Build(ctx context.Context, out io.Writer, tags tag.ImageTags, ar eventV2.CacheCheckHit(artifact.ImageName) output.Green.Fprintln(out, "Found. Tagging") if err := result.Tag(ctx, c); err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, fmt.Errorf("tagging image: %w", err) } @@ -84,6 +89,8 @@ func (c *cache) Build(ctx context.Context, out io.Writer, tags tag.ImageTags, ar eventV2.CacheCheckHit(artifact.ImageName) output.Green.Fprintln(out, "Found. Pushing") if err := result.Push(ctx, out, c); err != nil { + endTrace(instrumentation.TraceEndError(err)) + return nil, fmt.Errorf("%s: %w", sErrors.PushImageErr, err) } @@ -91,6 +98,7 @@ func (c *cache) Build(ctx context.Context, out io.Writer, tags tag.ImageTags, ar eventV2.CacheCheckHit(artifact.ImageName) isLocal, err := c.isLocalImage(artifact.ImageName) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } if isLocal { @@ -109,12 +117,14 @@ func (c *cache) Build(ctx context.Context, out io.Writer, tags tag.ImageTags, ar var uniqueTag string isLocal, err := c.isLocalImage(artifact.ImageName) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } if isLocal { var err error uniqueTag, err = build.TagWithImageID(ctx, tag, entry.ID, c.client) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } } else { @@ -131,6 +141,7 @@ func (c *cache) Build(ctx context.Context, out io.Writer, tags tag.ImageTags, ar bRes, err := buildAndTest(ctx, out, tags, needToBuild) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } diff --git a/pkg/skaffold/build/custom/build.go b/pkg/skaffold/build/custom/build.go index 5ca3d1e932a..ec02063143b 100644 --- a/pkg/skaffold/build/custom/build.go +++ b/pkg/skaffold/build/custom/build.go @@ -22,11 +22,17 @@ import ( "io" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/docker" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" ) // Build builds an artifact using a custom script func (b *Builder) Build(ctx context.Context, out io.Writer, artifact *latestV1.Artifact, tag string) (string, error) { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "BuildType": "custom", + "Context": instrumentation.PII(artifact.Workspace), + "Destination": tag, + }) if err := b.runBuildScript(ctx, out, artifact, tag); err != nil { return "", fmt.Errorf("building custom artifact: %w", err) } diff --git a/pkg/skaffold/build/docker/docker.go b/pkg/skaffold/build/docker/docker.go index a3022c7b1ab..c848f951ffb 100644 --- a/pkg/skaffold/build/docker/docker.go +++ b/pkg/skaffold/build/docker/docker.go @@ -24,6 +24,7 @@ import ( "os/exec" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/docker" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/util" @@ -31,6 +32,12 @@ import ( ) func (b *Builder) Build(ctx context.Context, out io.Writer, a *latestV1.Artifact, tag string) (string, error) { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "BuildType": "docker", + "Context": instrumentation.PII(a.Workspace), + "Destination": tag, + }) + // Fail fast if the Dockerfile can't be found. dockerfile, err := docker.NormalizeDockerfilePath(a.Workspace, a.DockerArtifact.DockerfilePath) if err != nil { diff --git a/pkg/skaffold/build/gcb/cloud_build.go b/pkg/skaffold/build/gcb/cloud_build.go index 0d2e61a8827..f4deb268229 100644 --- a/pkg/skaffold/build/gcb/cloud_build.go +++ b/pkg/skaffold/build/gcb/cloud_build.go @@ -38,6 +38,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/constants" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/docker" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/gcp" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/sources" @@ -45,6 +46,10 @@ import ( // Build builds a list of artifacts with Google Cloud Build. func (b *Builder) Build(ctx context.Context, out io.Writer, artifact *latestV1.Artifact) build.ArtifactBuilder { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "BuildType": "gcb", + "Context": instrumentation.PII(artifact.Workspace), + }) builder := build.WithLogFile(b.buildArtifactWithCloudBuild, b.muted) return builder } @@ -62,6 +67,9 @@ func (b *Builder) Concurrency() int { } func (b *Builder) buildArtifactWithCloudBuild(ctx context.Context, out io.Writer, artifact *latestV1.Artifact, tag string) (string, error) { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "Destination": tag, + }) // TODO: [#4922] Implement required artifact resolution from the `artifactStore` cbclient, err := cloudbuild.NewService(ctx, gcp.ClientOptions()...) if err != nil { diff --git a/pkg/skaffold/build/jib/build.go b/pkg/skaffold/build/jib/build.go index 83b461e205c..fd3a7c4d3e8 100644 --- a/pkg/skaffold/build/jib/build.go +++ b/pkg/skaffold/build/jib/build.go @@ -20,11 +20,18 @@ import ( "context" "io" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" ) // Build builds an artifact with Jib. func (b *Builder) Build(ctx context.Context, out io.Writer, artifact *latestV1.Artifact, tag string) (string, error) { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "BuildType": "jib", + "Context": instrumentation.PII(artifact.Workspace), + "Destination": tag, + }) + t, err := DeterminePluginType(artifact.Workspace, artifact.JibArtifact) if err != nil { return "", err diff --git a/pkg/skaffold/build/scheduler.go b/pkg/skaffold/build/scheduler.go index 3dcbb5a4f58..93d0a8a16bd 100644 --- a/pkg/skaffold/build/scheduler.go +++ b/pkg/skaffold/build/scheduler.go @@ -20,12 +20,14 @@ import ( "context" "fmt" "io" + "strconv" "golang.org/x/sync/errgroup" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event" eventV2 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event/v2" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/tag" @@ -90,11 +92,17 @@ func (s *scheduler) build(ctx context.Context, tags tag.ImageTags, i int) error event.BuildInProgress(a.ImageName) eventV2.BuildInProgress(a.ImageName) + ctx, endTrace := instrumentation.StartTrace(ctx, "build_BuildInProgress", map[string]string{ + "ArtifactNumber": strconv.Itoa(i), + "ImageName": instrumentation.PII(a.ImageName), + }) + defer endTrace() w, closeFn, err := s.logger.GetWriter() if err != nil { event.BuildFailed(a.ImageName, err) eventV2.BuildFailed(a.ImageName, err) + endTrace(instrumentation.TraceEndError(err)) return err } defer closeFn() @@ -103,6 +111,7 @@ func (s *scheduler) build(ctx context.Context, tags tag.ImageTags, i int) error if err != nil { event.BuildFailed(a.ImageName, err) eventV2.BuildFailed(a.ImageName, err) + endTrace(instrumentation.TraceEndError(err)) return err } diff --git a/pkg/skaffold/deploy/deploy_mux.go b/pkg/skaffold/deploy/deploy_mux.go index 4f4d2c1bccb..d259ce9bdc6 100644 --- a/pkg/skaffold/deploy/deploy_mux.go +++ b/pkg/skaffold/deploy/deploy_mux.go @@ -24,6 +24,7 @@ import ( eventV2 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event/v2" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes/manifest" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/util" ) @@ -38,15 +39,18 @@ func (m DeployerMux) Deploy(ctx context.Context, w io.Writer, as []graph.Artifac for i, deployer := range m { eventV2.DeployInProgress(i) + ctx, endTrace := instrumentation.StartTrace(ctx, "Deploy") namespaces, err := deployer.Deploy(ctx, w, as) if err != nil { eventV2.DeployFailed(i, err) + endTrace(instrumentation.TraceEndError(err)) return nil, err } seenNamespaces.Insert(namespaces...) eventV2.DeploySucceeded(i) + endTrace() } return seenNamespaces.ToList(), nil @@ -66,9 +70,11 @@ func (m DeployerMux) Dependencies() ([]string, error) { func (m DeployerMux) Cleanup(ctx context.Context, w io.Writer) error { for _, deployer := range m { + ctx, endTrace := instrumentation.StartTrace(ctx, "Cleanup") if err := deployer.Cleanup(ctx, w); err != nil { return err } + endTrace() } return nil } @@ -76,11 +82,14 @@ func (m DeployerMux) Cleanup(ctx context.Context, w io.Writer) error { func (m DeployerMux) Render(ctx context.Context, w io.Writer, as []graph.Artifact, offline bool, filepath string) error { resources, buf := []string{}, &bytes.Buffer{} for _, deployer := range m { + ctx, endTrace := instrumentation.StartTrace(ctx, "Render") buf.Reset() if err := deployer.Render(ctx, buf, as, offline, "" /* never write to files */); err != nil { + endTrace(instrumentation.TraceEndError(err)) return err } resources = append(resources, buf.String()) + endTrace() } allResources := strings.Join(resources, "\n---\n") diff --git a/pkg/skaffold/deploy/helm/deploy.go b/pkg/skaffold/deploy/helm/deploy.go index 3c9fe54ef75..39639e36c12 100644 --- a/pkg/skaffold/deploy/helm/deploy.go +++ b/pkg/skaffold/deploy/helm/deploy.go @@ -42,6 +42,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/label" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/types" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes/manifest" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" @@ -122,6 +123,11 @@ func NewDeployer(cfg Config, labels map[string]string, h *latestV1.HelmDeploy) ( // Deploy deploys the build results to the Kubernetes cluster func (h *Deployer) Deploy(ctx context.Context, out io.Writer, builds []graph.Artifact) ([]string, error) { + ctx, endTrace := instrumentation.StartTrace(ctx, "Render", map[string]string{ + "DeployerType": "helm", + }) + defer endTrace() + logrus.Infof("Deploying with helm v%s ...", h.bV) var dRes []types.Artifact @@ -229,6 +235,10 @@ func (h *Deployer) Dependencies() ([]string, error) { // Cleanup deletes what was deployed by calling Deploy. func (h *Deployer) Cleanup(ctx context.Context, out io.Writer) error { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "helm", + }) + for _, r := range h.Releases { releaseName, err := util.ExpandEnvTemplateOrFail(r.Name, nil) if err != nil { @@ -253,6 +263,9 @@ func (h *Deployer) Cleanup(ctx context.Context, out io.Writer) error { // Render generates the Kubernetes manifests and writes them out func (h *Deployer) Render(ctx context.Context, out io.Writer, builds []graph.Artifact, offline bool, filepath string) error { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "helm", + }) renderedManifests := new(bytes.Buffer) for _, r := range h.Releases { diff --git a/pkg/skaffold/deploy/kpt/kpt.go b/pkg/skaffold/deploy/kpt/kpt.go index ec617e87583..1eb989873c4 100644 --- a/pkg/skaffold/deploy/kpt/kpt.go +++ b/pkg/skaffold/deploy/kpt/kpt.go @@ -38,6 +38,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/kustomize" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes/manifest" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" @@ -142,41 +143,67 @@ func versionCheck(dir string, stdout io.Writer) error { // outputs them to the applyDir, and runs `kpt live apply` against applyDir to create resources in the cluster. // `kpt live apply` supports automated pruning declaratively via resources in the applyDir. func (k *Deployer) Deploy(ctx context.Context, out io.Writer, builds []graph.Artifact) ([]string, error) { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kpt", + }) + + _, endTrace := instrumentation.StartTrace(ctx, "Deploy_sanityCheck") if err := sanityCheck(k.Dir, out); err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } + endTrace() + + _, endTrace = instrumentation.StartTrace(ctx, "Deploy_getKptFnRunArgs") flags, err := k.getKptFnRunArgs() if err != nil { + endTrace(instrumentation.TraceEndError(err)) return []string{}, err } - manifests, err := k.renderManifests(ctx, out, builds, flags) + endTrace() + + childCtx, endTrace := instrumentation.StartTrace(ctx, "Deploy_renderManifests") + manifests, err := k.renderManifests(childCtx, out, builds, flags) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } if len(manifests) == 0 { + endTrace() return nil, nil } + endTrace() + _, endTrace = instrumentation.StartTrace(ctx, "Deploy_CollectNamespaces") namespaces, err := manifests.CollectNamespaces() if err != nil { event.DeployInfoEvent(fmt.Errorf("could not fetch deployed resource namespace. "+ "This might cause port-forward and deploy health-check to fail: %w", err)) } + endTrace() - applyDir, err := k.getApplyDir(ctx) + childCtx, endTrace = instrumentation.StartTrace(ctx, "Deploy_getApplyDir") + applyDir, err := k.getApplyDir(childCtx) if err != nil { return nil, fmt.Errorf("getting applyDir: %w", err) } + endTrace() + _, endTrace = instrumentation.StartTrace(ctx, "Deploy_manifest.Write") manifest.Write(manifests.String(), filepath.Join(applyDir, "resources.yaml"), out) - cmd := exec.CommandContext(ctx, "kpt", kptCommandArgs(applyDir, []string{"live", "apply"}, k.getKptLiveApplyArgs(), nil)...) + endTrace() + + childCtx, endTrace = instrumentation.StartTrace(ctx, "Deploy_execKptCommand") + cmd := exec.CommandContext(childCtx, "kpt", kptCommandArgs(applyDir, []string{"live", "apply"}, k.getKptLiveApplyArgs(), nil)...) cmd.Stdout = out cmd.Stderr = out if err := util.RunCmd(cmd); err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } + endTrace() return namespaces, nil } @@ -220,6 +247,10 @@ func (k *Deployer) Dependencies() ([]string, error) { // Cleanup deletes what was deployed by calling `kpt live destroy`. func (k *Deployer) Cleanup(ctx context.Context, out io.Writer) error { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kpt", + }) + applyDir, err := k.getApplyDir(ctx) if err != nil { return fmt.Errorf("getting applyDir: %w", err) @@ -237,19 +268,36 @@ func (k *Deployer) Cleanup(ctx context.Context, out io.Writer) error { // Render hydrates manifests using both kustomization and kpt functions. func (k *Deployer) Render(ctx context.Context, out io.Writer, builds []graph.Artifact, _ bool, filepath string) error { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kubectl", + }) + + _, endTrace := instrumentation.StartTrace(ctx, "Render_sanityCheck") + if err := sanityCheck(k.Dir, out); err != nil { + endTrace(instrumentation.TraceEndError(err)) return err } + endTrace() + + _, endTrace = instrumentation.StartTrace(ctx, "Render_getKptFnRunArgs") flags, err := k.getKptFnRunArgs() if err != nil { + endTrace(instrumentation.TraceEndError(err)) return err } + endTrace() - manifests, err := k.renderManifests(ctx, out, builds, flags) + childCtx, endTrace := instrumentation.StartTrace(ctx, "Render_renderManifests") + manifests, err := k.renderManifests(childCtx, out, builds, flags) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return err } + endTrace() + _, endTrace = instrumentation.StartTrace(ctx, "Render_manifest.Write") + defer endTrace() return manifest.Write(manifests.String(), filepath, out) } @@ -385,7 +433,7 @@ func (k *Deployer) renderManifests(ctx context.Context, _ io.Writer, builds []gr if err != nil { return nil, fmt.Errorf("excluding kpt functions from manifests: %w", err) } - manifests, err = manifests.ReplaceImages(builds) + manifests, err = manifests.ReplaceImages(ctx, builds) if err != nil { return nil, fmt.Errorf("replacing images in manifests: %w", err) } diff --git a/pkg/skaffold/deploy/kubectl/cli.go b/pkg/skaffold/deploy/kubectl/cli.go index 0a058503fb5..c676cbc4eb1 100644 --- a/pkg/skaffold/deploy/kubectl/cli.go +++ b/pkg/skaffold/deploy/kubectl/cli.go @@ -29,6 +29,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/config" deployerr "github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/error" deploy "github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/types" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubectl" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes/manifest" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" @@ -74,6 +75,10 @@ func (c *CLI) Delete(ctx context.Context, out io.Writer, manifests manifest.Mani // Apply runs `kubectl apply` on a list of manifests. func (c *CLI) Apply(ctx context.Context, out io.Writer, manifests manifest.ManifestList) error { + ctx, endTrace := instrumentation.StartTrace(ctx, "Apply", map[string]string{ + "AppliedBy": "kubectl", + }) + defer endTrace() // Only redeploy modified or new manifests // TODO(dgageot): should we delete a manifest that was deployed and is not anymore? updated := c.previousApply.Diff(manifests) @@ -93,6 +98,7 @@ func (c *CLI) Apply(ctx context.Context, out io.Writer, manifests manifest.Manif } if err := c.Run(ctx, updated.Reader(), out, "apply", c.args(c.Flags.Apply, args...)...); err != nil { + endTrace(instrumentation.TraceEndError(err)) return userErr(fmt.Errorf("kubectl apply: %w", err)) } diff --git a/pkg/skaffold/deploy/kubectl/kubectl.go b/pkg/skaffold/deploy/kubectl/kubectl.go index a051b2864c4..303bb3f95f0 100644 --- a/pkg/skaffold/deploy/kubectl/kubectl.go +++ b/pkg/skaffold/deploy/kubectl/kubectl.go @@ -33,6 +33,7 @@ import ( deployutil "github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/util" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes/manifest" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" @@ -88,19 +89,30 @@ func (k *Deployer) Deploy(ctx context.Context, out io.Writer, builds []graph.Art manifests manifest.ManifestList err error ) + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kubectl", + }) + // if any hydrated manifests are passed to `skaffold apply`, only deploy these // also, manually set the labels to ensure the runID is added switch { case len(k.hydratedManifests) > 0: + _, endTrace := instrumentation.StartTrace(ctx, "Deploy_createManifestList") manifests, err = createManifestList(k.hydratedManifests) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } manifests, err = manifests.SetLabels(k.labels) + endTrace() case k.skipRender: - manifests, err = k.readManifests(ctx, false) + childCtx, endTrace := instrumentation.StartTrace(ctx, "Deploy_readManifests") + manifests, err = k.readManifests(childCtx, false) + endTrace() default: - manifests, err = k.renderManifests(ctx, out, builds, false) + childCtx, endTrace := instrumentation.StartTrace(ctx, "Deploy_renderManifests") + manifests, err = k.renderManifests(childCtx, out, builds, false) + endTrace() } if err != nil { @@ -110,21 +122,27 @@ func (k *Deployer) Deploy(ctx context.Context, out io.Writer, builds []graph.Art if len(manifests) == 0 { return nil, nil } - + _, endTrace := instrumentation.StartTrace(ctx, "Deploy_CollectNamespaces") namespaces, err := manifests.CollectNamespaces() if err != nil { event.DeployInfoEvent(fmt.Errorf("could not fetch deployed resource namespace. "+ "This might cause port-forward and deploy health-check to fail: %w", err)) } + endTrace() - if err := k.kubectl.WaitForDeletions(ctx, textio.NewPrefixWriter(out, " - "), manifests); err != nil { + childCtx, endTrace := instrumentation.StartTrace(ctx, "Deploy_WaitForDeletions") + if err := k.kubectl.WaitForDeletions(childCtx, textio.NewPrefixWriter(out, " - "), manifests); err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } + endTrace() - if err := k.kubectl.Apply(ctx, textio.NewPrefixWriter(out, " - "), manifests); err != nil { + childCtx, endTrace = instrumentation.StartTrace(ctx, "Deploy_KubectlApply") + if err := k.kubectl.Apply(childCtx, textio.NewPrefixWriter(out, " - "), manifests); err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } - + endTrace() return namespaces, nil } @@ -243,11 +261,20 @@ func (k *Deployer) readRemoteManifest(ctx context.Context, name string) ([]byte, } func (k *Deployer) Render(ctx context.Context, out io.Writer, builds []graph.Artifact, offline bool, filepath string) error { - manifests, err := k.renderManifests(ctx, out, builds, offline) + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kubectl", + }) + + childCtx, endTrace := instrumentation.StartTrace(ctx, "Render_renderManifests") + manifests, err := k.renderManifests(childCtx, out, builds, offline) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return err } + endTrace() + _, endTrace = instrumentation.StartTrace(ctx, "Render_manifest.Write") + defer endTrace() return manifest.Write(manifests.String(), filepath, out) } @@ -300,7 +327,7 @@ func (k *Deployer) renderManifests(ctx context.Context, out io.Writer, builds [] } } - manifests, err = manifests.ReplaceImages(builds) + manifests, err = manifests.ReplaceImages(ctx, builds) if err != nil { return nil, err } @@ -314,6 +341,9 @@ func (k *Deployer) renderManifests(ctx context.Context, out io.Writer, builds [] // Cleanup deletes what was deployed by calling Deploy. func (k *Deployer) Cleanup(ctx context.Context, out io.Writer) error { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kubectl", + }) manifests, err := k.readManifests(ctx, false) if err != nil { return err @@ -332,7 +362,7 @@ func (k *Deployer) Cleanup(ctx context.Context, out io.Writer) error { rm = append(rm, manifest) } - upd, err := rm.ReplaceImages(k.originalImages) + upd, err := rm.ReplaceImages(ctx, k.originalImages) if err != nil { return err } diff --git a/pkg/skaffold/deploy/kustomize/kustomize.go b/pkg/skaffold/deploy/kustomize/kustomize.go index 63b72e948cc..0336031b71b 100644 --- a/pkg/skaffold/deploy/kustomize/kustomize.go +++ b/pkg/skaffold/deploy/kustomize/kustomize.go @@ -32,6 +32,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/kubectl" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes/manifest" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" @@ -143,28 +144,44 @@ func kubectlVersionCheck(kubectl kubectl.CLI) bool { // Deploy runs `kubectl apply` on the manifest generated by kustomize. func (k *Deployer) Deploy(ctx context.Context, out io.Writer, builds []graph.Artifact) ([]string, error) { - manifests, err := k.renderManifests(ctx, out, builds) + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kustomize", + }) + + childCtx, endTrace := instrumentation.StartTrace(ctx, "Deploy_renderManifests") + manifests, err := k.renderManifests(childCtx, out, builds) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } if len(manifests) == 0 { + endTrace() return nil, nil } + endTrace() + _, endTrace = instrumentation.StartTrace(ctx, "Deploy_CollectNamespaces") namespaces, err := manifests.CollectNamespaces() if err != nil { event.DeployInfoEvent(fmt.Errorf("could not fetch deployed resource namespace. "+ "This might cause port-forward and deploy health-check to fail: %w", err)) } + endTrace() - if err := k.kubectl.WaitForDeletions(ctx, textio.NewPrefixWriter(out, " - "), manifests); err != nil { + childCtx, endTrace = instrumentation.StartTrace(ctx, "Deploy_WaitForDeletions") + if err := k.kubectl.WaitForDeletions(childCtx, textio.NewPrefixWriter(out, " - "), manifests); err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } + endTrace() - if err := k.kubectl.Apply(ctx, textio.NewPrefixWriter(out, " - "), manifests); err != nil { + childCtx, endTrace = instrumentation.StartTrace(ctx, "Deploy_Apply") + if err := k.kubectl.Apply(childCtx, textio.NewPrefixWriter(out, " - "), manifests); err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } + endTrace() return namespaces, nil } @@ -189,7 +206,7 @@ func (k *Deployer) renderManifests(ctx context.Context, out io.Writer, builds [] return nil, nil } - manifests, err = manifests.ReplaceImages(builds) + manifests, err = manifests.ReplaceImages(ctx, builds) if err != nil { return nil, err } @@ -203,6 +220,9 @@ func (k *Deployer) renderManifests(ctx context.Context, out io.Writer, builds [] // Cleanup deletes what was deployed by calling Deploy. func (k *Deployer) Cleanup(ctx context.Context, out io.Writer) error { + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kustomize", + }) manifests, err := k.readManifests(ctx) if err != nil { return err @@ -229,10 +249,19 @@ func (k *Deployer) Dependencies() ([]string, error) { } func (k *Deployer) Render(ctx context.Context, out io.Writer, builds []graph.Artifact, offline bool, filepath string) error { - manifests, err := k.renderManifests(ctx, out, builds) + instrumentation.AddAttributesToCurrentSpanFromContext(ctx, map[string]string{ + "DeployerType": "kustomize", + }) + + childCtx, endTrace := instrumentation.StartTrace(ctx, "Render_renderManifests") + manifests, err := k.renderManifests(childCtx, out, builds) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return err } + + _, endTrace = instrumentation.StartTrace(ctx, "Render_manifest.Write") + defer endTrace() return manifest.Write(manifests.String(), filepath, out) } diff --git a/pkg/skaffold/graph/dependencies.go b/pkg/skaffold/graph/dependencies.go index cec6592b72b..e218fa63a09 100644 --- a/pkg/skaffold/graph/dependencies.go +++ b/pkg/skaffold/graph/dependencies.go @@ -26,6 +26,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/build/jib" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/build/misc" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/docker" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/util" ) @@ -57,13 +58,20 @@ type dependencyResolverImpl struct { } func (r *dependencyResolverImpl) TransitiveArtifactDependencies(ctx context.Context, a *latestV1.Artifact) ([]string, error) { + ctx, endTrace := instrumentation.StartTrace(ctx, "TransitiveArtifactDependencies", map[string]string{ + "ArtifactName": instrumentation.PII(a.ImageName), + }) + defer endTrace() + deps, err := r.SingleArtifactDependencies(ctx, a) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } for _, ad := range a.Dependencies { d, err := r.TransitiveArtifactDependencies(ctx, r.artifactGraph[ad.ImageName]) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } deps = append(deps, d...) @@ -72,6 +80,11 @@ func (r *dependencyResolverImpl) TransitiveArtifactDependencies(ctx context.Cont } func (r *dependencyResolverImpl) SingleArtifactDependencies(ctx context.Context, a *latestV1.Artifact) ([]string, error) { + ctx, endTrace := instrumentation.StartTrace(ctx, "SingleArtifactDependencies", map[string]string{ + "ArtifactName": instrumentation.PII(a.ImageName), + }) + defer endTrace() + res := r.cache.Exec(a.ImageName, func() interface{} { d, e := getDependenciesFunc(ctx, a, r.cfg, r.artifactResolver) if e != nil { @@ -80,6 +93,7 @@ func (r *dependencyResolverImpl) SingleArtifactDependencies(ctx context.Context, return d }) if err, ok := res.(error); ok { + endTrace(instrumentation.TraceEndError(err)) return nil, err } return res.([]string), nil diff --git a/pkg/skaffold/instrumentation/export.go b/pkg/skaffold/instrumentation/export.go index 50257d527ae..4f9645d6b6c 100644 --- a/pkg/skaffold/instrumentation/export.go +++ b/pkg/skaffold/instrumentation/export.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "io" "io/ioutil" "math/rand" "os" @@ -28,15 +29,21 @@ import ( "time" mexporter "github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric" + texporter "github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace" "github.com/mitchellh/go-homedir" "github.com/rakyll/statik/fs" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/exporters/stdout" + "go.opentelemetry.io/otel/exporters/trace/jaeger" "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric/global" "go.opentelemetry.io/otel/sdk/metric/controller/basic" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/semconv" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/option" "github.com/GoogleContainerTools/skaffold/cmd/skaffold/app/cmd/statik" @@ -283,3 +290,99 @@ func errorMetrics(ctx context.Context, meter skaffoldMeter, m metric.Meter, labe unknownCounter.Record(ctx, 1, labels...) } } + +type TraceExporterConfig struct { + writer io.Writer +} + +type TraceExporterOption func(te *TraceExporterConfig) + +func WithWriter(w io.Writer) TraceExporterOption { + return func(teconf *TraceExporterConfig) { + teconf.writer = w + } +} + +func initTraceExporter(opts ...TraceExporterOption) (trace.TracerProvider, func(context.Context) error, error) { + teconf := TraceExporterConfig{ + writer: os.Stdout, + } + + for _, opt := range opts { + opt(&teconf) + } + + switch os.Getenv("SKAFFOLD_TRACE") { + case "stdout": + logrus.Debugf("using stdout trace exporter") + return initIOWriterTracer(teconf.writer) + case "gcp-adc": + logrus.Debugf("using cloud trace exporter w/ application default creds") + tp, shutdown, err := initCloudTraceExporterApplicationDefaultCreds() + return tp, func(context.Context) error { shutdown(); return nil }, err + case "jaeger": + logrus.Debugf("using jaeger trace exporter") + tp, shutdown, err := initJaegerTraceExporter() + return tp, func(context.Context) error { shutdown(); return nil }, err + } + + if otelTraceExporterVal, ok := os.LookupEnv("OTEL_TRACES_EXPORTER"); ok { + logrus.Debugf("using otel default exporter - OTEL_TRACES_EXPORTER=%s", otelTraceExporterVal) + return nil, func(context.Context) error { return nil }, nil + } + + return nil, func(context.Context) error { return nil }, fmt.Errorf("error initializing trace exporter") +} + +// initIOWriterTracer creates and registers trace provider instance that writes to an io.Writer interface +func initIOWriterTracer(w io.Writer) (*sdktrace.TracerProvider, func(context.Context) error, error) { + exp, err := stdout.NewExporter( + stdout.WithWriter(w), + stdout.WithPrettyPrint(), + ) + if err != nil { + return nil, func(context.Context) error { return nil }, err + } + bsp := sdktrace.NewBatchSpanProcessor(exp) + tp := sdktrace.NewTracerProvider( + sdktrace.WithSampler(sdktrace.AlwaysSample()), + sdktrace.WithSpanProcessor(bsp), + ) + return tp, tp.Shutdown, nil +} + +func initCloudTraceExporterApplicationDefaultCreds() (trace.TracerProvider, func(), error) { + otel.SetErrorHandler(errHandler{}) + return texporter.InstallNewPipeline( + []texporter.Option{ + texporter.WithProjectID(os.Getenv("GOOGLE_CLOUD_PROJECT")), + texporter.WithOnError(func(err error) { + logrus.Debugf("Error with metrics: %v", err) + }), + }, + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) +} + +// initJaegerTraceExporter returns an OpenTelemetry TracerProvider configured to use +// the Jaeger exporter that will send spans to the provided url. The returned +// TracerProvider will also use a Resource configured with all the information +// about the application. +func initJaegerTraceExporter() (trace.TracerProvider, func(), error) { + // Create the Jaeger exporter + exp, err := jaeger.NewRawExporter(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint("http://localhost:14268/api/traces"))) + if err != nil { + return nil, func() {}, err + } + tp := sdktrace.NewTracerProvider( + // Always be sure to batch in production. + sdktrace.WithBatcher(exp), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + // Record information about this application in an Resource. + sdktrace.WithResource(resource.NewWithAttributes( + semconv.ServiceNameKey.String("skaffold-trace"), + attribute.Int64("ID", 1), // TODO(aaron-prindle) verify this value makes sense + )), + ) + return tp, func() {}, nil +} diff --git a/pkg/skaffold/instrumentation/export_test.go b/pkg/skaffold/instrumentation/export_test.go index 91df8f8d530..264b51d04b9 100644 --- a/pkg/skaffold/instrumentation/export_test.go +++ b/pkg/skaffold/instrumentation/export_test.go @@ -209,7 +209,7 @@ func TestInitCloudMonitoring(t *testing.T) { }, }, { - name: "key not present returns nill err", + name: "key not present returns nil err", fileSystem: &testutil.FakeFileSystem{ Files: map[string][]byte{}, }, diff --git a/pkg/skaffold/instrumentation/new.go b/pkg/skaffold/instrumentation/new.go new file mode 100644 index 00000000000..fe96502b48b --- /dev/null +++ b/pkg/skaffold/instrumentation/new.go @@ -0,0 +1,40 @@ +/* +Copyright 2021 The Skaffold Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package instrumentation + +import ( + "context" + + "github.com/sirupsen/logrus" + + latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" +) + +// Init initializes the skaffold metrics and trace tooling built on top of open-telemetry (otel) +func Init(configs []*latestV1.SkaffoldConfig, user string, opts ...TraceExporterOption) { + InitMeterFromConfig(configs, user) + InitTraceFromEnvVar() +} + +func ShutdownAndFlush(ctx context.Context, exitCode int) { + if err := ExportMetrics(exitCode); err != nil { + logrus.Debugf("error exporting metrics %v", err) + } + if err := TracerShutdown(ctx); err != nil { + logrus.Debugf("error shutting down tracer %v", err) + } +} diff --git a/pkg/skaffold/instrumentation/trace.go b/pkg/skaffold/instrumentation/trace.go new file mode 100644 index 00000000000..0144030a789 --- /dev/null +++ b/pkg/skaffold/instrumentation/trace.go @@ -0,0 +1,123 @@ +/* +Copyright 2020 The Skaffold Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package instrumentation + +import ( + "context" + "fmt" + "os" + "runtime" + "sync" + + "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" +) + +var traceEnabled bool +var traceInitOnce sync.Once + +var tracerProvider trace.TracerProvider +var tracerShutdown func(context.Context) error = func(context.Context) error { return nil } +var tracerInitErr error + +// InitTraceFromEnvVar initializes the singleton skaffold tracer from the SKAFFOLD_TRACE env variable. +// The code here is a wrapper around the opentelemetry(otel) trace libs for usability +// When SKAFFOLD_TRACE is set, this will setup the proper tracer provider (& exporter), +// configures otel to use this tracer provider and saves the tracer provider shutdown function +// to be used globally so that it can be run before skaffold exits. +func InitTraceFromEnvVar(opts ...TraceExporterOption) (trace.TracerProvider, func(context.Context) error, error) { + traceInitOnce.Do(func() { + _, skaffTraceEnv := os.LookupEnv("SKAFFOLD_TRACE") + _, otelTraceExporterEnv := os.LookupEnv("OTEL_TRACES_EXPORTER") + if skaffTraceEnv || otelTraceExporterEnv { + traceEnabled = true + } + if traceEnabled { + tp, shutdown, err := initTraceExporter(opts...) + tracerInitErr = err + if err == nil && skaffTraceEnv { // if only OTEL_TRACES_EXPORTER, tp set automatically + otel.SetTracerProvider(tp) + tracerProvider = tp + tracerShutdown = shutdown + } + } + }) + if tracerInitErr != nil { + logrus.Debugf("error initializing tracing: %v", tracerInitErr) + } + return tracerProvider, tracerShutdown, tracerInitErr +} + +// TracerShutdown is a function used to flush all current running spans and make sure they are exported. This should be called once +// at the end of a skaffold run to properly shutdown and export all spans for the singleton. +func TracerShutdown(ctx context.Context) error { + traceInitOnce = sync.Once{} + return tracerShutdown(ctx) +} + +// StartTrace uses the otel trace provider to export timing spans (with optional attributes) to the chosen exporter +// via the value set in SKAFFOLD_TRACE. Tracing is done via metadata stored in a context.Context. This means that +// to properly get parent/child traces, callers should use the returned context for subsequent calls in skaffold. +// The returned function should be called to end the trace span, for example this can be done with +// the form: _, endTrace = StartTrace...; defer endTrace() +func StartTrace(ctx context.Context, name string, attributes ...map[string]string) (context.Context, func(options ...trace.SpanOption)) { + if traceEnabled { + _, file, ln, _ := runtime.Caller(1) + tracer := otel.Tracer(file) + ctx, span := tracer.Start(ctx, name) + for _, attrs := range attributes { + for k, v := range attrs { + span.SetAttributes(attribute.Key(k).String(v)) + } + } + // currently Cloud Trace doesn't show the package in the UI, hack to get package information in Cloud Trace + span.SetAttributes(attribute.Key("source_file").String(fmt.Sprintf("%s:%d", file, ln))) + return ctx, span.End + } + return ctx, func(options ...trace.SpanOption) {} +} + +// TraceEndError adds an "error" attribute with value err.Error() to a span during it's end/shutdown callback +// This fnx is intended to used with the StartTrace callback - "endTrace" when an error occurs during the code path +// of trace, ex: endTrace(instrumentation.TraceEndError(err)); return nil, err +func TraceEndError(err error) trace.SpanOption { + if traceEnabled { + return trace.WithAttributes(attribute.KeyValue{Key: "error", Value: attribute.StringValue(PII(err.Error()))}) + } + return nil +} + +// AddAttributesToCurrentSpanFromContext adds the attributes from the input map to the span pulled from the current context. +// This is useful when additional attributes should be added to a parent span but the span object is not directly accessible. +func AddAttributesToCurrentSpanFromContext(ctx context.Context, attrs map[string]string) { + if traceEnabled { + span := trace.SpanFromContext(ctx) + for k, v := range attrs { + span.SetAttributes(attribute.Key(k).String(v)) + } + } +} + +// PII stub function tracking trace attributes that have PII in them. Currently no trace information is uploaded so +// PII values are not an issue but if in the future they are uploaded this will need to properly strip PII +func PII(s string) string { + // TODO(#5885) add functionality + // currently this is a stub for tracking PII attributes + return s +} diff --git a/pkg/skaffold/instrumentation/trace_test.go b/pkg/skaffold/instrumentation/trace_test.go new file mode 100644 index 00000000000..2314137a663 --- /dev/null +++ b/pkg/skaffold/instrumentation/trace_test.go @@ -0,0 +1,175 @@ +/* +Copyright 2021 The Skaffold Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package instrumentation + +import ( + "bytes" + "context" + "encoding/json" + "os" + "strings" + "testing" + "time" + + "github.com/GoogleContainerTools/skaffold/testutil" +) + +func TestInitCloudTrace(t *testing.T) { + tests := []struct { + shouldError bool + traceProviderIsNil bool + isConcurrentTrace bool + name string + traceEnvVar string + parentSpans []string + childSpans []string + }{ + { + name: "SKAFFOLD_TRACE=stdout, verify spans output to stdout and spans are sequential", + traceEnvVar: "stdout", + parentSpans: []string{"SequentialSpanOne", "SequentialSpanTwo"}, + }, + { + name: "SKAFFOLD_TRACE=stdout, verify spans output to stdout and spans are concurrent", + traceEnvVar: "stdout", + parentSpans: []string{"ConcurrentSpanOne", "ConcurrentSpanTwo"}, + isConcurrentTrace: true, + }, + { + name: "SKAFFOLD_TRACE=stdout, verify spans output to stdout and parent/child relationship exists spans", + traceEnvVar: "stdout", + parentSpans: []string{"ParentSpanOne"}, + childSpans: []string{"ChildSpanOne"}, + isConcurrentTrace: true, + }, + } + for _, test := range tests { + testutil.Run(t, test.name, func(t *testutil.T) { + if len(test.traceEnvVar) > 0 { + os.Setenv("SKAFFOLD_TRACE", test.traceEnvVar) + defer os.Unsetenv(("SKAFFOLD_TRACE")) + } + var b bytes.Buffer + func() { + ctx := context.Background() + tp, _, err := InitTraceFromEnvVar(WithWriter(&b)) + t.CheckErrorAndDeepEqual(test.shouldError, err, test.traceProviderIsNil || test.shouldError, tp == nil) + defer func() { _ = TracerShutdown(ctx) }() + + for _, pName := range test.parentSpans { + ctx, endTrace := StartTrace(ctx, pName) + for _, cName := range test.childSpans { + _, endTrace := StartTrace(ctx, cName) + if test.isConcurrentTrace { + defer endTrace() + } else { + endTrace() + } + } + if test.isConcurrentTrace { + defer endTrace() + } else { + endTrace() + } + time.Sleep(1 * time.Millisecond) + } + }() + if len(test.parentSpans) > 0 { + var spans SpanArray + err := json.Unmarshal(b.Bytes(), &spans) + if err != nil { + t.Errorf("unexpected error occurred unmarshalling trace spans %v: %v", b.String(), err) + } + t.CheckTrue(len(spans) == len(test.parentSpans)+len(test.childSpans)) + for i := range spans { + if strings.Contains(spans[i].Name, "Parent") { + t.CheckTrue(spans[i].Childspancount > 0) + } + if strings.Contains(spans[i].Name, "Child") { + // 0000000000000000 value for Spanid means parent does not exist for a span. Should be set to parent Spanid if + // span is a child span + t.CheckTrue(spans[i].Parent.Spanid != "0000000000000000") + } + + if i == 0 { + continue + } // skipping first span for comparing spans for sequential/concurrent tests + lastEndtime, err := time.Parse(time.RFC3339, spans[i-1].Endtime) + if err != nil { + t.Errorf("unexpected error occurred parsing trace span Endtime %v: %v", b.String(), err) + } + startime, err := time.Parse(time.RFC3339, spans[i].Starttime) + if err != nil { + t.Errorf("unexpected error occurred parsing trace span Endtime %v: %v", b.String(), err) + } + if test.isConcurrentTrace { + t.CheckTrue(!lastEndtime.Before(startime)) + } else { + // sequential ordering of traces + t.CheckTrue(lastEndtime.Before(startime)) + } + } + } + }) + } +} + +type SpanArray []struct { + Spancontext Spancontext `json:"SpanContext"` + Parent Parent `json:"Parent"` + Spankind int `json:"SpanKind"` + Name string `json:"Name"` + Starttime string `json:"StartTime"` + Endtime string `json:"EndTime"` + Attributes interface{} `json:"Attributes"` + Messageevents interface{} `json:"MessageEvents"` + Links interface{} `json:"Links"` + Statuscode string `json:"StatusCode"` + Statusmessage string `json:"StatusMessage"` + Droppedattributecount int `json:"DroppedAttributeCount"` + Droppedmessageeventcount int `json:"DroppedMessageEventCount"` + Droppedlinkcount int `json:"DroppedLinkCount"` + Childspancount int `json:"ChildSpanCount"` + Resource []Resource `json:"Resource"` + Instrumentationlibrary Instrumentationlibrary `json:"InstrumentationLibrary"` +} +type Spancontext struct { + Traceid string `json:"TraceID"` + Spanid string `json:"SpanID"` + Traceflags string `json:"TraceFlags"` + Tracestate interface{} `json:"TraceState"` + Remote bool `json:"Remote"` +} +type Parent struct { + Traceid string `json:"TraceID"` + Spanid string `json:"SpanID"` + Traceflags string `json:"TraceFlags"` + Tracestate interface{} `json:"TraceState"` + Remote bool `json:"Remote"` +} +type Value struct { + Type string `json:"Type"` + Value string `json:"Value"` +} +type Resource struct { + Key string `json:"Key"` + Value Value `json:"Value"` +} +type Instrumentationlibrary struct { + Name string `json:"Name"` + Version string `json:"Version"` +} diff --git a/pkg/skaffold/kubernetes/manifest/images.go b/pkg/skaffold/kubernetes/manifest/images.go index b3602b31620..4a36e5acbf7 100644 --- a/pkg/skaffold/kubernetes/manifest/images.go +++ b/pkg/skaffold/kubernetes/manifest/images.go @@ -17,10 +17,14 @@ limitations under the License. package manifest import ( + "context" + "strconv" + "github.com/sirupsen/logrus" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/docker" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/warnings" ) @@ -58,11 +62,18 @@ func (is *imageSaver) Visit(o map[string]interface{}, k string, v interface{}) b } // ReplaceImages replaces image names in a list of manifests. -func (l *ManifestList) ReplaceImages(builds []graph.Artifact) (ManifestList, error) { +func (l *ManifestList) ReplaceImages(ctx context.Context, builds []graph.Artifact) (ManifestList, error) { + _, endTrace := instrumentation.StartTrace(ctx, "ReplaceImages", map[string]string{ + "manifestEntries": strconv.Itoa(len(*l)), + "numImagesReplaced": strconv.Itoa(len(builds)), + }) + defer endTrace() + replacer := newImageReplacer(builds) updated, err := l.Visit(replacer) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, replaceImageErr(err) } diff --git a/pkg/skaffold/kubernetes/manifest/images_test.go b/pkg/skaffold/kubernetes/manifest/images_test.go index 9bc835ac3d3..8b7109ff2ac 100644 --- a/pkg/skaffold/kubernetes/manifest/images_test.go +++ b/pkg/skaffold/kubernetes/manifest/images_test.go @@ -17,6 +17,7 @@ limitations under the License. package manifest import ( + "context" "testing" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" @@ -126,7 +127,7 @@ spec: fakeWarner := &warnings.Collect{} t.Override(&warnings.Printf, fakeWarner.Warnf) - resultManifest, err := manifests.ReplaceImages(builds) + resultManifest, err := manifests.ReplaceImages(context.TODO(), builds) t.CheckNoError(err) t.CheckDeepEqual(expected.String(), resultManifest.String()) @@ -140,7 +141,7 @@ func TestReplaceEmptyManifest(t *testing.T) { manifests := ManifestList{[]byte(""), []byte(" ")} expected := ManifestList{} - resultManifest, err := manifests.ReplaceImages(nil) + resultManifest, err := manifests.ReplaceImages(context.TODO(), nil) testutil.CheckErrorAndDeepEqual(t, false, err, expected.String(), resultManifest.String()) } @@ -148,7 +149,7 @@ func TestReplaceEmptyManifest(t *testing.T) { func TestReplaceInvalidManifest(t *testing.T) { manifests := ManifestList{[]byte("INVALID")} - _, err := manifests.ReplaceImages(nil) + _, err := manifests.ReplaceImages(context.TODO(), nil) testutil.CheckError(t, true, err) } @@ -161,7 +162,7 @@ image: - value2 `)} - output, err := manifests.ReplaceImages(nil) + output, err := manifests.ReplaceImages(context.TODO(), nil) testutil.CheckErrorAndDeepEqual(t, false, err, manifests.String(), output.String()) } diff --git a/pkg/skaffold/kubernetes/portforward/forwarder_manager.go b/pkg/skaffold/kubernetes/portforward/forwarder_manager.go index 21c5763fb71..f22f20860d5 100644 --- a/pkg/skaffold/kubernetes/portforward/forwarder_manager.go +++ b/pkg/skaffold/kubernetes/portforward/forwarder_manager.go @@ -28,6 +28,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/constants" debugging "github.com/GoogleContainerTools/skaffold/pkg/skaffold/debug" eventV2 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event/v2" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubectl" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes" latestV1 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/schema/latest/v1" @@ -110,9 +111,13 @@ func (p *ForwarderManager) Start(ctx context.Context, namespaces []string) error } eventV2.TaskInProgress(constants.PortForward) + ctx, endTrace := instrumentation.StartTrace(ctx, "Start") + defer endTrace() + for _, f := range p.forwarders { if err := f.Start(ctx, namespaces); err != nil { eventV2.TaskFailed(constants.PortForward, err) + endTrace(instrumentation.TraceEndError(err)) return err } } diff --git a/pkg/skaffold/render/renderer/renderer.go b/pkg/skaffold/render/renderer/renderer.go index 0092c3d5563..92f9e876191 100644 --- a/pkg/skaffold/render/renderer/renderer.go +++ b/pkg/skaffold/render/renderer/renderer.go @@ -85,7 +85,7 @@ func (r *SkaffoldRenderer) Render(ctx context.Context, out io.Writer, builds []g if err != nil { return err } - manifests, err = manifests.ReplaceImages(builds) + manifests, err = manifests.ReplaceImages(ctx, builds) if err != nil { return err } diff --git a/pkg/skaffold/runner/v1/apply.go b/pkg/skaffold/runner/v1/apply.go index 75059f24bad..ddc2a871e33 100644 --- a/pkg/skaffold/runner/v1/apply.go +++ b/pkg/skaffold/runner/v1/apply.go @@ -25,6 +25,7 @@ import ( deployutil "github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/util" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" ) // Apply sends Kubernetes manifests to the cluster. @@ -50,12 +51,15 @@ func (r *SkaffoldRunner) applyResources(ctx context.Context, out io.Writer, arti return fmt.Errorf("unable to connect to Kubernetes: %w", err) } + ctx, endTrace := instrumentation.StartTrace(ctx, "applyResources_LoadImagesIntoCluster") if len(localImages) > 0 && r.runCtx.Cluster.LoadImages { err := r.loadImagesIntoCluster(ctx, out, localImages) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return err } } + endTrace() deployOut, postDeployFn, err := deployutil.WithLogFile(time.Now().Format(deployutil.TimeFormat)+".log", out, r.runCtx.Muted()) if err != nil { @@ -63,13 +67,15 @@ func (r *SkaffoldRunner) applyResources(ctx context.Context, out io.Writer, arti } event.DeployInProgress() + ctx, endTrace = instrumentation.StartTrace(ctx, "applyResources_Deploying") + defer endTrace() namespaces, err := r.deployer.Deploy(ctx, deployOut, artifacts) postDeployFn() if err != nil { event.DeployFailed(err) + endTrace(instrumentation.TraceEndError(err)) return err } - r.hasDeployed = true event.DeployComplete() r.runCtx.UpdateNamespaces(namespaces) diff --git a/pkg/skaffold/runner/v1/deploy.go b/pkg/skaffold/runner/v1/deploy.go index 0e8b41e5367..a08f402dda6 100644 --- a/pkg/skaffold/runner/v1/deploy.go +++ b/pkg/skaffold/runner/v1/deploy.go @@ -31,6 +31,7 @@ import ( "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event" eventV2 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event/v2" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" kubernetesclient "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes/client" kubectx "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes/context" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/output" @@ -126,11 +127,15 @@ See https://skaffold.dev/docs/pipeline-stages/taggers/#how-tagging-works`) event.DeployInProgress() eventV2.TaskInProgress(constants.Deploy) + ctx, endTrace := instrumentation.StartTrace(ctx, "Deploy_Deploying") + defer endTrace() + namespaces, err := r.deployer.Deploy(ctx, deployOut, artifacts) postDeployFn() if err != nil { event.DeployFailed(err) eventV2.TaskFailed(constants.Deploy, err) + endTrace(instrumentation.TraceEndError(err)) return err } @@ -139,8 +144,10 @@ See https://skaffold.dev/docs/pipeline-stages/taggers/#how-tagging-works`) statusCheckOut, postStatusCheckFn, err := deployutil.WithStatusCheckLogFile(time.Now().Format(deployutil.TimeFormat)+".log", out, r.runCtx.Muted()) defer postStatusCheckFn() if err != nil { + endTrace(instrumentation.TraceEndError(err)) return err } + event.DeployComplete() eventV2.TaskSucceeded(constants.Deploy) r.runCtx.UpdateNamespaces(namespaces) @@ -211,6 +218,9 @@ func (r *SkaffoldRunner) performStatusCheck(ctx context.Context, out io.Writer) } eventV2.TaskInProgress(constants.StatusCheck) + ctx, endTrace := instrumentation.StartTrace(ctx, "performStatusCheck_WaitForDeploymentToStabilize") + defer endTrace() + start := time.Now() output.Default.Fprintln(out, "Waiting for deployments to stabilize...") diff --git a/pkg/skaffold/runner/v1/dev.go b/pkg/skaffold/runner/v1/dev.go index f1a31f523f4..d2360121aa6 100644 --- a/pkg/skaffold/runner/v1/dev.go +++ b/pkg/skaffold/runner/v1/dev.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "io" + "strconv" "time" "github.com/sirupsen/logrus" @@ -69,14 +70,19 @@ func (r *SkaffoldRunner) doDev(ctx context.Context, out io.Writer, logger *logge // if any action is going to be performed, reset the monitor's changed component tracker for debouncing defer r.monitor.Reset() defer r.listener.LogWatchToUser(out) + event.DevLoopInProgress(r.devIteration) eventV2.InitializeState(r.runCtx) eventV2.TaskInProgress(constants.DevLoop) defer func() { r.devIteration++ }() eventV2.LogMetaEvent() + ctx, endTrace := instrumentation.StartTrace(ctx, "doDev_DevLoopInProgress", map[string]string{ + "devIteration": strconv.Itoa(r.devIteration), + }) meterUpdated := false if needsSync { + childCtx, endTrace := instrumentation.StartTrace(ctx, "doDev_needsSync") defer func() { r.changeSet.ResetSync() r.intents.ResetSync() @@ -88,20 +94,24 @@ func (r *SkaffoldRunner) doDev(ctx context.Context, out io.Writer, logger *logge output.Default.Fprintf(out, "Syncing %d files for %s\n", fileCount, s.Image) fileSyncInProgress(fileCount, s.Image) - if err := r.syncer.Sync(ctx, s); err != nil { + if err := r.syncer.Sync(childCtx, s); err != nil { logrus.Warnln("Skipping deploy due to sync error:", err) fileSyncFailed(fileCount, s.Image, err) event.DevLoopFailedInPhase(r.devIteration, constants.Sync, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace(instrumentation.TraceEndError(err)) + return nil } fileSyncSucceeded(fileCount, s.Image) } + endTrace() } var bRes []graph.Artifact if needsBuild { + childCtx, endTrace := instrumentation.StartTrace(ctx, "doDev_needsBuild") event.ResetStateOnBuild() defer func() { r.changeSet.ResetBuild() @@ -113,19 +123,22 @@ func (r *SkaffoldRunner) doDev(ctx context.Context, out io.Writer, logger *logge } var err error - bRes, err = r.Build(ctx, out, r.changeSet.NeedsRebuild()) + bRes, err = r.Build(childCtx, out, r.changeSet.NeedsRebuild()) if err != nil { logrus.Warnln("Skipping test and deploy due to build error:", err) event.DevLoopFailedInPhase(r.devIteration, constants.Build, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace(instrumentation.TraceEndError(err)) return nil } r.changeSet.Redeploy() needsDeploy = deployIntent + endTrace() } // Trigger retest when there are newly rebuilt artifacts or untested previous artifacts; and it's not explicitly skipped if (len(bRes) > 0 || needsTest) && !r.runCtx.SkipTests() { + childCtx, endTrace := instrumentation.StartTrace(ctx, "doDev_needsTest") event.ResetStateOnTest() defer func() { r.changeSet.ResetTest() @@ -138,17 +151,20 @@ func (r *SkaffoldRunner) doDev(ctx context.Context, out io.Writer, logger *logge bRes = append(bRes, a) } } - if err := r.Test(ctx, out, bRes); err != nil { + if err := r.Test(childCtx, out, bRes); err != nil { if needsDeploy { logrus.Warnln("Skipping deploy due to test error:", err) } event.DevLoopFailedInPhase(r.devIteration, constants.Test, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace(instrumentation.TraceEndError(err)) return nil } + endTrace() } if needsDeploy { + childCtx, endTrace := instrumentation.StartTrace(ctx, "doDev_needsDeploy") event.ResetStateOnDeploy() defer func() { r.changeSet.ResetDeploy() @@ -159,18 +175,21 @@ func (r *SkaffoldRunner) doDev(ctx context.Context, out io.Writer, logger *logge if !meterUpdated { instrumentation.AddDevIteration("deploy") } - if err := r.Deploy(ctx, out, r.Builds); err != nil { + if err := r.Deploy(childCtx, out, r.Builds); err != nil { logrus.Warnln("Skipping deploy due to error:", err) event.DevLoopFailedInPhase(r.devIteration, constants.Deploy, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace(instrumentation.TraceEndError(err)) return nil } - if err := forwarderManager.Start(ctx, r.runCtx.GetNamespaces()); err != nil { + if err := forwarderManager.Start(childCtx, r.runCtx.GetNamespaces()); err != nil { logrus.Warnln("Port forwarding failed:", err) } + endTrace() } event.DevLoopComplete(r.devIteration) eventV2.TaskSucceeded(constants.DevLoop) + endTrace() logger.Unmute() return nil } @@ -183,6 +202,9 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la eventV2.TaskInProgress(constants.DevLoop) defer func() { r.devIteration++ }() eventV2.LogMetaEvent() + ctx, endTrace := instrumentation.StartTrace(ctx, "Dev", map[string]string{ + "devIteration": strconv.Itoa(r.devIteration), + }) g := getTransposeGraph(artifacts) // Watch artifacts @@ -219,6 +241,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la ); err != nil { event.DevLoopFailedWithErrorCode(r.devIteration, proto.StatusCode_DEVINIT_REGISTER_BUILD_DEPS, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace() return fmt.Errorf("watching files for artifact %q: %w", artifact.ImageName, err) } } @@ -233,6 +256,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la ); err != nil { event.DevLoopFailedWithErrorCode(r.devIteration, proto.StatusCode_DEVINIT_REGISTER_TEST_DEPS, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace() return fmt.Errorf("watching test files: %w", err) } } @@ -244,6 +268,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la ); err != nil { event.DevLoopFailedWithErrorCode(r.devIteration, proto.StatusCode_DEVINIT_REGISTER_DEPLOY_DEPS, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace() return fmt.Errorf("watching files for deployer: %w", err) } @@ -254,6 +279,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la ); err != nil { event.DevLoopFailedWithErrorCode(r.devIteration, proto.StatusCode_DEVINIT_REGISTER_CONFIG_DEP, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace() return fmt.Errorf("watching skaffold configuration %q: %w", r.runCtx.ConfigurationFile(), err) } @@ -263,6 +289,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la if err := sync.Init(ctx, artifacts); err != nil { event.DevLoopFailedWithErrorCode(r.devIteration, proto.StatusCode_SYNC_INIT_ERROR, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace() return fmt.Errorf("exiting dev mode because initializing sync state failed: %w", err) } @@ -271,6 +298,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la if err != nil { event.DevLoopFailedInPhase(r.devIteration, constants.Build, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace() return fmt.Errorf("exiting dev mode because first build failed: %w", err) } // First test @@ -278,6 +306,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la if err = r.Test(ctx, out, bRes); err != nil { event.DevLoopFailedInPhase(r.devIteration, constants.Build, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace() return fmt.Errorf("exiting dev mode because test failed after first build: %w", err) } } @@ -295,6 +324,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la if err := r.Deploy(ctx, out, r.Builds); err != nil { event.DevLoopFailedInPhase(r.devIteration, constants.Deploy, err) eventV2.TaskFailed(constants.DevLoop, err) + endTrace() return fmt.Errorf("exiting dev mode because first deploy failed: %w", err) } @@ -316,6 +346,7 @@ func (r *SkaffoldRunner) Dev(ctx context.Context, out io.Writer, artifacts []*la event.DevLoopComplete(r.devIteration) eventV2.TaskSucceeded(constants.DevLoop) + endTrace() r.devIteration++ return r.listener.WatchForChanges(ctx, out, func() error { return r.doDev(ctx, out, logger, forwarderManager) diff --git a/pkg/skaffold/runner/v1/new.go b/pkg/skaffold/runner/v1/new.go index 087aca78bbc..0dcd89bc183 100644 --- a/pkg/skaffold/runner/v1/new.go +++ b/pkg/skaffold/runner/v1/new.go @@ -37,6 +37,7 @@ import ( eventV2 "github.com/GoogleContainerTools/skaffold/pkg/skaffold/event/v2" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/filemon" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/graph" + "github.com/GoogleContainerTools/skaffold/pkg/skaffold/instrumentation" pkgkubectl "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubectl" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes" "github.com/GoogleContainerTools/skaffold/pkg/skaffold/runner" @@ -57,9 +58,12 @@ func NewForConfig(runCtx *runcontext.RunContext) (*SkaffoldRunner, error) { eventV2.InitializeState(runCtx) eventV2.LogMetaEvent() kubectlCLI := pkgkubectl.NewCLI(runCtx, "") + _, endTrace := instrumentation.StartTrace(context.Background(), "NewForConfig") + defer endTrace() tagger, err := tag.NewTaggerMux(runCtx) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, fmt.Errorf("creating tagger: %w", err) } @@ -72,6 +76,7 @@ func NewForConfig(runCtx *runcontext.RunContext) (*SkaffoldRunner, error) { return runner.GetBuilder(runCtx, store, sourceDependencies, p) }) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, fmt.Errorf("creating builder: %w", err) } isLocalImage := func(imageName string) (bool, error) { @@ -80,31 +85,38 @@ func NewForConfig(runCtx *runcontext.RunContext) (*SkaffoldRunner, error) { labeller := label.NewLabeller(runCtx.AddSkaffoldLabels(), runCtx.CustomLabels()) tester, err := getTester(runCtx, isLocalImage) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, fmt.Errorf("creating tester: %w", err) } syncer := getSyncer(runCtx) var deployer deploy.Deployer deployer, err = getDeployer(runCtx, labeller.Labels()) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, fmt.Errorf("creating deployer: %w", err) } depLister := func(ctx context.Context, artifact *latestV1.Artifact) ([]string, error) { + ctx, endTrace := instrumentation.StartTrace(ctx, "NewForConfig_depLister") + defer endTrace() + buildDependencies, err := sourceDependencies.SingleArtifactDependencies(ctx, artifact) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } testDependencies, err := tester.TestDependencies(artifact) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, err } - return append(buildDependencies, testDependencies...), nil } artifactCache, err := cache.NewCache(runCtx, isLocalImage, depLister, g, store) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, fmt.Errorf("initializing cache: %w", err) } @@ -117,6 +129,7 @@ func NewForConfig(runCtx *runcontext.RunContext) (*SkaffoldRunner, error) { intents, intentChan := setupIntents(runCtx) rtrigger, err := trigger.NewTrigger(runCtx, intents.IsAnyAutoEnabled) if err != nil { + endTrace(instrumentation.TraceEndError(err)) return nil, fmt.Errorf("creating watch trigger: %w", err) }