From 064cc685f4c1312d1db22f31e8b823cef0692f18 Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Wed, 16 Feb 2022 16:29:53 -0500 Subject: [PATCH] Updated logging and tracing Updating low level spans (right above bridge/guest connection) to use tracing/log-level sampler. Set span kind to client/server, as required, for bridge RPC calls. Added traces and spans to public fuctions and wait goroutines in contaienrd-shim-runhcs. Sampler in cmd/contaienrd-shim-runhcs calls underneath main `service` calls exposed to containerd use trace samples, and that is inherited by calls beneath it. Exceptions are made for internal functions that are called with context.Background. In places where spans did not seem necessary, added `Trace()` calls. Additionally, if the context was propagated to other calls, or further logging was done, added entry used for `Trace` to the context for further propagation to callees. Updated internal/cmd to use spans within `.Start()` and `.Wait()` Reduced the number of `INFO` level logs printed, downgrading to `DEBUG`. Moved traces and spans from inside `Once.Do()`, so that they are always logged, even if the `Once` does not execute. Signed-off-by: Hamza El-Saawy --- cmd/containerd-shim-runhcs-v1/events.go | 2 +- cmd/containerd-shim-runhcs-v1/exec_clone.go | 11 +- cmd/containerd-shim-runhcs-v1/exec_hcs.go | 64 ++++++-- .../exec_wcow_podsandbox.go | 29 +++- cmd/containerd-shim-runhcs-v1/main.go | 2 +- cmd/containerd-shim-runhcs-v1/pod.go | 22 ++- cmd/containerd-shim-runhcs-v1/serve.go | 56 +++++-- cmd/containerd-shim-runhcs-v1/service.go | 13 +- cmd/containerd-shim-runhcs-v1/task_hcs.go | 154 ++++++++++++++---- .../task_wcow_podsandbox.go | 61 +++++-- cmd/ncproxy/ncproxy.go | 3 +- cmd/ncproxy/run.go | 2 +- cmd/ncproxy/server.go | 6 +- computestorage/attach.go | 2 +- computestorage/destroy.go | 2 +- computestorage/detach.go | 2 +- computestorage/export.go | 2 +- computestorage/format.go | 2 +- computestorage/import.go | 2 +- computestorage/initialize.go | 2 +- computestorage/mount.go | 2 +- computestorage/setup.go | 4 +- container.go | 2 +- hcn/hcnsupport.go | 2 +- internal/cmd/cmd.go | 73 ++++++--- internal/cmd/diag.go | 7 + internal/cmd/io.go | 19 ++- internal/cmd/io_binary.go | 26 ++- internal/cmd/io_npipe.go | 16 +- internal/devices/assigned_devices.go | 2 +- internal/gcs/bridge.go | 7 +- internal/gcs/container.go | 31 ++-- internal/gcs/guestconnection.go | 12 +- internal/gcs/iochannel.go | 4 + internal/gcs/process.go | 8 +- internal/gcs/protocol.go | 73 +++++---- internal/guest/bridge/bridge.go | 21 ++- internal/guest/kmsg/kmsg.go | 2 +- internal/guest/runtime/hcsv2/container.go | 3 +- internal/guest/runtime/hcsv2/process.go | 9 +- internal/guest/transport/vsock.go | 16 +- internal/hcs/process.go | 48 ++++-- internal/hcs/system.go | 54 +++--- internal/hcsoci/create.go | 32 +++- internal/hcsoci/devices.go | 2 +- internal/hcsoci/hcsdoc_lcow.go | 3 +- internal/hcsoci/hcsdoc_wcow.go | 12 +- internal/hcsoci/network.go | 18 +- internal/hcsoci/resources.go | 2 +- internal/layers/layers.go | 42 +++-- internal/resources/resources.go | 12 +- internal/uvm/computeagent.go | 21 ++- internal/uvm/create.go | 2 +- internal/uvm/create_lcow.go | 5 +- internal/uvm/create_wcow.go | 14 +- internal/uvm/network.go | 14 +- internal/uvm/plan9.go | 26 ++- internal/uvm/start.go | 2 +- internal/uvm/vsmb.go | 34 +++- .../hcsshim/computestorage/attach.go | 2 +- .../hcsshim/computestorage/destroy.go | 2 +- .../hcsshim/computestorage/detach.go | 2 +- .../hcsshim/computestorage/export.go | 2 +- .../hcsshim/computestorage/format.go | 2 +- .../hcsshim/computestorage/import.go | 2 +- .../hcsshim/computestorage/initialize.go | 2 +- .../Microsoft/hcsshim/computestorage/mount.go | 2 +- .../Microsoft/hcsshim/computestorage/setup.go | 4 +- .../github.com/Microsoft/hcsshim/container.go | 2 +- .../Microsoft/hcsshim/hcn/hcnsupport.go | 2 +- .../Microsoft/hcsshim/internal/cmd/cmd.go | 73 ++++++--- .../Microsoft/hcsshim/internal/cmd/diag.go | 7 + .../Microsoft/hcsshim/internal/cmd/io.go | 19 ++- .../hcsshim/internal/cmd/io_binary.go | 26 ++- .../hcsshim/internal/cmd/io_npipe.go | 16 +- .../internal/devices/assigned_devices.go | 2 +- .../Microsoft/hcsshim/internal/gcs/bridge.go | 7 +- .../hcsshim/internal/gcs/container.go | 31 ++-- .../hcsshim/internal/gcs/guestconnection.go | 12 +- .../hcsshim/internal/gcs/iochannel.go | 4 + .../Microsoft/hcsshim/internal/gcs/process.go | 8 +- .../hcsshim/internal/gcs/protocol.go | 73 +++++---- .../Microsoft/hcsshim/internal/hcs/process.go | 48 ++++-- .../Microsoft/hcsshim/internal/hcs/system.go | 54 +++--- .../hcsshim/internal/hcsoci/create.go | 32 +++- .../hcsshim/internal/hcsoci/devices.go | 2 +- .../hcsshim/internal/hcsoci/hcsdoc_lcow.go | 3 +- .../hcsshim/internal/hcsoci/hcsdoc_wcow.go | 12 +- .../hcsshim/internal/hcsoci/network.go | 18 +- .../hcsshim/internal/hcsoci/resources.go | 2 +- .../hcsshim/internal/layers/layers.go | 42 +++-- .../Microsoft/hcsshim/internal/log/context.go | 99 +++++++++++ .../Microsoft/hcsshim/internal/log/format.go | 47 ++++++ .../Microsoft/hcsshim/internal/log/g.go | 23 --- .../hcsshim/internal/logfields/fields.go | 33 +++- .../Microsoft/hcsshim/internal/oc/exporter.go | 61 +++++-- .../Microsoft/hcsshim/internal/oc/span.go | 65 ++++++++ .../hcsshim/internal/resources/resources.go | 12 +- .../hcsshim/internal/uvm/computeagent.go | 21 ++- .../Microsoft/hcsshim/internal/uvm/create.go | 2 +- .../hcsshim/internal/uvm/create_lcow.go | 5 +- .../hcsshim/internal/uvm/create_wcow.go | 14 +- .../Microsoft/hcsshim/internal/uvm/network.go | 14 +- .../Microsoft/hcsshim/internal/uvm/plan9.go | 26 ++- .../Microsoft/hcsshim/internal/uvm/start.go | 2 +- .../Microsoft/hcsshim/internal/uvm/vsmb.go | 34 +++- 106 files changed, 1515 insertions(+), 579 deletions(-) create mode 100644 test/vendor/github.com/Microsoft/hcsshim/internal/log/context.go create mode 100644 test/vendor/github.com/Microsoft/hcsshim/internal/log/format.go delete mode 100644 test/vendor/github.com/Microsoft/hcsshim/internal/log/g.go diff --git a/cmd/containerd-shim-runhcs-v1/events.go b/cmd/containerd-shim-runhcs-v1/events.go index 267b315d40..c6a71c4717 100644 --- a/cmd/containerd-shim-runhcs-v1/events.go +++ b/cmd/containerd-shim-runhcs-v1/events.go @@ -35,7 +35,7 @@ func (e *eventPublisher) close() error { } func (e *eventPublisher) publishEvent(ctx context.Context, topic string, event interface{}) (err error) { - ctx, span := trace.StartSpan(ctx, "publishEvent") + ctx, span := oc.StartTraceSpan(ctx, "publishEvent") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( diff --git a/cmd/containerd-shim-runhcs-v1/exec_clone.go b/cmd/containerd-shim-runhcs-v1/exec_clone.go index 078149f140..74be06206f 100644 --- a/cmd/containerd-shim-runhcs-v1/exec_clone.go +++ b/cmd/containerd-shim-runhcs-v1/exec_clone.go @@ -6,9 +6,12 @@ import ( "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" ) func newClonedExec( @@ -25,7 +28,7 @@ func newClonedExec( "tid": tid, "eid": id, // Init exec ID is always same as Task ID "bundle": bundle, - }).Debug("newClonedExec") + }).Trace("newClonedExec") he := &hcsExec{ events: events, @@ -61,6 +64,12 @@ type clonedExec struct { } func (ce *clonedExec) Start(ctx context.Context) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "clonedExec::Start") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, ce.tid), + trace.StringAttribute(logfields.ExecID, ce.id)) + // A cloned exec should never initialize the container as it should // already be running. return ce.startInternal(ctx, false) diff --git a/cmd/containerd-shim-runhcs-v1/exec_hcs.go b/cmd/containerd-shim-runhcs-v1/exec_hcs.go index b3b8e84ee9..9b96458e37 100644 --- a/cmd/containerd-shim-runhcs-v1/exec_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/exec_hcs.go @@ -17,6 +17,8 @@ import ( "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/signals" @@ -43,7 +45,7 @@ func newHcsExec( "eid": id, // Init exec ID is always same as Task ID "bundle": bundle, "wcow": isWCOW, - }).Debug("newHcsExec") + }).Trace("newHcsExec") he := &hcsExec{ events: events, @@ -249,12 +251,24 @@ func (he *hcsExec) startInternal(ctx context.Context, initializeContainer bool) } func (he *hcsExec) Start(ctx context.Context) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "hcsExec::Start") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, he.tid), + trace.StringAttribute(logfields.ExecID, he.id)) + // If he.id == he.tid then this is the init exec. // We need to initialize the container itself before starting this exec. return he.startInternal(ctx, he.id == he.tid) } -func (he *hcsExec) Kill(ctx context.Context, signal uint32) error { +func (he *hcsExec) Kill(ctx context.Context, signal uint32) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "hcsExec::Kill") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, he.tid), + trace.StringAttribute(logfields.ExecID, he.id)) + he.sl.Lock() defer he.sl.Unlock() switch he.state { @@ -267,7 +281,6 @@ func (he *hcsExec) Kill(ctx context.Context, signal uint32) error { supported = he.host == nil || he.host.SignalProcessSupported() } var options interface{} - var err error if he.isWCOW { var opt *guestresource.SignalProcessOptionsWCOW opt, err = signals.ValidateWCOW(int(signal), supported) @@ -307,6 +320,14 @@ func (he *hcsExec) Kill(ctx context.Context, signal uint32) error { } func (he *hcsExec) ResizePty(ctx context.Context, width, height uint32) error { + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.TaskID: he.tid, + logfields.ExecID: he.id, + "width": width, + "height": height, + }) + etr.Trace("hcsExec::ResizePty") + he.sl.Lock() defer he.sl.Unlock() if !he.io.Terminal() { @@ -320,6 +341,11 @@ func (he *hcsExec) ResizePty(ctx context.Context, width, height uint32) error { } func (he *hcsExec) CloseIO(ctx context.Context, stdin bool) error { + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.TaskID: he.tid, + logfields.ExecID: he.id, + }) + etr.Trace("hcsExec::CloseIO") // If we have any upstream IO we close the upstream connection. This will // unblock the `io.Copy` in the `Start()` call which will signal // `he.p.CloseStdin()`. If `he.io.Stdin()` is already closed this is safe to @@ -334,6 +360,13 @@ func (he *hcsExec) Wait() *task.StateResponse { } func (he *hcsExec) ForceExit(ctx context.Context, status int) { + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.TaskID: he.tid, + logfields.ExecID: he.id, + "status": status, + }) + etr.Trace("hcsExec::ForceExit") + he.sl.Lock() defer he.sl.Unlock() if he.state != shimExecStateExited { @@ -369,10 +402,15 @@ func (he *hcsExec) ForceExit(ctx context.Context, status int) { // We DO NOT send the async `TaskExit` event because we never would have sent // the `TaskStart`/`TaskExecStarted` event. func (he *hcsExec) exitFromCreatedL(ctx context.Context, status int) { - if he.state != shimExecStateExited { - // Avoid logging the force if we already exited gracefully - log.G(ctx).WithField("status", status).Debug("hcsExec::exitFromCreatedL") + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.TaskID: he.tid, + logfields.ExecID: he.id, + "status": status, + "state": he.state, + }) + etr.Trace("hcsExec::exitFromCreatedL") + if he.state != shimExecStateExited { // Unblock the container exit goroutine he.processDoneOnce.Do(func() { close(he.processDone) }) // Transition this exec @@ -414,13 +452,15 @@ func (he *hcsExec) exitFromCreatedL(ctx context.Context, status int) { // // 7. Finally, save the UVM and this container as a template if specified. func (he *hcsExec) waitForExit() { - ctx, span := trace.StartSpan(context.Background(), "hcsExec::waitForExit") + ctx, span := oc.StartTraceSpan(context.Background(), "hcsExec::waitForExit") + var err error // this will only save the last error, since we dont return early on error defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( trace.StringAttribute("tid", he.tid), trace.StringAttribute("eid", he.id)) - err := he.p.Process.Wait() + err = he.p.Process.Wait() if err != nil { log.G(ctx).WithError(err).Error("failed process Wait") } @@ -431,9 +471,11 @@ func (he *hcsExec) waitForExit() { code, err := he.p.Process.ExitCode() if err != nil { - log.G(ctx).WithError(err).Error("failed to get ExitCode") + log.G(ctx).WithError(err).WithField(logfields.ExecID, he.id).Error("failed to get hcsExec ExitCode") } else { - log.G(ctx).WithField("exitCode", code).Debug("exited") + log.G(ctx).WithFields(logrus.Fields{ + "exitCode": code, + logfields.ExecID: he.id}).Debug("hcsExec exited") } he.sl.Lock() @@ -476,7 +518,7 @@ func (he *hcsExec) waitForExit() { // // This MUST be called via a goroutine at exec create. func (he *hcsExec) waitForContainerExit() { - ctx, span := trace.StartSpan(context.Background(), "hcsExec::waitForContainerExit") + ctx, span := oc.StartTraceSpan(context.Background(), "hcsExec::waitForContainerExit") defer span.End() span.AddAttributes( trace.StringAttribute("tid", he.tid), diff --git a/cmd/containerd-shim-runhcs-v1/exec_wcow_podsandbox.go b/cmd/containerd-shim-runhcs-v1/exec_wcow_podsandbox.go index 0a4b4e7eb7..68647c1d95 100644 --- a/cmd/containerd-shim-runhcs-v1/exec_wcow_podsandbox.go +++ b/cmd/containerd-shim-runhcs-v1/exec_wcow_podsandbox.go @@ -6,6 +6,7 @@ import ( "time" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" eventstypes "github.com/containerd/containerd/api/events" containerd_v1_types "github.com/containerd/containerd/api/types/task" "github.com/containerd/containerd/errdefs" @@ -20,7 +21,7 @@ func newWcowPodSandboxExec(ctx context.Context, events publisher, tid, bundle st "tid": tid, "eid": tid, // Init exec ID is always same as Task ID "bundle": bundle, - }).Debug("newWcowPodSandboxExec") + }).Trace("newWcowPodSandboxExec") wpse := &wcowPodSandboxExec{ events: events, @@ -121,6 +122,11 @@ func (wpse *wcowPodSandboxExec) Status() *task.StateResponse { } func (wpse *wcowPodSandboxExec) Start(ctx context.Context) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.TaskID: wpse.tid, + logfields.ExecID: wpse.tid, + }).Trace("wcowPodSandboxExec::Start") + wpse.sl.Lock() defer wpse.sl.Unlock() if wpse.state != shimExecStateCreated { @@ -142,6 +148,12 @@ func (wpse *wcowPodSandboxExec) Start(ctx context.Context) error { } func (wpse *wcowPodSandboxExec) Kill(ctx context.Context, signal uint32) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.TaskID: wpse.tid, + logfields.ExecID: wpse.tid, + "signal": signal, + }).Trace("wcowPodSandboxExec::Kill") + wpse.sl.Lock() defer wpse.sl.Unlock() switch wpse.state { @@ -170,6 +182,12 @@ func (wpse *wcowPodSandboxExec) Kill(ctx context.Context, signal uint32) error { } func (wpse *wcowPodSandboxExec) ResizePty(ctx context.Context, width, height uint32) error { + // useless function, but trace could help track down who is calling it + log.G(ctx).WithFields(logrus.Fields{ + logfields.TaskID: wpse.tid, + logfields.ExecID: wpse.tid, + }).Trace("wcowPodSandboxExec::ResizePty") + wpse.sl.Lock() defer wpse.sl.Unlock() // We will never have IO for a sandbox container so we wont have a tty @@ -187,11 +205,18 @@ func (wpse *wcowPodSandboxExec) Wait() *task.StateResponse { } func (wpse *wcowPodSandboxExec) ForceExit(ctx context.Context, status int) { + ctx, etr := log.S(ctx, logrus.Fields{ //nolint:ineffassign,staticcheck + logfields.TaskID: wpse.tid, + logfields.ExecID: wpse.tid, + "status": status, + }) + etr.Trace("wcowPodSandboxExec::ForceExit") + wpse.sl.Lock() defer wpse.sl.Unlock() if wpse.state != shimExecStateExited { // Avoid logging the force if we already exited gracefully - log.G(ctx).WithField("status", status).Debug("wcowPodSandboxExec::ForceExit") + etr.WithField("status", status).Debug("forcing exit") wpse.state = shimExecStateExited wpse.exitStatus = 1 diff --git a/cmd/containerd-shim-runhcs-v1/main.go b/cmd/containerd-shim-runhcs-v1/main.go index 375b2dcb46..6b17142bc1 100644 --- a/cmd/containerd-shim-runhcs-v1/main.go +++ b/cmd/containerd-shim-runhcs-v1/main.go @@ -84,7 +84,7 @@ func main() { ) // Register our OpenCensus logrus exporter - trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()}) + trace.ApplyConfig(trace.Config{DefaultSampler: oc.DefaultSampler}) trace.RegisterExporter(&oc.LogrusExporter{}) app := cli.NewApp() diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index b241d3439c..20c34eb38a 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -8,6 +8,8 @@ import ( "sync" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/osversion" @@ -18,6 +20,7 @@ import ( "github.com/containerd/containerd/runtime/v2/task" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "go.opencensus.io/trace" "golang.org/x/sync/errgroup" ) @@ -62,7 +65,10 @@ type shimPod interface { } func createPod(ctx context.Context, events publisher, req *task.CreateTaskRequest, s *specs.Spec) (_ shimPod, err error) { - log.G(ctx).WithField("tid", req.ID).Debug("createPod") + ctx, span := oc.StartTraceSpan(ctx, "createPod") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("tid", req.ID)) if osversion.Build() < osversion.RS5 { return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "pod support is not available on Windows versions previous to RS5 (%d)", osversion.RS5) @@ -286,6 +292,8 @@ func (p *pod) ID() string { } func (p *pod) GetCloneAnnotations(ctx context.Context, s *specs.Spec) (bool, string, error) { + log.G(ctx).WithField(logfields.TaskID, p.id).Trace("pod::GetCloneAnnotations") + isTemplate, templateID, err := oci.ParseCloneAnnotations(ctx, s) if err != nil { return false, "", err @@ -296,6 +304,11 @@ func (p *pod) GetCloneAnnotations(ctx context.Context, s *specs.Spec) (bool, str } func (p *pod) CreateTask(ctx context.Context, req *task.CreateTaskRequest, s *specs.Spec) (_ shimTask, err error) { + ctx, span := oc.StartTraceSpan(ctx, "pod::CreateTask") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, p.id)) + if req.ID == p.id { return nil, errors.Wrapf(errdefs.ErrAlreadyExists, "task with id: '%s' already exists", req.ID) } @@ -371,7 +384,12 @@ func (p *pod) GetTask(tid string) (shimTask, error) { return raw.(shimTask), nil } -func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all bool) error { +func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all bool) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "pod::KillTask") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, p.id)) + t, err := p.GetTask(tid) if err != nil { return err diff --git a/cmd/containerd-shim-runhcs-v1/serve.go b/cmd/containerd-shim-runhcs-v1/serve.go index 6a973f1c3c..110242c1a6 100644 --- a/cmd/containerd-shim-runhcs-v1/serve.go +++ b/cmd/containerd-shim-runhcs-v1/serve.go @@ -12,10 +12,6 @@ import ( "unsafe" "github.com/Microsoft/go-winio" - runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" - "github.com/Microsoft/hcsshim/internal/extendedtask" - "github.com/Microsoft/hcsshim/internal/shimdiag" - "github.com/Microsoft/hcsshim/pkg/octtrpc" "github.com/containerd/containerd/log" "github.com/containerd/containerd/runtime/v2/task" "github.com/containerd/ttrpc" @@ -25,7 +21,15 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/urfave/cli" + "go.opencensus.io/trace" "golang.org/x/sys/windows" + + runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/extendedtask" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/internal/shimdiag" + "github.com/Microsoft/hcsshim/pkg/octtrpc" ) var svc *service @@ -44,7 +48,7 @@ var serveCommand = cli.Command{ Usage: "is the task id a Kubernetes sandbox id", }, }, - Action: func(ctx *cli.Context) error { + Action: func(ctx *cli.Context) (err error) { // On Windows the serve command is internally used to actually create // the process that hosts the containerd/ttrpc entrypoint to the Runtime // V2 API's. The model requires this 2nd invocation of the shim process @@ -67,6 +71,20 @@ var serveCommand = cli.Command{ // the events. var lerrs chan error + // TODO: use a context WithCancel() context and defer the cancellation + // after verying that canceling context/service won't prematurely bring + // down service and cleanup in background go-routines + cctx, span := oc.StartSpan(context.Background(), ctx.App.Name+"::serve") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.BoolAttribute("isSandbox", ctx.Bool("is-sandbox")), + trace.StringAttribute("namespace", namespaceFlag), + trace.StringAttribute("address", addressFlag), + trace.StringAttribute("binary", containerdBinaryFlag), + trace.StringAttribute("socket", ctx.String("socket")), + trace.StringAttribute("id", idFlag), + ) // Default values for shim options. shimOpts := &runhcsopts.Options{ @@ -75,6 +93,7 @@ var serveCommand = cli.Command{ } // containerd passes the shim options protobuf via stdin. + // todo: merge defaults, above, with passed in opts newShimOpts, err := readOptions(os.Stdin) if err != nil { return errors.Wrap(err, "failed to read shim options from stdin") @@ -84,24 +103,24 @@ var serveCommand = cli.Command{ } if shimOpts.Debug && shimOpts.LogLevel != "" { - logrus.Warning("Both Debug and LogLevel specified, Debug will be overriden") + log.G(cctx).Warning("Both Debug and LogLevel specified, Debug will be overridden") } + lvl := logrus.GetLevel() // For now keep supporting the debug option, this used to be the only way to specify a different logging // level for the shim. if shimOpts.Debug { - logrus.SetLevel(logrus.DebugLevel) + lvl = logrus.DebugLevel } - // If log level is specified, set the corresponding logrus logging level. This overrides the debug option // (unless the level being asked for IS debug also, then this doesn't do much). if shimOpts.LogLevel != "" { - lvl, err := logrus.ParseLevel(shimOpts.LogLevel) + lvl, err = logrus.ParseLevel(shimOpts.LogLevel) if err != nil { return errors.Wrapf(err, "failed to parse shim log level %q", shimOpts.LogLevel) } - logrus.SetLevel(lvl) } + logrus.SetLevel(lvl) switch shimOpts.DebugType { case runhcsopts.Options_NPIPE: @@ -158,6 +177,10 @@ var serveCommand = cli.Command{ os.Stdin.Close() + log.G(cctx). + WithField(logfields.Options, shimOpts.String()). + Debug("shim options") + // Force the cli.ErrWriter to be os.Stdout for this. We use stderr for // the panic.log attached via start. cli.ErrWriter = os.Stdout @@ -207,7 +230,7 @@ var serveCommand = cli.Command{ // Serve loops infinitely unless s.Shutdown or s.Close are called. // Passed in context is used as parent context for handling requests, // but canceliing does not bring down ttrpc service. - if err := trapClosedConnErr(s.Serve(context.Background(), sl)); err != nil { + if err := trapClosedConnErr(s.Serve(cctx, sl)); err != nil { logrus.WithError(err).Fatal("containerd-shim: ttrpc server failure") serrs <- err return @@ -222,12 +245,11 @@ var serveCommand = cli.Command{ return err case <-time.After(2 * time.Millisecond): // TODO: Contribute a change to ttrpc so that you can: - // - // go func () { errs <- s.Serve() } - // select { - // case <-errs: - // case <-s.Ready(): - // } + // go func () { errs <- s.Serve() } + // select { + // case <-errs: + // case <-s.Ready(): + // } // This is our best indication that we have not errored on creation // and are successfully serving the API. diff --git a/cmd/containerd-shim-runhcs-v1/service.go b/cmd/containerd-shim-runhcs-v1/service.go index c1a080eff5..4bc82b3316 100644 --- a/cmd/containerd-shim-runhcs-v1/service.go +++ b/cmd/containerd-shim-runhcs-v1/service.go @@ -2,6 +2,7 @@ package main import ( "context" + "fmt" "os" "runtime" "strings" @@ -103,7 +104,7 @@ func (s *service) State(ctx context.Context, req *task.StateRequest) (resp *task span.AddAttributes( trace.StringAttribute("status", resp.Status.String()), trace.Int64Attribute("exitStatus", int64(resp.ExitStatus)), - trace.StringAttribute("exitedAt", resp.ExitedAt.String())) + trace.StringAttribute("exitedAt", resp.ExitedAt.Format(time.RFC3339Nano))) } oc.SetSpanStatus(span, err) }() @@ -133,8 +134,7 @@ func (s *service) Create(ctx context.Context, req *task.CreateTaskRequest) (resp span.AddAttributes( trace.StringAttribute("tid", req.ID), trace.StringAttribute("bundle", req.Bundle), - // trace.StringAttribute("rootfs", req.Rootfs), TODO: JTERRY75 - - // OpenCensus doesnt support slice like our logrus hook + trace.StringAttribute("rootfs", fmt.Sprintf("%+v", req.Rootfs)), trace.BoolAttribute("terminal", req.Terminal), trace.StringAttribute("stdin", req.Stdin), trace.StringAttribute("stdout", req.Stdout), @@ -180,7 +180,7 @@ func (s *service) Delete(ctx context.Context, req *task.DeleteRequest) (resp *ta span.AddAttributes( trace.Int64Attribute("pid", int64(resp.Pid)), trace.Int64Attribute("exitStatus", int64(resp.ExitStatus)), - trace.StringAttribute("exitedAt", resp.ExitedAt.String())) + trace.StringAttribute("exitedAt", resp.ExitedAt.Format(time.RFC3339Nano))) } oc.SetSpanStatus(span, err) }() @@ -397,7 +397,7 @@ func (s *service) Wait(ctx context.Context, req *task.WaitRequest) (resp *task.W if resp != nil { span.AddAttributes( trace.Int64Attribute("exitStatus", int64(resp.ExitStatus)), - trace.StringAttribute("exitedAt", resp.ExitedAt.String())) + trace.StringAttribute("exitedAt", resp.ExitedAt.Format(time.RFC3339Nano))) } oc.SetSpanStatus(span, err) }() @@ -457,7 +457,8 @@ func (s *service) Shutdown(ctx context.Context, req *task.ShutdownRequest) (_ *g defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("tid", req.ID)) + span.AddAttributes(trace.StringAttribute("tid", req.ID), + trace.BoolAttribute("now", req.Now)) if s.isSandbox { span.AddAttributes(trace.StringAttribute("pod-id", s.tid)) diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 9076e2b08f..944f192cee 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -29,6 +29,8 @@ import ( "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/jobcontainers" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/processorinfo" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" @@ -42,8 +44,11 @@ import ( const bytesPerMB = 1024 * 1024 -func newHcsStandaloneTask(ctx context.Context, events publisher, req *task.CreateTaskRequest, s *specs.Spec) (shimTask, error) { - log.G(ctx).WithField("tid", req.ID).Debug("newHcsStandaloneTask") +func newHcsStandaloneTask(ctx context.Context, events publisher, req *task.CreateTaskRequest, s *specs.Spec) (_ shimTask, err error) { + ctx, span := oc.StartTraceSpan(ctx, "newHcsStandaloneTask") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, req.ID)) ct, _, err := oci.GetSandboxTypeAndID(s.Annotations) if err != nil { @@ -116,12 +121,27 @@ func newHcsStandaloneTask(ctx context.Context, events publisher, req *task.Creat // createContainer is a generic call to return either a process/hypervisor isolated container, or a job container // based on what is set in the OCI spec. -func createContainer(ctx context.Context, id, owner, netNS string, s *specs.Spec, parent *uvm.UtilityVM, shimOpts *runhcsopts.Options) (cow.Container, *resources.Resources, error) { - var ( - err error - container cow.Container - resources *resources.Resources - ) +func createContainer( + ctx context.Context, + id, owner, netNS string, + s *specs.Spec, + parent *uvm.UtilityVM, + shimOpts *runhcsopts.Options) (container cow.Container, resources *resources.Resources, err error) { + ctx, span := oc.StartTraceSpan(ctx, "createContainer") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.ID, id), + trace.StringAttribute("owner", owner), + trace.StringAttribute("netNS", netNS)) + if parent != nil { + span.AddAttributes(trace.StringAttribute("parent", parent.ID())) + } + + log.G(ctx).WithFields(logrus.Fields{ + "spec": fmt.Sprintf("%+v", s), + "shimOptions": shimOpts.String(), + }).Debug("createContainer options") if oci.IsJobContainer(s) { container, resources, err = jobcontainers.Create(ctx, id, s) @@ -158,10 +178,14 @@ func newHcsTask( ownsParent bool, req *task.CreateTaskRequest, s *specs.Spec) (_ shimTask, err error) { - log.G(ctx).WithFields(logrus.Fields{ - "tid": req.ID, - "ownsParent": ownsParent, - }).Debug("newHcsTask") + ctx, span := oc.StartTraceSpan(ctx, "newHcsTask") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, req.ID), + trace.BoolAttribute("ownParent", ownsParent)) + if parent != nil { + span.AddAttributes(trace.StringAttribute("parent", parent.ID())) + } owner := filepath.Base(os.Args[0]) isTemplate := oci.ParseAnnotationsSaveAsTemplate(ctx, s) @@ -273,11 +297,15 @@ func newClonedHcsTask( req *task.CreateTaskRequest, s *specs.Spec, templateID string) (_ shimTask, err error) { - log.G(ctx).WithFields(logrus.Fields{ - "tid": req.ID, - "ownsParent": ownsParent, - "templateid": templateID, - }).Debug("newClonedHcsTask") + ctx, span := oc.StartTraceSpan(ctx, "newClonedHcsTask") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, req.ID), + trace.BoolAttribute("ownParent", ownsParent), + trace.StringAttribute("templateID", templateID)) + if parent != nil { + span.AddAttributes(trace.StringAttribute("parent", parent.ID())) + } owner := filepath.Base(os.Args[0]) @@ -310,7 +338,14 @@ func newClonedHcsTask( netNS = s.Windows.Network.NetworkNamespace } - // This is a cloned task. Use the templateid as the ID of the container here + // cloneHcsTask doesnt rely on createContainer, so it doesnt log spec and shim options + log.G(ctx).WithFields(logrus.Fields{ + "netNS": netNS, + "spec": fmt.Sprintf("%+v", s), + "shimOptions": shimOpts.String(), + }).Debug("newClonedHcsTask options") + + // This is a cloned task. Use the templateID as the ID of the container here // because that's the ID of this container inside the UVM. opts := hcsoci.CreateOptions{ ID: templateID, @@ -462,7 +497,13 @@ func (ht *hcsTask) ID() string { return ht.id } -func (ht *hcsTask) CreateExec(ctx context.Context, req *task.ExecProcessRequest, spec *specs.Process) error { +func (ht *hcsTask) CreateExec(ctx context.Context, req *task.ExecProcessRequest, spec *specs.Process) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "hcsTask::CreateExec") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, ht.id), + trace.StringAttribute(logfields.ExecID, req.ExecID)) + ht.ecl.Lock() defer ht.ecl.Unlock() @@ -517,7 +558,15 @@ func (ht *hcsTask) GetExec(eid string) (shimExec, error) { return raw.(shimExec), nil } -func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all bool) error { +func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all bool) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "hcsTask::KillExec") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, ht.id), + trace.StringAttribute(logfields.ExecID, eid), + trace.Int64Attribute("signal", int64(signal)), + trace.BoolAttribute("all", all)) + e, err := ht.GetExec(eid) if err != nil { return err @@ -566,7 +615,13 @@ func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all return e.Kill(ctx, signal) } -func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, time.Time, error) { +func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (_ int, _ uint32, _ time.Time, err error) { + ctx, span := oc.StartTraceSpan(ctx, "hcsTask::DeleteExec") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, ht.id), + trace.StringAttribute(logfields.ExecID, eid)) + e, err := ht.GetExec(eid) if err != nil { return 0, 0, time.Time{}, err @@ -648,6 +703,9 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim } func (ht *hcsTask) Pids(ctx context.Context) ([]runhcsopts.ProcessDetails, error) { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: ht.id}) + etr.Trace("hcsTask::Pids") + // Map all user created exec's to pid/exec-id pidMap := make(map[int]string) ht.execs.Range(func(key, value interface{}) bool { @@ -691,9 +749,10 @@ func (ht *hcsTask) Wait() *task.StateResponse { } func (ht *hcsTask) waitInitExit(destroyContainer bool) { - ctx, span := trace.StartSpan(context.Background(), "hcsTask::waitInitExit") + ctx, span := oc.StartTraceSpan(context.Background(), "hcsTask::waitInitExit") defer span.End() - span.AddAttributes(trace.StringAttribute("tid", ht.id)) + span.AddAttributes(trace.StringAttribute("tid", ht.id), + trace.BoolAttribute("destroyContainer", destroyContainer)) // Wait for it to exit on its own ht.init.Wait() @@ -722,7 +781,7 @@ func (ht *hcsTask) waitInitExit(destroyContainer bool) { // Note: For Windows process isolated containers there is no host virtual // machine so this should not be called. func (ht *hcsTask) waitForHostExit() { - ctx, span := trace.StartSpan(context.Background(), "hcsTask::waitForHostExit") + ctx, span := oc.StartTraceSpan(context.Background(), "hcsTask::waitForHostExit") defer span.End() span.AddAttributes(trace.StringAttribute("tid", ht.id)) @@ -751,9 +810,13 @@ func (ht *hcsTask) waitForHostExit() { // NOTE: For Windows process isolated containers `ht.ownsHost==true && ht.host // == nil`. func (ht *hcsTask) close(ctx context.Context) { - ht.closeOnce.Do(func() { - log.G(ctx).Debug("hcsTask::closeOnce") + ctx, span := oc.StartTraceSpan(ctx, "hcsTask::closeOnce") + var err error // this will only save the last error, since we dont return early on error + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, ht.id)) + ht.closeOnce.Do(func() { // ht.c should never be nil for a real task but in testing we stub // this to avoid a nil dereference. We really should introduce a // method or interface for ht.c operations that we can stub for @@ -766,7 +829,7 @@ func (ht *hcsTask) close(ctx context.Context) { werr = ht.c.Wait() close(ch) }() - err := ht.c.Shutdown(ctx) + err = ht.c.Shutdown(ctx) if err != nil { log.G(ctx).WithError(err).Error("failed to shutdown container") } else { @@ -803,12 +866,12 @@ func (ht *hcsTask) close(ctx context.Context) { } // Release any resources associated with the container. - if err := resources.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil { + if err = resources.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil { log.G(ctx).WithError(err).Error("failed to release container resources") } // Close the container handle invalidating all future access. - if err := ht.c.Close(); err != nil { + if err = ht.c.Close(); err != nil { log.G(ctx).WithError(err).Error("failed to close container") } } @@ -824,18 +887,22 @@ func (ht *hcsTask) close(ctx context.Context) { // // This call is idempotent and safe to call multiple times. func (ht *hcsTask) closeHost(ctx context.Context) { - ht.closeHostOnce.Do(func() { - log.G(ctx).Debug("hcsTask::closeHostOnce") + ctx, span := oc.StartTraceSpan(ctx, "hcsTask::closeHostOnce") + var err error // this will only save the last error, since we dont return early on error + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, ht.id)) + ht.closeHostOnce.Do(func() { if ht.ownsHost && ht.host != nil { - if err := ht.host.Close(); err != nil { + if err = ht.host.Close(); err != nil { log.G(ctx).WithError(err).Error("failed host vm shutdown") } } // Send the `init` exec exit notification always. exit := ht.init.Status() - if err := ht.events.publishEvent( + if err = ht.events.publishEvent( ctx, runtime.TaskExitEventTopic, &eventstypes.TaskExit{ @@ -852,6 +919,9 @@ func (ht *hcsTask) closeHost(ctx context.Context) { } func (ht *hcsTask) ExecInHost(ctx context.Context, req *shimdiag.ExecProcessRequest) (int, error) { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: ht.id}) + etr.Trace("hcsTask::ExecInHost") + cmdReq := &cmd.CmdProcessRequest{ Args: req.Args, Workdir: req.Workdir, @@ -868,10 +938,13 @@ func (ht *hcsTask) ExecInHost(ctx context.Context, req *shimdiag.ExecProcessRequ } func (ht *hcsTask) DumpGuestStacks(ctx context.Context) string { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: ht.id}) + etr.Trace("hcsTask::DumpGuestStacks") + if ht.host != nil { stacks, err := ht.host.DumpStacks(ctx) if err != nil { - log.G(ctx).WithError(err).Warn("failed to capture guest stacks") + etr.WithError(err).Warn("failed to capture guest stacks") } else { return stacks } @@ -880,6 +953,9 @@ func (ht *hcsTask) DumpGuestStacks(ctx context.Context) string { } func (ht *hcsTask) Share(ctx context.Context, req *shimdiag.ShareRequest) error { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: ht.id}) + etr.Trace("hcsTask::Share") + if ht.host == nil { return errTaskNotIsolated } @@ -919,6 +995,9 @@ func hcsPropertiesToWindowsStats(props *hcsschema.Properties) *stats.Statistics_ } func (ht *hcsTask) Stats(ctx context.Context) (*stats.Statistics, error) { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: ht.id}) + etr.Trace("hcsTask::Stats") + s := &stats.Statistics{} props, err := ht.c.PropertiesV2(ctx, hcsschema.PTStatistics) if err != nil { @@ -945,7 +1024,12 @@ func (ht *hcsTask) Stats(ctx context.Context) (*stats.Statistics, error) { return s, nil } -func (ht *hcsTask) Update(ctx context.Context, req *task.UpdateTaskRequest) error { +func (ht *hcsTask) Update(ctx context.Context, req *task.UpdateTaskRequest) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "hcsTask::Update") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, ht.id)) + resources, err := typeurl.UnmarshalAny(req.Resources) if err != nil { return errors.Wrapf(err, "failed to unmarshal resources for container %s update request", req.ID) diff --git a/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go b/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go index 576573466c..8fd7ec3f97 100644 --- a/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go +++ b/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go @@ -10,6 +10,8 @@ import ( "github.com/Microsoft/hcsshim/internal/clone" "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/shimdiag" "github.com/Microsoft/hcsshim/internal/uvm" eventstypes "github.com/containerd/containerd/api/events" @@ -19,6 +21,7 @@ import ( "github.com/containerd/typeurl" "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -31,7 +34,8 @@ import ( // `parent`. When the fake WCOW `init` process exits via `Signal` `parent` will // be forcibly closed by this task. func newWcowPodSandboxTask(ctx context.Context, events publisher, id, bundle string, parent *uvm.UtilityVM, nsid string) shimTask { - log.G(ctx).WithField("tid", id).Debug("newWcowPodSandboxTask") + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: id}) + etr.Trace("newWcowPodSandboxTask") wpst := &wcowPodSandboxTask{ events: events, @@ -108,7 +112,12 @@ func (wpst *wcowPodSandboxTask) GetExec(eid string) (shimExec, error) { return nil, errors.Wrapf(errdefs.ErrNotFound, "exec: '%s' in task: '%s' not found", eid, wpst.id) } -func (wpst *wcowPodSandboxTask) KillExec(ctx context.Context, eid string, signal uint32, all bool) error { +func (wpst *wcowPodSandboxTask) KillExec(ctx context.Context, eid string, signal uint32, all bool) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "wcowPodSandboxTask::KillExec") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, wpst.id)) + e, err := wpst.GetExec(eid) if err != nil { return err @@ -123,7 +132,12 @@ func (wpst *wcowPodSandboxTask) KillExec(ctx context.Context, eid string, signal return nil } -func (wpst *wcowPodSandboxTask) DeleteExec(ctx context.Context, eid string) (int, uint32, time.Time, error) { +func (wpst *wcowPodSandboxTask) DeleteExec(ctx context.Context, eid string) (_ int, _ uint32, _ time.Time, err error) { + ctx, span := oc.StartTraceSpan(ctx, "wcowPodSandboxTask::DeleteExec") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, wpst.id)) + e, err := wpst.GetExec(eid) if err != nil { return 0, 0, time.Time{}, err @@ -154,6 +168,8 @@ func (wpst *wcowPodSandboxTask) DeleteExec(ctx context.Context, eid string) (int } func (wpst *wcowPodSandboxTask) Pids(ctx context.Context) ([]options.ProcessDetails, error) { + log.G(ctx).WithField(logfields.TaskID, wpst.id).Trace("wcowPodSandboxTask::Pids") + return []options.ProcessDetails{ { ProcessID: uint32(wpst.init.Pid()), @@ -174,26 +190,30 @@ func (wpst *wcowPodSandboxTask) Wait() *task.StateResponse { // // This call is idempotent and safe to call multiple times. func (wpst *wcowPodSandboxTask) close(ctx context.Context) { - wpst.closeOnce.Do(func() { - log.G(ctx).Debug("wcowPodSandboxTask::closeOnce") + ctx, span := oc.StartTraceSpan(ctx, "wcowPodSandboxTask::closeOnce") + var err error // this will only save the last error, since we dont return early on error + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, wpst.id)) + wpst.closeOnce.Do(func() { if wpst.host != nil { - if err := wpst.host.TearDownNetworking(ctx, wpst.nsid); err != nil { + if err = wpst.host.TearDownNetworking(ctx, wpst.nsid); err != nil { log.G(ctx).WithError(err).Error("failed to cleanup networking for utility VM") } - if err := wpst.host.Close(); err != nil { + if err = wpst.host.Close(); err != nil { log.G(ctx).WithError(err).Error("failed host vm shutdown") } // cleanup template state if any exists - if err := clone.RemoveSavedTemplateConfig(wpst.host.ID()); err != nil { + if err = clone.RemoveSavedTemplateConfig(wpst.host.ID()); err != nil { log.G(ctx).WithError(err).Error("failed to cleanup template config state for vm") } } // Send the `init` exec exit notification always. exit := wpst.init.Status() - if err := wpst.events.publishEvent( + if err = wpst.events.publishEvent( ctx, runtime.TaskExitEventTopic, &eventstypes.TaskExit{ @@ -210,7 +230,7 @@ func (wpst *wcowPodSandboxTask) close(ctx context.Context) { } func (wpst *wcowPodSandboxTask) waitInitExit() { - ctx, span := trace.StartSpan(context.Background(), "wcowPodSandboxTask::waitInitExit") + ctx, span := oc.StartTraceSpan(context.Background(), "wcowPodSandboxTask::waitInitExit") defer span.End() span.AddAttributes(trace.StringAttribute("tid", wpst.id)) @@ -222,7 +242,7 @@ func (wpst *wcowPodSandboxTask) waitInitExit() { } func (wpst *wcowPodSandboxTask) waitParentExit() { - ctx, span := trace.StartSpan(context.Background(), "wcowPodSandboxTask::waitParentExit") + ctx, span := oc.StartTraceSpan(context.Background(), "wcowPodSandboxTask::waitParentExit") defer span.End() span.AddAttributes(trace.StringAttribute("tid", wpst.id)) @@ -240,6 +260,9 @@ func (wpst *wcowPodSandboxTask) waitParentExit() { } func (wpst *wcowPodSandboxTask) ExecInHost(ctx context.Context, req *shimdiag.ExecProcessRequest) (int, error) { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: wpst.id}) + etr.Trace("wcowPodSandboxTask::ExecInHost") + cmdReq := &cmd.CmdProcessRequest{ Args: req.Args, Workdir: req.Workdir, @@ -255,6 +278,9 @@ func (wpst *wcowPodSandboxTask) ExecInHost(ctx context.Context, req *shimdiag.Ex } func (wpst *wcowPodSandboxTask) DumpGuestStacks(ctx context.Context) string { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: wpst.id}) + etr.Trace("wcowPodSandboxTask::DumpGuestStacks") + if wpst.host != nil { stacks, err := wpst.host.DumpStacks(ctx) if err != nil { @@ -266,7 +292,12 @@ func (wpst *wcowPodSandboxTask) DumpGuestStacks(ctx context.Context) string { return "" } -func (wpst *wcowPodSandboxTask) Update(ctx context.Context, req *task.UpdateTaskRequest) error { +func (wpst *wcowPodSandboxTask) Update(ctx context.Context, req *task.UpdateTaskRequest) (err error) { + ctx, span := oc.StartTraceSpan(ctx, "wcowPodSandboxTask::Update") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.TaskID, wpst.id)) + if wpst.host == nil { return errTaskNotIsolated } @@ -284,6 +315,9 @@ func (wpst *wcowPodSandboxTask) Update(ctx context.Context, req *task.UpdateTask } func (wpst *wcowPodSandboxTask) Share(ctx context.Context, req *shimdiag.ShareRequest) error { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: wpst.id}) + etr.Trace("wcowPodSandboxTask::Share") + if wpst.host == nil { return errTaskNotIsolated } @@ -291,6 +325,9 @@ func (wpst *wcowPodSandboxTask) Share(ctx context.Context, req *shimdiag.ShareRe } func (wpst *wcowPodSandboxTask) Stats(ctx context.Context) (*stats.Statistics, error) { + ctx, etr := log.S(ctx, logrus.Fields{logfields.TaskID: wpst.id}) + etr.Trace("wcowPodSandboxTask::Stats") + stats := &stats.Statistics{} if wpst.host == nil { return stats, nil diff --git a/cmd/ncproxy/ncproxy.go b/cmd/ncproxy/ncproxy.go index 922c615741..69f1279913 100644 --- a/cmd/ncproxy/ncproxy.go +++ b/cmd/ncproxy/ncproxy.go @@ -195,7 +195,8 @@ func (s *grpcService) ModifyNIC(ctx context.Context, req *ncproxygrpc.ModifyNICR if settings.Policies == nil || settings.Policies.IovPolicySettings == nil { return nil, status.Error(codes.InvalidArgument, "received empty field in request") } - log.G(ctx).WithField("iov settings", settings.Policies.IovPolicySettings).Info("ModifyNIC iov settings") + + log.G(ctx).WithField("iov settings", settings.Policies.IovPolicySettings).Debug("ModifyNIC iov settings") iovReqSettings := settings.Policies.IovPolicySettings caReq := &computeagent.ModifyNICInternalRequest{ diff --git a/cmd/ncproxy/run.go b/cmd/ncproxy/run.go index 4e49de4aa5..9fdd0d0795 100644 --- a/cmd/ncproxy/run.go +++ b/cmd/ncproxy/run.go @@ -203,7 +203,7 @@ func run(clicontext *cli.Context) error { return fmt.Errorf("failed to connect to NodeNetworkService at address %s", conf.NodeNetSvcAddr) } - log.G(ctx).Infof("Successfully connected to NodeNetworkService at address %s", conf.NodeNetSvcAddr) + log.G(ctx).Debugf("Successfully connected to NodeNetworkService at address %s", conf.NodeNetSvcAddr) netSvcClient := nodenetsvc.NewNodeNetworkServiceClient(client) nodeNetSvcClient = &nodeNetSvcConn{ diff --git a/cmd/ncproxy/server.go b/cmd/ncproxy/server.go index 975650618c..a7cb623b04 100644 --- a/cmd/ncproxy/server.go +++ b/cmd/ncproxy/server.go @@ -113,7 +113,7 @@ func (s *server) serve(ctx context.Context, ttrpcListener net.Listener, grpcList go func() { log.G(ctx).WithFields(logrus.Fields{ "address": s.conf.TTRPCAddr, - }).Info("Serving ncproxy TTRPC service") + }).Debug("Serving ncproxy TTRPC service") // No need to defer close the listener as ttrpc.Serve does this internally. serveErr <- trapClosedConnErr(s.ttrpc.Serve(ctx, ttrpcListener)) @@ -122,7 +122,7 @@ func (s *server) serve(ctx context.Context, ttrpcListener net.Listener, grpcList go func() { log.G(ctx).WithFields(logrus.Fields{ "address": s.conf.GRPCAddr, - }).Info("Serving ncproxy GRPC service") + }).Debug("Serving ncproxy GRPC service") defer grpcListener.Close() serveErr <- trapClosedConnErr(s.grpc.Serve(grpcListener)) @@ -180,7 +180,7 @@ func reconnectComputeAgents(ctx context.Context, agentStore *ncproxystore.Comput } return } - log.G(ctx).WithField("containerID", containerID).Info("reconnected to container's compute agent") + log.G(ctx).WithField("containerID", containerID).Debug("reconnected to container's compute agent") // connection succeeded, add entry in cache map for later // since the servers have not started running, we know that the cache cannot be empty diff --git a/computestorage/attach.go b/computestorage/attach.go index 7f1f2823dd..7f7cbd6e3b 100644 --- a/computestorage/attach.go +++ b/computestorage/attach.go @@ -17,7 +17,7 @@ import ( // // `layerData` is the parent read-only layer data. func AttachLayerStorageFilter(ctx context.Context, layerPath string, layerData LayerData) (err error) { - title := "hcsshim.AttachLayerStorageFilter" + title := "hcsshim::AttachLayerStorageFilter" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/computestorage/destroy.go b/computestorage/destroy.go index 8e28e6c504..596a5635e5 100644 --- a/computestorage/destroy.go +++ b/computestorage/destroy.go @@ -12,7 +12,7 @@ import ( // // `layerPath` is a path to a directory containing the layer to export. func DestroyLayer(ctx context.Context, layerPath string) (err error) { - title := "hcsshim.DestroyLayer" + title := "hcsshim::DestroyLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/computestorage/detach.go b/computestorage/detach.go index 435473257e..0752c09115 100644 --- a/computestorage/detach.go +++ b/computestorage/detach.go @@ -12,7 +12,7 @@ import ( // // `layerPath` is a path to a directory containing the layer to export. func DetachLayerStorageFilter(ctx context.Context, layerPath string) (err error) { - title := "hcsshim.DetachLayerStorageFilter" + title := "hcsshim::DetachLayerStorageFilter" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/computestorage/export.go b/computestorage/export.go index 2db6d40399..0b353bc7e2 100644 --- a/computestorage/export.go +++ b/computestorage/export.go @@ -19,7 +19,7 @@ import ( // // `options` are the export options applied to the exported layer. func ExportLayer(ctx context.Context, layerPath, exportFolderPath string, layerData LayerData, options ExportLayerOptions) (err error) { - title := "hcsshim.ExportLayer" + title := "hcsshim::ExportLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/computestorage/format.go b/computestorage/format.go index 61d8d5a634..07c29dd14c 100644 --- a/computestorage/format.go +++ b/computestorage/format.go @@ -41,7 +41,7 @@ func openDisk(path string) (windows.Handle, error) { // // If the VHD is not mounted it will be temporarily mounted. func FormatWritableLayerVhd(ctx context.Context, vhdHandle windows.Handle) (err error) { - title := "hcsshim.FormatWritableLayerVhd" + title := "hcsshim::FormatWritableLayerVhd" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/computestorage/import.go b/computestorage/import.go index 0c61dab329..1f9b167fa3 100644 --- a/computestorage/import.go +++ b/computestorage/import.go @@ -19,7 +19,7 @@ import ( // // `layerData` is the parent layer data. func ImportLayer(ctx context.Context, layerPath, sourceFolderPath string, layerData LayerData) (err error) { - title := "hcsshim.ImportLayer" + title := "hcsshim::ImportLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/computestorage/initialize.go b/computestorage/initialize.go index 53ed8ea6ed..0358370f1f 100644 --- a/computestorage/initialize.go +++ b/computestorage/initialize.go @@ -16,7 +16,7 @@ import ( // // `layerData` is the parent read-only layer data. func InitializeWritableLayer(ctx context.Context, layerPath string, layerData LayerData) (err error) { - title := "hcsshim.InitializeWritableLayer" + title := "hcsshim::InitializeWritableLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/computestorage/mount.go b/computestorage/mount.go index fcdbbef814..d50c68e91f 100644 --- a/computestorage/mount.go +++ b/computestorage/mount.go @@ -12,7 +12,7 @@ import ( // GetLayerVhdMountPath returns the volume path for a virtual disk of a writable container layer. func GetLayerVhdMountPath(ctx context.Context, vhdHandle windows.Handle) (path string, err error) { - title := "hcsshim.GetLayerVhdMountPath" + title := "hcsshim::GetLayerVhdMountPath" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/computestorage/setup.go b/computestorage/setup.go index 06aaf841e8..68d353e733 100644 --- a/computestorage/setup.go +++ b/computestorage/setup.go @@ -21,7 +21,7 @@ import ( // // `options` are the options applied while processing the layer. func SetupBaseOSLayer(ctx context.Context, layerPath string, vhdHandle windows.Handle, options OsLayerOptions) (err error) { - title := "hcsshim.SetupBaseOSLayer" + title := "hcsshim::SetupBaseOSLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -52,7 +52,7 @@ func SetupBaseOSVolume(ctx context.Context, layerPath, volumePath string, option if osversion.Build() < 19645 { return errors.New("SetupBaseOSVolume is not present on builds older than 19645") } - title := "hcsshim.SetupBaseOSVolume" + title := "hcsshim::SetupBaseOSVolume" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/container.go b/container.go index bfd722898e..1de9625b91 100644 --- a/container.go +++ b/container.go @@ -60,7 +60,7 @@ type container struct { waitCh chan struct{} } -// createComputeSystemAdditionalJSON is read from the environment at initialisation +// createContainerAdditionalJSON is read from the environment at initialization // time. It allows an environment variable to define additional JSON which // is merged in the CreateComputeSystem call to HCS. var createContainerAdditionalJSON []byte diff --git a/hcn/hcnsupport.go b/hcn/hcnsupport.go index bacb91feda..0624406ade 100644 --- a/hcn/hcnsupport.go +++ b/hcn/hcnsupport.go @@ -115,7 +115,7 @@ func getSupportedFeatures() (SupportedFeatures, error) { logrus.WithFields(logrus.Fields{ "version": fmt.Sprintf("%+v", globals.Version), "supportedFeatures": fmt.Sprintf("%+v", features), - }).Info("HCN feature check") + }).Debug("HCN feature check") return features, nil } diff --git a/internal/cmd/cmd.go b/internal/cmd/cmd.go index 91a7da0277..5ad3ae22fe 100644 --- a/internal/cmd/cmd.go +++ b/internal/cmd/cmd.go @@ -11,12 +11,16 @@ import ( "sync/atomic" "time" - "github.com/Microsoft/hcsshim/internal/cow" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" - specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "golang.org/x/sync/errgroup" "golang.org/x/sys/windows" + + "github.com/Microsoft/hcsshim/internal/cow" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" + specs "github.com/opencontainers/runtime-spec/specs-go" ) // CmdProcessRequest stores information on command requests made through this package. @@ -29,6 +33,8 @@ type CmdProcessRequest struct { Stderr string } +// todo (helsaawy): replace use of Cmd.Log and switch to log.WithContext + // Cmd represents a command being prepared or run in a process host. type Cmd struct { // Host is the process host in which to launch the process. @@ -111,7 +117,7 @@ func Command(host cow.ProcessHost, name string, arg ...string) *Cmd { Spec: &specs.Process{ Args: append([]string{name}, arg...), }, - Log: logrus.NewEntry(logrus.StandardLogger()), + Log: log.G(context.Background()), ExitState: &ExitState{}, } if host.OS() == "windows" { @@ -128,12 +134,30 @@ func Command(host cow.ProcessHost, name string, arg ...string) *Cmd { func CommandContext(ctx context.Context, host cow.ProcessHost, name string, arg ...string) *Cmd { cmd := Command(host, name, arg...) cmd.Context = ctx + cmd.Log = log.G(ctx) return cmd } // Start starts a command. The caller must ensure that if Start succeeds, // Wait is eventually called to clean up resources. -func (c *Cmd) Start() error { +func (c *Cmd) Start() (err error) { + pctx := context.Background() + if c.Context != nil { + pctx = c.Context + } + ctx, span := oc.StartTraceSpan(pctx, "cmd::Start") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cmd", fmt.Sprintf("%q", c.Spec.Args)), + trace.BoolAttribute("isOCI", c.Host.IsOCI()), + trace.StringAttribute("os", c.Host.OS()), + trace.StringAttribute("cwd", c.Spec.Cwd), + trace.StringAttribute("env", fmt.Sprintf("%q", c.Spec.Env))) + + if c.Log == nil { + c.Log = log.G(ctx) + } + c.allDoneCh = make(chan struct{}) var x interface{} if !c.Host.IsOCI() { @@ -182,17 +206,15 @@ func (c *Cmd) Start() error { } x = lpp } - if c.Context != nil && c.Context.Err() != nil { - return c.Context.Err() + if err = ctx.Err(); err != nil { + return err } - p, err := c.Host.CreateProcess(context.TODO(), x) + p, err := c.Host.CreateProcess(ctx, x) if err != nil { return err } c.Process = p - if c.Log != nil { - c.Log = c.Log.WithField("pid", p.Pid()) - } + c.Log = c.Log.WithField("pid", p.Pid()) // Start relaying process IO. stdin, stdout, stderr := p.Stdio() @@ -209,7 +231,7 @@ func (c *Cmd) Start() error { c.stdinErr.Store(err) } // Notify the process that there is no more input. - if err := p.CloseStdin(context.TODO()); err != nil && c.Log != nil { + if err := p.CloseStdin(ctx); err != nil && c.Log != nil { c.Log.WithError(err).Warn("failed to close Cmd stdin") } }() @@ -218,7 +240,7 @@ func (c *Cmd) Start() error { if c.Stdout != nil { c.iogrp.Go(func() error { _, err := relayIO(c.Stdout, stdout, c.Log, "stdout") - if err := p.CloseStdout(context.TODO()); err != nil { + if err := p.CloseStdout(ctx); err != nil { c.Log.WithError(err).Warn("failed to close Cmd stdout") } return err @@ -228,7 +250,7 @@ func (c *Cmd) Start() error { if c.Stderr != nil { c.iogrp.Go(func() error { _, err := relayIO(c.Stderr, stderr, c.Log, "stderr") - if err := p.CloseStderr(context.TODO()); err != nil { + if err := p.CloseStderr(ctx); err != nil { c.Log.WithError(err).Warn("failed to close Cmd stderr") } return err @@ -239,20 +261,33 @@ func (c *Cmd) Start() error { go func() { select { case <-c.Context.Done(): - _, _ = c.Process.Kill(context.TODO()) + // Process.Kill (via Process.Signal) will not send an RPC if the + // provided context in is cancelled (bridge.AsyncRPC will end early) + kctx := log.Copy(context.Background(), ctx) + _, _ = c.Process.Kill(kctx) case <-c.allDoneCh: } }() } + return nil } // Wait waits for a command and its IO to complete and closes the underlying // process. It can only be called once. It returns an ExitError if the command // runs and returns a non-zero exit code. -func (c *Cmd) Wait() error { +func (c *Cmd) Wait() (err error) { + pctx := context.Background() + if c.Context != nil { + pctx = c.Context + } + _, span := oc.StartTraceSpan(pctx, "cmd::Wait") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cmd", fmt.Sprintf("%q", c.Spec.Args))) + waitErr := c.Process.Wait() - if waitErr != nil && c.Log != nil { + if waitErr != nil { c.Log.WithError(waitErr).Warn("process wait failed") } state := &ExitState{} @@ -271,9 +306,7 @@ func (c *Cmd) Wait() error { case <-t.C: // Close the process to cancel any reads to stdout or stderr. c.Process.Close() - if c.Log != nil { - c.Log.Warn("timed out waiting for stdio relay") - } + c.Log.Warn("timed out waiting for stdio relay") } }() } diff --git a/internal/cmd/diag.go b/internal/cmd/diag.go index f4caff3251..54d320235e 100644 --- a/internal/cmd/diag.go +++ b/internal/cmd/diag.go @@ -5,6 +5,8 @@ import ( "errors" "os/exec" + "github.com/sirupsen/logrus" + "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/uvm" @@ -12,6 +14,9 @@ import ( // ExecInUvm is a helper function used to execute commands specified in `req` inside the given UVM. func ExecInUvm(ctx context.Context, vm *uvm.UtilityVM, req *CmdProcessRequest) (int, error) { + ctx, etr := log.S(ctx, logrus.Fields{logfields.UVMID: vm.ID()}) + etr.Trace("cmd::ExecInUVM") + if len(req.Args) == 0 { return 0, errors.New("missing command") } @@ -39,6 +44,8 @@ func ExecInUvm(ctx context.Context, vm *uvm.UtilityVM, req *CmdProcessRequest) ( // ExecInShimHost is a helper function used to execute commands specified in `req` in the shim's // hosting system. func ExecInShimHost(ctx context.Context, req *CmdProcessRequest) (int, error) { + log.G(ctx).Trace("cmd::ExecInUVM") + if len(req.Args) == 0 { return 0, errors.New("missing command") } diff --git a/internal/cmd/io.go b/internal/cmd/io.go index 0912af160c..b7741f8144 100644 --- a/internal/cmd/io.go +++ b/internal/cmd/io.go @@ -8,6 +8,9 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" + + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" ) // UpstreamIO is an interface describing the IO to connect to above the shim. @@ -62,19 +65,21 @@ func NewUpstreamIO(ctx context.Context, id, stdout, stderr, stdin string, termin } // relayIO is a glorified io.Copy that also logs when the copy has completed. -func relayIO(w io.Writer, r io.Reader, log *logrus.Entry, name string) (int64, error) { +func relayIO(w io.Writer, r io.Reader, entry *logrus.Entry, name string) (int64, error) { n, err := io.Copy(w, r) - if log != nil { + if entry != nil { lvl := logrus.DebugLevel - log = log.WithFields(logrus.Fields{ - "file": name, - "bytes": n, + entry = entry.WithFields(logrus.Fields{ + logfields.File: name, + "bytes": n, + "reader": log.FormatIO(r), + "writer": log.FormatIO(w), }) if err != nil { lvl = logrus.ErrorLevel - log = log.WithError(err) + entry = entry.WithError(err) } - log.Log(lvl, "Cmd IO relay complete") + entry.Log(lvl, "Cmd IO relay complete") } return n, err } diff --git a/internal/cmd/io_binary.go b/internal/cmd/io_binary.go index ecfd1fa5ef..16e3dfd0b2 100644 --- a/internal/cmd/io_binary.go +++ b/internal/cmd/io_binary.go @@ -18,6 +18,7 @@ import ( "github.com/sirupsen/logrus" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" ) const ( @@ -167,17 +168,29 @@ type binaryIO struct { // Close named pipes for container stdout and stderr and wait for the binary process to finish. func (b *binaryIO) Close(ctx context.Context) { + ctx, etr := log.S(ctx, logrus.Fields{ //nolint:ineffassign,staticcheck + "binaryCmd": b.cmd.String(), + "binaryProcessID": b.cmd.Process.Pid, + }) + etr.Trace("binaryIO::Close") + b.soutCloser.Do(func() { if b.sout != nil { + etr := etr.WithField(logfields.Pipe, b.stdout) err := b.sout.Close() if err != nil { - log.G(ctx).WithError(err).Errorf("error while closing stdout npipe") + etr.WithError(err).Errorf("error while closing stdout npipe") + } else { + etr.Debug("binaryIO::soutCloser - stdout") } } if b.serr != nil { + etr := etr.WithField(logfields.Pipe, b.stderr) err := b.serr.Close() if err != nil { - log.G(ctx).WithError(err).Errorf("error while closing stderr npipe") + etr.WithError(err).Errorf("error while closing stderr npipe") + } else { + etr.Debug("binaryIO::soutCloser - stderr") } } }) @@ -190,13 +203,14 @@ func (b *binaryIO) Close(ctx context.Context) { select { case err := <-done: if err != nil { - log.G(ctx).WithError(err).Errorf("error while waiting for binary cmd to finish") + etr.WithError(err).Errorf("error while waiting for binary cmd to finish") } case <-time.After(binaryCmdWaitTimeout): - log.G(ctx).Errorf("timeout while waiting for binaryIO process to finish. Killing") + etr := etr.WithField(logfields.Timeout, binaryCmdWaitTimeout.String()) + etr.Errorf("timeout while waiting for binaryIO process to finish. Killing") err := b.cmd.Process.Kill() if err != nil { - log.G(ctx).WithError(err).Errorf("error while killing binaryIO process") + etr.WithError(err).Errorf("error while killing binaryIO process") } } }) @@ -278,7 +292,7 @@ func (p *pipe) Read(b []byte) (int, error) { func (p *pipe) Close() error { if err := p.l.Close(); err != nil { - log.G(context.TODO()).WithError(err).Debug("error closing pipe listener") + log.G(context.Background()).WithError(err).Debug("error closing pipe listener") } p.conWg.Wait() if p.con != nil { diff --git a/internal/cmd/io_npipe.go b/internal/cmd/io_npipe.go index 63b9f9b732..c91bdd3dd9 100644 --- a/internal/cmd/io_npipe.go +++ b/internal/cmd/io_npipe.go @@ -13,6 +13,7 @@ import ( winio "github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/cenkalti/backoff/v4" "github.com/sirupsen/logrus" "golang.org/x/sys/windows" @@ -32,7 +33,7 @@ func NewNpipeIO(ctx context.Context, stdin, stdout, stderr string, terminal bool "stdout": stdout, "stderr": stderr, "terminal": terminal, - }).Debug("NewNpipeIO") + }).Trace("NewNpipeIO") nio := &npipeio{ stdin: stdin, @@ -121,7 +122,7 @@ func (nprw *nPipeRetryWriter) Write(p []byte) (n int, err error) { nprw.Conn.Close() newConn, retryErr := nprw.retryDialPipe() if retryErr == nil { - log.G(nprw.ctx).WithField("address", nprw.pipePath).Info("Succeeded in reconnecting to named pipe") + log.G(nprw.ctx).WithField("address", nprw.pipePath).Debug("Succeeded in reconnecting to named pipe") nprw.Conn = newConn continue @@ -196,28 +197,31 @@ type npipeio struct { } func (nio *npipeio) Close(ctx context.Context) { + log.G(ctx).Trace("npipeio::Close") + // winio.win32Pipe.Close currently doesn't return any errors nio.sinCloser.Do(func() { if nio.sin != nil { - log.G(ctx).Debug("npipeio::sinCloser") + log.G(ctx).WithField(logfields.Pipe, nio.stdin).Debug("npipeio::sinCloser") nio.sin.Close() } }) nio.outErrCloser.Do(func() { if nio.sout != nil { - log.G(ctx).Debug("npipeio::outErrCloser - stdout") + log.G(ctx).WithField(logfields.Pipe, nio.stdout).Debug("npipeio::outErrCloser - stdout") nio.sout.Close() } if nio.serr != nil { - log.G(ctx).Debug("npipeio::outErrCloser - stderr") + log.G(ctx).WithField(logfields.Pipe, nio.stderr).Debug("npipeio::outErrCloser - stderr") nio.serr.Close() } }) } func (nio *npipeio) CloseStdin(ctx context.Context) { + log.G(ctx).Trace("npipeio::CloseStdin") nio.sinCloser.Do(func() { if nio.sin != nil { - log.G(ctx).Debug("npipeio::sinCloser") + log.G(ctx).WithField(logfields.Pipe, nio.stdin).Debug("npipeio::sinCloser") nio.sin.Close() } }) diff --git a/internal/devices/assigned_devices.go b/internal/devices/assigned_devices.go index 4750776403..a1ae4e0f73 100644 --- a/internal/devices/assigned_devices.go +++ b/internal/devices/assigned_devices.go @@ -44,7 +44,7 @@ func AddDevice(ctx context.Context, vm *uvm.UtilityVM, idType, deviceID string, return vpci, nil, errors.Wrapf(err, "failed to assign device %s of type %s to pod %s", deviceID, idType, vm.ID()) } vmBusInstanceID := vm.GetAssignedDeviceVMBUSInstanceID(vpci.VMBusGUID) - log.G(ctx).WithField("vmbus id", vmBusInstanceID).Info("vmbus instance ID") + log.G(ctx).WithField("vmbus id", vmBusInstanceID).Debug("vmbus instance ID") locationPaths, err = getChildrenDeviceLocationPaths(ctx, vm, vmBusInstanceID, deviceUtilPath) return vpci, locationPaths, err diff --git a/internal/gcs/bridge.go b/internal/gcs/bridge.go index d1650ecb4a..aebf7e4996 100644 --- a/internal/gcs/bridge.go +++ b/internal/gcs/bridge.go @@ -49,6 +49,9 @@ type rpc struct { ch chan struct{} } +// todo(helsaawy): remove bridge.log entry and log based on per-request context, not +// the context that started the bridge + // bridge represents a communcations bridge with the guest. It handles the // transport layer but (mostly) does not parse or construct the message payload. type bridge struct { @@ -307,7 +310,7 @@ func (brdg *bridge) recvLoop() error { } brdg.log.WithFields(logrus.Fields{ "payload": string(b), - "type": typ, + "type": typ.String(), "message-id": id}).Debug("bridge receive") switch typ & msgTypeMask { case msgTypeResponse: @@ -395,7 +398,7 @@ func (brdg *bridge) writeMessage(buf *bytes.Buffer, enc *json.Encoder, typ msgTy // Write the message. brdg.log.WithFields(logrus.Fields{ "payload": string(buf.Bytes()[hdrSize:]), - "type": typ, + "type": typ.String(), "message-id": id}).Debug("bridge send") _, err = buf.WriteTo(brdg.conn) if err != nil { diff --git a/internal/gcs/container.go b/internal/gcs/container.go index 6ec5b8b84f..8a21ef1cfa 100644 --- a/internal/gcs/container.go +++ b/internal/gcs/container.go @@ -10,6 +10,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "go.opencensus.io/trace" ) @@ -31,7 +32,7 @@ var _ cow.Container = &Container{} // CreateContainer creates a container using ID `cid` and `cfg`. The request // will likely not be cancellable even if `ctx` becomes done. func (gc *GuestConnection) CreateContainer(ctx context.Context, cid string, config interface{}) (_ *Container, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::CreateContainer") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::CreateContainer", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", cid)) @@ -89,17 +90,17 @@ func (c *Container) IsOCI() bool { // Close releases associated with the container. func (c *Container) Close() error { - c.closeOnce.Do(func() { - _, span := trace.StartSpan(context.Background(), "gcs::Container::Close") - defer span.End() - span.AddAttributes(trace.StringAttribute("cid", c.id)) - }) + _, span := oc.StartTraceSpan(context.Background(), "gcs::Container::Close") + defer span.End() + span.AddAttributes(trace.StringAttribute("cid", c.id)) + + c.closeOnce.Do(func() {}) return nil } // CreateProcess creates a process in the container. func (c *Container) CreateProcess(ctx context.Context, config interface{}) (_ cow.Process, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::CreateProcess") + ctx, span := oc.StartTraceSpan(ctx, "gcs::Container::CreateProcess", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -114,7 +115,7 @@ func (c *Container) ID() string { // Modify sends a modify request to the container. func (c *Container) Modify(ctx context.Context, config interface{}) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Modify") + ctx, span := oc.StartSpan(ctx, "gcs::Container::Modify", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -129,7 +130,7 @@ func (c *Container) Modify(ctx context.Context, config interface{}) (err error) // Properties returns the requested container properties targeting a V1 schema container. func (c *Container) Properties(ctx context.Context, types ...schema1.PropertyType) (_ *schema1.ContainerProperties, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Properties") + ctx, span := trace.StartSpan(ctx, "gcs::Container::Properties", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -148,7 +149,7 @@ func (c *Container) Properties(ctx context.Context, types ...schema1.PropertyTyp // PropertiesV2 returns the requested container properties targeting a V2 schema container. func (c *Container) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (_ *hcsschema.Properties, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::PropertiesV2") + ctx, span := trace.StartSpan(ctx, "gcs::Container::PropertiesV2", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -167,7 +168,7 @@ func (c *Container) PropertiesV2(ctx context.Context, types ...hcsschema.Propert // Start starts the container. func (c *Container) Start(ctx context.Context) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Start") + ctx, span := trace.StartSpan(ctx, "gcs::Container::Start", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -198,7 +199,7 @@ func (c *Container) shutdown(ctx context.Context, proc rpcProc) error { // might not be terminated by the time the request completes (and might never // terminate). func (c *Container) Shutdown(ctx context.Context) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Shutdown") + ctx, span := trace.StartSpan(ctx, "gcs::Container::Shutdown", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -212,7 +213,7 @@ func (c *Container) Shutdown(ctx context.Context) (err error) { // might not be terminated by the time the request completes (and might never // terminate). func (c *Container) Terminate(ctx context.Context) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Terminate") + ctx, span := trace.StartSpan(ctx, "gcs::Container::Terminate", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -234,11 +235,11 @@ func (c *Container) Wait() error { } func (c *Container) waitBackground() { - ctx, span := trace.StartSpan(context.Background(), "gcs::Container::waitBackground") + ctx, span := oc.StartTraceSpan(context.Background(), "gcs::Container::waitBackground") defer span.End() span.AddAttributes(trace.StringAttribute("cid", c.id)) err := c.Wait() - log.G(ctx).Debug("container exited") + log.G(ctx).WithField(logfields.ContainerID, c.id).Debug("container exited") oc.SetSpanStatus(span, err) } diff --git a/internal/gcs/guestconnection.go b/internal/gcs/guestconnection.go index bdf796010c..90d40a4ee3 100644 --- a/internal/gcs/guestconnection.go +++ b/internal/gcs/guestconnection.go @@ -66,7 +66,7 @@ type GuestConnectionConfig struct { // Connect establishes a GCS connection. `gcc.Conn` will be closed by this function. func (gcc *GuestConnectionConfig) Connect(ctx context.Context, isColdStart bool) (_ *GuestConnection, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnectionConfig::Connect") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnectionConfig::Connect", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -165,7 +165,7 @@ func (gc *GuestConnection) connect(ctx context.Context, isColdStart bool, initGu // Modify sends a modify settings request to the null container. This is // generally used to prepare virtual hardware that has been added to the guest. func (gc *GuestConnection) Modify(ctx context.Context, settings interface{}) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::Modify") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::Modify", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -178,7 +178,7 @@ func (gc *GuestConnection) Modify(ctx context.Context, settings interface{}) (er } func (gc *GuestConnection) DumpStacks(ctx context.Context) (response string, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::DumpStacks") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::DumpStacks", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -191,7 +191,7 @@ func (gc *GuestConnection) DumpStacks(ctx context.Context) (response string, err } func (gc *GuestConnection) DeleteContainerState(ctx context.Context, cid string) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::DeleteContainerState") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::DeleteContainerState", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", cid)) @@ -214,7 +214,7 @@ func (gc *GuestConnection) Close() error { // CreateProcess creates a process in the container host. func (gc *GuestConnection) CreateProcess(ctx context.Context, settings interface{}) (_ cow.Process, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::CreateProcess") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::CreateProcess", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -266,7 +266,7 @@ func (gc *GuestConnection) notify(ntf *containerNotification) error { if ch == nil { return fmt.Errorf("container %s not found", cid) } - logrus.WithField(logfields.ContainerID, cid).Info("container terminated in guest") + logrus.WithField(logfields.ContainerID, cid).Debug("container terminated in guest") close(ch) return nil } diff --git a/internal/gcs/iochannel.go b/internal/gcs/iochannel.go index 5af6b81aaf..a2a032a12d 100644 --- a/internal/gcs/iochannel.go +++ b/internal/gcs/iochannel.go @@ -65,3 +65,7 @@ func (c *ioChannel) Write(b []byte) (int, error) { } return c.c.Write(b) } + +func (c *ioChannel) Addr() net.Addr { + return c.l.Addr() +} diff --git a/internal/gcs/process.go b/internal/gcs/process.go index 628cb8b0d7..dec513e9a2 100644 --- a/internal/gcs/process.go +++ b/internal/gcs/process.go @@ -122,7 +122,7 @@ func (gc *GuestConnection) exec(ctx context.Context, cid string, params interfac // Close releases resources associated with the process and closes the // associated standard IO streams. func (p *Process) Close() error { - ctx, span := trace.StartSpan(context.Background(), "gcs::Process::Close") + ctx, span := oc.StartTraceSpan(context.Background(), "gcs::Process::Close") defer span.End() span.AddAttributes( trace.StringAttribute("cid", p.cid), @@ -211,7 +211,7 @@ func (p *Process) Pid() int { // ResizeConsole requests that the pty associated with the process resize its // window. func (p *Process) ResizeConsole(ctx context.Context, width, height uint16) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Process::ResizeConsole") + ctx, span := trace.StartSpan(ctx, "gcs::Process::ResizeConsole", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( @@ -230,7 +230,7 @@ func (p *Process) ResizeConsole(ctx context.Context, width, height uint16) (err // Signal sends a signal to the process, returning whether it was delivered. func (p *Process) Signal(ctx context.Context, options interface{}) (_ bool, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Process::Signal") + ctx, span := trace.StartSpan(ctx, "gcs::Process::Signal", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( @@ -275,7 +275,7 @@ func (p *Process) Wait() error { } func (p *Process) waitBackground() { - ctx, span := trace.StartSpan(context.Background(), "gcs::Process::waitBackground") + ctx, span := oc.StartTraceSpan(context.Background(), "gcs::Process::waitBackground") defer span.End() span.AddAttributes( trace.StringAttribute("cid", p.cid), diff --git a/internal/gcs/protocol.go b/internal/gcs/protocol.go index 840dcb2392..c07adced6b 100644 --- a/internal/gcs/protocol.go +++ b/internal/gcs/protocol.go @@ -3,6 +3,7 @@ package gcs import ( "encoding/json" "fmt" + "strconv" "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/hcs/schema1" @@ -62,6 +63,43 @@ const ( rpcLifecycleNotification ) +func (rpc rpcProc) String() string { + switch rpc { + case rpcCreate: + return "Create" + case rpcStart: + return "Start" + case rpcShutdownGraceful: + return "ShutdownGraceful" + case rpcShutdownForced: + return "ShutdownForced" + case rpcExecuteProcess: + return "ExecuteProcess" + case rpcWaitForProcess: + return "WaitForProcess" + case rpcSignalProcess: + return "SignalProcess" + case rpcResizeConsole: + return "ResizeConsole" + case rpcGetProperties: + return "GetProperties" + case rpcModifySettings: + return "ModifySettings" + case rpcNegotiateProtocol: + return "NegotiateProtocol" + case rpcDumpStacks: + return "DumpStacks" + case rpcDeleteContainerState: + return "DeleteContainerState" + case rpcUpdateContainer: + return "UpdateContainer" + case rpcLifecycleNotification: + return "LifecycleNotification" + default: + return "0x" + strconv.FormatUint(uint64(rpc), 16) + } +} + type msgType uint32 const ( @@ -92,40 +130,7 @@ func (typ msgType) String() string { default: return fmt.Sprintf("%#x", uint32(typ)) } - switch rpcProc(typ &^ msgTypeMask) { - case rpcCreate: - s += "Create" - case rpcStart: - s += "Start" - case rpcShutdownGraceful: - s += "ShutdownGraceful" - case rpcShutdownForced: - s += "ShutdownForced" - case rpcExecuteProcess: - s += "ExecuteProcess" - case rpcWaitForProcess: - s += "WaitForProcess" - case rpcSignalProcess: - s += "SignalProcess" - case rpcResizeConsole: - s += "ResizeConsole" - case rpcGetProperties: - s += "GetProperties" - case rpcModifySettings: - s += "ModifySettings" - case rpcNegotiateProtocol: - s += "NegotiateProtocol" - case rpcDumpStacks: - s += "DumpStacks" - case rpcDeleteContainerState: - s += "DeleteContainerState" - case rpcUpdateContainer: - s += "UpdateContainer" - case rpcLifecycleNotification: - s += "LifecycleNotification" - default: - s += fmt.Sprintf("%#x", uint32(typ)) - } + s += rpcProc(typ &^ msgTypeMask).String() return s + ")" } diff --git a/internal/guest/bridge/bridge.go b/internal/guest/bridge/bridge.go index 2d2e56c768..cbc9f613c8 100644 --- a/internal/guest/bridge/bridge.go +++ b/internal/guest/bridge/bridge.go @@ -129,11 +129,14 @@ func (mux *Mux) ServeMsg(r *Request) (RequestResponse, error) { return h.ServeMsg(r) } +// todo (helsaawy): remove context as a field and pass it explicitly +// https://go.dev/blog/context-and-structs#storing-context-in-structs-leads-to-confusion + // Request is the bridge request that has been sent. type Request struct { // Context is the request context received from the bridge. Context context.Context - // Header is the wire format message header that preceeded the message for + // Header is the wire format message header that preceded the message for // this request. Header *prot.MessageHeader // ContainerID is the id of the container that this message corresponds to. @@ -166,7 +169,7 @@ type bridgeResponse struct { // It has two fundamentally different dispatch options: // // 1. Request/Response where using the `Handler` a request -// of a given type will be dispatched to the apprpriate handler +// of a given type will be dispatched to the appropriate handler // and an appropriate response will respond to exactly that request that // caused the dispatch. // @@ -290,9 +293,15 @@ func (b *Bridge) ListenAndServe(bridgeIn io.ReadCloser, bridgeOut io.WriteCloser } } } - ctx, span = trace.StartSpanWithRemoteParent(context.Background(), "opengcs::bridge::request", sc) + ctx, span = trace.StartSpanWithRemoteParent(context.Background(), + "opengcs::bridge::request", + sc, + oc.WithTraceLevelSampler, + oc.WithServerSpanKind) } else { - ctx, span = trace.StartSpan(context.Background(), "opengcs::bridge::request") + ctx, span = oc.StartTraceSpan(context.Background(), + "opengcs::bridge::request", + oc.WithServerSpanKind) } span.AddAttributes( @@ -401,7 +410,9 @@ func (b *Bridge) ListenAndServe(bridgeIn io.ReadCloser, bridgeOut io.WriteCloser // PublishNotification writes a specific notification to the bridge. func (b *Bridge) PublishNotification(n *prot.ContainerNotification) { - ctx, span := trace.StartSpan(context.Background(), "opengcs::bridge::PublishNotification") + ctx, span := oc.StartTraceSpan(context.Background(), + "opengcs::bridge::PublishNotification", + oc.WithClientSpanKind) span.AddAttributes(trace.StringAttribute("notification", fmt.Sprintf("%+v", n))) // DONT defer span.End() here. Publish is odd because bridgeResponse calls // `End` on the `ctx` after the response is sent. diff --git a/internal/guest/kmsg/kmsg.go b/internal/guest/kmsg/kmsg.go index 429235bb16..f15cc3cf04 100644 --- a/internal/guest/kmsg/kmsg.go +++ b/internal/guest/kmsg/kmsg.go @@ -142,7 +142,7 @@ func ReadForever(logLevel LogLevel) { }).Error("failed to parse kmsg entry") } else { if entry.Priority <= logLevel { - logrus.WithFields(entry.logFormat()).Info("kmsg read") + logrus.WithFields(entry.logFormat()).Debug("kmsg read") } } } diff --git a/internal/guest/runtime/hcsv2/container.go b/internal/guest/runtime/hcsv2/container.go index d222842e6b..956de8de35 100644 --- a/internal/guest/runtime/hcsv2/container.go +++ b/internal/guest/runtime/hcsv2/container.go @@ -25,6 +25,7 @@ import ( "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/oc" ) type Container struct { @@ -178,7 +179,7 @@ func (c *Container) Update(ctx context.Context, resources interface{}) error { // Wait waits for the container's init process to exit. func (c *Container) Wait() prot.NotificationType { - _, span := trace.StartSpan(context.Background(), "opengcs::Container::Wait") + _, span := oc.StartTraceSpan(context.Background(), "opengcs::Container::Wait") defer span.End() span.AddAttributes(trace.StringAttribute(logfields.ContainerID, c.id)) diff --git a/internal/guest/runtime/hcsv2/process.go b/internal/guest/runtime/hcsv2/process.go index 0a7dee8b30..d2f77a1102 100644 --- a/internal/guest/runtime/hcsv2/process.go +++ b/internal/guest/runtime/hcsv2/process.go @@ -14,6 +14,7 @@ import ( "github.com/Microsoft/hcsshim/internal/guest/runtime" "github.com/Microsoft/hcsshim/internal/guest/stdio" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" oci "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -83,7 +84,7 @@ func newProcess(c *Container, spec *oci.Process, process runtime.Process, pid ui p.exitWg.Add(1) p.writersWg.Add(1) go func() { - ctx, span := trace.StartSpan(context.Background(), "newProcess::waitBackground") + ctx, span := oc.StartTraceSpan(context.Background(), "newProcess::waitBackground") defer span.End() span.AddAttributes( trace.StringAttribute("cid", p.cid), @@ -113,7 +114,7 @@ func newProcess(c *Container, spec *oci.Process, process runtime.Process, pid ui } c.processesMutex.Lock() - _, span := trace.StartSpan(context.Background(), "newProcess::waitBackground::waitAllWaiters") + _, span := oc.StartTraceSpan(context.Background(), "newProcess::waitBackground::waitAllWaiters") defer span.End() span.AddAttributes( trace.StringAttribute("cid", p.cid), @@ -161,7 +162,7 @@ func (p *containerProcess) ResizeConsole(ctx context.Context, height, width uint // gather the exit code. The second channel must be signaled from the caller // when the caller has completed its use of this call to Wait. func (p *containerProcess) Wait() (<-chan int, chan<- bool) { - ctx, span := trace.StartSpan(context.Background(), "opengcs::containerProcess::Wait") + ctx, span := oc.StartTraceSpan(context.Background(), "opengcs::containerProcess::Wait") span.AddAttributes( trace.StringAttribute("cid", p.cid), trace.Int64Attribute("pid", int64(p.pid))) @@ -284,7 +285,7 @@ func (ep *externalProcess) ResizeConsole(ctx context.Context, height, width uint } func (ep *externalProcess) Wait() (<-chan int, chan<- bool) { - _, span := trace.StartSpan(context.Background(), "opengcs::externalProcess::Wait") + _, span := oc.StartTraceSpan(context.Background(), "opengcs::externalProcess::Wait") span.AddAttributes(trace.Int64Attribute("pid", int64(ep.cmd.Process.Pid))) exitCodeChan := make(chan int, 1) diff --git a/internal/guest/transport/vsock.go b/internal/guest/transport/vsock.go index c731b984aa..1410d901c7 100644 --- a/internal/guest/transport/vsock.go +++ b/internal/guest/transport/vsock.go @@ -1,10 +1,13 @@ package transport import ( + "context" "fmt" "syscall" "time" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/linuxkit/virtsock/pkg/vsock" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -22,12 +25,15 @@ type VsockTransport struct{} var _ Transport = &VsockTransport{} +const _dialRetrySleepTime = 100 * time.Millisecond + // Dial accepts a vsock socket port number as configuration, and // returns an unconnected VsockConnection struct. func (t *VsockTransport) Dial(port uint32) (Connection, error) { - logrus.WithFields(logrus.Fields{ + _, etr := log.S(context.Background(), logrus.Fields{ "port": port, - }).Info("opengcs::VsockTransport::Dial - vsock dial port") + }) + etr.Trace("opengcs::VsockTransport::Dial") // HACK: Remove loop when vsock bugs are fixed! // Retry 10 times because vsock.Dial can return connection time out @@ -40,7 +46,11 @@ func (t *VsockTransport) Dial(port uint32) (Connection, error) { // If the error was ETIMEDOUT retry, otherwise fail. cause := errors.Cause(err) if errno, ok := cause.(syscall.Errno); ok && errno == syscall.ETIMEDOUT { - time.Sleep(100 * time.Millisecond) + etr.WithFields(logrus.Fields{ + logfields.Attempt: i + 1, + logfields.Duration: _dialRetrySleepTime.String(), + }).Debug("vsock dial timed out; sleeping before retrying") + time.Sleep(_dialRetrySleepTime) continue } else { return nil, errors.Wrapf(err, "vsock Dial port (%d) failed", port) diff --git a/internal/hcs/process.go b/internal/hcs/process.go index 605856f2a3..ef44d30cf9 100644 --- a/internal/hcs/process.go +++ b/internal/hcs/process.go @@ -11,8 +11,10 @@ import ( "time" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/vmcompute" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -118,11 +120,15 @@ func (process *Process) processSignalResult(ctx context.Context, err error) (boo // // For WCOW `guestresource.SignalProcessOptionsWCOW`. func (process *Process) Signal(ctx context.Context, options interface{}) (bool, error) { + operation := "hcs::Process::Signal" + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.ProcessID: process.processID, + logfields.ContainerID: process.SystemID()}) + etr.Trace(operation) + process.handleLock.RLock() defer process.handleLock.RUnlock() - operation := "hcs::Process::Signal" - if process.handle == 0 { return false, makeProcessError(process, operation, ErrAlreadyClosed, nil) } @@ -143,11 +149,15 @@ func (process *Process) Signal(ctx context.Context, options interface{}) (bool, // Kill signals the process to terminate but does not wait for it to finish terminating. func (process *Process) Kill(ctx context.Context) (bool, error) { + operation := "hcs::Process::Kill" + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.ProcessID: process.processID, + logfields.ContainerID: process.SystemID()}) + etr.Trace(operation) + process.handleLock.RLock() defer process.handleLock.RUnlock() - operation := "hcs::Process::Kill" - if process.handle == 0 { return false, makeProcessError(process, operation, ErrAlreadyClosed, nil) } @@ -201,7 +211,7 @@ func (process *Process) Kill(ctx context.Context) (bool, error) { // call multiple times. func (process *Process) waitBackground() { operation := "hcs::Process::waitBackground" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() span.AddAttributes( trace.StringAttribute("cid", process.SystemID()), @@ -243,7 +253,7 @@ func (process *Process) waitBackground() { } } } - log.G(ctx).WithField("exitCode", exitCode).Debug("process exited") + log.G(ctx).WithField("exitCode", exitCode).Debug("hcs::Process process exited") process.closedWaitOnce.Do(func() { process.exitCode = exitCode @@ -254,7 +264,7 @@ func (process *Process) waitBackground() { } // Wait waits for the process to exit. If the process has already exited returns -// the pervious error (if any). +// the previous error (if any). func (process *Process) Wait() error { <-process.waitBlock return process.waitError @@ -262,11 +272,15 @@ func (process *Process) Wait() error { // ResizeConsole resizes the console of the process. func (process *Process) ResizeConsole(ctx context.Context, width, height uint16) error { + operation := "hcs::Process::ResizeConsole" + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.ProcessID: process.processID, + logfields.ContainerID: process.SystemID()}) + etr.Trace(operation) + process.handleLock.RLock() defer process.handleLock.RUnlock() - operation := "hcs::Process::ResizeConsole" - if process.handle == 0 { return makeProcessError(process, operation, ErrAlreadyClosed, nil) } @@ -312,7 +326,7 @@ func (process *Process) ExitCode() (int, error) { // are the responsibility of the caller to close. func (process *Process) StdioLegacy() (_ io.WriteCloser, _ io.ReadCloser, _ io.ReadCloser, err error) { operation := "hcs::Process::StdioLegacy" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( @@ -359,12 +373,18 @@ func (process *Process) Stdio() (stdin io.Writer, stdout, stderr io.Reader) { // CloseStdin closes the write side of the stdin pipe so that the process is // notified on the read side that there is no more data in stdin. -func (process *Process) CloseStdin(ctx context.Context) error { +func (process *Process) CloseStdin(ctx context.Context) (err error) { + operation := "hcs::Process::CloseStdin" + ctx, span := trace.StartSpan(ctx, operation) //nolint:ineffassign,staticcheck + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute("cid", process.SystemID()), + trace.Int64Attribute("pid", int64(process.processID))) + process.handleLock.RLock() defer process.handleLock.RUnlock() - operation := "hcs::Process::CloseStdin" - if process.handle == 0 { return makeProcessError(process, operation, ErrAlreadyClosed, nil) } @@ -450,7 +470,7 @@ func (process *Process) CloseStderr(ctx context.Context) (err error) { // or wait on it. func (process *Process) Close() (err error) { operation := "hcs::Process::Close" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( diff --git a/internal/hcs/system.go b/internal/hcs/system.go index 75499c967f..adf1c1e235 100644 --- a/internal/hcs/system.go +++ b/internal/hcs/system.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "strings" "sync" "syscall" @@ -12,9 +13,11 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/timeout" "github.com/Microsoft/hcsshim/internal/vmcompute" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -44,7 +47,7 @@ func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface in // hcsCreateComputeSystemContext is an async operation. Start the outer span // here to measure the full create time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", id)) @@ -97,6 +100,7 @@ func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface in // OpenComputeSystem opens an existing compute system by ID. func OpenComputeSystem(ctx context.Context, id string) (*System, error) { operation := "hcs::OpenComputeSystem" + log.G(ctx).WithField(logfields.ContainerID, id).Trace(operation) computeSystem := newSystem(id) handle, resultJSON, err := vmcompute.HcsOpenComputeSystem(ctx, id) @@ -149,6 +153,7 @@ func (computeSystem *System) IsOCI() bool { // GetComputeSystems gets a list of the compute systems on the system that match the query func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]schema1.ContainerProperties, error) { operation := "hcs::GetComputeSystems" + log.G(ctx).WithField("query", fmt.Sprintf("%+v", q)).Trace(operation) queryb, err := json.Marshal(q) if err != nil { @@ -178,7 +183,7 @@ func (computeSystem *System) Start(ctx context.Context) (err error) { // hcsStartComputeSystemContext is an async operation. Start the outer span // here to measure the full start time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -206,11 +211,12 @@ func (computeSystem *System) ID() string { // Shutdown requests a compute system shutdown. func (computeSystem *System) Shutdown(ctx context.Context) error { + operation := "hcs::System::Shutdown" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::Shutdown" - if computeSystem.handle == 0 { return nil } @@ -227,11 +233,12 @@ func (computeSystem *System) Shutdown(ctx context.Context) error { // Terminate requests a compute system terminate. func (computeSystem *System) Terminate(ctx context.Context) error { + operation := "hcs::System::Terminate" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::Terminate" - if computeSystem.handle == 0 { return nil } @@ -253,7 +260,7 @@ func (computeSystem *System) Terminate(ctx context.Context) error { // safe to call multiple times. func (computeSystem *System) waitBackground() { operation := "hcs::System::waitBackground" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -297,11 +304,12 @@ func (computeSystem *System) ExitError() error { // Properties returns the requested container properties targeting a V1 schema container. func (computeSystem *System) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) { + operation := "hcs::System::Properties" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::Properties" - queryBytes, err := json.Marshal(schema1.PropertyQuery{PropertyTypes: types}) if err != nil { return nil, makeSystemError(computeSystem, operation, err, nil) @@ -326,11 +334,12 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr // PropertiesV2 returns the requested container properties targeting a V2 schema container. func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) { + operation := "hcs::System::PropertiesV2" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::PropertiesV2" - queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types}) if err != nil { return nil, makeSystemError(computeSystem, operation, err, nil) @@ -359,7 +368,7 @@ func (computeSystem *System) Pause(ctx context.Context) (err error) { // hcsPauseComputeSystemContext is an async peration. Start the outer span // here to measure the full pause time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -386,7 +395,7 @@ func (computeSystem *System) Resume(ctx context.Context) (err error) { // hcsResumeComputeSystemContext is an async operation. Start the outer span // here to measure the full restore time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -411,9 +420,9 @@ func (computeSystem *System) Resume(ctx context.Context) (err error) { func (computeSystem *System) Save(ctx context.Context, options interface{}) (err error) { operation := "hcs::System::Save" - // hcsSaveComputeSystemContext is an async peration. Start the outer span + // hcsSaveComputeSystemContext is an async operation. Start the outer span // here to measure the full save time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -466,6 +475,9 @@ func (computeSystem *System) createProcess(ctx context.Context, operation string // CreateProcess launches a new process within the computeSystem. func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) (cow.Process, error) { operation := "hcs::System::CreateProcess" + ctx, etr := log.S(ctx, logrus.Fields{logfields.ContainerID: computeSystem.id}) + etr.Trace(operation) + process, processInfo, err := computeSystem.createProcess(ctx, operation, c) if err != nil { return nil, err @@ -495,11 +507,12 @@ func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) ( // OpenProcess gets an interface to an existing process within the computeSystem. func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process, error) { + operation := "hcs::System::OpenProcess" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::OpenProcess" - if computeSystem.handle == 0 { return nil, makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) } @@ -522,7 +535,7 @@ func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process // Close cleans up any state associated with the compute system but does not terminate or wait for it. func (computeSystem *System) Close() (err error) { operation := "hcs::System::Close" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -612,11 +625,12 @@ func (computeSystem *System) unregisterCallback(ctx context.Context) error { // Modify the System by sending a request to HCS func (computeSystem *System) Modify(ctx context.Context, config interface{}) error { + operation := "hcs::System::Modify" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::Modify" - if computeSystem.handle == 0 { return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) } diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index 6c9640f878..c73252eb69 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -17,6 +17,8 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/schemaversion" @@ -24,6 +26,7 @@ import ( "github.com/Microsoft/hcsshim/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" ) var ( @@ -208,8 +211,8 @@ func initializeCreateOptions(ctx context.Context, createOptions *CreateOptions) log.G(ctx).WithFields(logrus.Fields{ "options": fmt.Sprintf("%+v", createOptions), - "schema": coi.actualSchemaVersion, - }).Debug("hcsshim::initializeCreateOptions") + "schema": fmt.Sprintf("%+v", coi.actualSchemaVersion), + }).Debug("hcsshim::initializeCreateOptions options") return coi, nil } @@ -260,6 +263,11 @@ func configureSandboxNetwork(ctx context.Context, coi *createOptionsInternal, r // release the resources on failure, so that the client can make the necessary // call to release resources that have been allocated as part of calling this function. func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { + ctx, span := trace.StartSpan(ctx, "hcsoci::CreateContainer") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.ID, createOptions.ID)) + coi, err := initializeCreateOptions(ctx, createOptions) if err != nil { return nil, nil, err @@ -312,12 +320,13 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C } var hcsDocument, gcsDocument interface{} - log.G(ctx).Debug("hcsshim::CreateContainer allocating resources") + etr := log.G(ctx) + etr.Debug("hcsshim::CreateContainer allocating resources") if coi.Spec.Linux != nil { if schemaversion.IsV10(coi.actualSchemaVersion) { return nil, r, errors.New("LCOW v1 not supported") } - log.G(ctx).Debug("hcsshim::CreateContainer allocateLinuxResources") + etr.Debug("hcsshim::CreateContainer allocateLinuxResources") err = allocateLinuxResources(ctx, coi, r, isSandbox) if err != nil { log.G(ctx).WithError(err).Debug("failed to allocateLinuxResources") @@ -325,19 +334,19 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C } gcsDocument, err = createLinuxContainerDocument(ctx, coi, r.ContainerRootInUVM()) if err != nil { - log.G(ctx).WithError(err).Debug("failed createHCSContainerDocument") + log.G(ctx).WithError(err).Error("failed createHCSContainerDocument") return nil, r, err } } else { err = allocateWindowsResources(ctx, coi, r, isSandbox) if err != nil { - log.G(ctx).WithError(err).Debug("failed to allocateWindowsResources") + etr.WithError(err).Error("failed to allocateWindowsResources") return nil, r, err } - log.G(ctx).Debug("hcsshim::CreateContainer creating container document") + etr.Debug("hcsshim::CreateContainer creating container document") v1, v2, err := createWindowsContainerDocument(ctx, coi) if err != nil { - log.G(ctx).WithError(err).Debug("failed createHCSContainerDocument") + etr.WithError(err).Error("failed createHCSContainerDocument") return nil, r, err } @@ -361,7 +370,7 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C } } - log.G(ctx).Debug("hcsshim::CreateContainer creating compute system") + etr.Debug("hcsshim::CreateContainer creating compute system") if gcsDocument != nil { c, err := coi.HostingSystem.CreateContainer(ctx, coi.actualID, gcsDocument) if err != nil { @@ -381,6 +390,11 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C // CreateContainer does. Also, instead of sending create container request it sends a modify // request to an existing container. CloneContainer only works for WCOW. func CloneContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { + ctx, span := trace.StartSpan(ctx, "hcsoci::CloneContainer") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.ID, createOptions.ID)) + coi, err := initializeCreateOptions(ctx, createOptions) if err != nil { return nil, nil, err diff --git a/internal/hcsoci/devices.go b/internal/hcsoci/devices.go index 42d11aac52..c5b55f27db 100644 --- a/internal/hcsoci/devices.go +++ b/internal/hcsoci/devices.go @@ -155,7 +155,7 @@ func handleAssignedDevicesWindows( ID: value, IDType: uvm.VPCILocationPathIDType, } - log.G(ctx).WithField("parsed devices", specDev).Info("added windows device to spec") + log.G(ctx).WithField("parsed devices", specDev).Debug("added windows device to spec") resultDevs = append(resultDevs, specDev) } } diff --git a/internal/hcsoci/hcsdoc_lcow.go b/internal/hcsoci/hcsdoc_lcow.go index 5612cd18f0..26308a7a47 100644 --- a/internal/hcsoci/hcsdoc_lcow.go +++ b/internal/hcsoci/hcsdoc_lcow.go @@ -78,12 +78,13 @@ type linuxHostedSystem struct { } func createLinuxContainerDocument(ctx context.Context, coi *createOptionsInternal, guestRoot string) (*linuxHostedSystem, error) { + log.G(ctx).WithField("guestRoot", guestRoot).Trace("hcsshim::createLinuxContainerDoc") + spec, err := createLCOWSpec(coi) if err != nil { return nil, err } - log.G(ctx).WithField("guestRoot", guestRoot).Debug("hcsshim::createLinuxContainerDoc") return &linuxHostedSystem{ SchemaVersion: schemaversion.SchemaV21(), OciBundlePath: guestRoot, diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 688d901609..e503dee68e 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -15,6 +15,7 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/processorinfo" "github.com/Microsoft/hcsshim/internal/uvm" @@ -142,7 +143,12 @@ func ConvertCPULimits(ctx context.Context, cid string, spec *specs.Spec, maxCPUC // a container, both hosted and process isolated. It creates both v1 and v2 // container objects, WCOW only. The containers storage should have been mounted already. func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInternal) (*schema1.ContainerConfig, *hcsschema.Container, error) { - log.G(ctx).Debug("hcsshim: CreateHCSContainerDocument") + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.Name: coi.actualID, + "owner": coi.actualOwner, + }) + etr.Trace("hcsshim: CreateHCSContainerDocument") + // TODO: Make this safe if exported so no null pointer dereferences. if coi.Spec == nil { @@ -226,12 +232,12 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter } else if newCPULimit > 10000 { newCPULimit = 10000 } - log.G(ctx).WithFields(logrus.Fields{ + etr.WithFields(logrus.Fields{ "hostCPUCount": hostCPUCount, "uvmCPUCount": uvmCPUCount, "oldCPULimit": cpuLimit, "newCPULimit": newCPULimit, - }).Info("rescaling CPU limit for UVM sandbox") + }).Debug("rescaling CPU limit for UVM sandbox") cpuLimit = newCPULimit } diff --git a/internal/hcsoci/network.go b/internal/hcsoci/network.go index daa4e46d00..f1ed1776d4 100644 --- a/internal/hcsoci/network.go +++ b/internal/hcsoci/network.go @@ -6,18 +6,18 @@ import ( "github.com/Microsoft/hcsshim/hcn" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" ) -func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) error { - op := "hcsoci::createNetworkNamespace" - l := log.G(ctx).WithField(logfields.ContainerID, coi.ID) - l.Debug(op + " - Begin") - defer func() { - l.Debug(op + " - End") - }() +func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) (err error) { + ctx, span := trace.StartSpan(ctx, "hcsoci::createNetworkNamespace") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.ContainerID, coi.ID)) ns, err := hcn.NewNamespace("").Create() if err != nil { @@ -27,7 +27,7 @@ func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r * log.G(ctx).WithFields(logrus.Fields{ "netID": ns.Id, logfields.ContainerID: coi.ID, - }).Info("created network namespace for container") + }).Debug("created network namespace for container") r.SetNetNS(ns.Id) r.SetCreatedNetNS(true) @@ -41,7 +41,7 @@ func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r * log.G(ctx).WithFields(logrus.Fields{ "netID": ns.Id, "endpointID": endpointID, - }).Info("added network endpoint to namespace") + }).Debug("added network endpoint to namespace") endpoints = append(endpoints, endpointID) } r.Add(&uvm.NetworkEndpoints{EndpointIDs: endpoints, Namespace: ns.Id}) diff --git a/internal/hcsoci/resources.go b/internal/hcsoci/resources.go index 46ad55660f..d42fe79f0f 100644 --- a/internal/hcsoci/resources.go +++ b/internal/hcsoci/resources.go @@ -14,7 +14,7 @@ func NormalizeProcessorCount(ctx context.Context, cid string, requestedCount, ho "id": cid, "requested count": requestedCount, "assigned count": hostCount, - }).Warn("Changing user requested cpu count to current number of processors on the host") + }).Warn("Changing user requested cpu count to current number of processors on the host") return hostCount } else { return requestedCount diff --git a/internal/layers/layers.go b/internal/layers/layers.go index 573ad72aa7..d127dde8a6 100644 --- a/internal/layers/layers.go +++ b/internal/layers/layers.go @@ -14,11 +14,14 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/hcserror" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/ospath" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/wclayer" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "golang.org/x/sys/windows" ) @@ -45,7 +48,12 @@ func NewImageLayers(vm *uvm.UtilityVM, containerRootInUVM string, layers []strin } // Release unmounts all of the layers located in the layers array. -func (layers *ImageLayers) Release(ctx context.Context, all bool) error { +func (layers *ImageLayers) Release(ctx context.Context, all bool) (err error) { + ctx, span := trace.StartSpan(ctx, "layers::Release") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("ImageLayers", fmt.Sprintf("%+v", layers))) + if layers.skipCleanup && layers.vm != nil { return nil } @@ -57,7 +65,7 @@ func (layers *ImageLayers) Release(ctx context.Context, all bool) error { if layers.vm != nil { crp = containerRootfsPath(layers.vm, layers.containerRootInUVM) } - err := UnmountContainerLayers(ctx, layers.layers, crp, layers.volumeMountPath, layers.vm, op) + err = UnmountContainerLayers(ctx, layers.layers, crp, layers.volumeMountPath, layers.vm, op) if err != nil { return err } @@ -78,7 +86,12 @@ func (layers *ImageLayers) Release(ctx context.Context, all bool) error { // // TODO dcantah: Keep better track of the layers that are added, don't simply discard the SCSI, VSMB, etc. resource types gotten inside. func MountContainerLayers(ctx context.Context, containerID string, layerFolders []string, guestRoot string, volumeMountPath string, vm *uvm.UtilityVM) (_ string, err error) { - log.G(ctx).WithField("layerFolders", layerFolders).Debug("hcsshim::mountContainerLayers") + ctx, span := trace.StartSpan(ctx, "layers::MountContainerLayers") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("layerFolders", fmt.Sprintf("%q", layerFolders)), + trace.StringAttribute("guestRoot", guestRoot), + trace.StringAttribute("volumeMountPath", volumeMountPath)) if vm == nil { if len(layerFolders) < 2 { @@ -160,7 +173,7 @@ func MountContainerLayers(ctx context.Context, containerID string, layerFolders } // V2 UVM - log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::mountContainerLayers V2 UVM") + log.G(ctx).WithField("os", vm.OS()).Debug("layers::MountContainerLayers V2 UVM") var ( layersAdded []string @@ -267,7 +280,6 @@ func MountContainerLayers(ctx context.Context, containerID string, layerFolders if err != nil { return "", err } - log.G(ctx).Debug("hcsshim::mountContainerLayers Succeeded") return rootfs, nil } @@ -340,8 +352,12 @@ const ( ) // UnmountContainerLayers is a helper for clients to hide all the complexity of layer unmounting -func UnmountContainerLayers(ctx context.Context, layerFolders []string, containerRootPath, volumeMountPath string, vm *uvm.UtilityVM, op UnmountOperation) error { - log.G(ctx).WithField("layerFolders", layerFolders).Debug("hcsshim::unmountContainerLayers") +func UnmountContainerLayers(ctx context.Context, layerFolders []string, containerRootPath, volumeMountPath string, vm *uvm.UtilityVM, op UnmountOperation) (err error) { + ctx, span := trace.StartSpan(ctx, "layers::UnmountContainerLayers") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("layerFolders", fmt.Sprint(layerFolders))) + if vm == nil { // Must be an argon - folders are mounted on the host if op != UnmountOperationAll { @@ -442,6 +458,12 @@ func UnmountContainerLayers(ctx context.Context, layerFolders []string, containe // GetHCSLayers converts host paths corresponding to container layers into HCS schema V2 layers func GetHCSLayers(ctx context.Context, vm *uvm.UtilityVM, paths []string) (layers []hcsschema.Layer, err error) { + ctx, span := trace.StartSpan(ctx, "layers::GetHCSLayers") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.UVMID, vm.ID()), + trace.StringAttribute("paths", fmt.Sprint(paths))) + for _, path := range paths { uvmPath, err := vm.GetVSMBUvmPath(ctx, path, true) if err != nil { @@ -483,7 +505,7 @@ func mountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err e log.G(ctx).WithFields(logrus.Fields{ "hostpath": hostPath, "volumeName": volumeName, - }).Debug("mounting volume for container") + }).Trace("layers::mountSandboxVolume") if _, err := os.Stat(hostPath); os.IsNotExist(err) { if err := os.MkdirAll(hostPath, 0777); err != nil { @@ -509,10 +531,10 @@ func mountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err e } // Remove volume mount point. And remove folder afterwards. -func removeSandboxMountPoint(ctx context.Context, hostPath string) error { +func removeSandboxMountPoint(ctx context.Context, hostPath string) (err error) { log.G(ctx).WithFields(logrus.Fields{ "hostpath": hostPath, - }).Debug("removing volume mount point for container") + }).Trace("layers::removeSandboxMountPoint") if err := windows.DeleteVolumeMountPoint(windows.StringToUTF16Ptr(hostPath)); err != nil { return errors.Wrap(err, "failed to delete sandbox volume mount point") diff --git a/internal/resources/resources.go b/internal/resources/resources.go index 89851ae7c3..195c4f660d 100644 --- a/internal/resources/resources.go +++ b/internal/resources/resources.go @@ -9,7 +9,10 @@ import ( "github.com/Microsoft/hcsshim/internal/credentials" "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/uvm" + "go.opencensus.io/trace" ) // NetNS returns the network namespace for the container @@ -95,8 +98,15 @@ func NewContainerResources(id string) *Resources { // ReleaseResources releases/frees all of the resources associated with a container. This includes // Plan9 shares, vsmb mounts, pipe mounts, network endpoints, scsi mounts, vpci devices and layers. // TODO: make method on Resources struct. -func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all bool) error { +// TODO: group together all the different errors into one +func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all bool) (err error) { + ctx, span := trace.StartSpan(ctx, "resources::ReleaseResources") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + if vm != nil { + span.AddAttributes(trace.StringAttribute(logfields.ID, vm.ID())) + if r.addedNetNSToVM { if err := vm.TearDownNetworking(ctx, r.netNS); err != nil { log.G(ctx).Warn(err) diff --git a/internal/uvm/computeagent.go b/internal/uvm/computeagent.go index b87edf2796..f18442aeb3 100644 --- a/internal/uvm/computeagent.go +++ b/internal/uvm/computeagent.go @@ -63,7 +63,7 @@ func (ca *computeAgent) AssignPCI(ctx context.Context, req *computeagent.AssignP "containerID": req.ContainerID, "deviceID": req.DeviceID, "virtualFunctionIndex": req.VirtualFunctionIndex, - }).Info("AssignPCI request") + }).Trace("computeAgent::AssignPCI") if req.DeviceID == "" { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -80,7 +80,7 @@ func (ca *computeAgent) RemovePCI(ctx context.Context, req *computeagent.RemoveP log.G(ctx).WithFields(logrus.Fields{ "containerID": req.ContainerID, "deviceID": req.DeviceID, - }).Info("RemovePCI request") + }).Trace("computeAgent::RemovePCI") if req.DeviceID == "" { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -95,9 +95,9 @@ func (ca *computeAgent) RemovePCI(ctx context.Context, req *computeagent.RemoveP func (ca *computeAgent) AddNIC(ctx context.Context, req *computeagent.AddNICInternalRequest) (*computeagent.AddNICInternalResponse, error) { log.G(ctx).WithFields(logrus.Fields{ "containerID": req.ContainerID, - "endpoint": req.Endpoint, + "endpoint": req.Endpoint.String(), "nicID": req.NicID, - }).Info("AddNIC request") + }).Trace("computeAgent::AddNIC") if req.NicID == "" || req.Endpoint == nil { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -139,9 +139,9 @@ func (ca *computeAgent) AddNIC(ctx context.Context, req *computeagent.AddNICInte // ModifyNIC will modify a NIC from the computeagent services hosting UVM. func (ca *computeAgent) ModifyNIC(ctx context.Context, req *computeagent.ModifyNICInternalRequest) (*computeagent.ModifyNICInternalResponse, error) { log.G(ctx).WithFields(logrus.Fields{ + "endpoint": req.Endpoint.String(), "nicID": req.NicID, - "endpoint": req.Endpoint, - }).Info("ModifyNIC request") + }).Trace("computeAgent::ModifyNIC") if req.NicID == "" || req.Endpoint == nil || req.IovPolicySettings == nil { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -189,10 +189,9 @@ func (ca *computeAgent) ModifyNIC(ctx context.Context, req *computeagent.ModifyN // DeleteNIC will delete a NIC from the computeagent services hosting UVM. func (ca *computeAgent) DeleteNIC(ctx context.Context, req *computeagent.DeleteNICInternalRequest) (*computeagent.DeleteNICInternalResponse, error) { log.G(ctx).WithFields(logrus.Fields{ - "containerID": req.ContainerID, - "nicID": req.NicID, - "endpoint": req.Endpoint, - }).Info("DeleteNIC request") + "endpoint": req.Endpoint.String(), + "nicID": req.NicID, + }).Trace("computeAgent::DeleteNIC") if req.NicID == "" || req.Endpoint == nil { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -238,7 +237,7 @@ func setupAndServe(ctx context.Context, caAddr string, vm *UtilityVM) error { } computeagent.RegisterComputeAgentService(s, &computeAgent{vm}) - log.G(ctx).WithField("address", l.Addr().String()).Info("serving compute agent") + log.G(ctx).WithField("address", l.Addr().String()).Debug("serving compute agent") go func() { defer l.Close() if err := trapClosedConnErr(s.Serve(ctx, l)); err != nil { diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 8f293f67fe..ee3034cdc8 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -234,7 +234,7 @@ func (uvm *UtilityVM) create(ctx context.Context, doc interface{}) error { // Close terminates and releases resources associated with the utility VM. func (uvm *UtilityVM) Close() (err error) { - ctx, span := trace.StartSpan(context.Background(), "uvm::Close") + ctx, span := oc.StartTraceSpan(context.Background(), "uvm::Close") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute(logfields.UVMID, uvm.id)) diff --git a/internal/uvm/create_lcow.go b/internal/uvm/create_lcow.go index 5a5628230d..a40ede257e 100644 --- a/internal/uvm/create_lcow.go +++ b/internal/uvm/create_lcow.go @@ -709,7 +709,10 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error } span.AddAttributes(trace.StringAttribute(logfields.UVMID, opts.ID)) - log.G(ctx).WithField("options", fmt.Sprintf("%+v", opts)).Debug("uvm::CreateLCOW options") + log.G(ctx).WithFields(logrus.Fields{ + logfields.Options: fmt.Sprintf("%+v", opts.Options), + "options-lcow": fmt.Sprintf("%+v", opts), + }).Debug("uvm::CreateLCOW options") // We dont serialize OutputHandler so if it is missing we need to put it back to the default. if opts.OutputHandler == nil { diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index c09b49d9f0..799a291f0b 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -6,11 +6,12 @@ import ( "os" "path/filepath" + "github.com/Microsoft/go-winio" + "github.com/Microsoft/go-winio/pkg/guid" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" - "github.com/Microsoft/go-winio" - "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/gcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" @@ -86,7 +87,7 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv return nil, fmt.Errorf("failed to get host processor information: %s", err) } - // To maintain compatability with Docker we need to automatically downgrade + // To maintain compatibility with Docker we need to automatically downgrade // a user CPU count if the setting is not possible. uvm.processorCount = uvm.normalizeProcessorCount(ctx, opts.ProcessorCount, processorTopology) @@ -243,7 +244,10 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error } span.AddAttributes(trace.StringAttribute(logfields.UVMID, opts.ID)) - log.G(ctx).WithField("options", fmt.Sprintf("%+v", opts)).Debug("uvm::CreateWCOW options") + log.G(ctx).WithFields(logrus.Fields{ + logfields.Options: fmt.Sprintf("%+v", opts.Options), + "options-wcow": fmt.Sprintf("%+v", opts), + }).Debug("uvm::CreateWCOW options") uvm := &UtilityVM{ id: opts.ID, @@ -276,7 +280,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error } // TODO: BUGBUG Remove this. @jhowardmsft - // It should be the responsiblity of the caller to do the creation and population. + // It should be the responsibility of the caller to do the creation and population. // - Update runhcs too (vm.go). // - Remove comment in function header // - Update tests that rely on this current behaviour. diff --git a/internal/uvm/network.go b/internal/uvm/network.go index dca3ad5ab9..00a000cdd8 100644 --- a/internal/uvm/network.go +++ b/internal/uvm/network.go @@ -10,6 +10,7 @@ import ( "github.com/containerd/ttrpc" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "github.com/Microsoft/hcsshim/hcn" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" @@ -17,6 +18,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hns" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/ncproxyttrpc" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/osversion" @@ -96,13 +98,11 @@ func (uvm *UtilityVM) SetupNetworkNamespace(ctx context.Context, nsid string) er } // GetNamespaceEndpoints gets all endpoints in `netNS` -func GetNamespaceEndpoints(ctx context.Context, netNS string) ([]*hns.HNSEndpoint, error) { - op := "uvm::GetNamespaceEndpoints" - l := log.G(ctx).WithField("netns-id", netNS) - l.Debug(op + " - Begin") - defer func() { - l.Debug(op + " - End") - }() +func GetNamespaceEndpoints(ctx context.Context, netNS string) (_ []*hns.HNSEndpoint, err error) { + ctx, span := trace.StartSpan(ctx, "uvm::GetNamespaceEndpoints") //nolint:ineffassign,staticcheck + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("netns-id", netNS)) ids, err := hns.GetNamespaceEndpoints(netNS) if err != nil { diff --git a/internal/uvm/plan9.go b/internal/uvm/plan9.go index 3b86b392ae..ad34fdcfd6 100644 --- a/internal/uvm/plan9.go +++ b/internal/uvm/plan9.go @@ -10,7 +10,10 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/osversion" + "go.opencensus.io/trace" ) // Plan9Share is a struct containing host paths for the UVM @@ -31,7 +34,18 @@ func (p9 *Plan9Share) Release(ctx context.Context) error { const plan9Port = 564 // AddPlan9 adds a Plan9 share to a utility VM. -func (uvm *UtilityVM) AddPlan9(ctx context.Context, hostPath string, uvmPath string, readOnly bool, restrict bool, allowedNames []string) (*Plan9Share, error) { +func (uvm *UtilityVM) AddPlan9(ctx context.Context, hostPath string, uvmPath string, readOnly bool, restrict bool, allowedNames []string) (_ *Plan9Share, err error) { + ctx, span := trace.StartSpan(ctx, "uvm::AddPlan9") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.UVMID, uvm.id), + trace.StringAttribute("hostPath", hostPath), + trace.StringAttribute("uvmPath", uvmPath), + trace.BoolAttribute("readOnly", readOnly), + trace.BoolAttribute("restrict", restrict), + trace.StringAttribute("allowedNamed", fmt.Sprintf("%v", allowedNames))) + if uvm.operatingSystem != "linux" { return nil, errNotSupported } @@ -104,7 +118,15 @@ func (uvm *UtilityVM) AddPlan9(ctx context.Context, hostPath string, uvmPath str // RemovePlan9 removes a Plan9 share from a utility VM. Each Plan9 share is ref-counted // and only actually removed when the ref-count drops to zero. -func (uvm *UtilityVM) RemovePlan9(ctx context.Context, share *Plan9Share) error { +func (uvm *UtilityVM) RemovePlan9(ctx context.Context, share *Plan9Share) (err error) { + ctx, span := trace.StartSpan(ctx, "uvm::RemovePlan9") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.UVMID, uvm.id), + trace.StringAttribute("uvmPath", share.name), + trace.StringAttribute("uvmPath", share.uvmPath)) + if uvm.operatingSystem != "linux" { return errNotSupported } diff --git a/internal/uvm/start.go b/internal/uvm/start.go index 702815ed6a..dc337eba78 100644 --- a/internal/uvm/start.go +++ b/internal/uvm/start.go @@ -113,7 +113,7 @@ func parseLogrus(vmid string) func(r io.Reader) { break } fields[logfields.UVMID] = vmid - fields["vm.time"] = gcsEntry.Time + fields["vm.time"] = log.FormatTime(gcsEntry.Time) e.Log(gcsEntry.Level, gcsEntry.Message) } } diff --git a/internal/uvm/vsmb.go b/internal/uvm/vsmb.go index b1e5f24434..a64ece9ef2 100644 --- a/internal/uvm/vsmb.go +++ b/internal/uvm/vsmb.go @@ -11,11 +11,14 @@ import ( "unsafe" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "golang.org/x/sys/windows" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/winapi" "github.com/Microsoft/hcsshim/osversion" @@ -158,7 +161,16 @@ func forceNoDirectMap(path string) (bool, error) { // AddVSMB adds a VSMB share to a Windows utility VM. Each VSMB share is ref-counted and // only added if it isn't already. This is used for read-only layers, mapped directories // to a container, and for mapped pipes. -func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcsschema.VirtualSmbShareOptions) (*VSMBShare, error) { +func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcsschema.VirtualSmbShareOptions) (_ *VSMBShare, err error) { + ctx, span := trace.StartSpan(ctx, "uvm::AddVSMB") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.UVMID, uvm.id), + trace.StringAttribute("hostPath", hostPath), + trace.BoolAttribute("readOnly", options.ReadOnly), + trace.BoolAttribute("restrict", options.RestrictFileAccess)) + if uvm.operatingSystem != "windows" { return nil, errNotSupported } @@ -190,7 +202,7 @@ func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcs if force, err := forceNoDirectMap(hostPath); err != nil { return nil, err } else if force { - log.G(ctx).WithField("path", hostPath).Info("Forcing NoDirectmap for VSMB mount") + log.G(ctx).WithField("hostPath", hostPath).Debug("Forcing NoDirectmap for VSMB mount") options.NoDirectmap = true } @@ -225,7 +237,7 @@ func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcs "path": hostPath, "options": fmt.Sprintf("%+#v", options), "operation": requestType, - }).Info("Modifying VSMB share") + }).Debug("Modifying VSMB share") modification := &hcsschema.ModifySettingRequest{ RequestType: requestType, Settings: hcsschema.VirtualSmbShare{ @@ -250,7 +262,15 @@ func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcs // RemoveVSMB removes a VSMB share from a utility VM. Each VSMB share is ref-counted // and only actually removed when the ref-count drops to zero. -func (uvm *UtilityVM) RemoveVSMB(ctx context.Context, hostPath string, readOnly bool) error { +func (uvm *UtilityVM) RemoveVSMB(ctx context.Context, hostPath string, readOnly bool) (err error) { + ctx, span := trace.StartSpan(ctx, "uvm::RemoveVSMB") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.UVMID, uvm.id), + trace.BoolAttribute("readOnly", readOnly), + trace.StringAttribute("hostPath", hostPath)) + if uvm.operatingSystem != "windows" { return errNotSupported } @@ -387,6 +407,12 @@ func (vsmb *VSMBShare) GobDecode(data []byte) error { // clone VSMB share we just need to add it into the config doc of that VM and increase the // vsmb counter. func (vsmb *VSMBShare) Clone(ctx context.Context, vm *UtilityVM, cd *cloneData) error { + log.G(ctx).WithFields(logrus.Fields{ + "hostPath": vsmb.HostPath, + "guestPath": vsmb.guestPath, + "name": vsmb.name, + }).Trace("vsmb::Clone") + cd.doc.VirtualMachine.Devices.VirtualSmb.Shares = append(cd.doc.VirtualMachine.Devices.VirtualSmb.Shares, hcsschema.VirtualSmbShare{ Name: vsmb.name, Path: vsmb.HostPath, diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/attach.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/attach.go index 7f1f2823dd..7f7cbd6e3b 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/attach.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/attach.go @@ -17,7 +17,7 @@ import ( // // `layerData` is the parent read-only layer data. func AttachLayerStorageFilter(ctx context.Context, layerPath string, layerData LayerData) (err error) { - title := "hcsshim.AttachLayerStorageFilter" + title := "hcsshim::AttachLayerStorageFilter" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/destroy.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/destroy.go index 8e28e6c504..596a5635e5 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/destroy.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/destroy.go @@ -12,7 +12,7 @@ import ( // // `layerPath` is a path to a directory containing the layer to export. func DestroyLayer(ctx context.Context, layerPath string) (err error) { - title := "hcsshim.DestroyLayer" + title := "hcsshim::DestroyLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/detach.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/detach.go index 435473257e..0752c09115 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/detach.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/detach.go @@ -12,7 +12,7 @@ import ( // // `layerPath` is a path to a directory containing the layer to export. func DetachLayerStorageFilter(ctx context.Context, layerPath string) (err error) { - title := "hcsshim.DetachLayerStorageFilter" + title := "hcsshim::DetachLayerStorageFilter" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/export.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/export.go index 2db6d40399..0b353bc7e2 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/export.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/export.go @@ -19,7 +19,7 @@ import ( // // `options` are the export options applied to the exported layer. func ExportLayer(ctx context.Context, layerPath, exportFolderPath string, layerData LayerData, options ExportLayerOptions) (err error) { - title := "hcsshim.ExportLayer" + title := "hcsshim::ExportLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/format.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/format.go index 61d8d5a634..07c29dd14c 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/format.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/format.go @@ -41,7 +41,7 @@ func openDisk(path string) (windows.Handle, error) { // // If the VHD is not mounted it will be temporarily mounted. func FormatWritableLayerVhd(ctx context.Context, vhdHandle windows.Handle) (err error) { - title := "hcsshim.FormatWritableLayerVhd" + title := "hcsshim::FormatWritableLayerVhd" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/import.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/import.go index 0c61dab329..1f9b167fa3 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/import.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/import.go @@ -19,7 +19,7 @@ import ( // // `layerData` is the parent layer data. func ImportLayer(ctx context.Context, layerPath, sourceFolderPath string, layerData LayerData) (err error) { - title := "hcsshim.ImportLayer" + title := "hcsshim::ImportLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/initialize.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/initialize.go index 53ed8ea6ed..0358370f1f 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/initialize.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/initialize.go @@ -16,7 +16,7 @@ import ( // // `layerData` is the parent read-only layer data. func InitializeWritableLayer(ctx context.Context, layerPath string, layerData LayerData) (err error) { - title := "hcsshim.InitializeWritableLayer" + title := "hcsshim::InitializeWritableLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/mount.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/mount.go index fcdbbef814..d50c68e91f 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/mount.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/mount.go @@ -12,7 +12,7 @@ import ( // GetLayerVhdMountPath returns the volume path for a virtual disk of a writable container layer. func GetLayerVhdMountPath(ctx context.Context, vhdHandle windows.Handle) (path string, err error) { - title := "hcsshim.GetLayerVhdMountPath" + title := "hcsshim::GetLayerVhdMountPath" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/computestorage/setup.go b/test/vendor/github.com/Microsoft/hcsshim/computestorage/setup.go index 06aaf841e8..68d353e733 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/computestorage/setup.go +++ b/test/vendor/github.com/Microsoft/hcsshim/computestorage/setup.go @@ -21,7 +21,7 @@ import ( // // `options` are the options applied while processing the layer. func SetupBaseOSLayer(ctx context.Context, layerPath string, vhdHandle windows.Handle, options OsLayerOptions) (err error) { - title := "hcsshim.SetupBaseOSLayer" + title := "hcsshim::SetupBaseOSLayer" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -52,7 +52,7 @@ func SetupBaseOSVolume(ctx context.Context, layerPath, volumePath string, option if osversion.Build() < 19645 { return errors.New("SetupBaseOSVolume is not present on builds older than 19645") } - title := "hcsshim.SetupBaseOSVolume" + title := "hcsshim::SetupBaseOSVolume" ctx, span := trace.StartSpan(ctx, title) //nolint:ineffassign,staticcheck defer span.End() defer func() { oc.SetSpanStatus(span, err) }() diff --git a/test/vendor/github.com/Microsoft/hcsshim/container.go b/test/vendor/github.com/Microsoft/hcsshim/container.go index bfd722898e..1de9625b91 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/container.go +++ b/test/vendor/github.com/Microsoft/hcsshim/container.go @@ -60,7 +60,7 @@ type container struct { waitCh chan struct{} } -// createComputeSystemAdditionalJSON is read from the environment at initialisation +// createContainerAdditionalJSON is read from the environment at initialization // time. It allows an environment variable to define additional JSON which // is merged in the CreateComputeSystem call to HCS. var createContainerAdditionalJSON []byte diff --git a/test/vendor/github.com/Microsoft/hcsshim/hcn/hcnsupport.go b/test/vendor/github.com/Microsoft/hcsshim/hcn/hcnsupport.go index bacb91feda..0624406ade 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/hcn/hcnsupport.go +++ b/test/vendor/github.com/Microsoft/hcsshim/hcn/hcnsupport.go @@ -115,7 +115,7 @@ func getSupportedFeatures() (SupportedFeatures, error) { logrus.WithFields(logrus.Fields{ "version": fmt.Sprintf("%+v", globals.Version), "supportedFeatures": fmt.Sprintf("%+v", features), - }).Info("HCN feature check") + }).Debug("HCN feature check") return features, nil } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/cmd.go b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/cmd.go index 91a7da0277..5ad3ae22fe 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/cmd.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/cmd.go @@ -11,12 +11,16 @@ import ( "sync/atomic" "time" - "github.com/Microsoft/hcsshim/internal/cow" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" - specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "golang.org/x/sync/errgroup" "golang.org/x/sys/windows" + + "github.com/Microsoft/hcsshim/internal/cow" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" + specs "github.com/opencontainers/runtime-spec/specs-go" ) // CmdProcessRequest stores information on command requests made through this package. @@ -29,6 +33,8 @@ type CmdProcessRequest struct { Stderr string } +// todo (helsaawy): replace use of Cmd.Log and switch to log.WithContext + // Cmd represents a command being prepared or run in a process host. type Cmd struct { // Host is the process host in which to launch the process. @@ -111,7 +117,7 @@ func Command(host cow.ProcessHost, name string, arg ...string) *Cmd { Spec: &specs.Process{ Args: append([]string{name}, arg...), }, - Log: logrus.NewEntry(logrus.StandardLogger()), + Log: log.G(context.Background()), ExitState: &ExitState{}, } if host.OS() == "windows" { @@ -128,12 +134,30 @@ func Command(host cow.ProcessHost, name string, arg ...string) *Cmd { func CommandContext(ctx context.Context, host cow.ProcessHost, name string, arg ...string) *Cmd { cmd := Command(host, name, arg...) cmd.Context = ctx + cmd.Log = log.G(ctx) return cmd } // Start starts a command. The caller must ensure that if Start succeeds, // Wait is eventually called to clean up resources. -func (c *Cmd) Start() error { +func (c *Cmd) Start() (err error) { + pctx := context.Background() + if c.Context != nil { + pctx = c.Context + } + ctx, span := oc.StartTraceSpan(pctx, "cmd::Start") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cmd", fmt.Sprintf("%q", c.Spec.Args)), + trace.BoolAttribute("isOCI", c.Host.IsOCI()), + trace.StringAttribute("os", c.Host.OS()), + trace.StringAttribute("cwd", c.Spec.Cwd), + trace.StringAttribute("env", fmt.Sprintf("%q", c.Spec.Env))) + + if c.Log == nil { + c.Log = log.G(ctx) + } + c.allDoneCh = make(chan struct{}) var x interface{} if !c.Host.IsOCI() { @@ -182,17 +206,15 @@ func (c *Cmd) Start() error { } x = lpp } - if c.Context != nil && c.Context.Err() != nil { - return c.Context.Err() + if err = ctx.Err(); err != nil { + return err } - p, err := c.Host.CreateProcess(context.TODO(), x) + p, err := c.Host.CreateProcess(ctx, x) if err != nil { return err } c.Process = p - if c.Log != nil { - c.Log = c.Log.WithField("pid", p.Pid()) - } + c.Log = c.Log.WithField("pid", p.Pid()) // Start relaying process IO. stdin, stdout, stderr := p.Stdio() @@ -209,7 +231,7 @@ func (c *Cmd) Start() error { c.stdinErr.Store(err) } // Notify the process that there is no more input. - if err := p.CloseStdin(context.TODO()); err != nil && c.Log != nil { + if err := p.CloseStdin(ctx); err != nil && c.Log != nil { c.Log.WithError(err).Warn("failed to close Cmd stdin") } }() @@ -218,7 +240,7 @@ func (c *Cmd) Start() error { if c.Stdout != nil { c.iogrp.Go(func() error { _, err := relayIO(c.Stdout, stdout, c.Log, "stdout") - if err := p.CloseStdout(context.TODO()); err != nil { + if err := p.CloseStdout(ctx); err != nil { c.Log.WithError(err).Warn("failed to close Cmd stdout") } return err @@ -228,7 +250,7 @@ func (c *Cmd) Start() error { if c.Stderr != nil { c.iogrp.Go(func() error { _, err := relayIO(c.Stderr, stderr, c.Log, "stderr") - if err := p.CloseStderr(context.TODO()); err != nil { + if err := p.CloseStderr(ctx); err != nil { c.Log.WithError(err).Warn("failed to close Cmd stderr") } return err @@ -239,20 +261,33 @@ func (c *Cmd) Start() error { go func() { select { case <-c.Context.Done(): - _, _ = c.Process.Kill(context.TODO()) + // Process.Kill (via Process.Signal) will not send an RPC if the + // provided context in is cancelled (bridge.AsyncRPC will end early) + kctx := log.Copy(context.Background(), ctx) + _, _ = c.Process.Kill(kctx) case <-c.allDoneCh: } }() } + return nil } // Wait waits for a command and its IO to complete and closes the underlying // process. It can only be called once. It returns an ExitError if the command // runs and returns a non-zero exit code. -func (c *Cmd) Wait() error { +func (c *Cmd) Wait() (err error) { + pctx := context.Background() + if c.Context != nil { + pctx = c.Context + } + _, span := oc.StartTraceSpan(pctx, "cmd::Wait") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cmd", fmt.Sprintf("%q", c.Spec.Args))) + waitErr := c.Process.Wait() - if waitErr != nil && c.Log != nil { + if waitErr != nil { c.Log.WithError(waitErr).Warn("process wait failed") } state := &ExitState{} @@ -271,9 +306,7 @@ func (c *Cmd) Wait() error { case <-t.C: // Close the process to cancel any reads to stdout or stderr. c.Process.Close() - if c.Log != nil { - c.Log.Warn("timed out waiting for stdio relay") - } + c.Log.Warn("timed out waiting for stdio relay") } }() } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/diag.go b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/diag.go index f4caff3251..54d320235e 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/diag.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/diag.go @@ -5,6 +5,8 @@ import ( "errors" "os/exec" + "github.com/sirupsen/logrus" + "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/uvm" @@ -12,6 +14,9 @@ import ( // ExecInUvm is a helper function used to execute commands specified in `req` inside the given UVM. func ExecInUvm(ctx context.Context, vm *uvm.UtilityVM, req *CmdProcessRequest) (int, error) { + ctx, etr := log.S(ctx, logrus.Fields{logfields.UVMID: vm.ID()}) + etr.Trace("cmd::ExecInUVM") + if len(req.Args) == 0 { return 0, errors.New("missing command") } @@ -39,6 +44,8 @@ func ExecInUvm(ctx context.Context, vm *uvm.UtilityVM, req *CmdProcessRequest) ( // ExecInShimHost is a helper function used to execute commands specified in `req` in the shim's // hosting system. func ExecInShimHost(ctx context.Context, req *CmdProcessRequest) (int, error) { + log.G(ctx).Trace("cmd::ExecInUVM") + if len(req.Args) == 0 { return 0, errors.New("missing command") } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io.go b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io.go index 0912af160c..b7741f8144 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io.go @@ -8,6 +8,9 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" + + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" ) // UpstreamIO is an interface describing the IO to connect to above the shim. @@ -62,19 +65,21 @@ func NewUpstreamIO(ctx context.Context, id, stdout, stderr, stdin string, termin } // relayIO is a glorified io.Copy that also logs when the copy has completed. -func relayIO(w io.Writer, r io.Reader, log *logrus.Entry, name string) (int64, error) { +func relayIO(w io.Writer, r io.Reader, entry *logrus.Entry, name string) (int64, error) { n, err := io.Copy(w, r) - if log != nil { + if entry != nil { lvl := logrus.DebugLevel - log = log.WithFields(logrus.Fields{ - "file": name, - "bytes": n, + entry = entry.WithFields(logrus.Fields{ + logfields.File: name, + "bytes": n, + "reader": log.FormatIO(r), + "writer": log.FormatIO(w), }) if err != nil { lvl = logrus.ErrorLevel - log = log.WithError(err) + entry = entry.WithError(err) } - log.Log(lvl, "Cmd IO relay complete") + entry.Log(lvl, "Cmd IO relay complete") } return n, err } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io_binary.go b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io_binary.go index ecfd1fa5ef..16e3dfd0b2 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io_binary.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io_binary.go @@ -18,6 +18,7 @@ import ( "github.com/sirupsen/logrus" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" ) const ( @@ -167,17 +168,29 @@ type binaryIO struct { // Close named pipes for container stdout and stderr and wait for the binary process to finish. func (b *binaryIO) Close(ctx context.Context) { + ctx, etr := log.S(ctx, logrus.Fields{ //nolint:ineffassign,staticcheck + "binaryCmd": b.cmd.String(), + "binaryProcessID": b.cmd.Process.Pid, + }) + etr.Trace("binaryIO::Close") + b.soutCloser.Do(func() { if b.sout != nil { + etr := etr.WithField(logfields.Pipe, b.stdout) err := b.sout.Close() if err != nil { - log.G(ctx).WithError(err).Errorf("error while closing stdout npipe") + etr.WithError(err).Errorf("error while closing stdout npipe") + } else { + etr.Debug("binaryIO::soutCloser - stdout") } } if b.serr != nil { + etr := etr.WithField(logfields.Pipe, b.stderr) err := b.serr.Close() if err != nil { - log.G(ctx).WithError(err).Errorf("error while closing stderr npipe") + etr.WithError(err).Errorf("error while closing stderr npipe") + } else { + etr.Debug("binaryIO::soutCloser - stderr") } } }) @@ -190,13 +203,14 @@ func (b *binaryIO) Close(ctx context.Context) { select { case err := <-done: if err != nil { - log.G(ctx).WithError(err).Errorf("error while waiting for binary cmd to finish") + etr.WithError(err).Errorf("error while waiting for binary cmd to finish") } case <-time.After(binaryCmdWaitTimeout): - log.G(ctx).Errorf("timeout while waiting for binaryIO process to finish. Killing") + etr := etr.WithField(logfields.Timeout, binaryCmdWaitTimeout.String()) + etr.Errorf("timeout while waiting for binaryIO process to finish. Killing") err := b.cmd.Process.Kill() if err != nil { - log.G(ctx).WithError(err).Errorf("error while killing binaryIO process") + etr.WithError(err).Errorf("error while killing binaryIO process") } } }) @@ -278,7 +292,7 @@ func (p *pipe) Read(b []byte) (int, error) { func (p *pipe) Close() error { if err := p.l.Close(); err != nil { - log.G(context.TODO()).WithError(err).Debug("error closing pipe listener") + log.G(context.Background()).WithError(err).Debug("error closing pipe listener") } p.conWg.Wait() if p.con != nil { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io_npipe.go b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io_npipe.go index 63b9f9b732..c91bdd3dd9 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io_npipe.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/cmd/io_npipe.go @@ -13,6 +13,7 @@ import ( winio "github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/cenkalti/backoff/v4" "github.com/sirupsen/logrus" "golang.org/x/sys/windows" @@ -32,7 +33,7 @@ func NewNpipeIO(ctx context.Context, stdin, stdout, stderr string, terminal bool "stdout": stdout, "stderr": stderr, "terminal": terminal, - }).Debug("NewNpipeIO") + }).Trace("NewNpipeIO") nio := &npipeio{ stdin: stdin, @@ -121,7 +122,7 @@ func (nprw *nPipeRetryWriter) Write(p []byte) (n int, err error) { nprw.Conn.Close() newConn, retryErr := nprw.retryDialPipe() if retryErr == nil { - log.G(nprw.ctx).WithField("address", nprw.pipePath).Info("Succeeded in reconnecting to named pipe") + log.G(nprw.ctx).WithField("address", nprw.pipePath).Debug("Succeeded in reconnecting to named pipe") nprw.Conn = newConn continue @@ -196,28 +197,31 @@ type npipeio struct { } func (nio *npipeio) Close(ctx context.Context) { + log.G(ctx).Trace("npipeio::Close") + // winio.win32Pipe.Close currently doesn't return any errors nio.sinCloser.Do(func() { if nio.sin != nil { - log.G(ctx).Debug("npipeio::sinCloser") + log.G(ctx).WithField(logfields.Pipe, nio.stdin).Debug("npipeio::sinCloser") nio.sin.Close() } }) nio.outErrCloser.Do(func() { if nio.sout != nil { - log.G(ctx).Debug("npipeio::outErrCloser - stdout") + log.G(ctx).WithField(logfields.Pipe, nio.stdout).Debug("npipeio::outErrCloser - stdout") nio.sout.Close() } if nio.serr != nil { - log.G(ctx).Debug("npipeio::outErrCloser - stderr") + log.G(ctx).WithField(logfields.Pipe, nio.stderr).Debug("npipeio::outErrCloser - stderr") nio.serr.Close() } }) } func (nio *npipeio) CloseStdin(ctx context.Context) { + log.G(ctx).Trace("npipeio::CloseStdin") nio.sinCloser.Do(func() { if nio.sin != nil { - log.G(ctx).Debug("npipeio::sinCloser") + log.G(ctx).WithField(logfields.Pipe, nio.stdin).Debug("npipeio::sinCloser") nio.sin.Close() } }) diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/devices/assigned_devices.go b/test/vendor/github.com/Microsoft/hcsshim/internal/devices/assigned_devices.go index 4750776403..a1ae4e0f73 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/devices/assigned_devices.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/devices/assigned_devices.go @@ -44,7 +44,7 @@ func AddDevice(ctx context.Context, vm *uvm.UtilityVM, idType, deviceID string, return vpci, nil, errors.Wrapf(err, "failed to assign device %s of type %s to pod %s", deviceID, idType, vm.ID()) } vmBusInstanceID := vm.GetAssignedDeviceVMBUSInstanceID(vpci.VMBusGUID) - log.G(ctx).WithField("vmbus id", vmBusInstanceID).Info("vmbus instance ID") + log.G(ctx).WithField("vmbus id", vmBusInstanceID).Debug("vmbus instance ID") locationPaths, err = getChildrenDeviceLocationPaths(ctx, vm, vmBusInstanceID, deviceUtilPath) return vpci, locationPaths, err diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/bridge.go b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/bridge.go index d1650ecb4a..aebf7e4996 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/bridge.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/bridge.go @@ -49,6 +49,9 @@ type rpc struct { ch chan struct{} } +// todo(helsaawy): remove bridge.log entry and log based on per-request context, not +// the context that started the bridge + // bridge represents a communcations bridge with the guest. It handles the // transport layer but (mostly) does not parse or construct the message payload. type bridge struct { @@ -307,7 +310,7 @@ func (brdg *bridge) recvLoop() error { } brdg.log.WithFields(logrus.Fields{ "payload": string(b), - "type": typ, + "type": typ.String(), "message-id": id}).Debug("bridge receive") switch typ & msgTypeMask { case msgTypeResponse: @@ -395,7 +398,7 @@ func (brdg *bridge) writeMessage(buf *bytes.Buffer, enc *json.Encoder, typ msgTy // Write the message. brdg.log.WithFields(logrus.Fields{ "payload": string(buf.Bytes()[hdrSize:]), - "type": typ, + "type": typ.String(), "message-id": id}).Debug("bridge send") _, err = buf.WriteTo(brdg.conn) if err != nil { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/container.go b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/container.go index 6ec5b8b84f..8a21ef1cfa 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/container.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/container.go @@ -10,6 +10,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "go.opencensus.io/trace" ) @@ -31,7 +32,7 @@ var _ cow.Container = &Container{} // CreateContainer creates a container using ID `cid` and `cfg`. The request // will likely not be cancellable even if `ctx` becomes done. func (gc *GuestConnection) CreateContainer(ctx context.Context, cid string, config interface{}) (_ *Container, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::CreateContainer") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::CreateContainer", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", cid)) @@ -89,17 +90,17 @@ func (c *Container) IsOCI() bool { // Close releases associated with the container. func (c *Container) Close() error { - c.closeOnce.Do(func() { - _, span := trace.StartSpan(context.Background(), "gcs::Container::Close") - defer span.End() - span.AddAttributes(trace.StringAttribute("cid", c.id)) - }) + _, span := oc.StartTraceSpan(context.Background(), "gcs::Container::Close") + defer span.End() + span.AddAttributes(trace.StringAttribute("cid", c.id)) + + c.closeOnce.Do(func() {}) return nil } // CreateProcess creates a process in the container. func (c *Container) CreateProcess(ctx context.Context, config interface{}) (_ cow.Process, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::CreateProcess") + ctx, span := oc.StartTraceSpan(ctx, "gcs::Container::CreateProcess", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -114,7 +115,7 @@ func (c *Container) ID() string { // Modify sends a modify request to the container. func (c *Container) Modify(ctx context.Context, config interface{}) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Modify") + ctx, span := oc.StartSpan(ctx, "gcs::Container::Modify", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -129,7 +130,7 @@ func (c *Container) Modify(ctx context.Context, config interface{}) (err error) // Properties returns the requested container properties targeting a V1 schema container. func (c *Container) Properties(ctx context.Context, types ...schema1.PropertyType) (_ *schema1.ContainerProperties, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Properties") + ctx, span := trace.StartSpan(ctx, "gcs::Container::Properties", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -148,7 +149,7 @@ func (c *Container) Properties(ctx context.Context, types ...schema1.PropertyTyp // PropertiesV2 returns the requested container properties targeting a V2 schema container. func (c *Container) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (_ *hcsschema.Properties, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::PropertiesV2") + ctx, span := trace.StartSpan(ctx, "gcs::Container::PropertiesV2", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -167,7 +168,7 @@ func (c *Container) PropertiesV2(ctx context.Context, types ...hcsschema.Propert // Start starts the container. func (c *Container) Start(ctx context.Context) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Start") + ctx, span := trace.StartSpan(ctx, "gcs::Container::Start", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -198,7 +199,7 @@ func (c *Container) shutdown(ctx context.Context, proc rpcProc) error { // might not be terminated by the time the request completes (and might never // terminate). func (c *Container) Shutdown(ctx context.Context) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Shutdown") + ctx, span := trace.StartSpan(ctx, "gcs::Container::Shutdown", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -212,7 +213,7 @@ func (c *Container) Shutdown(ctx context.Context) (err error) { // might not be terminated by the time the request completes (and might never // terminate). func (c *Container) Terminate(ctx context.Context) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Container::Terminate") + ctx, span := trace.StartSpan(ctx, "gcs::Container::Terminate", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", c.id)) @@ -234,11 +235,11 @@ func (c *Container) Wait() error { } func (c *Container) waitBackground() { - ctx, span := trace.StartSpan(context.Background(), "gcs::Container::waitBackground") + ctx, span := oc.StartTraceSpan(context.Background(), "gcs::Container::waitBackground") defer span.End() span.AddAttributes(trace.StringAttribute("cid", c.id)) err := c.Wait() - log.G(ctx).Debug("container exited") + log.G(ctx).WithField(logfields.ContainerID, c.id).Debug("container exited") oc.SetSpanStatus(span, err) } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/guestconnection.go b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/guestconnection.go index bdf796010c..90d40a4ee3 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/guestconnection.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/guestconnection.go @@ -66,7 +66,7 @@ type GuestConnectionConfig struct { // Connect establishes a GCS connection. `gcc.Conn` will be closed by this function. func (gcc *GuestConnectionConfig) Connect(ctx context.Context, isColdStart bool) (_ *GuestConnection, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnectionConfig::Connect") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnectionConfig::Connect", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -165,7 +165,7 @@ func (gc *GuestConnection) connect(ctx context.Context, isColdStart bool, initGu // Modify sends a modify settings request to the null container. This is // generally used to prepare virtual hardware that has been added to the guest. func (gc *GuestConnection) Modify(ctx context.Context, settings interface{}) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::Modify") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::Modify", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -178,7 +178,7 @@ func (gc *GuestConnection) Modify(ctx context.Context, settings interface{}) (er } func (gc *GuestConnection) DumpStacks(ctx context.Context) (response string, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::DumpStacks") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::DumpStacks", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -191,7 +191,7 @@ func (gc *GuestConnection) DumpStacks(ctx context.Context) (response string, err } func (gc *GuestConnection) DeleteContainerState(ctx context.Context, cid string) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::DeleteContainerState") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::DeleteContainerState", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", cid)) @@ -214,7 +214,7 @@ func (gc *GuestConnection) Close() error { // CreateProcess creates a process in the container host. func (gc *GuestConnection) CreateProcess(ctx context.Context, settings interface{}) (_ cow.Process, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::CreateProcess") + ctx, span := trace.StartSpan(ctx, "gcs::GuestConnection::CreateProcess", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -266,7 +266,7 @@ func (gc *GuestConnection) notify(ntf *containerNotification) error { if ch == nil { return fmt.Errorf("container %s not found", cid) } - logrus.WithField(logfields.ContainerID, cid).Info("container terminated in guest") + logrus.WithField(logfields.ContainerID, cid).Debug("container terminated in guest") close(ch) return nil } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/iochannel.go b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/iochannel.go index 5af6b81aaf..a2a032a12d 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/iochannel.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/iochannel.go @@ -65,3 +65,7 @@ func (c *ioChannel) Write(b []byte) (int, error) { } return c.c.Write(b) } + +func (c *ioChannel) Addr() net.Addr { + return c.l.Addr() +} diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/process.go b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/process.go index 628cb8b0d7..dec513e9a2 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/process.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/process.go @@ -122,7 +122,7 @@ func (gc *GuestConnection) exec(ctx context.Context, cid string, params interfac // Close releases resources associated with the process and closes the // associated standard IO streams. func (p *Process) Close() error { - ctx, span := trace.StartSpan(context.Background(), "gcs::Process::Close") + ctx, span := oc.StartTraceSpan(context.Background(), "gcs::Process::Close") defer span.End() span.AddAttributes( trace.StringAttribute("cid", p.cid), @@ -211,7 +211,7 @@ func (p *Process) Pid() int { // ResizeConsole requests that the pty associated with the process resize its // window. func (p *Process) ResizeConsole(ctx context.Context, width, height uint16) (err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Process::ResizeConsole") + ctx, span := trace.StartSpan(ctx, "gcs::Process::ResizeConsole", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( @@ -230,7 +230,7 @@ func (p *Process) ResizeConsole(ctx context.Context, width, height uint16) (err // Signal sends a signal to the process, returning whether it was delivered. func (p *Process) Signal(ctx context.Context, options interface{}) (_ bool, err error) { - ctx, span := trace.StartSpan(ctx, "gcs::Process::Signal") + ctx, span := trace.StartSpan(ctx, "gcs::Process::Signal", oc.WithClientSpanKind) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( @@ -275,7 +275,7 @@ func (p *Process) Wait() error { } func (p *Process) waitBackground() { - ctx, span := trace.StartSpan(context.Background(), "gcs::Process::waitBackground") + ctx, span := oc.StartTraceSpan(context.Background(), "gcs::Process::waitBackground") defer span.End() span.AddAttributes( trace.StringAttribute("cid", p.cid), diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/protocol.go b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/protocol.go index 840dcb2392..c07adced6b 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/protocol.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/gcs/protocol.go @@ -3,6 +3,7 @@ package gcs import ( "encoding/json" "fmt" + "strconv" "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/hcs/schema1" @@ -62,6 +63,43 @@ const ( rpcLifecycleNotification ) +func (rpc rpcProc) String() string { + switch rpc { + case rpcCreate: + return "Create" + case rpcStart: + return "Start" + case rpcShutdownGraceful: + return "ShutdownGraceful" + case rpcShutdownForced: + return "ShutdownForced" + case rpcExecuteProcess: + return "ExecuteProcess" + case rpcWaitForProcess: + return "WaitForProcess" + case rpcSignalProcess: + return "SignalProcess" + case rpcResizeConsole: + return "ResizeConsole" + case rpcGetProperties: + return "GetProperties" + case rpcModifySettings: + return "ModifySettings" + case rpcNegotiateProtocol: + return "NegotiateProtocol" + case rpcDumpStacks: + return "DumpStacks" + case rpcDeleteContainerState: + return "DeleteContainerState" + case rpcUpdateContainer: + return "UpdateContainer" + case rpcLifecycleNotification: + return "LifecycleNotification" + default: + return "0x" + strconv.FormatUint(uint64(rpc), 16) + } +} + type msgType uint32 const ( @@ -92,40 +130,7 @@ func (typ msgType) String() string { default: return fmt.Sprintf("%#x", uint32(typ)) } - switch rpcProc(typ &^ msgTypeMask) { - case rpcCreate: - s += "Create" - case rpcStart: - s += "Start" - case rpcShutdownGraceful: - s += "ShutdownGraceful" - case rpcShutdownForced: - s += "ShutdownForced" - case rpcExecuteProcess: - s += "ExecuteProcess" - case rpcWaitForProcess: - s += "WaitForProcess" - case rpcSignalProcess: - s += "SignalProcess" - case rpcResizeConsole: - s += "ResizeConsole" - case rpcGetProperties: - s += "GetProperties" - case rpcModifySettings: - s += "ModifySettings" - case rpcNegotiateProtocol: - s += "NegotiateProtocol" - case rpcDumpStacks: - s += "DumpStacks" - case rpcDeleteContainerState: - s += "DeleteContainerState" - case rpcUpdateContainer: - s += "UpdateContainer" - case rpcLifecycleNotification: - s += "LifecycleNotification" - default: - s += fmt.Sprintf("%#x", uint32(typ)) - } + s += rpcProc(typ &^ msgTypeMask).String() return s + ")" } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/process.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/process.go index 605856f2a3..ef44d30cf9 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/process.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/process.go @@ -11,8 +11,10 @@ import ( "time" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/vmcompute" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -118,11 +120,15 @@ func (process *Process) processSignalResult(ctx context.Context, err error) (boo // // For WCOW `guestresource.SignalProcessOptionsWCOW`. func (process *Process) Signal(ctx context.Context, options interface{}) (bool, error) { + operation := "hcs::Process::Signal" + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.ProcessID: process.processID, + logfields.ContainerID: process.SystemID()}) + etr.Trace(operation) + process.handleLock.RLock() defer process.handleLock.RUnlock() - operation := "hcs::Process::Signal" - if process.handle == 0 { return false, makeProcessError(process, operation, ErrAlreadyClosed, nil) } @@ -143,11 +149,15 @@ func (process *Process) Signal(ctx context.Context, options interface{}) (bool, // Kill signals the process to terminate but does not wait for it to finish terminating. func (process *Process) Kill(ctx context.Context) (bool, error) { + operation := "hcs::Process::Kill" + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.ProcessID: process.processID, + logfields.ContainerID: process.SystemID()}) + etr.Trace(operation) + process.handleLock.RLock() defer process.handleLock.RUnlock() - operation := "hcs::Process::Kill" - if process.handle == 0 { return false, makeProcessError(process, operation, ErrAlreadyClosed, nil) } @@ -201,7 +211,7 @@ func (process *Process) Kill(ctx context.Context) (bool, error) { // call multiple times. func (process *Process) waitBackground() { operation := "hcs::Process::waitBackground" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() span.AddAttributes( trace.StringAttribute("cid", process.SystemID()), @@ -243,7 +253,7 @@ func (process *Process) waitBackground() { } } } - log.G(ctx).WithField("exitCode", exitCode).Debug("process exited") + log.G(ctx).WithField("exitCode", exitCode).Debug("hcs::Process process exited") process.closedWaitOnce.Do(func() { process.exitCode = exitCode @@ -254,7 +264,7 @@ func (process *Process) waitBackground() { } // Wait waits for the process to exit. If the process has already exited returns -// the pervious error (if any). +// the previous error (if any). func (process *Process) Wait() error { <-process.waitBlock return process.waitError @@ -262,11 +272,15 @@ func (process *Process) Wait() error { // ResizeConsole resizes the console of the process. func (process *Process) ResizeConsole(ctx context.Context, width, height uint16) error { + operation := "hcs::Process::ResizeConsole" + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.ProcessID: process.processID, + logfields.ContainerID: process.SystemID()}) + etr.Trace(operation) + process.handleLock.RLock() defer process.handleLock.RUnlock() - operation := "hcs::Process::ResizeConsole" - if process.handle == 0 { return makeProcessError(process, operation, ErrAlreadyClosed, nil) } @@ -312,7 +326,7 @@ func (process *Process) ExitCode() (int, error) { // are the responsibility of the caller to close. func (process *Process) StdioLegacy() (_ io.WriteCloser, _ io.ReadCloser, _ io.ReadCloser, err error) { operation := "hcs::Process::StdioLegacy" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( @@ -359,12 +373,18 @@ func (process *Process) Stdio() (stdin io.Writer, stdout, stderr io.Reader) { // CloseStdin closes the write side of the stdin pipe so that the process is // notified on the read side that there is no more data in stdin. -func (process *Process) CloseStdin(ctx context.Context) error { +func (process *Process) CloseStdin(ctx context.Context) (err error) { + operation := "hcs::Process::CloseStdin" + ctx, span := trace.StartSpan(ctx, operation) //nolint:ineffassign,staticcheck + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute("cid", process.SystemID()), + trace.Int64Attribute("pid", int64(process.processID))) + process.handleLock.RLock() defer process.handleLock.RUnlock() - operation := "hcs::Process::CloseStdin" - if process.handle == 0 { return makeProcessError(process, operation, ErrAlreadyClosed, nil) } @@ -450,7 +470,7 @@ func (process *Process) CloseStderr(ctx context.Context) (err error) { // or wait on it. func (process *Process) Close() (err error) { operation := "hcs::Process::Close" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go index 75499c967f..adf1c1e235 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "strings" "sync" "syscall" @@ -12,9 +13,11 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/timeout" "github.com/Microsoft/hcsshim/internal/vmcompute" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -44,7 +47,7 @@ func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface in // hcsCreateComputeSystemContext is an async operation. Start the outer span // here to measure the full create time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", id)) @@ -97,6 +100,7 @@ func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface in // OpenComputeSystem opens an existing compute system by ID. func OpenComputeSystem(ctx context.Context, id string) (*System, error) { operation := "hcs::OpenComputeSystem" + log.G(ctx).WithField(logfields.ContainerID, id).Trace(operation) computeSystem := newSystem(id) handle, resultJSON, err := vmcompute.HcsOpenComputeSystem(ctx, id) @@ -149,6 +153,7 @@ func (computeSystem *System) IsOCI() bool { // GetComputeSystems gets a list of the compute systems on the system that match the query func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]schema1.ContainerProperties, error) { operation := "hcs::GetComputeSystems" + log.G(ctx).WithField("query", fmt.Sprintf("%+v", q)).Trace(operation) queryb, err := json.Marshal(q) if err != nil { @@ -178,7 +183,7 @@ func (computeSystem *System) Start(ctx context.Context) (err error) { // hcsStartComputeSystemContext is an async operation. Start the outer span // here to measure the full start time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -206,11 +211,12 @@ func (computeSystem *System) ID() string { // Shutdown requests a compute system shutdown. func (computeSystem *System) Shutdown(ctx context.Context) error { + operation := "hcs::System::Shutdown" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::Shutdown" - if computeSystem.handle == 0 { return nil } @@ -227,11 +233,12 @@ func (computeSystem *System) Shutdown(ctx context.Context) error { // Terminate requests a compute system terminate. func (computeSystem *System) Terminate(ctx context.Context) error { + operation := "hcs::System::Terminate" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::Terminate" - if computeSystem.handle == 0 { return nil } @@ -253,7 +260,7 @@ func (computeSystem *System) Terminate(ctx context.Context) error { // safe to call multiple times. func (computeSystem *System) waitBackground() { operation := "hcs::System::waitBackground" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -297,11 +304,12 @@ func (computeSystem *System) ExitError() error { // Properties returns the requested container properties targeting a V1 schema container. func (computeSystem *System) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) { + operation := "hcs::System::Properties" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::Properties" - queryBytes, err := json.Marshal(schema1.PropertyQuery{PropertyTypes: types}) if err != nil { return nil, makeSystemError(computeSystem, operation, err, nil) @@ -326,11 +334,12 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr // PropertiesV2 returns the requested container properties targeting a V2 schema container. func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) { + operation := "hcs::System::PropertiesV2" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::PropertiesV2" - queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types}) if err != nil { return nil, makeSystemError(computeSystem, operation, err, nil) @@ -359,7 +368,7 @@ func (computeSystem *System) Pause(ctx context.Context) (err error) { // hcsPauseComputeSystemContext is an async peration. Start the outer span // here to measure the full pause time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -386,7 +395,7 @@ func (computeSystem *System) Resume(ctx context.Context) (err error) { // hcsResumeComputeSystemContext is an async operation. Start the outer span // here to measure the full restore time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -411,9 +420,9 @@ func (computeSystem *System) Resume(ctx context.Context) (err error) { func (computeSystem *System) Save(ctx context.Context, options interface{}) (err error) { operation := "hcs::System::Save" - // hcsSaveComputeSystemContext is an async peration. Start the outer span + // hcsSaveComputeSystemContext is an async operation. Start the outer span // here to measure the full save time. - ctx, span := trace.StartSpan(ctx, operation) + ctx, span := oc.StartTraceSpan(ctx, operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -466,6 +475,9 @@ func (computeSystem *System) createProcess(ctx context.Context, operation string // CreateProcess launches a new process within the computeSystem. func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) (cow.Process, error) { operation := "hcs::System::CreateProcess" + ctx, etr := log.S(ctx, logrus.Fields{logfields.ContainerID: computeSystem.id}) + etr.Trace(operation) + process, processInfo, err := computeSystem.createProcess(ctx, operation, c) if err != nil { return nil, err @@ -495,11 +507,12 @@ func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) ( // OpenProcess gets an interface to an existing process within the computeSystem. func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process, error) { + operation := "hcs::System::OpenProcess" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::OpenProcess" - if computeSystem.handle == 0 { return nil, makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) } @@ -522,7 +535,7 @@ func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process // Close cleans up any state associated with the compute system but does not terminate or wait for it. func (computeSystem *System) Close() (err error) { operation := "hcs::System::Close" - ctx, span := trace.StartSpan(context.Background(), operation) + ctx, span := oc.StartTraceSpan(context.Background(), operation) defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) @@ -612,11 +625,12 @@ func (computeSystem *System) unregisterCallback(ctx context.Context) error { // Modify the System by sending a request to HCS func (computeSystem *System) Modify(ctx context.Context, config interface{}) error { + operation := "hcs::System::Modify" + log.G(ctx).WithField(logfields.ContainerID, computeSystem.id).Trace(operation) + computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() - operation := "hcs::System::Modify" - if computeSystem.handle == 0 { return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/create.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/create.go index 6c9640f878..c73252eb69 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/create.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/create.go @@ -17,6 +17,8 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/schemaversion" @@ -24,6 +26,7 @@ import ( "github.com/Microsoft/hcsshim/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" ) var ( @@ -208,8 +211,8 @@ func initializeCreateOptions(ctx context.Context, createOptions *CreateOptions) log.G(ctx).WithFields(logrus.Fields{ "options": fmt.Sprintf("%+v", createOptions), - "schema": coi.actualSchemaVersion, - }).Debug("hcsshim::initializeCreateOptions") + "schema": fmt.Sprintf("%+v", coi.actualSchemaVersion), + }).Debug("hcsshim::initializeCreateOptions options") return coi, nil } @@ -260,6 +263,11 @@ func configureSandboxNetwork(ctx context.Context, coi *createOptionsInternal, r // release the resources on failure, so that the client can make the necessary // call to release resources that have been allocated as part of calling this function. func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { + ctx, span := trace.StartSpan(ctx, "hcsoci::CreateContainer") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.ID, createOptions.ID)) + coi, err := initializeCreateOptions(ctx, createOptions) if err != nil { return nil, nil, err @@ -312,12 +320,13 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C } var hcsDocument, gcsDocument interface{} - log.G(ctx).Debug("hcsshim::CreateContainer allocating resources") + etr := log.G(ctx) + etr.Debug("hcsshim::CreateContainer allocating resources") if coi.Spec.Linux != nil { if schemaversion.IsV10(coi.actualSchemaVersion) { return nil, r, errors.New("LCOW v1 not supported") } - log.G(ctx).Debug("hcsshim::CreateContainer allocateLinuxResources") + etr.Debug("hcsshim::CreateContainer allocateLinuxResources") err = allocateLinuxResources(ctx, coi, r, isSandbox) if err != nil { log.G(ctx).WithError(err).Debug("failed to allocateLinuxResources") @@ -325,19 +334,19 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C } gcsDocument, err = createLinuxContainerDocument(ctx, coi, r.ContainerRootInUVM()) if err != nil { - log.G(ctx).WithError(err).Debug("failed createHCSContainerDocument") + log.G(ctx).WithError(err).Error("failed createHCSContainerDocument") return nil, r, err } } else { err = allocateWindowsResources(ctx, coi, r, isSandbox) if err != nil { - log.G(ctx).WithError(err).Debug("failed to allocateWindowsResources") + etr.WithError(err).Error("failed to allocateWindowsResources") return nil, r, err } - log.G(ctx).Debug("hcsshim::CreateContainer creating container document") + etr.Debug("hcsshim::CreateContainer creating container document") v1, v2, err := createWindowsContainerDocument(ctx, coi) if err != nil { - log.G(ctx).WithError(err).Debug("failed createHCSContainerDocument") + etr.WithError(err).Error("failed createHCSContainerDocument") return nil, r, err } @@ -361,7 +370,7 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C } } - log.G(ctx).Debug("hcsshim::CreateContainer creating compute system") + etr.Debug("hcsshim::CreateContainer creating compute system") if gcsDocument != nil { c, err := coi.HostingSystem.CreateContainer(ctx, coi.actualID, gcsDocument) if err != nil { @@ -381,6 +390,11 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C // CreateContainer does. Also, instead of sending create container request it sends a modify // request to an existing container. CloneContainer only works for WCOW. func CloneContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { + ctx, span := trace.StartSpan(ctx, "hcsoci::CloneContainer") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.ID, createOptions.ID)) + coi, err := initializeCreateOptions(ctx, createOptions) if err != nil { return nil, nil, err diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/devices.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/devices.go index 42d11aac52..c5b55f27db 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/devices.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/devices.go @@ -155,7 +155,7 @@ func handleAssignedDevicesWindows( ID: value, IDType: uvm.VPCILocationPathIDType, } - log.G(ctx).WithField("parsed devices", specDev).Info("added windows device to spec") + log.G(ctx).WithField("parsed devices", specDev).Debug("added windows device to spec") resultDevs = append(resultDevs, specDev) } } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_lcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_lcow.go index 5612cd18f0..26308a7a47 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_lcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_lcow.go @@ -78,12 +78,13 @@ type linuxHostedSystem struct { } func createLinuxContainerDocument(ctx context.Context, coi *createOptionsInternal, guestRoot string) (*linuxHostedSystem, error) { + log.G(ctx).WithField("guestRoot", guestRoot).Trace("hcsshim::createLinuxContainerDoc") + spec, err := createLCOWSpec(coi) if err != nil { return nil, err } - log.G(ctx).WithField("guestRoot", guestRoot).Debug("hcsshim::createLinuxContainerDoc") return &linuxHostedSystem{ SchemaVersion: schemaversion.SchemaV21(), OciBundlePath: guestRoot, diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go index 688d901609..e503dee68e 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go @@ -15,6 +15,7 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/processorinfo" "github.com/Microsoft/hcsshim/internal/uvm" @@ -142,7 +143,12 @@ func ConvertCPULimits(ctx context.Context, cid string, spec *specs.Spec, maxCPUC // a container, both hosted and process isolated. It creates both v1 and v2 // container objects, WCOW only. The containers storage should have been mounted already. func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInternal) (*schema1.ContainerConfig, *hcsschema.Container, error) { - log.G(ctx).Debug("hcsshim: CreateHCSContainerDocument") + ctx, etr := log.S(ctx, logrus.Fields{ + logfields.Name: coi.actualID, + "owner": coi.actualOwner, + }) + etr.Trace("hcsshim: CreateHCSContainerDocument") + // TODO: Make this safe if exported so no null pointer dereferences. if coi.Spec == nil { @@ -226,12 +232,12 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter } else if newCPULimit > 10000 { newCPULimit = 10000 } - log.G(ctx).WithFields(logrus.Fields{ + etr.WithFields(logrus.Fields{ "hostCPUCount": hostCPUCount, "uvmCPUCount": uvmCPUCount, "oldCPULimit": cpuLimit, "newCPULimit": newCPULimit, - }).Info("rescaling CPU limit for UVM sandbox") + }).Debug("rescaling CPU limit for UVM sandbox") cpuLimit = newCPULimit } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/network.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/network.go index daa4e46d00..f1ed1776d4 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/network.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/network.go @@ -6,18 +6,18 @@ import ( "github.com/Microsoft/hcsshim/hcn" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" ) -func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) error { - op := "hcsoci::createNetworkNamespace" - l := log.G(ctx).WithField(logfields.ContainerID, coi.ID) - l.Debug(op + " - Begin") - defer func() { - l.Debug(op + " - End") - }() +func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) (err error) { + ctx, span := trace.StartSpan(ctx, "hcsoci::createNetworkNamespace") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.ContainerID, coi.ID)) ns, err := hcn.NewNamespace("").Create() if err != nil { @@ -27,7 +27,7 @@ func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r * log.G(ctx).WithFields(logrus.Fields{ "netID": ns.Id, logfields.ContainerID: coi.ID, - }).Info("created network namespace for container") + }).Debug("created network namespace for container") r.SetNetNS(ns.Id) r.SetCreatedNetNS(true) @@ -41,7 +41,7 @@ func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r * log.G(ctx).WithFields(logrus.Fields{ "netID": ns.Id, "endpointID": endpointID, - }).Info("added network endpoint to namespace") + }).Debug("added network endpoint to namespace") endpoints = append(endpoints, endpointID) } r.Add(&uvm.NetworkEndpoints{EndpointIDs: endpoints, Namespace: ns.Id}) diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/resources.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/resources.go index 46ad55660f..d42fe79f0f 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/resources.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/resources.go @@ -14,7 +14,7 @@ func NormalizeProcessorCount(ctx context.Context, cid string, requestedCount, ho "id": cid, "requested count": requestedCount, "assigned count": hostCount, - }).Warn("Changing user requested cpu count to current number of processors on the host") + }).Warn("Changing user requested cpu count to current number of processors on the host") return hostCount } else { return requestedCount diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/layers/layers.go b/test/vendor/github.com/Microsoft/hcsshim/internal/layers/layers.go index 573ad72aa7..d127dde8a6 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/layers/layers.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/layers/layers.go @@ -14,11 +14,14 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/hcserror" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/ospath" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/wclayer" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "golang.org/x/sys/windows" ) @@ -45,7 +48,12 @@ func NewImageLayers(vm *uvm.UtilityVM, containerRootInUVM string, layers []strin } // Release unmounts all of the layers located in the layers array. -func (layers *ImageLayers) Release(ctx context.Context, all bool) error { +func (layers *ImageLayers) Release(ctx context.Context, all bool) (err error) { + ctx, span := trace.StartSpan(ctx, "layers::Release") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("ImageLayers", fmt.Sprintf("%+v", layers))) + if layers.skipCleanup && layers.vm != nil { return nil } @@ -57,7 +65,7 @@ func (layers *ImageLayers) Release(ctx context.Context, all bool) error { if layers.vm != nil { crp = containerRootfsPath(layers.vm, layers.containerRootInUVM) } - err := UnmountContainerLayers(ctx, layers.layers, crp, layers.volumeMountPath, layers.vm, op) + err = UnmountContainerLayers(ctx, layers.layers, crp, layers.volumeMountPath, layers.vm, op) if err != nil { return err } @@ -78,7 +86,12 @@ func (layers *ImageLayers) Release(ctx context.Context, all bool) error { // // TODO dcantah: Keep better track of the layers that are added, don't simply discard the SCSI, VSMB, etc. resource types gotten inside. func MountContainerLayers(ctx context.Context, containerID string, layerFolders []string, guestRoot string, volumeMountPath string, vm *uvm.UtilityVM) (_ string, err error) { - log.G(ctx).WithField("layerFolders", layerFolders).Debug("hcsshim::mountContainerLayers") + ctx, span := trace.StartSpan(ctx, "layers::MountContainerLayers") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("layerFolders", fmt.Sprintf("%q", layerFolders)), + trace.StringAttribute("guestRoot", guestRoot), + trace.StringAttribute("volumeMountPath", volumeMountPath)) if vm == nil { if len(layerFolders) < 2 { @@ -160,7 +173,7 @@ func MountContainerLayers(ctx context.Context, containerID string, layerFolders } // V2 UVM - log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::mountContainerLayers V2 UVM") + log.G(ctx).WithField("os", vm.OS()).Debug("layers::MountContainerLayers V2 UVM") var ( layersAdded []string @@ -267,7 +280,6 @@ func MountContainerLayers(ctx context.Context, containerID string, layerFolders if err != nil { return "", err } - log.G(ctx).Debug("hcsshim::mountContainerLayers Succeeded") return rootfs, nil } @@ -340,8 +352,12 @@ const ( ) // UnmountContainerLayers is a helper for clients to hide all the complexity of layer unmounting -func UnmountContainerLayers(ctx context.Context, layerFolders []string, containerRootPath, volumeMountPath string, vm *uvm.UtilityVM, op UnmountOperation) error { - log.G(ctx).WithField("layerFolders", layerFolders).Debug("hcsshim::unmountContainerLayers") +func UnmountContainerLayers(ctx context.Context, layerFolders []string, containerRootPath, volumeMountPath string, vm *uvm.UtilityVM, op UnmountOperation) (err error) { + ctx, span := trace.StartSpan(ctx, "layers::UnmountContainerLayers") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("layerFolders", fmt.Sprint(layerFolders))) + if vm == nil { // Must be an argon - folders are mounted on the host if op != UnmountOperationAll { @@ -442,6 +458,12 @@ func UnmountContainerLayers(ctx context.Context, layerFolders []string, containe // GetHCSLayers converts host paths corresponding to container layers into HCS schema V2 layers func GetHCSLayers(ctx context.Context, vm *uvm.UtilityVM, paths []string) (layers []hcsschema.Layer, err error) { + ctx, span := trace.StartSpan(ctx, "layers::GetHCSLayers") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute(logfields.UVMID, vm.ID()), + trace.StringAttribute("paths", fmt.Sprint(paths))) + for _, path := range paths { uvmPath, err := vm.GetVSMBUvmPath(ctx, path, true) if err != nil { @@ -483,7 +505,7 @@ func mountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err e log.G(ctx).WithFields(logrus.Fields{ "hostpath": hostPath, "volumeName": volumeName, - }).Debug("mounting volume for container") + }).Trace("layers::mountSandboxVolume") if _, err := os.Stat(hostPath); os.IsNotExist(err) { if err := os.MkdirAll(hostPath, 0777); err != nil { @@ -509,10 +531,10 @@ func mountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err e } // Remove volume mount point. And remove folder afterwards. -func removeSandboxMountPoint(ctx context.Context, hostPath string) error { +func removeSandboxMountPoint(ctx context.Context, hostPath string) (err error) { log.G(ctx).WithFields(logrus.Fields{ "hostpath": hostPath, - }).Debug("removing volume mount point for container") + }).Trace("layers::removeSandboxMountPoint") if err := windows.DeleteVolumeMountPoint(windows.StringToUTF16Ptr(hostPath)); err != nil { return errors.Wrap(err, "failed to delete sandbox volume mount point") diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/log/context.go b/test/vendor/github.com/Microsoft/hcsshim/internal/log/context.go new file mode 100644 index 0000000000..446f014ec1 --- /dev/null +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/log/context.go @@ -0,0 +1,99 @@ +package log + +import ( + "context" + + "github.com/sirupsen/logrus" + "go.opencensus.io/trace" + + "github.com/Microsoft/hcsshim/internal/logfields" +) + +type entryContextKeyType int + +const _entryContextKey entryContextKeyType = iota + +// L is the default, blank logging entry. WithField and co. all return a copy +// of the original entry, so this will not leak fields between calls. +// +// Do NOT modify fields directly, as that will corrupt state for all users and +// is not thread safe. +var L = logrus.NewEntry(logrus.StandardLogger()) + +// G returns a `logrus.Entry` stored in context (if any), appended with the +// `TraceID, SpanID` from `ctx` if `ctx` contains an OpenCensus `trace.Span`. +// +// Unlike FromContext, this will overwrite any tracing information already +// present in the entry. +func G(ctx context.Context) *logrus.Entry { + entry := FromContext(ctx) + + //TODO: remove this and modify the etw hooks to extract span info: + // `trace.FromContext(entry.Context)` + if span := trace.FromContext(ctx); span != nil { + sctx := span.SpanContext() + entry = logrus.WithFields(logrus.Fields{ + logfields.TraceID: sctx.TraceID.String(), + logfields.SpanID: sctx.SpanID.String(), + }) + } + return entry +} + +// S updates the log entry in the context with the provided fields, and returns +// both. It is equivlent to: +// entry := G(ctx).WithFields(fields) +// ctx = WithContext(ctx, entry) +func S(ctx context.Context, fields logrus.Fields) (context.Context, *logrus.Entry) { + e := G(ctx) + if len(fields) > 0 { + e = e.WithFields(fields) + } + ctx = WithContext(ctx, e) + return ctx, e +} + +// WithContext returns a context that contains the provided log entry. +// The entry can be extracted with `G` (preferred) or `FromContext`. +// +// The entry in the context is a copy of `entry` (generated by `entry.WithContext`) +func WithContext(ctx context.Context, entry *logrus.Entry) context.Context { + entry = entry.WithContext(ctx) + return context.WithValue(ctx, _entryContextKey, entry) +} + +// FromContext returns the log entry stored in the context, if one exits, or +// the default logging entry otherwise. +func FromContext(ctx context.Context) *logrus.Entry { + entry := fromContext(ctx) + + if entry == nil { + return L.WithContext(ctx) + } + + return entry +} + +// Copy extracts the tracing Span and logging entry from the src Context, if they +// exist, and adds them to the dst Context. +// +// This is useful to share tracing and logging between contexts, but not the +// cancellation. For example, if the src Context has been cancelled but cleanup +// operations triggered by the cancellation require a non-cancelled context to +// execute +func Copy(dst context.Context, src context.Context) context.Context { + if s := trace.FromContext(src); s != nil { + dst = trace.NewContext(dst, s) + } + + if e := fromContext(src); e != nil { + dst = WithContext(dst, e) + } + + return dst +} + +func fromContext(ctx context.Context) *logrus.Entry { + e, _ := ctx.Value(_entryContextKey).(*logrus.Entry) + return e +} diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/log/format.go b/test/vendor/github.com/Microsoft/hcsshim/internal/log/format.go new file mode 100644 index 0000000000..deb5c09300 --- /dev/null +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/log/format.go @@ -0,0 +1,47 @@ +package log + +import ( + "fmt" + "net" + "reflect" + "time" +) + +const TimeFormat = time.RFC3339Nano + +func FormatTime(t time.Time) string { + return t.Format(TimeFormat) +} + +// FormatIO formats net.Conn and other types that have an `Addr()` or `Name()` +func FormatIO(v interface{}) string { + s := reflect.TypeOf(v).String() + + switch t := v.(type) { + case net.Conn: + s += "(" + FormatAddr(t.LocalAddr()) + ")" + case interface{ Addr() net.Addr }: + s += "(" + FormatAddr(t.Addr()) + ")" + case interface{ Name() string }: + s += "(" + t.Name() + ")" + default: + return FormatAny(t) + } + + return s +} + +func FormatAddr(a net.Addr) string { + return a.Network() + "://" + a.String() +} + +func FormatAny(v interface{}) string { + switch t := v.(type) { + case fmt.GoStringer: + return t.GoString() + case fmt.Stringer: + return t.String() + default: + return fmt.Sprintf("%+v", v) + } +} diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/log/g.go b/test/vendor/github.com/Microsoft/hcsshim/internal/log/g.go deleted file mode 100644 index ba6b1a4a53..0000000000 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/log/g.go +++ /dev/null @@ -1,23 +0,0 @@ -package log - -import ( - "context" - - "github.com/sirupsen/logrus" - "go.opencensus.io/trace" -) - -// G returns a `logrus.Entry` with the `TraceID, SpanID` from `ctx` if `ctx` -// contains an OpenCensus `trace.Span`. -func G(ctx context.Context) *logrus.Entry { - span := trace.FromContext(ctx) - if span != nil { - sctx := span.SpanContext() - return logrus.WithFields(logrus.Fields{ - "traceID": sctx.TraceID.String(), - "spanID": sctx.SpanID.String(), - // "parentSpanID": TODO: JTERRY75 - Try to convince OC to export this? - }) - } - return logrus.NewEntry(logrus.StandardLogger()) -} diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/logfields/fields.go b/test/vendor/github.com/Microsoft/hcsshim/internal/logfields/fields.go index cf2c166d9b..7f2dfe2626 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/logfields/fields.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/logfields/fields.go @@ -3,21 +3,42 @@ package logfields const ( // Identifiers + Name = "name" + Operation = "operation" + + ID = "id" ContainerID = "cid" - UVMID = "uvm-id" + ExecID = "eid" ProcessID = "pid" + TaskID = "tid" + UVMID = "uvm-id" + + // networking and IO + + File = "file" + Path = "path" + Bytes = "bytes" + Pipe = "pipe" // Common Misc - // Timeout represents an operation timeout. - Timeout = "timeout" + Attempt = "attemptNo" JSON = "json" + // Time + + StartTime = "startTime" + EndTime = "endTime" + Duration = "duration" + Timeout = "timeout" + // Keys/values Field = "field" + Key = "key" OCIAnnotation = "oci-annotation" Value = "value" + Options = "options" // Golang type's @@ -29,4 +50,10 @@ const ( // runhcs VMShimOperation = "vmshim-op" + + // logging and tracing + + TraceID = "traceID" + SpanID = "spanID" + ParentSpanID = "parentSpanID" ) diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/oc/exporter.go b/test/vendor/github.com/Microsoft/hcsshim/internal/oc/exporter.go index f428bdaf72..62f6fc1c75 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/oc/exporter.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/oc/exporter.go @@ -3,19 +3,21 @@ package oc import ( "github.com/sirupsen/logrus" "go.opencensus.io/trace" -) -var _ = (trace.Exporter)(&LogrusExporter{}) + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" +) // LogrusExporter is an OpenCensus `trace.Exporter` that exports // `trace.SpanData` to logrus output. -type LogrusExporter struct { -} +type LogrusExporter struct{} + +var _ trace.Exporter = &LogrusExporter{} // ExportSpan exports `s` based on the the following rules: // -// 1. All output will contain `s.Attributes`, `s.TraceID`, `s.SpanID`, -// `s.ParentSpanID` for correlation +// 1. All output will contain `s.Attributes`, `s.SpanKind`, `s.TraceID`, +// `s.SpanID`, and `s.ParentSpanID` for correlation // // 2. Any calls to .Annotate will not be supported. // @@ -23,21 +25,44 @@ type LogrusExporter struct { // `s.Status.Code != 0` in which case it will be written at `logrus.ErrorLevel` // providing `s.Status.Message` as the error value. func (le *LogrusExporter) ExportSpan(s *trace.SpanData) { - // Combine all span annotations with traceID, spanID, parentSpanID - baseEntry := logrus.WithFields(logrus.Fields(s.Attributes)) - baseEntry.Data["traceID"] = s.TraceID.String() - baseEntry.Data["spanID"] = s.SpanID.String() - baseEntry.Data["parentSpanID"] = s.ParentSpanID.String() - baseEntry.Data["startTime"] = s.StartTime - baseEntry.Data["endTime"] = s.EndTime - baseEntry.Data["duration"] = s.EndTime.Sub(s.StartTime).String() - baseEntry.Data["name"] = s.Name - baseEntry.Time = s.StartTime + if s.DroppedAnnotationCount > 0 { + logrus.WithFields(logrus.Fields{ + "name": s.Name, + logfields.TraceID: s.TraceID.String(), + logfields.SpanID: s.SpanID.String(), + "dropped": s.DroppedAttributeCount, + "maxAttributes": len(s.Attributes), + }).Warning("span had dropped attributes") + } + + // Combine all span annotations with traceID, spanID, parentSpanID, and error + entry := logrus.WithFields(logrus.Fields(s.Attributes)) + // All span fields are string, so we can safely skip overhead in entry.WithFields + // and add them directly to entry.Data. + // Avoid growing the entry.Data map and reallocating buckets by creating new + // `logrus.Fields` and copying data over. + data := make(logrus.Fields, len(entry.Data)+9) // should only add 9 new entries + for k, v := range entry.Data { //copy old data + data[k] = v + } + data[logfields.Name] = s.Name + data[logfields.TraceID] = s.TraceID.String() + data[logfields.SpanID] = s.SpanID.String() + data[logfields.ParentSpanID] = s.ParentSpanID.String() + data[logfields.StartTime] = log.FormatTime(s.StartTime) + data[logfields.EndTime] = log.FormatTime(s.EndTime) + data[logfields.Duration] = s.EndTime.Sub(s.StartTime).String() + if sk := spanKindToString(s.SpanKind); sk != "" { + data["spanKind"] = sk + } level := logrus.InfoLevel if s.Status.Code != 0 { level = logrus.ErrorLevel - baseEntry.Data[logrus.ErrorKey] = s.Status.Message + data[logrus.ErrorKey] = s.Status.Message } - baseEntry.Log(level, "Span") + + entry.Data = data + entry.Time = s.StartTime + entry.Log(level, "Span") } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/oc/span.go b/test/vendor/github.com/Microsoft/hcsshim/internal/oc/span.go index fee4765cbc..0cd7d74608 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/oc/span.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/oc/span.go @@ -1,9 +1,30 @@ package oc import ( + "context" + + "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) +var ( + DefaultSampler = trace.AlwaysSample() + + // TraceLevelSampler samples if the logging level is "Trace" or higher + TraceLevelSampler = LoggingLevelSampler(logrus.TraceLevel) +) + +func WithTraceLevelSampler(o *trace.StartOptions) { // trace.StartOption + o.Sampler = TraceLevelSampler +} + +func LoggingLevelSampler(lvl logrus.Level) trace.Sampler { + return func(_ trace.SamplingParameters) trace.SamplingDecision { + b := logrus.GetLevel() >= lvl + return trace.SamplingDecision{Sample: b} + } +} + // SetSpanStatus sets `span.SetStatus` to the proper status depending on `err`. If // `err` is `nil` assumes `trace.StatusCodeOk`. func SetSpanStatus(span *trace.Span, err error) { @@ -15,3 +36,47 @@ func SetSpanStatus(span *trace.Span, err error) { } span.SetStatus(status) } + +/* + * convenience wrappers + */ + +// StartSpan wraps go.opencensus.io/trace.StartSpan, but explicitly sets the +// sampler to DefaultSampler. This will override other trace.StartOptions +// and the sampling of the parent span. +// +// If the parent span may have been started with StartTraceSpan, use this to +// force sampling. +func StartSpan(ctx context.Context, name string, o ...trace.StartOption) (context.Context, *trace.Span) { + o = append(o, trace.WithSampler(DefaultSampler)) + ctx, s := trace.StartSpan(ctx, name, o...) + + return ctx, s +} + +// StartTraceSpan is similar to StartSpan, but uses TraceLevelSampler, which +// disables sampling on this span and its children if the logging +// level is less than logrus.Trace. +// +// The returned span will still propagate a SpanContext, and its children can be +// sampled and exporeted if started by StartSpan +func StartTraceSpan(ctx context.Context, name string, o ...trace.StartOption) (context.Context, *trace.Span) { + o = append(o, WithTraceLevelSampler) + ctx, s := trace.StartSpan(ctx, name, o...) + + return ctx, s +} + +var WithServerSpanKind = trace.WithSpanKind(trace.SpanKindServer) +var WithClientSpanKind = trace.WithSpanKind(trace.SpanKindServer) + +func spanKindToString(sk int) string { + switch sk { + case trace.SpanKindClient: + return "client" + case trace.SpanKindServer: + return "server" + default: + return "" + } +} diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/resources/resources.go b/test/vendor/github.com/Microsoft/hcsshim/internal/resources/resources.go index 89851ae7c3..195c4f660d 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/resources/resources.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/resources/resources.go @@ -9,7 +9,10 @@ import ( "github.com/Microsoft/hcsshim/internal/credentials" "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/uvm" + "go.opencensus.io/trace" ) // NetNS returns the network namespace for the container @@ -95,8 +98,15 @@ func NewContainerResources(id string) *Resources { // ReleaseResources releases/frees all of the resources associated with a container. This includes // Plan9 shares, vsmb mounts, pipe mounts, network endpoints, scsi mounts, vpci devices and layers. // TODO: make method on Resources struct. -func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all bool) error { +// TODO: group together all the different errors into one +func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all bool) (err error) { + ctx, span := trace.StartSpan(ctx, "resources::ReleaseResources") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + if vm != nil { + span.AddAttributes(trace.StringAttribute(logfields.ID, vm.ID())) + if r.addedNetNSToVM { if err := vm.TearDownNetworking(ctx, r.netNS); err != nil { log.G(ctx).Warn(err) diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/computeagent.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/computeagent.go index b87edf2796..f18442aeb3 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/computeagent.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/computeagent.go @@ -63,7 +63,7 @@ func (ca *computeAgent) AssignPCI(ctx context.Context, req *computeagent.AssignP "containerID": req.ContainerID, "deviceID": req.DeviceID, "virtualFunctionIndex": req.VirtualFunctionIndex, - }).Info("AssignPCI request") + }).Trace("computeAgent::AssignPCI") if req.DeviceID == "" { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -80,7 +80,7 @@ func (ca *computeAgent) RemovePCI(ctx context.Context, req *computeagent.RemoveP log.G(ctx).WithFields(logrus.Fields{ "containerID": req.ContainerID, "deviceID": req.DeviceID, - }).Info("RemovePCI request") + }).Trace("computeAgent::RemovePCI") if req.DeviceID == "" { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -95,9 +95,9 @@ func (ca *computeAgent) RemovePCI(ctx context.Context, req *computeagent.RemoveP func (ca *computeAgent) AddNIC(ctx context.Context, req *computeagent.AddNICInternalRequest) (*computeagent.AddNICInternalResponse, error) { log.G(ctx).WithFields(logrus.Fields{ "containerID": req.ContainerID, - "endpoint": req.Endpoint, + "endpoint": req.Endpoint.String(), "nicID": req.NicID, - }).Info("AddNIC request") + }).Trace("computeAgent::AddNIC") if req.NicID == "" || req.Endpoint == nil { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -139,9 +139,9 @@ func (ca *computeAgent) AddNIC(ctx context.Context, req *computeagent.AddNICInte // ModifyNIC will modify a NIC from the computeagent services hosting UVM. func (ca *computeAgent) ModifyNIC(ctx context.Context, req *computeagent.ModifyNICInternalRequest) (*computeagent.ModifyNICInternalResponse, error) { log.G(ctx).WithFields(logrus.Fields{ + "endpoint": req.Endpoint.String(), "nicID": req.NicID, - "endpoint": req.Endpoint, - }).Info("ModifyNIC request") + }).Trace("computeAgent::ModifyNIC") if req.NicID == "" || req.Endpoint == nil || req.IovPolicySettings == nil { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -189,10 +189,9 @@ func (ca *computeAgent) ModifyNIC(ctx context.Context, req *computeagent.ModifyN // DeleteNIC will delete a NIC from the computeagent services hosting UVM. func (ca *computeAgent) DeleteNIC(ctx context.Context, req *computeagent.DeleteNICInternalRequest) (*computeagent.DeleteNICInternalResponse, error) { log.G(ctx).WithFields(logrus.Fields{ - "containerID": req.ContainerID, - "nicID": req.NicID, - "endpoint": req.Endpoint, - }).Info("DeleteNIC request") + "endpoint": req.Endpoint.String(), + "nicID": req.NicID, + }).Trace("computeAgent::DeleteNIC") if req.NicID == "" || req.Endpoint == nil { return nil, status.Error(codes.InvalidArgument, "received empty field in request") @@ -238,7 +237,7 @@ func setupAndServe(ctx context.Context, caAddr string, vm *UtilityVM) error { } computeagent.RegisterComputeAgentService(s, &computeAgent{vm}) - log.G(ctx).WithField("address", l.Addr().String()).Info("serving compute agent") + log.G(ctx).WithField("address", l.Addr().String()).Debug("serving compute agent") go func() { defer l.Close() if err := trapClosedConnErr(s.Serve(ctx, l)); err != nil { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go index 8f293f67fe..ee3034cdc8 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go @@ -234,7 +234,7 @@ func (uvm *UtilityVM) create(ctx context.Context, doc interface{}) error { // Close terminates and releases resources associated with the utility VM. func (uvm *UtilityVM) Close() (err error) { - ctx, span := trace.StartSpan(context.Background(), "uvm::Close") + ctx, span := oc.StartTraceSpan(context.Background(), "uvm::Close") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute(logfields.UVMID, uvm.id)) diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go index 5a5628230d..a40ede257e 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go @@ -709,7 +709,10 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error } span.AddAttributes(trace.StringAttribute(logfields.UVMID, opts.ID)) - log.G(ctx).WithField("options", fmt.Sprintf("%+v", opts)).Debug("uvm::CreateLCOW options") + log.G(ctx).WithFields(logrus.Fields{ + logfields.Options: fmt.Sprintf("%+v", opts.Options), + "options-lcow": fmt.Sprintf("%+v", opts), + }).Debug("uvm::CreateLCOW options") // We dont serialize OutputHandler so if it is missing we need to put it back to the default. if opts.OutputHandler == nil { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go index c09b49d9f0..799a291f0b 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go @@ -6,11 +6,12 @@ import ( "os" "path/filepath" + "github.com/Microsoft/go-winio" + "github.com/Microsoft/go-winio/pkg/guid" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" - "github.com/Microsoft/go-winio" - "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/gcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" @@ -86,7 +87,7 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv return nil, fmt.Errorf("failed to get host processor information: %s", err) } - // To maintain compatability with Docker we need to automatically downgrade + // To maintain compatibility with Docker we need to automatically downgrade // a user CPU count if the setting is not possible. uvm.processorCount = uvm.normalizeProcessorCount(ctx, opts.ProcessorCount, processorTopology) @@ -243,7 +244,10 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error } span.AddAttributes(trace.StringAttribute(logfields.UVMID, opts.ID)) - log.G(ctx).WithField("options", fmt.Sprintf("%+v", opts)).Debug("uvm::CreateWCOW options") + log.G(ctx).WithFields(logrus.Fields{ + logfields.Options: fmt.Sprintf("%+v", opts.Options), + "options-wcow": fmt.Sprintf("%+v", opts), + }).Debug("uvm::CreateWCOW options") uvm := &UtilityVM{ id: opts.ID, @@ -276,7 +280,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error } // TODO: BUGBUG Remove this. @jhowardmsft - // It should be the responsiblity of the caller to do the creation and population. + // It should be the responsibility of the caller to do the creation and population. // - Update runhcs too (vm.go). // - Remove comment in function header // - Update tests that rely on this current behaviour. diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/network.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/network.go index dca3ad5ab9..00a000cdd8 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/network.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/network.go @@ -10,6 +10,7 @@ import ( "github.com/containerd/ttrpc" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "github.com/Microsoft/hcsshim/hcn" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" @@ -17,6 +18,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hns" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/ncproxyttrpc" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/osversion" @@ -96,13 +98,11 @@ func (uvm *UtilityVM) SetupNetworkNamespace(ctx context.Context, nsid string) er } // GetNamespaceEndpoints gets all endpoints in `netNS` -func GetNamespaceEndpoints(ctx context.Context, netNS string) ([]*hns.HNSEndpoint, error) { - op := "uvm::GetNamespaceEndpoints" - l := log.G(ctx).WithField("netns-id", netNS) - l.Debug(op + " - Begin") - defer func() { - l.Debug(op + " - End") - }() +func GetNamespaceEndpoints(ctx context.Context, netNS string) (_ []*hns.HNSEndpoint, err error) { + ctx, span := trace.StartSpan(ctx, "uvm::GetNamespaceEndpoints") //nolint:ineffassign,staticcheck + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("netns-id", netNS)) ids, err := hns.GetNamespaceEndpoints(netNS) if err != nil { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/plan9.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/plan9.go index 3b86b392ae..ad34fdcfd6 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/plan9.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/plan9.go @@ -10,7 +10,10 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/osversion" + "go.opencensus.io/trace" ) // Plan9Share is a struct containing host paths for the UVM @@ -31,7 +34,18 @@ func (p9 *Plan9Share) Release(ctx context.Context) error { const plan9Port = 564 // AddPlan9 adds a Plan9 share to a utility VM. -func (uvm *UtilityVM) AddPlan9(ctx context.Context, hostPath string, uvmPath string, readOnly bool, restrict bool, allowedNames []string) (*Plan9Share, error) { +func (uvm *UtilityVM) AddPlan9(ctx context.Context, hostPath string, uvmPath string, readOnly bool, restrict bool, allowedNames []string) (_ *Plan9Share, err error) { + ctx, span := trace.StartSpan(ctx, "uvm::AddPlan9") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.UVMID, uvm.id), + trace.StringAttribute("hostPath", hostPath), + trace.StringAttribute("uvmPath", uvmPath), + trace.BoolAttribute("readOnly", readOnly), + trace.BoolAttribute("restrict", restrict), + trace.StringAttribute("allowedNamed", fmt.Sprintf("%v", allowedNames))) + if uvm.operatingSystem != "linux" { return nil, errNotSupported } @@ -104,7 +118,15 @@ func (uvm *UtilityVM) AddPlan9(ctx context.Context, hostPath string, uvmPath str // RemovePlan9 removes a Plan9 share from a utility VM. Each Plan9 share is ref-counted // and only actually removed when the ref-count drops to zero. -func (uvm *UtilityVM) RemovePlan9(ctx context.Context, share *Plan9Share) error { +func (uvm *UtilityVM) RemovePlan9(ctx context.Context, share *Plan9Share) (err error) { + ctx, span := trace.StartSpan(ctx, "uvm::RemovePlan9") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.UVMID, uvm.id), + trace.StringAttribute("uvmPath", share.name), + trace.StringAttribute("uvmPath", share.uvmPath)) + if uvm.operatingSystem != "linux" { return errNotSupported } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/start.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/start.go index 702815ed6a..dc337eba78 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/start.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/start.go @@ -113,7 +113,7 @@ func parseLogrus(vmid string) func(r io.Reader) { break } fields[logfields.UVMID] = vmid - fields["vm.time"] = gcsEntry.Time + fields["vm.time"] = log.FormatTime(gcsEntry.Time) e.Log(gcsEntry.Level, gcsEntry.Message) } } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/vsmb.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/vsmb.go index b1e5f24434..a64ece9ef2 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/vsmb.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/vsmb.go @@ -11,11 +11,14 @@ import ( "unsafe" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "golang.org/x/sys/windows" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/winapi" "github.com/Microsoft/hcsshim/osversion" @@ -158,7 +161,16 @@ func forceNoDirectMap(path string) (bool, error) { // AddVSMB adds a VSMB share to a Windows utility VM. Each VSMB share is ref-counted and // only added if it isn't already. This is used for read-only layers, mapped directories // to a container, and for mapped pipes. -func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcsschema.VirtualSmbShareOptions) (*VSMBShare, error) { +func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcsschema.VirtualSmbShareOptions) (_ *VSMBShare, err error) { + ctx, span := trace.StartSpan(ctx, "uvm::AddVSMB") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.UVMID, uvm.id), + trace.StringAttribute("hostPath", hostPath), + trace.BoolAttribute("readOnly", options.ReadOnly), + trace.BoolAttribute("restrict", options.RestrictFileAccess)) + if uvm.operatingSystem != "windows" { return nil, errNotSupported } @@ -190,7 +202,7 @@ func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcs if force, err := forceNoDirectMap(hostPath); err != nil { return nil, err } else if force { - log.G(ctx).WithField("path", hostPath).Info("Forcing NoDirectmap for VSMB mount") + log.G(ctx).WithField("hostPath", hostPath).Debug("Forcing NoDirectmap for VSMB mount") options.NoDirectmap = true } @@ -225,7 +237,7 @@ func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcs "path": hostPath, "options": fmt.Sprintf("%+#v", options), "operation": requestType, - }).Info("Modifying VSMB share") + }).Debug("Modifying VSMB share") modification := &hcsschema.ModifySettingRequest{ RequestType: requestType, Settings: hcsschema.VirtualSmbShare{ @@ -250,7 +262,15 @@ func (uvm *UtilityVM) AddVSMB(ctx context.Context, hostPath string, options *hcs // RemoveVSMB removes a VSMB share from a utility VM. Each VSMB share is ref-counted // and only actually removed when the ref-count drops to zero. -func (uvm *UtilityVM) RemoveVSMB(ctx context.Context, hostPath string, readOnly bool) error { +func (uvm *UtilityVM) RemoveVSMB(ctx context.Context, hostPath string, readOnly bool) (err error) { + ctx, span := trace.StartSpan(ctx, "uvm::RemoveVSMB") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes( + trace.StringAttribute(logfields.UVMID, uvm.id), + trace.BoolAttribute("readOnly", readOnly), + trace.StringAttribute("hostPath", hostPath)) + if uvm.operatingSystem != "windows" { return errNotSupported } @@ -387,6 +407,12 @@ func (vsmb *VSMBShare) GobDecode(data []byte) error { // clone VSMB share we just need to add it into the config doc of that VM and increase the // vsmb counter. func (vsmb *VSMBShare) Clone(ctx context.Context, vm *UtilityVM, cd *cloneData) error { + log.G(ctx).WithFields(logrus.Fields{ + "hostPath": vsmb.HostPath, + "guestPath": vsmb.guestPath, + "name": vsmb.name, + }).Trace("vsmb::Clone") + cd.doc.VirtualMachine.Devices.VirtualSmb.Shares = append(cd.doc.VirtualMachine.Devices.VirtualSmb.Shares, hcsschema.VirtualSmbShare{ Name: vsmb.name, Path: vsmb.HostPath,