diff --git a/internal/guest/gcserr/errors.go b/internal/guest/gcserr/errors.go index d0dcd2bac5..0e13e21ef1 100644 --- a/internal/guest/gcserr/errors.go +++ b/internal/guest/gcserr/errors.go @@ -10,13 +10,18 @@ import ( // Hresult is a type corresponding to the HRESULT error type used on Windows. type Hresult int32 +// from +// - https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/705fb797-2175-4a90-b5a3-3918024b10b8 +// - https://docs.microsoft.com/en-us/virtualization/api/hcs/reference/hcshresult const ( // HrNotImpl is the HRESULT for a not implemented function. HrNotImpl = Hresult(-2147467263) // 0x80004001 - // HrFail is the HRESULT for an invocation failure. + // HrFail is the HRESULT for an invocation or unspecified failure. HrFail = Hresult(-2147467259) // 0x80004005 // HrErrNotFound is the HRESULT for an invalid process id. HrErrNotFound = Hresult(-2147023728) // 0x80070490 + // HrErrInvalidArg is the HRESULT for One or more arguments are invalid. + HrErrInvalidArg = Hresult(-2147024809) // 0x80070057 // HvVmcomputeTimeout is the HRESULT for operations that timed out. HvVmcomputeTimeout = Hresult(-1070137079) // 0xC0370109 // HrVmcomputeInvalidJSON is the HRESULT for failing to unmarshal a json @@ -37,6 +42,16 @@ const ( // HrVmcomputeUnknownMessage is the HRESULT for unknown message types sent // from the HCS. HrVmcomputeUnknownMessage = Hresult(-1070137077) // 0xC037010B + // HrVmcomputeInvalidState is the HRESULT for: + // + // The requested virtual machine or container operation is not valid in the + // current state + HrVmcomputeInvalidState = Hresult(-2143878907) // 0x80370105 + // HrVmcomputeSystemAlreadyStopped is the HRESULT for: + // + // The virtual machine or container with the specified identifier is not + // running + HrVmcomputeSystemAlreadyStopped = Hresult(-2143878896) // 0x80370110 ) // StackTracer is an interface originating (but not exported) from the @@ -136,7 +151,7 @@ func WrapHresult(e error, hresult Hresult) error { } } -// GetHresult interates through the error's cause stack (similiarly to how the +// GetHresult iterates through the error's cause stack (similar to how the // Cause function in github.com/pkg/errors operates). At the first error it // encounters which implements the Hresult() method, it return's that error's // HRESULT. This allows errors higher up in the cause stack to shadow the diff --git a/internal/guest/runtime/hcsv2/container.go b/internal/guest/runtime/hcsv2/container.go index 9dcd080cb3..c507be19a8 100644 --- a/internal/guest/runtime/hcsv2/container.go +++ b/internal/guest/runtime/hcsv2/container.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux package hcsv2 @@ -14,10 +15,12 @@ import ( "github.com/Microsoft/hcsshim/internal/guest/storage" "github.com/Microsoft/hcsshim/internal/guest/transport" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/containerd/cgroups" v1 "github.com/containerd/cgroups/stats/v1" oci "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -39,6 +42,7 @@ type Container struct { } func (c *Container) Start(ctx context.Context, conSettings stdio.ConnectionSettings) (int, error) { + log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::Start") stdioSet, err := stdio.Connect(c.vsock, conSettings) if err != nil { return -1, err @@ -61,6 +65,7 @@ func (c *Container) Start(ctx context.Context, conSettings stdio.ConnectionSetti } func (c *Container) ExecProcess(ctx context.Context, process *oci.Process, conSettings stdio.ConnectionSettings) (int, error) { + log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::ExecProcess") stdioSet, err := stdio.Connect(c.vsock, conSettings) if err != nil { return -1, err @@ -101,6 +106,11 @@ func (c *Container) ExecProcess(ctx context.Context, process *oci.Process, conSe // GetProcess returns the Process with the matching 'pid'. If the 'pid' does // not exit returns error. func (c *Container) GetProcess(pid uint32) (Process, error) { + //todo: thread a context to this function call + logrus.WithFields(logrus.Fields{ + logfields.ContainerID: c.id, + logfields.ProcessID: pid, + }).Info("opengcs::Container::GetAllProcessPids") if c.initProcess.pid == pid { return c.initProcess, nil } @@ -117,6 +127,7 @@ func (c *Container) GetProcess(pid uint32) (Process, error) { // GetAllProcessPids returns all process pids in the container namespace. func (c *Container) GetAllProcessPids(ctx context.Context) ([]int, error) { + log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::GetAllProcessPids") state, err := c.container.GetAllProcesses() if err != nil { return nil, err @@ -130,6 +141,7 @@ func (c *Container) GetAllProcessPids(ctx context.Context) ([]int, error) { // Kill sends 'signal' to the container process. func (c *Container) Kill(ctx context.Context, signal syscall.Signal) error { + log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::Kill") err := c.container.Kill(signal) if err != nil { return err @@ -139,21 +151,25 @@ func (c *Container) Kill(ctx context.Context, signal syscall.Signal) error { } func (c *Container) Delete(ctx context.Context) error { + entity := log.G(ctx).WithField(logfields.ContainerID, c.id) + entity.Info("opengcs::Container::Delete") if c.isSandbox { // remove user mounts in sandbox container if err := storage.UnmountAllInPath(ctx, getSandboxMountsDir(c.id), true); err != nil { - log.G(ctx).WithError(err).Error("failed to unmount sandbox mounts") + entity.WithError(err).Error("failed to unmount sandbox mounts") } // remove hugepages mounts in sandbox container if err := storage.UnmountAllInPath(ctx, getSandboxHugePageMountsDir(c.id), true); err != nil { - log.G(ctx).WithError(err).Error("failed to unmount hugepages mounts") + entity.WithError(err).Error("failed to unmount hugepages mounts") } } + return c.container.Delete() } func (c *Container) Update(ctx context.Context, resources interface{}) error { + log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::Update") return c.container.Update(resources) } @@ -161,7 +177,7 @@ func (c *Container) Update(ctx context.Context, resources interface{}) error { func (c *Container) Wait() prot.NotificationType { _, span := trace.StartSpan(context.Background(), "opengcs::Container::Wait") defer span.End() - span.AddAttributes(trace.StringAttribute("cid", c.id)) + span.AddAttributes(trace.StringAttribute(logfields.ContainerID, c.id)) c.initProcess.writersWg.Wait() c.etL.Lock() diff --git a/internal/guest/runtime/hcsv2/uvm.go b/internal/guest/runtime/hcsv2/uvm.go index b1ec2c1001..faf78eb7f7 100644 --- a/internal/guest/runtime/hcsv2/uvm.go +++ b/internal/guest/runtime/hcsv2/uvm.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux package hcsv2 @@ -244,6 +245,10 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM if err != nil { return nil, errors.Wrapf(err, "failed to create container") } + init, err := con.GetInitProcess() + if err != nil { + return nil, errors.Wrapf(err, "failed to get container init process") + } c := &Container{ id: id, @@ -254,7 +259,7 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM exitType: prot.NtUnexpectedExit, processes: make(map[uint32]*containerProcess), } - c.initProcess = newProcess(c, settings.OCISpecification.Process, con.(runtime.Process), uint32(c.container.Pid()), true) + c.initProcess = newProcess(c, settings.OCISpecification.Process, init, uint32(c.container.Pid()), true) // Sandbox or standalone, move the networks to the container namespace if criType == "sandbox" || !isCRI { diff --git a/internal/guest/runtime/runc/runc.go b/internal/guest/runtime/runc/runc.go index e6d929f5d1..24fb83d5d4 100644 --- a/internal/guest/runtime/runc/runc.go +++ b/internal/guest/runtime/runc/runc.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux // Package runc defines an implementation of the Runtime interface which uses @@ -16,9 +17,9 @@ import ( "syscall" "github.com/Microsoft/hcsshim/internal/guest/commonutils" - "github.com/Microsoft/hcsshim/internal/guest/gcserr" "github.com/Microsoft/hcsshim/internal/guest/runtime" "github.com/Microsoft/hcsshim/internal/guest/stdio" + "github.com/Microsoft/hcsshim/internal/logfields" oci "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -51,6 +52,8 @@ type container struct { ownsPidNamespace bool } +var _ runtime.Container = &container{} + func (c *container) ID() string { return c.id } @@ -76,6 +79,8 @@ type process struct { pipeRelay *stdio.PipeRelay } +var _ runtime.Process = &process{} + func (p *process) Pid() int { return p.pid } @@ -139,7 +144,7 @@ func (c *container) Start() error { if err != nil { runcErr := getRuncLogError(logPath) c.r.cleanupContainer(c.id) - return errors.Wrapf(err, "runc start failed with %v: %s", runcErr, string(out)) + return errors.Wrapf(runcErr, "runc start failed with %v: %s", err, string(out)) } return nil } @@ -156,6 +161,7 @@ func (c *container) ExecProcess(process *oci.Process, stdioSet *stdio.Connection // Kill sends the specified signal to the container's init process. func (c *container) Kill(signal syscall.Signal) error { + logrus.WithField(logfields.ContainerID, c.id).Debug("runc::container::Kill") logPath := c.r.getLogPath(c.id) args := []string{"kill"} if signal == syscall.SIGTERM || signal == syscall.SIGKILL { @@ -165,14 +171,8 @@ func (c *container) Kill(signal syscall.Signal) error { cmd := createRuncCommand(logPath, args...) out, err := cmd.CombinedOutput() if err != nil { - if strings.Contains(err.Error(), "os: process already finished") || - strings.Contains(err.Error(), "container not running") || - err == syscall.ESRCH { - return gcserr.NewHresultError(gcserr.HrVmcomputeSystemNotFound) - } - runcErr := getRuncLogError(logPath) - return errors.Wrapf(err, "unknown runc error after kill %v: %s", runcErr, string(out)) + return errors.Wrapf(runcErr, "unknown runc error after kill %v: %s", err, string(out)) } return nil } @@ -180,13 +180,14 @@ func (c *container) Kill(signal syscall.Signal) error { // Delete deletes any state created for the container by either this wrapper or // runC itself. func (c *container) Delete() error { + logrus.WithField(logfields.ContainerID, c.id).Debug("runc::container::Delete") logPath := c.r.getLogPath(c.id) args := []string{"delete", c.id} cmd := createRuncCommand(logPath, args...) out, err := cmd.CombinedOutput() if err != nil { runcErr := getRuncLogError(logPath) - return errors.Wrapf(err, "runc delete failed with %v: %s", runcErr, string(out)) + return errors.Wrapf(runcErr, "runc delete failed with %v: %s", err, string(out)) } if err := c.r.cleanupContainer(c.id); err != nil { return err @@ -211,7 +212,7 @@ func (c *container) Pause() error { out, err := cmd.CombinedOutput() if err != nil { runcErr := getRuncLogError(logPath) - return errors.Wrapf(err, "runc pause failed with %v: %s", runcErr, string(out)) + return errors.Wrapf(runcErr, "runc pause failed with %v: %s", err, string(out)) } return nil } @@ -224,7 +225,7 @@ func (c *container) Resume() error { out, err := cmd.CombinedOutput() if err != nil { runcErr := getRuncLogError(logPath) - return errors.Wrapf(err, "runc resume failed with %v: %s", runcErr, string(out)) + return errors.Wrapf(runcErr, "runc resume failed with %v: %s", err, string(out)) } return nil } @@ -237,7 +238,7 @@ func (c *container) GetState() (*runtime.ContainerState, error) { out, err := cmd.CombinedOutput() if err != nil { runcErr := getRuncLogError(logPath) - return nil, errors.Wrapf(err, "runc state failed with %v: %s", runcErr, string(out)) + return nil, errors.Wrapf(runcErr, "runc state failed with %v: %s", err, string(out)) } var state runtime.ContainerState if err := json.Unmarshal(out, &state); err != nil { @@ -251,30 +252,31 @@ func (c *container) GetState() (*runtime.ContainerState, error) { // It should be noted that containers that have stopped but have not been // deleted are still considered to exist. func (c *container) Exists() (bool, error) { - states, err := c.r.ListContainerStates() + // use global path because container may not exist + logPath := c.r.getGlobalLogPath() + args := []string{"state", c.id} + cmd := createRuncCommand(logPath, args...) + out, err := cmd.CombinedOutput() if err != nil { - return false, err - } - // TODO: This is definitely not the most efficient way of doing this. See - // about improving it in the future. - for _, state := range states { - if state.ID == c.id { - return true, nil + runcErr := getRuncLogError(logPath) + if errors.Is(runcErr, runtime.ContainerDoesNotExistErr) { + return false, nil } + return false, errors.Wrapf(runcErr, "runc state failed with %v: %s", err, string(out)) } - return false, nil + return true, nil } // ListContainerStates returns ContainerState structs for all existing // containers, whether they're running or not. func (r *runcRuntime) ListContainerStates() ([]runtime.ContainerState, error) { - logPath := filepath.Join(r.runcLogBasePath, "global-runc.log") + logPath := r.getGlobalLogPath() args := []string{"list", "-f", "json"} cmd := createRuncCommand(logPath, args...) out, err := cmd.CombinedOutput() if err != nil { runcErr := getRuncLogError(logPath) - return nil, errors.Wrapf(err, "runc list failed with %v: %s", runcErr, string(out)) + return nil, errors.Wrapf(runcErr, "runc list failed with %v: %s", err, string(out)) } var states []runtime.ContainerState if err := json.Unmarshal(out, &states); err != nil { @@ -393,6 +395,15 @@ func (c *container) GetAllProcesses() ([]runtime.ContainerProcessState, error) { return c.r.pidMapToProcessStates(pidMap), nil } +// GetInitProcess gets the init processes associated with the given container, +// including both running and zombie processes. +func (c *container) GetInitProcess() (runtime.Process, error) { + if c.init == nil { + return nil, errors.New("container has no init process") + } + return c.init, nil +} + // getRunningPids gets the pids of all processes which runC recognizes as // running. func (r *runcRuntime) getRunningPids(id string) ([]int, error) { @@ -402,7 +413,7 @@ func (r *runcRuntime) getRunningPids(id string) ([]int, error) { out, err := cmd.CombinedOutput() if err != nil { runcErr := getRuncLogError(logPath) - return nil, errors.Wrapf(err, "runc ps failed with %v: %s", runcErr, string(out)) + return nil, errors.Wrapf(runcErr, "runc ps failed with %v: %s", err, string(out)) } var pids []int if err := json.Unmarshal(out, &pids); err != nil { @@ -458,8 +469,8 @@ func (r *runcRuntime) waitOnProcess(pid int) (int, error) { func (p *process) Wait() (int, error) { exitCode, err := p.c.r.waitOnProcess(p.pid) - l := logrus.WithField("cid", p.c.id) - l.WithField("pid", p.pid).Debug("process wait completed") + l := logrus.WithField(logfields.ContainerID, p.c.id) + l.WithField(logfields.ContainerID, p.pid).Debug("process wait completed") // If the init process for the container has exited, kill everything else in // the container. Runc uses the devices cgroup of the container ot determine @@ -499,7 +510,7 @@ func (p *process) Wait() (int, error) { p.pipeRelay.Wait() } - l.WithField("pid", p.pid).Debug("relay wait completed") + l.WithField(logfields.ProcessID, p.pid).Debug("relay wait completed") return exitCode, err } @@ -508,6 +519,7 @@ func (p *process) Wait() (int, error) { // final wait on the init process. The exit code returned is the exit code // acquired from waiting on the init process. func (c *container) Wait() (int, error) { + entity := logrus.WithField(logfields.ContainerID, c.id) processes, err := c.GetAllProcesses() if err != nil { return -1, err @@ -519,15 +531,12 @@ func (c *container) Wait() (int, error) { // well (as in p.Wait()). This may not matter as long as the relays // finish "soon" after Wait() returns since HCS expects the stdio // connections to close before container shutdown can complete. - logrus.WithFields(logrus.Fields{ - "cid": c.id, - "pid": process.Pid, - }).Debug("waiting on container exec process") + entity.WithField(logfields.ProcessID, process.Pid).Debug("waiting on container exec process") c.r.waitOnProcess(process.Pid) } } exitCode, err := c.init.Wait() - logrus.WithField("cid", c.id).Debug("runc.container::init process wait completed") + entity.Debug("runc::container::init process wait completed") if err != nil { return -1, err } @@ -683,7 +692,7 @@ func (c *container) startProcess(tempProcessDir string, hasTerminal bool, stdioS if err := cmd.Run(); err != nil { runcErr := getRuncLogError(logPath) - return nil, errors.Wrapf(err, "failed to run runc create/exec call for container %s with %v", c.id, runcErr) + return nil, errors.Wrapf(runcErr, "failed to run runc create/exec call for container %s with %v", c.id, err) } var ttyRelay *stdio.TtyRelay @@ -733,7 +742,7 @@ func (c *container) Update(resources interface{}) error { out, err := cmd.CombinedOutput() if err != nil { runcErr := getRuncLogError(logPath) - return errors.Wrapf(err, "runc update request %s failed with %v: %s", string(jsonResources), runcErr, string(out)) + return errors.Wrapf(runcErr, "runc update request %s failed with %v: %s", string(jsonResources), err, string(out)) } return nil } diff --git a/internal/guest/runtime/runc/utils.go b/internal/guest/runtime/runc/utils.go index 4d09da84b3..94da1deec3 100644 --- a/internal/guest/runtime/runc/utils.go +++ b/internal/guest/runtime/runc/utils.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux package runc @@ -9,8 +10,10 @@ import ( "os/exec" "path/filepath" "strconv" + "strings" "syscall" + "github.com/Microsoft/hcsshim/internal/guest/runtime" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -81,11 +84,17 @@ func (r *runcRuntime) makeLogDir(id string) error { return nil } -// getLogPath returns the path to the log file used by the runC wrapper. +// getLogPath returns the path to the log file used by the runC wrapper for a particular container func (r *runcRuntime) getLogPath(id string) string { return filepath.Join(r.getLogDir(id), "runc.log") } +// getLogPath returns the path to the log file used by the runC wrapper. +func (r *runcRuntime) getGlobalLogPath() string { + // runcLogBasePath should be created by r.initialize + return filepath.Join(r.runcLogBasePath, "global-runc.log") +} + // processExists returns true if the given process exists in /proc, false if // not. // It should be noted that processes which have exited, but have not yet been @@ -101,6 +110,34 @@ type standardLogEntry struct { Err error `json:"error,omitempty"` } +func (l *standardLogEntry) asError() (err error) { + // TODO (helsaawy): match with errors from + // https://github.com/opencontainers/runc/blob/master/libcontainer/error.go + msg := l.Message + + if strings.HasPrefix(msg, "container") && strings.HasSuffix(msg, "does not exist") { + // currently: "container does not exist" + err = runtime.ContainerDoesNotExistErr + } else if strings.Contains(msg, "container with id exists") || + strings.Contains(msg, "container with given ID already exists") { + err = runtime.ContainerAlreadyExistsErr + } else if strings.Contains(msg, "invalid id format") || + strings.Contains(msg, "invalid container ID format") { + err = runtime.InvalidContainerIDErr + } else if strings.Contains(msg, "container") && + strings.Contains(msg, "that is not stopped") { + err = runtime.ContainerNotStoppedErr + } else { + err = errors.New(msg) + } + + if l.Err != nil { + err = errors.Wrapf(err, l.Err.Error()) + } + + return +} + func getRuncLogError(logPath string) error { reader, err := os.OpenFile(logPath, syscall.O_RDONLY, 0644) if err != nil { @@ -116,10 +153,7 @@ func getRuncLogError(logPath string) error { break } if entry.Level <= logrus.ErrorLevel { - lastErr = errors.New(entry.Message) - if entry.Err != nil { - lastErr = errors.Wrapf(lastErr, entry.Err.Error()) - } + lastErr = entry.asError() } } return lastErr diff --git a/internal/guest/runtime/runtime.go b/internal/guest/runtime/runtime.go index 7f4854f906..079027d6d0 100644 --- a/internal/guest/runtime/runtime.go +++ b/internal/guest/runtime/runtime.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux // Package runtime defines the interface between the GCS and an OCI container @@ -5,13 +6,24 @@ package runtime import ( + "errors" "io" "syscall" + "github.com/Microsoft/hcsshim/internal/guest/gcserr" "github.com/Microsoft/hcsshim/internal/guest/stdio" oci "github.com/opencontainers/runtime-spec/specs-go" ) +var ( + ContainerAlreadyExistsErr = gcserr.WrapHresult(errors.New("container already exist"), gcserr.HrVmcomputeSystemAlreadyExists) + ContainerDoesNotExistErr = gcserr.WrapHresult(errors.New("container does not exist"), gcserr.HrVmcomputeSystemNotFound) + ContainerStillRunningErr = gcserr.WrapHresult(errors.New("container still running"), gcserr.HrVmcomputeInvalidState) + ContainerNotRunningErr = gcserr.WrapHresult(errors.New("container not running"), gcserr.HrVmcomputeSystemAlreadyStopped) + ContainerNotStoppedErr = gcserr.WrapHresult(errors.New("container not stopped"), gcserr.HrVmcomputeInvalidState) + InvalidContainerIDErr = gcserr.WrapHresult(errors.New("invalid container ID"), gcserr.HrErrInvalidArg) +) + // ContainerState gives information about a container created by a Runtime. type ContainerState struct { OCIVersion string @@ -62,6 +74,7 @@ type Container interface { GetState() (*ContainerState, error) GetRunningProcesses() ([]ContainerProcessState, error) GetAllProcesses() ([]ContainerProcessState, error) + GetInitProcess() (Process, error) Update(resources interface{}) error }