Skip to content

Commit

Permalink
Bug fix with runc container lifetime management
Browse files Browse the repository at this point in the history
Fixed bug where container is cast as a process, which then causes the
container to be deleted prematurely before when the container finishes
executing.

Added conversion of runc log file error strings into `error` types that
wrap HResult error types.
Wrapped runc errors from log file, which is more informative that error
returned from cmd execution.

Added traces to guest container operations, to trace low level container
operations.

Signed-off-by: Hamza El-Saawy <hamzaelsaawy@microsoft.com>
  • Loading branch information
helsaawy committed Jan 10, 2022
1 parent 77c0270 commit b2e849f
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 46 deletions.
19 changes: 17 additions & 2 deletions internal/guest/gcserr/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,18 @@ import (
// Hresult is a type corresponding to the HRESULT error type used on Windows.
type Hresult int32

// from
// - https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/705fb797-2175-4a90-b5a3-3918024b10b8
// - https://docs.microsoft.com/en-us/virtualization/api/hcs/reference/hcshresult
const (
// HrNotImpl is the HRESULT for a not implemented function.
HrNotImpl = Hresult(-2147467263) // 0x80004001
// HrFail is the HRESULT for an invocation failure.
// HrFail is the HRESULT for an invocation or unspecified failure.
HrFail = Hresult(-2147467259) // 0x80004005
// HrErrNotFound is the HRESULT for an invalid process id.
HrErrNotFound = Hresult(-2147023728) // 0x80070490
// HrErrInvalidArg is the HRESULT for One or more arguments are invalid.
HrErrInvalidArg = Hresult(-2147024809) // 0x80070057
// HvVmcomputeTimeout is the HRESULT for operations that timed out.
HvVmcomputeTimeout = Hresult(-1070137079) // 0xC0370109
// HrVmcomputeInvalidJSON is the HRESULT for failing to unmarshal a json
Expand All @@ -37,6 +42,16 @@ const (
// HrVmcomputeUnknownMessage is the HRESULT for unknown message types sent
// from the HCS.
HrVmcomputeUnknownMessage = Hresult(-1070137077) // 0xC037010B
// HrVmcomputeInvalidState is the HRESULT for:
//
// The requested virtual machine or container operation is not valid in the
// current state
HrVmcomputeInvalidState = Hresult(-2143878907) // 0x80370105
// HrVmcomputeSystemAlreadyStopped is the HRESULT for:
//
// The virtual machine or container with the specified identifier is not
// running
HrVmcomputeSystemAlreadyStopped = Hresult(-2143878896) // 0x80370110
)

// StackTracer is an interface originating (but not exported) from the
Expand Down Expand Up @@ -136,7 +151,7 @@ func WrapHresult(e error, hresult Hresult) error {
}
}

// GetHresult interates through the error's cause stack (similiarly to how the
// GetHresult iterates through the error's cause stack (similar to how the
// Cause function in github.com/pkg/errors operates). At the first error it
// encounters which implements the Hresult() method, it return's that error's
// HRESULT. This allows errors higher up in the cause stack to shadow the
Expand Down
22 changes: 19 additions & 3 deletions internal/guest/runtime/hcsv2/container.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build linux
// +build linux

package hcsv2
Expand All @@ -14,10 +15,12 @@ import (
"github.com/Microsoft/hcsshim/internal/guest/storage"
"github.com/Microsoft/hcsshim/internal/guest/transport"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/containerd/cgroups"
v1 "github.com/containerd/cgroups/stats/v1"
oci "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"go.opencensus.io/trace"
)

Expand All @@ -39,6 +42,7 @@ type Container struct {
}

func (c *Container) Start(ctx context.Context, conSettings stdio.ConnectionSettings) (int, error) {
log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::Start")
stdioSet, err := stdio.Connect(c.vsock, conSettings)
if err != nil {
return -1, err
Expand All @@ -61,6 +65,7 @@ func (c *Container) Start(ctx context.Context, conSettings stdio.ConnectionSetti
}

func (c *Container) ExecProcess(ctx context.Context, process *oci.Process, conSettings stdio.ConnectionSettings) (int, error) {
log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::ExecProcess")
stdioSet, err := stdio.Connect(c.vsock, conSettings)
if err != nil {
return -1, err
Expand Down Expand Up @@ -101,6 +106,11 @@ func (c *Container) ExecProcess(ctx context.Context, process *oci.Process, conSe
// GetProcess returns the Process with the matching 'pid'. If the 'pid' does
// not exit returns error.
func (c *Container) GetProcess(pid uint32) (Process, error) {
//todo: thread a context to this function call
logrus.WithFields(logrus.Fields{
logfields.ContainerID: c.id,
logfields.ProcessID: pid,
}).Info("opengcs::Container::GetAllProcessPids")
if c.initProcess.pid == pid {
return c.initProcess, nil
}
Expand All @@ -117,6 +127,7 @@ func (c *Container) GetProcess(pid uint32) (Process, error) {

// GetAllProcessPids returns all process pids in the container namespace.
func (c *Container) GetAllProcessPids(ctx context.Context) ([]int, error) {
log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::GetAllProcessPids")
state, err := c.container.GetAllProcesses()
if err != nil {
return nil, err
Expand All @@ -130,6 +141,7 @@ func (c *Container) GetAllProcessPids(ctx context.Context) ([]int, error) {

// Kill sends 'signal' to the container process.
func (c *Container) Kill(ctx context.Context, signal syscall.Signal) error {
log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::Kill")
err := c.container.Kill(signal)
if err != nil {
return err
Expand All @@ -139,29 +151,33 @@ func (c *Container) Kill(ctx context.Context, signal syscall.Signal) error {
}

func (c *Container) Delete(ctx context.Context) error {
entity := log.G(ctx).WithField(logfields.ContainerID, c.id)
entity.Info("opengcs::Container::Delete")
if c.isSandbox {
// remove user mounts in sandbox container
if err := storage.UnmountAllInPath(ctx, getSandboxMountsDir(c.id), true); err != nil {
log.G(ctx).WithError(err).Error("failed to unmount sandbox mounts")
entity.WithError(err).Error("failed to unmount sandbox mounts")
}

// remove hugepages mounts in sandbox container
if err := storage.UnmountAllInPath(ctx, getSandboxHugePageMountsDir(c.id), true); err != nil {
log.G(ctx).WithError(err).Error("failed to unmount hugepages mounts")
entity.WithError(err).Error("failed to unmount hugepages mounts")
}
}

return c.container.Delete()
}

func (c *Container) Update(ctx context.Context, resources interface{}) error {
log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::Update")
return c.container.Update(resources)
}

// Wait waits for the container's init process to exit.
func (c *Container) Wait() prot.NotificationType {
_, span := trace.StartSpan(context.Background(), "opengcs::Container::Wait")
defer span.End()
span.AddAttributes(trace.StringAttribute("cid", c.id))
span.AddAttributes(trace.StringAttribute(logfields.ContainerID, c.id))

c.initProcess.writersWg.Wait()
c.etL.Lock()
Expand Down
7 changes: 6 additions & 1 deletion internal/guest/runtime/hcsv2/uvm.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build linux
// +build linux

package hcsv2
Expand Down Expand Up @@ -244,6 +245,10 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
if err != nil {
return nil, errors.Wrapf(err, "failed to create container")
}
init, err := con.GetInitProcess()
if err != nil {
return nil, errors.Wrapf(err, "failed to get container init process")
}

c := &Container{
id: id,
Expand All @@ -254,7 +259,7 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
exitType: prot.NtUnexpectedExit,
processes: make(map[uint32]*containerProcess),
}
c.initProcess = newProcess(c, settings.OCISpecification.Process, con.(runtime.Process), uint32(c.container.Pid()), true)
c.initProcess = newProcess(c, settings.OCISpecification.Process, init, uint32(c.container.Pid()), true)

// Sandbox or standalone, move the networks to the container namespace
if criType == "sandbox" || !isCRI {
Expand Down
Loading

0 comments on commit b2e849f

Please sign in to comment.