Skip to content

Commit

Permalink
Merge pull request #1214 from dcantah/cp-retrystdio-0.8
Browse files Browse the repository at this point in the history
[release/0.8] Add reconnect logic for stdio pipes
  • Loading branch information
dcantah authored Nov 2, 2021
2 parents 6fcadb8 + 48bc738 commit b3b9c7a
Show file tree
Hide file tree
Showing 52 changed files with 2,142 additions and 890 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
with:
version: v1.38.0
args: --timeout=5m
only-new-issues: true

test:
runs-on: 'windows-2019'
Expand Down
156 changes: 96 additions & 60 deletions cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions cmd/containerd-shim-runhcs-v1/options/runhcs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ message Options {
// logrus log levels: "trace", "debug", "info", "warn", "error", "fatal", "panic". This setting will override
// the `debug` field if both are specified, unless the level specified is also "debug", as these are equivalent.
string log_level = 16;

// io_retry_timeout_in_sec is the timeout in seconds for how long to try and reconnect to an upstream IO provider if a connection is lost.
// The typical example is if Containerd has restarted but is expected to come back online. A 0 for this field is interpreted as an infinite
// timeout.
int32 io_retry_timeout_in_sec = 17;
}

// ProcessDetails contains additional information about a process. This is the additional
Expand Down
56 changes: 38 additions & 18 deletions cmd/containerd-shim-runhcs-v1/task_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,6 @@ func newHcsTask(
owner := filepath.Base(os.Args[0])
isTemplate := oci.ParseAnnotationsSaveAsTemplate(ctx, s)

io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal)
if err != nil {
return nil, err
}

var netNS string
if s.Windows != nil &&
s.Windows.Network != nil {
Expand All @@ -153,9 +148,16 @@ func newHcsTask(
HostingSystem: parent,
NetworkNamespace: netNS,
}

// Default to an infinite timeout (zero value)
var ioRetryTimeout time.Duration
if shimOpts != nil {
opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox
ioRetryTimeout = time.Duration(shimOpts.IoRetryTimeoutInSec) * time.Second
}

io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal, ioRetryTimeout)
if err != nil {
return nil, err
}

system, resources, err := hcsoci.CreateContainer(ctx, &opts)
Expand All @@ -164,16 +166,17 @@ func newHcsTask(
}

ht := &hcsTask{
events: events,
id: req.ID,
isWCOW: oci.IsWCOW(s),
c: system,
cr: resources,
ownsHost: ownsParent,
host: parent,
closed: make(chan struct{}),
taskSpec: s,
isTemplate: isTemplate,
events: events,
id: req.ID,
isWCOW: oci.IsWCOW(s),
c: system,
cr: resources,
ownsHost: ownsParent,
host: parent,
closed: make(chan struct{}),
taskSpec: s,
isTemplate: isTemplate,
ioRetryTimeout: ioRetryTimeout,
}
ht.init = newHcsExec(
ctx,
Expand Down Expand Up @@ -251,7 +254,21 @@ func newClonedHcsTask(
return nil, fmt.Errorf("cloned task can only be created inside a windows host")
}

io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal)
var shimOpts *runhcsopts.Options
if req.Options != nil {
v, err := typeurl.UnmarshalAny(req.Options)
if err != nil {
return nil, err
}
shimOpts = v.(*runhcsopts.Options)
}

// Default to an infinite timeout (zero value)
var ioRetryTimeout time.Duration
if shimOpts != nil {
ioRetryTimeout = time.Duration(shimOpts.IoRetryTimeoutInSec) * time.Second
}
io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal, ioRetryTimeout)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -405,6 +422,9 @@ type hcsTask struct {

// taskSpec represents the spec/configuration for this task.
taskSpec *specs.Spec

// ioRetryTimeout is the time for how long to try reconnecting to stdio pipes from containerd.
ioRetryTimeout time.Duration
}

func (ht *hcsTask) ID() string {
Expand All @@ -425,7 +445,7 @@ func (ht *hcsTask) CreateExec(ctx context.Context, req *task.ExecProcessRequest,
return errors.Wrapf(errdefs.ErrFailedPrecondition, "exec: '' in task: '%s' must be running to create additional execs", ht.id)
}

io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal)
io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal, ht.ioRetryTimeout)
if err != nil {
return err
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.13

require (
github.com/Microsoft/go-winio v0.4.17
github.com/cenkalti/backoff/v4 v4.1.1
github.com/containerd/cgroups v1.0.1
github.com/containerd/console v1.0.2
github.com/containerd/containerd v1.4.9
Expand Down
Loading

0 comments on commit b3b9c7a

Please sign in to comment.