Skip to content

Commit

Permalink
Merge pull request #431 from greenhouse-org/fix-syscall-race
Browse files Browse the repository at this point in the history
Fix race in syscallWatcher
  • Loading branch information
jterry75 authored Dec 19, 2018
2 parents 9586870 + 6a8ab83 commit 218ab38
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 74 deletions.
21 changes: 9 additions & 12 deletions internal/hcs/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,9 @@ func (process *Process) Signal(options guestrequest.SignalProcessOptions) (err e
optionsStr := string(optionsb)

var resultp *uint16
completed := false
go syscallWatcher(process.logctx, &completed)
err = hcsSignalProcess(process.handle, optionsStr, &resultp)
completed = true
syscallWatcher(process.logctx, func() {
err = hcsSignalProcess(process.handle, optionsStr, &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return makeProcessError(process, operation, err, events)
Expand All @@ -156,10 +155,9 @@ func (process *Process) Kill() (err error) {
}

var resultp *uint16
completed := false
go syscallWatcher(process.logctx, &completed)
err = hcsTerminateProcess(process.handle, &resultp)
completed = true
syscallWatcher(process.logctx, func() {
err = hcsTerminateProcess(process.handle, &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return makeProcessError(process, operation, err, events)
Expand Down Expand Up @@ -251,10 +249,9 @@ func (process *Process) Properties() (_ *ProcessStatus, err error) {
resultp *uint16
propertiesp *uint16
)
completed := false
go syscallWatcher(process.logctx, &completed)
err = hcsGetProcessProperties(process.handle, &propertiesp, &resultp)
completed = true
syscallWatcher(process.logctx, func() {
err = hcsGetProcessProperties(process.handle, &propertiesp, &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return nil, makeProcessError(process, operation, err, events)
Expand Down
90 changes: 40 additions & 50 deletions internal/hcs/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,13 @@ func CreateComputeSystem(id string, hcsDocumentInterface interface{}) (_ *System
Debug("HCS ComputeSystem Document")

var (
resultp *uint16
identity syscall.Handle
resultp *uint16
identity syscall.Handle
createError error
)
completed := false
go syscallWatcher(computeSystem.logctx, &completed)
createError := hcsCreateComputeSystem(id, hcsDocument, identity, &computeSystem.handle, &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
createError = hcsCreateComputeSystem(id, hcsDocument, identity, &computeSystem.handle, &resultp)
})

if createError == nil || IsPending(createError) {
if err = computeSystem.registerCallback(); err != nil {
Expand Down Expand Up @@ -193,10 +193,10 @@ func GetComputeSystems(q schema1.ComputeSystemQuery) (_ []schema1.ContainerPrope
resultp *uint16
computeSystemsp *uint16
)
completed := false
go syscallWatcher(fields, &completed)
err = hcsEnumerateComputeSystems(query, &computeSystemsp, &resultp)
completed = true

syscallWatcher(fields, func() {
err = hcsEnumerateComputeSystems(query, &computeSystemsp, &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return nil, &HcsError{Op: operation, Err: err, Events: events}
Expand Down Expand Up @@ -254,10 +254,9 @@ func (computeSystem *System) Start() (err error) {
}

var resultp *uint16
completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsStartComputeSystem(computeSystem.handle, "", &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsStartComputeSystem(computeSystem.handle, "", &resultp)
})
events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemStartCompleted, &timeout.SystemStart)
if err != nil {
return makeSystemError(computeSystem, "Start", "", err, events)
Expand Down Expand Up @@ -286,10 +285,9 @@ func (computeSystem *System) Shutdown() (err error) {
}

var resultp *uint16
completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsShutdownComputeSystem(computeSystem.handle, "", &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsShutdownComputeSystem(computeSystem.handle, "", &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return makeSystemError(computeSystem, "Shutdown", "", err, events)
Expand All @@ -313,10 +311,9 @@ func (computeSystem *System) Terminate() (err error) {
}

var resultp *uint16
completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsTerminateComputeSystem(computeSystem.handle, "", &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsTerminateComputeSystem(computeSystem.handle, "", &resultp)
})
events := processHcsResult(resultp)
if err != nil && err != ErrVmcomputeAlreadyStopped {
return makeSystemError(computeSystem, "Terminate", "", err, events)
Expand Down Expand Up @@ -387,10 +384,9 @@ func (computeSystem *System) Properties(types ...schema1.PropertyType) (_ *schem
Debug("HCS ComputeSystem Properties Query")

var resultp, propertiesp *uint16
completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsGetComputeSystemProperties(computeSystem.handle, string(queryj), &propertiesp, &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsGetComputeSystemProperties(computeSystem.handle, string(queryj), &propertiesp, &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return nil, makeSystemError(computeSystem, "Properties", "", err, events)
Expand Down Expand Up @@ -422,10 +418,9 @@ func (computeSystem *System) Pause() (err error) {
}

var resultp *uint16
completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsPauseComputeSystem(computeSystem.handle, "", &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsPauseComputeSystem(computeSystem.handle, "", &resultp)
})
events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemPauseCompleted, &timeout.SystemPause)
if err != nil {
return makeSystemError(computeSystem, "Pause", "", err, events)
Expand All @@ -448,10 +443,9 @@ func (computeSystem *System) Resume() (err error) {
}

var resultp *uint16
completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsResumeComputeSystem(computeSystem.handle, "", &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsResumeComputeSystem(computeSystem.handle, "", &resultp)
})
events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemResumeCompleted, &timeout.SystemResume)
if err != nil {
return makeSystemError(computeSystem, "Resume", "", err, events)
Expand Down Expand Up @@ -490,10 +484,9 @@ func (computeSystem *System) CreateProcess(c interface{}) (_ *Process, err error
WithField(logfields.JSON, configuration).
Debug("HCS ComputeSystem Process Document")

completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsCreateProcess(computeSystem.handle, configuration, &processInfo, &processHandle, &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsCreateProcess(computeSystem.handle, configuration, &processInfo, &processHandle, &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return nil, makeSystemError(computeSystem, "CreateProcess", configuration, err, events)
Expand Down Expand Up @@ -539,10 +532,9 @@ func (computeSystem *System) OpenProcess(pid int) (_ *Process, err error) {
return nil, makeSystemError(computeSystem, "OpenProcess", "", ErrAlreadyClosed, nil)
}

completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsOpenProcess(computeSystem.handle, uint32(pid), &processHandle, &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsOpenProcess(computeSystem.handle, uint32(pid), &processHandle, &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return nil, makeSystemError(computeSystem, "OpenProcess", "", err, events)
Expand Down Expand Up @@ -574,10 +566,9 @@ func (computeSystem *System) Close() (err error) {
return makeSystemError(computeSystem, "Close", "", err, nil)
}

completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsCloseComputeSystem(computeSystem.handle)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsCloseComputeSystem(computeSystem.handle)
})
if err != nil {
return makeSystemError(computeSystem, "Close", "", err, nil)
}
Expand Down Expand Up @@ -669,10 +660,9 @@ func (computeSystem *System) Modify(config interface{}) (err error) {
Debug("HCS ComputeSystem Modify Document")

var resultp *uint16
completed := false
go syscallWatcher(computeSystem.logctx, &completed)
err = hcsModifyComputeSystem(computeSystem.handle, requestString, &resultp)
completed = true
syscallWatcher(computeSystem.logctx, func() {
err = hcsModifyComputeSystem(computeSystem.handle, requestString, &resultp)
})
events := processHcsResult(resultp)
if err != nil {
return makeSystemError(computeSystem, "Modify", requestString, err, events)
Expand Down
32 changes: 20 additions & 12 deletions internal/hcs/watcher.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package hcs

import (
"time"
"context"

"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/timeout"
Expand All @@ -17,17 +17,25 @@ import (
//
// Usage is:
//
// completed := false
// go syscallWatcher(context, &completed)
// <syscall>
// completed = true
// syscallWatcher(logContext, func() {
// err = <syscall>(args...)
// })
//
func syscallWatcher(context logrus.Fields, syscallCompleted *bool) {
time.Sleep(timeout.SyscallWatcher)
if *syscallCompleted {
return

func syscallWatcher(logContext logrus.Fields, syscallLambda func()) {
ctx, cancel := context.WithTimeout(context.Background(), timeout.SyscallWatcher)
defer cancel()
go watchFunc(ctx, logContext)
syscallLambda()
}

func watchFunc(ctx context.Context, logContext logrus.Fields) {
select {
case <-ctx.Done():
if ctx.Err() != context.Canceled {
logrus.WithFields(logContext).
WithField(logfields.Timeout, timeout.SyscallWatcher).
Warning("Syscall did not complete within operation timeout. This may indicate a platform issue. If it appears to be making no forward progress, obtain the stacks and see if there is a syscall stuck in the platform API for a significant length of time.")
}
}
logrus.WithFields(context).
WithField(logfields.Timeout, timeout.SyscallWatcher).
Warning("Syscall did not complete within operation timeout. This may indicate a platform issue. If it appears to be making no forward progress, obtain the stacks and see if there is a syscall stuck in the platform API for a significant length of time.")
}

0 comments on commit 218ab38

Please sign in to comment.