Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests calling terminating signals to func-e #372

Merged
merged 6 commits into from
Sep 14, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 109 additions & 59 deletions internal/moreos/moreos_func-e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"path/filepath"
"runtime"
"strings"
"syscall"
"testing"
"time"

Expand All @@ -45,68 +46,106 @@ var (
moreosSrcDir embed.FS
)

// TestProcessGroupAttr_Kill tests sending SIGKILL to fake func-e.
// On linux, we propagate SIGKILL to the child process as the configured SysProcAttr.Pdeathsig
// in proc_linux.go.
func TestProcessGroupAttr_Kill(t *testing.T) {
// This works only for linux, sending kill -9 on darwin will not kill the process, we need to kill
// via pgid or kill the child first.
if runtime.GOOS != "linux" {
t.Skip()
// Test_CallSignal tests sending signals to fake func-e.
func Test_CallSignal(t *testing.T) {
type testCase struct {
name string
signal func(*os.Process) error
skip bool
waitForExiting bool
}
tempDir := t.TempDir()

// Build a fake envoy and pass the ENV hint so that fake func-e uses it
fakeEnvoy := filepath.Join(tempDir, "envoy"+Exe)
fakebinary.RequireFakeEnvoy(t, fakeEnvoy)
t.Setenv("ENVOY_PATH", fakeEnvoy)

fakeFuncE := filepath.Join(tempDir, "func-e"+Exe)
requireFakeFuncE(t, fakeFuncE)

stderr := new(bytes.Buffer)
stdout := new(bytes.Buffer)

// With an arg so fakeFuncE runs fakeEnvoy as its child and doesn't exit.
arg := string(version.LastKnownEnvoy)
cmd := exec.Command(fakeFuncE, "run", arg, "-c")
cmd.Stdout = stdout
cmd.Stderr = stderr

require.NoError(t, cmd.Start())

// Block until we reach an expected line or timeout.
reader := bufio.NewReader(stderr)
waitFor := "initializing epoch 0"
require.Eventually(t, func() bool {
b, _ := reader.Peek(512) // the error value is always EOF.
return strings.HasPrefix(string(b), waitFor)
}, 5*time.Second, 100*time.Millisecond, "timeout waiting for stderr to contain %q", waitFor)

require.Equal(t, Sprintf("starting: %s %s -c\n", fakeEnvoy, arg), stdout.String())

fakeFuncEProcess, err := process.NewProcess(int32(cmd.Process.Pid))
require.NoError(t, err)

// Get all fake func-e children processes.
children, err := fakeFuncEProcess.Children()
require.NoError(t, err)
require.Equal(t, len(children), 1) // Should have only one child process i.e. the fake envoy process.
fakeEnvoyProcess := &os.Process{Pid: int(children[0].Pid)}

// Kill the fake func-e process.
require.NoError(t, cmd.Process.Kill())

// Wait for the process to die; this could error due to the kill signal.
cmd.Wait() //nolint

// Wait and check if fake func-e and envoy processes are killed.
requireFindProcessError(t, cmd.Process, process.ErrorProcessNotRunning)
requireFindProcessError(t, fakeEnvoyProcess, process.ErrorProcessNotRunning)
tests := []testCase{
{
name: "interrupt",
signal: Interrupt,
waitForExiting: true,
skip: runtime.GOOS == OSWindows,
dio marked this conversation as resolved.
Show resolved Hide resolved
},
{
name: "SIGTERM",
signal: func(proc *os.Process) error { return proc.Signal(syscall.SIGTERM) },
waitForExiting: true,
// On Windows, os.Process.Signal is not implemented; it will return an error instead of sending
// a signal.
skip: runtime.GOOS == OSWindows,
},
{
name: "kill",
// On Linux, we propagate SIGKILL to the child process as the configured SysProcAttr.Pdeathsig
// in proc_linux.go.
signal: func(proc *os.Process) error { return proc.Kill() },
waitForExiting: false, // since the process is killed, it is immediately exit.
// This works only for Linux, sending kill -9 on Darwin will not kill the process, we need to
// kill via pgid or kill the child first.
skip: runtime.GOOS != OSLinux,
},
}

// Ensure both processes are killed.
require.NoError(t, EnsureProcessDone(cmd.Process))
require.NoError(t, EnsureProcessDone(fakeEnvoyProcess))
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
if tc.skip {
t.Skip()
}

tempDir := t.TempDir()

// Build a fake envoy and pass the ENV hint so that fake func-e uses it
dio marked this conversation as resolved.
Show resolved Hide resolved
fakeEnvoy := filepath.Join(tempDir, "envoy"+Exe)
fakebinary.RequireFakeEnvoy(t, fakeEnvoy)
t.Setenv("ENVOY_PATH", fakeEnvoy)

fakeFuncE := filepath.Join(tempDir, "func-e"+Exe)
requireFakeFuncE(t, fakeFuncE)

stdout := new(bytes.Buffer)

// With an arg so fakeFuncE runs fakeEnvoy as its child and doesn't exit.
arg := string(version.LastKnownEnvoy)
cmd := exec.Command(fakeFuncE, "run", arg, "-c")
cmd.Stdout = stdout

stderr, err := cmd.StderrPipe()
require.NoError(t, err)
stderrScanner := bufio.NewScanner(stderr)

require.NoError(t, cmd.Start())

// Block until we reach an expected line or timeout.
requireScannedWaitFor(t, stderrScanner, "starting main dispatch loop")
require.Equal(t, Sprintf("starting: %s %s -c\n", fakeEnvoy, arg), stdout.String())

fakeFuncEProcess, err := process.NewProcess(int32(cmd.Process.Pid))
require.NoError(t, err)

// Get all fake func-e children processes.
children, err := fakeFuncEProcess.Children()
require.NoError(t, err)
require.Equal(t, len(children), 1) // Should have only one child process i.e. the fake envoy process.
fakeEnvoyProcess := &os.Process{Pid: int(children[0].Pid)}

tc.signal(cmd.Process)
if tc.waitForExiting {
requireScannedWaitFor(t, stderrScanner, "exiting")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe adding comment about where "exiting" comes from would help future contributor :D

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. A good call. I added it.

}

// Wait for the process to die; this could error due to the kill signal.
err = cmd.Wait()
dio marked this conversation as resolved.
Show resolved Hide resolved
if tc.waitForExiting {
// When it is terminated using interrupt or SIGTERM, we expect cmd.Wait to be properly stopped.
require.NoError(t, err)
}

// Wait and check if fake func-e and envoy processes are stopped.
requireFindProcessError(t, cmd.Process, process.ErrorProcessNotRunning)
requireFindProcessError(t, fakeEnvoyProcess, process.ErrorProcessNotRunning)

// Ensure both processes are killed.
require.NoError(t, EnsureProcessDone(cmd.Process))
require.NoError(t, EnsureProcessDone(fakeEnvoyProcess))
})
}
}

// requireFakeFuncE builds a func-e binary only depends on fakeFuncESrc and the sources in this package.
Expand Down Expand Up @@ -136,3 +175,14 @@ func requireFindProcessError(t *testing.T, proc *os.Process, expectedErr error)
return err == expectedErr
}, 100*time.Millisecond, 5*time.Millisecond, "timeout waiting for expected error %v", expectedErr)
}

func requireScannedWaitFor(t *testing.T, s *bufio.Scanner, waitFor string) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

later I will make a refactoring PR as we use scanner in the e2e for the same reasons, even if it could be documented better. Most notably we also avoid require.Eventually as it makes crap error messages when it fails. In other words, we used to use require.Eventually, that caused very difficult troubleshooting in CI, then we switched off of it.

Even if we fail to do refactoring for whatever reason, when there is copy/paste or something very similar, it is worth commenting the other function or file this is almost the same as. That way we can know to look at it in worst case. Folks who read those comments can help prevent adding a third similar thing later. It also helps reviewers who may have forgotten the other thing.

require.Eventually(t, func() bool {
for s.Scan() {
if strings.HasPrefix(s.Text(), waitFor) {
return true
}
}
return false
}, 5*time.Second, 100*time.Millisecond, "timeout waiting for scanner to find %q", waitFor)
}
6 changes: 5 additions & 1 deletion internal/moreos/testdata/fake_func-e.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func main() {

// Like envoy.Run.
waitCtx, waitCancel := context.WithCancel(context.Background())
sigCtx, _ := signal.NotifyContext(waitCtx, syscall.SIGINT, syscall.SIGTERM)
sigCtx, stop := signal.NotifyContext(waitCtx, os.Interrupt, syscall.SIGTERM)
defer waitCancel()
dio marked this conversation as resolved.
Show resolved Hide resolved

moreos.Fprintf(os.Stdout, "starting: %s\n", strings.Join(cmd.Args, " ")) //nolint
Expand All @@ -56,6 +56,10 @@ func main() {

// Block until we receive SIGINT or are canceled because Envoy has died.
<-sigCtx.Done()
stop()
dio marked this conversation as resolved.
Show resolved Hide resolved

// Simulate handleShutdown like in envoy.Run.
_ = moreos.Interrupt(cmd.Process)
Copy link
Collaborator Author

@dio dio Sep 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably this one causing -1073741510 on Windows? If we don't have this, we waste 5 secs waiting for the child process to terminate.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is because I didn't set the cmd.SysProcAttr for windows.


// Block until it exits to ensure file descriptors are closed prior to archival.
// Allow up to 5 seconds for a clean stop, killing if it can't for any reason.
Expand Down