Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/cachewd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/block/cachew/internal/jobscheduler"
"github.com/block/cachew/internal/logging"
"github.com/block/cachew/internal/metrics"
"github.com/block/cachew/internal/reaper"
"github.com/block/cachew/internal/strategy"
"github.com/block/cachew/internal/strategy/git"
"github.com/block/cachew/internal/strategy/gomod"
Expand Down Expand Up @@ -64,6 +65,8 @@ func main() {
ctx := context.Background()
logger, ctx := logging.Configure(ctx, globalConfig.LoggingConfig)

reaper.Start(ctx)

// Start initialising
tokenManagerProvider := githubapp.NewTokenManagerProvider(globalConfig.GithubAppConfig, logger)
managerProvider := gitclone.NewManagerProvider(ctx, globalConfig.GitCloneConfig, func() (gitclone.CredentialProvider, error) {
Expand Down
62 changes: 62 additions & 0 deletions internal/reaper/reaper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Package reaper provides a background zombie process reaper.
//
// When a Go process runs as PID 1 (e.g. inside a container), it inherits
// orphaned child processes. If those children exit without being waited on,
// they accumulate as zombies. This package periodically calls waitpid(-1)
// with WNOHANG to reap any such zombies.
package reaper

import (
"context"
"log/slog"
"os"
"syscall"
"time"

"github.com/block/cachew/internal/logging"
)

// Start launches a background goroutine that reaps zombie child processes.
// It only activates when the current process is PID 1. The goroutine exits
// when ctx is cancelled.
func Start(ctx context.Context) {
logger := logging.FromContext(ctx)
if os.Getpid() != 1 {
logger.DebugContext(ctx, "Zombie reaper not needed, not running as PID 1")
return
}
logger.InfoContext(ctx, "Running as PID 1, starting zombie reaper")
go run(ctx)
}

// StartForTest is like Start but skips the PID 1 check.
func StartForTest(ctx context.Context) {
go run(ctx)
}

// Reap collects all currently-zombie child processes without blocking.
func Reap(ctx context.Context) {
logger := logging.FromContext(ctx)
for {
var status syscall.WaitStatus
pid, err := syscall.Wait4(-1, &status, syscall.WNOHANG, nil)
if pid <= 0 || err != nil {
return
}
logger.DebugContext(ctx, "Reaped zombie process", slog.Int("pid", pid), slog.Int("status", status.ExitStatus()))
}
}

func run(ctx context.Context) {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()

for {
select {
case <-ctx.Done():
return
case <-ticker.C:
Reap(ctx)
}
}
}
68 changes: 68 additions & 0 deletions internal/reaper/reaper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package reaper_test

import (
"context"
"log/slog"
"os"
"os/exec"
"runtime"
"syscall"
"testing"
"time"

"github.com/alecthomas/assert/v2"

"github.com/block/cachew/internal/logging"
"github.com/block/cachew/internal/reaper"
)

func testContext(t *testing.T) context.Context {
t.Helper()
_, ctx := logging.Configure(t.Context(), logging.Config{Level: slog.LevelDebug})
return ctx
}

func TestStartReapsZombies(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("zombie reaping not applicable on Windows")
}
if os.Getpid() == 1 {
t.Skip("test assumes we are not PID 1")
}

ctx, cancel := context.WithCancel(testContext(t))
defer cancel()

reaper.StartForTest(ctx)

// Create a child process that exits immediately. We deliberately
// don't call cmd.Wait(), so the exited process becomes a zombie
// that the reaper should collect.
cmd := exec.Command("true")
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
err := cmd.Start()
assert.NoError(t, err)

pid := cmd.Process.Pid

// Give the child time to exit and then reap it.
time.Sleep(200 * time.Millisecond)
reaper.Reap(ctx)

// Verify the zombie was reaped: waitpid should return nothing.
var status syscall.WaitStatus
wpid, err := syscall.Wait4(pid, &status, syscall.WNOHANG, nil)
assert.True(t, wpid <= 0 || err != nil, "expected zombie to have been reaped")
}

func TestStartSkipsWhenNotPID1(t *testing.T) {
if os.Getpid() == 1 {
t.Skip("unexpectedly running as PID 1")
}

ctx, cancel := context.WithCancel(testContext(t))
defer cancel()

// Should return immediately without starting a goroutine.
reaper.Start(ctx)
}