Files
coder/agent/reaper/reaper_test.go
T
Mathias Fredriksson 1adc22fffd fix(agent/reaper): skip reaper tests in CI (#23068)
ForkReap's syscall.ForkExec and process-directed signals remain
flaky in CI despite the subprocess isolation added in #22894.
Restore the testutil.InCI() skip guard that was removed in that
change.

Fixes coder/internal#1402
2026-03-14 21:15:47 +01:00

215 lines
5.7 KiB
Go

//go:build linux
package reaper_test
import (
"fmt"
"os"
"os/exec"
"os/signal"
"sync"
"syscall"
"testing"
"time"
"github.com/hashicorp/go-reap"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/agent/reaper"
"github.com/coder/coder/v2/testutil"
)
// subprocessEnvKey is set when a test re-execs itself as an
// isolated subprocess. Tests that call ForkReap or send signals
// to their own process check this to decide whether to run real
// test logic or launch the subprocess and wait for it.
const subprocessEnvKey = "CODER_REAPER_TEST_SUBPROCESS"
// runSubprocess re-execs the current test binary in a new process
// running only the named test. This isolates ForkReap's
// syscall.ForkExec and any process-directed signals (e.g. SIGINT)
// from the parent test binary, making these tests safe to run in
// CI and alongside other tests.
//
// Returns true inside the subprocess (caller should proceed with
// the real test logic). Returns false in the parent after the
// subprocess exits successfully (caller should return).
func runSubprocess(t *testing.T) bool {
t.Helper()
if os.Getenv(subprocessEnvKey) == "1" {
return true
}
ctx := testutil.Context(t, testutil.WaitMedium)
//nolint:gosec // Test-controlled arguments.
cmd := exec.CommandContext(ctx, os.Args[0],
"-test.run=^"+t.Name()+"$",
"-test.v",
)
cmd.Env = append(os.Environ(), subprocessEnvKey+"=1")
out, err := cmd.CombinedOutput()
t.Logf("Subprocess output:\n%s", out)
require.NoError(t, err, "subprocess failed")
return false
}
// withDone returns options that stop the reaper goroutine when t
// completes and wait for it to fully exit, preventing
// overlapping reapers across sequential subtests.
func withDone(t *testing.T) []reaper.Option {
t.Helper()
stop := make(chan struct{})
stopped := make(chan struct{})
t.Cleanup(func() {
close(stop)
<-stopped
})
return []reaper.Option{
reaper.WithReaperStop(stop),
reaper.WithReaperStopped(stopped),
}
}
// TestReap checks that the reaper successfully reaps exited
// processes and passes their PIDs through the shared channel.
func TestReap(t *testing.T) {
t.Parallel()
if testutil.InCI() {
t.Skip("Detected CI, skipping reaper tests")
}
if !runSubprocess(t) {
return
}
pids := make(reap.PidCh, 1)
var reapLock sync.RWMutex
opts := append([]reaper.Option{
reaper.WithPIDCallback(pids),
reaper.WithExecArgs("/bin/sh", "-c", "exit 0"),
reaper.WithReapLock(&reapLock),
}, withDone(t)...)
reapLock.RLock()
exitCode, err := reaper.ForkReap(opts...)
reapLock.RUnlock()
require.NoError(t, err)
require.Equal(t, 0, exitCode)
cmd := exec.Command("tail", "-f", "/dev/null")
err = cmd.Start()
require.NoError(t, err)
cmd2 := exec.Command("tail", "-f", "/dev/null")
err = cmd2.Start()
require.NoError(t, err)
err = cmd.Process.Kill()
require.NoError(t, err)
err = cmd2.Process.Kill()
require.NoError(t, err)
expectedPIDs := []int{cmd.Process.Pid, cmd2.Process.Pid}
for range len(expectedPIDs) {
select {
case <-time.After(testutil.WaitShort):
t.Fatalf("Timed out waiting for process")
case pid := <-pids:
require.Contains(t, expectedPIDs, pid)
}
}
}
//nolint:tparallel // Subtests must be sequential, each starts its own reaper.
func TestForkReapExitCodes(t *testing.T) {
t.Parallel()
if testutil.InCI() {
t.Skip("Detected CI, skipping reaper tests")
}
if !runSubprocess(t) {
return
}
tests := []struct {
name string
command string
expectedCode int
}{
{"exit 0", "exit 0", 0},
{"exit 1", "exit 1", 1},
{"exit 42", "exit 42", 42},
{"exit 255", "exit 255", 255},
{"SIGKILL", "kill -9 $$", 128 + 9},
{"SIGTERM", "kill -15 $$", 128 + 15},
}
//nolint:paralleltest // Subtests must be sequential, each starts its own reaper.
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var reapLock sync.RWMutex
opts := append([]reaper.Option{
reaper.WithExecArgs("/bin/sh", "-c", tt.command),
reaper.WithReapLock(&reapLock),
}, withDone(t)...)
reapLock.RLock()
exitCode, err := reaper.ForkReap(opts...)
reapLock.RUnlock()
require.NoError(t, err)
require.Equal(t, tt.expectedCode, exitCode, "exit code mismatch for %q", tt.command)
})
}
}
// TestReapInterrupt verifies that ForkReap forwards caught signals
// to the child process. The test sends SIGINT to its own process
// and checks that the child receives it. Running in a subprocess
// ensures SIGINT cannot kill the parent test binary.
func TestReapInterrupt(t *testing.T) {
t.Parallel()
if testutil.InCI() {
t.Skip("Detected CI, skipping reaper tests")
}
if !runSubprocess(t) {
return
}
errC := make(chan error, 1)
pids := make(reap.PidCh, 1)
// Use signals to notify when the child process is ready for the
// next step of our test.
usrSig := make(chan os.Signal, 1)
signal.Notify(usrSig, syscall.SIGUSR1, syscall.SIGUSR2)
defer signal.Stop(usrSig)
go func() {
opts := append([]reaper.Option{
reaper.WithPIDCallback(pids),
reaper.WithCatchSignals(os.Interrupt),
// Signal propagation does not extend to children of children, so
// we create a little bash script to ensure sleep is interrupted.
reaper.WithExecArgs("/bin/sh", "-c", fmt.Sprintf(
"pid=0; trap 'kill -USR2 %d; kill -TERM $pid' INT; sleep 10 &\npid=$!; kill -USR1 %d; wait",
os.Getpid(), os.Getpid(),
)),
}, withDone(t)...)
exitCode, err := reaper.ForkReap(opts...)
// The child exits with 128 + SIGTERM (15) = 143, but the trap catches
// SIGINT and sends SIGTERM to the sleep process, so exit code varies.
_ = exitCode
errC <- err
}()
require.Equal(t, syscall.SIGUSR1, <-usrSig)
err := syscall.Kill(os.Getpid(), syscall.SIGINT)
require.NoError(t, err)
require.Equal(t, syscall.SIGUSR2, <-usrSig)
require.NoError(t, <-errC)
}