mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
1adc22fffd
ForkReap's syscall.ForkExec and process-directed signals remain flaky in CI despite the subprocess isolation added in #22894. Restore the testutil.InCI() skip guard that was removed in that change. Fixes coder/internal#1402
215 lines
5.7 KiB
Go
215 lines
5.7 KiB
Go
//go:build linux
|
|
|
|
package reaper_test
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"os/signal"
|
|
"sync"
|
|
"syscall"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/go-reap"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/coder/coder/v2/agent/reaper"
|
|
"github.com/coder/coder/v2/testutil"
|
|
)
|
|
|
|
// subprocessEnvKey is set when a test re-execs itself as an
|
|
// isolated subprocess. Tests that call ForkReap or send signals
|
|
// to their own process check this to decide whether to run real
|
|
// test logic or launch the subprocess and wait for it.
|
|
const subprocessEnvKey = "CODER_REAPER_TEST_SUBPROCESS"
|
|
|
|
// runSubprocess re-execs the current test binary in a new process
|
|
// running only the named test. This isolates ForkReap's
|
|
// syscall.ForkExec and any process-directed signals (e.g. SIGINT)
|
|
// from the parent test binary, making these tests safe to run in
|
|
// CI and alongside other tests.
|
|
//
|
|
// Returns true inside the subprocess (caller should proceed with
|
|
// the real test logic). Returns false in the parent after the
|
|
// subprocess exits successfully (caller should return).
|
|
func runSubprocess(t *testing.T) bool {
|
|
t.Helper()
|
|
|
|
if os.Getenv(subprocessEnvKey) == "1" {
|
|
return true
|
|
}
|
|
|
|
ctx := testutil.Context(t, testutil.WaitMedium)
|
|
|
|
//nolint:gosec // Test-controlled arguments.
|
|
cmd := exec.CommandContext(ctx, os.Args[0],
|
|
"-test.run=^"+t.Name()+"$",
|
|
"-test.v",
|
|
)
|
|
cmd.Env = append(os.Environ(), subprocessEnvKey+"=1")
|
|
|
|
out, err := cmd.CombinedOutput()
|
|
t.Logf("Subprocess output:\n%s", out)
|
|
require.NoError(t, err, "subprocess failed")
|
|
|
|
return false
|
|
}
|
|
|
|
// withDone returns options that stop the reaper goroutine when t
|
|
// completes and wait for it to fully exit, preventing
|
|
// overlapping reapers across sequential subtests.
|
|
func withDone(t *testing.T) []reaper.Option {
|
|
t.Helper()
|
|
stop := make(chan struct{})
|
|
stopped := make(chan struct{})
|
|
t.Cleanup(func() {
|
|
close(stop)
|
|
<-stopped
|
|
})
|
|
return []reaper.Option{
|
|
reaper.WithReaperStop(stop),
|
|
reaper.WithReaperStopped(stopped),
|
|
}
|
|
}
|
|
|
|
// TestReap checks that the reaper successfully reaps exited
|
|
// processes and passes their PIDs through the shared channel.
|
|
func TestReap(t *testing.T) {
|
|
t.Parallel()
|
|
if testutil.InCI() {
|
|
t.Skip("Detected CI, skipping reaper tests")
|
|
}
|
|
if !runSubprocess(t) {
|
|
return
|
|
}
|
|
|
|
pids := make(reap.PidCh, 1)
|
|
var reapLock sync.RWMutex
|
|
opts := append([]reaper.Option{
|
|
reaper.WithPIDCallback(pids),
|
|
reaper.WithExecArgs("/bin/sh", "-c", "exit 0"),
|
|
reaper.WithReapLock(&reapLock),
|
|
}, withDone(t)...)
|
|
reapLock.RLock()
|
|
exitCode, err := reaper.ForkReap(opts...)
|
|
reapLock.RUnlock()
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, exitCode)
|
|
|
|
cmd := exec.Command("tail", "-f", "/dev/null")
|
|
err = cmd.Start()
|
|
require.NoError(t, err)
|
|
|
|
cmd2 := exec.Command("tail", "-f", "/dev/null")
|
|
err = cmd2.Start()
|
|
require.NoError(t, err)
|
|
|
|
err = cmd.Process.Kill()
|
|
require.NoError(t, err)
|
|
|
|
err = cmd2.Process.Kill()
|
|
require.NoError(t, err)
|
|
|
|
expectedPIDs := []int{cmd.Process.Pid, cmd2.Process.Pid}
|
|
|
|
for range len(expectedPIDs) {
|
|
select {
|
|
case <-time.After(testutil.WaitShort):
|
|
t.Fatalf("Timed out waiting for process")
|
|
case pid := <-pids:
|
|
require.Contains(t, expectedPIDs, pid)
|
|
}
|
|
}
|
|
}
|
|
|
|
//nolint:tparallel // Subtests must be sequential, each starts its own reaper.
|
|
func TestForkReapExitCodes(t *testing.T) {
|
|
t.Parallel()
|
|
if testutil.InCI() {
|
|
t.Skip("Detected CI, skipping reaper tests")
|
|
}
|
|
if !runSubprocess(t) {
|
|
return
|
|
}
|
|
|
|
tests := []struct {
|
|
name string
|
|
command string
|
|
expectedCode int
|
|
}{
|
|
{"exit 0", "exit 0", 0},
|
|
{"exit 1", "exit 1", 1},
|
|
{"exit 42", "exit 42", 42},
|
|
{"exit 255", "exit 255", 255},
|
|
{"SIGKILL", "kill -9 $$", 128 + 9},
|
|
{"SIGTERM", "kill -15 $$", 128 + 15},
|
|
}
|
|
|
|
//nolint:paralleltest // Subtests must be sequential, each starts its own reaper.
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
var reapLock sync.RWMutex
|
|
opts := append([]reaper.Option{
|
|
reaper.WithExecArgs("/bin/sh", "-c", tt.command),
|
|
reaper.WithReapLock(&reapLock),
|
|
}, withDone(t)...)
|
|
reapLock.RLock()
|
|
exitCode, err := reaper.ForkReap(opts...)
|
|
reapLock.RUnlock()
|
|
require.NoError(t, err)
|
|
require.Equal(t, tt.expectedCode, exitCode, "exit code mismatch for %q", tt.command)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestReapInterrupt verifies that ForkReap forwards caught signals
|
|
// to the child process. The test sends SIGINT to its own process
|
|
// and checks that the child receives it. Running in a subprocess
|
|
// ensures SIGINT cannot kill the parent test binary.
|
|
func TestReapInterrupt(t *testing.T) {
|
|
t.Parallel()
|
|
if testutil.InCI() {
|
|
t.Skip("Detected CI, skipping reaper tests")
|
|
}
|
|
if !runSubprocess(t) {
|
|
return
|
|
}
|
|
|
|
errC := make(chan error, 1)
|
|
pids := make(reap.PidCh, 1)
|
|
|
|
// Use signals to notify when the child process is ready for the
|
|
// next step of our test.
|
|
usrSig := make(chan os.Signal, 1)
|
|
signal.Notify(usrSig, syscall.SIGUSR1, syscall.SIGUSR2)
|
|
defer signal.Stop(usrSig)
|
|
|
|
go func() {
|
|
opts := append([]reaper.Option{
|
|
reaper.WithPIDCallback(pids),
|
|
reaper.WithCatchSignals(os.Interrupt),
|
|
// Signal propagation does not extend to children of children, so
|
|
// we create a little bash script to ensure sleep is interrupted.
|
|
reaper.WithExecArgs("/bin/sh", "-c", fmt.Sprintf(
|
|
"pid=0; trap 'kill -USR2 %d; kill -TERM $pid' INT; sleep 10 &\npid=$!; kill -USR1 %d; wait",
|
|
os.Getpid(), os.Getpid(),
|
|
)),
|
|
}, withDone(t)...)
|
|
exitCode, err := reaper.ForkReap(opts...)
|
|
// The child exits with 128 + SIGTERM (15) = 143, but the trap catches
|
|
// SIGINT and sends SIGTERM to the sleep process, so exit code varies.
|
|
_ = exitCode
|
|
errC <- err
|
|
}()
|
|
|
|
require.Equal(t, syscall.SIGUSR1, <-usrSig)
|
|
|
|
err := syscall.Kill(os.Getpid(), syscall.SIGINT)
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, syscall.SIGUSR2, <-usrSig)
|
|
require.NoError(t, <-errC)
|
|
}
|