test: fix failnow in goroutine in TestServer_TelemetryDisabled_FinalReport (#21973)

closes: https://github.com/coder/internal/issues/1331 Fixes up an issue in the test where we end up calling `FailNow` outside the main test goroutine. Also adds the ability to name a `ptytest.PTY` for cases like this one where we start multiple commands. This will help debugging if we see the issue again. This doesn't address the root cause of the failure, but I think we should close the flake issue. I think we'd need like a stacktrace of all goroutines at the point of failing the test, but that's way too much effort unless we see this again.
2026-06-02 20:48:20 +00:00 · 2026-02-09 14:20:57 +04:00
parent 93b000776f
commit 95b3bc9c7a
2 changed files with 37 additions and 22 deletions
@@ -2244,6 +2244,7 @@ type runServerOpts struct {
 	waitForSnapshot               bool
 	telemetryDisabled             bool
 	waitForTelemetryDisabledCheck bool
+	name                          string
 }

 func TestServer_TelemetryDisabled_FinalReport(t *testing.T) {
@@ -2266,25 +2267,23 @@ func TestServer_TelemetryDisabled_FinalReport(t *testing.T) {
 			"--cache-dir", cacheDir,
 			"--log-filter", ".*",
 		)
-		finished := make(chan bool, 2)
+		inv.Logger = inv.Logger.Named(opts.name)
+
 		errChan := make(chan error, 1)
-		pty := ptytest.New(t).Attach(inv)
+		pty := ptytest.New(t).Named(opts.name).Attach(inv)
 		go func() {
 			errChan <- inv.WithContext(ctx).Run()
-			finished <- true
+			// close the pty here so that we can start tearing down resources. This test creates multiple servers with
+			// associated ptys. There is a `t.Cleanup()` that does this, but it waits until the whole test is complete.
+			_ = pty.Close()
 		}()
-		go func() {
-			defer func() {
-				finished <- true
-			}()
-			if opts.waitForSnapshot {
-				pty.ExpectMatchContext(testutil.Context(t, testutil.WaitLong), "submitted snapshot")
-			}
-			if opts.waitForTelemetryDisabledCheck {
-				pty.ExpectMatchContext(testutil.Context(t, testutil.WaitLong), "finished telemetry status check")
-			}
-		}()
-		<-finished
+
+		if opts.waitForSnapshot {
+			pty.ExpectMatchContext(testutil.Context(t, testutil.WaitLong), "submitted snapshot")
+		}
+		if opts.waitForTelemetryDisabledCheck {
+			pty.ExpectMatchContext(testutil.Context(t, testutil.WaitLong), "finished telemetry status check")
+		}
 		return errChan, cancelFunc
 	}
 	waitForShutdown := func(t *testing.T, errChan chan error) error {
@@ -2298,7 +2297,9 @@ func TestServer_TelemetryDisabled_FinalReport(t *testing.T) {
 		return nil
 	}

-	errChan, cancelFunc := runServer(t, runServerOpts{telemetryDisabled: true, waitForTelemetryDisabledCheck: true})
+	errChan, cancelFunc := runServer(t, runServerOpts{
+		telemetryDisabled: true, waitForTelemetryDisabledCheck: true, name: "0disabled",
+	})
 	cancelFunc()
 	require.NoError(t, waitForShutdown(t, errChan))

@@ -2306,7 +2307,7 @@ func TestServer_TelemetryDisabled_FinalReport(t *testing.T) {
 	require.Empty(t, deployment)
 	require.Empty(t, snapshot)

-	errChan, cancelFunc = runServer(t, runServerOpts{waitForSnapshot: true})
+	errChan, cancelFunc = runServer(t, runServerOpts{waitForSnapshot: true, name: "1enabled"})
 	cancelFunc()
 	require.NoError(t, waitForShutdown(t, errChan))
 	// we expect to see a deployment and a snapshot twice:
@@ -2325,7 +2326,9 @@ func TestServer_TelemetryDisabled_FinalReport(t *testing.T) {
 		}
 	}

-	errChan, cancelFunc = runServer(t, runServerOpts{telemetryDisabled: true, waitForTelemetryDisabledCheck: true})
+	errChan, cancelFunc = runServer(t, runServerOpts{
+		telemetryDisabled: true, waitForTelemetryDisabledCheck: true, name: "2disabled",
+	})
 	cancelFunc()
 	require.NoError(t, waitForShutdown(t, errChan))

@@ -2341,7 +2344,9 @@ func TestServer_TelemetryDisabled_FinalReport(t *testing.T) {
 		t.Fatalf("timed out waiting for snapshot")
 	}

-	errChan, cancelFunc = runServer(t, runServerOpts{telemetryDisabled: true, waitForTelemetryDisabledCheck: true})
+	errChan, cancelFunc = runServer(t, runServerOpts{
+		telemetryDisabled: true, waitForTelemetryDisabledCheck: true, name: "3disabled",
+	})
 	cancelFunc()
 	require.NoError(t, waitForShutdown(t, errChan))
 	// Since telemetry is disabled and we've already sent a snapshot, we expect no
@@ -17,6 +17,7 @@ import (

 	"github.com/acarl005/stripansi"
 	"github.com/stretchr/testify/require"
+	"go.uber.org/atomic"
 	"golang.org/x/xerrors"

 	"github.com/coder/coder/v2/pty"
@@ -78,7 +79,7 @@ func newExpecter(t *testing.T, r io.Reader, name string) outExpecter {
 	ex := outExpecter{
 		t:    t,
 		out:  out,
-		name: name,
+		name: atomic.NewString(name),

 		runeReader: bufio.NewReaderSize(out, utf8.UTFMax),
 	}
@@ -140,7 +141,7 @@ type outExpecter struct {
 	t     *testing.T
 	close func(reason string) error
 	out   *stdbuf
-	name  string
+	name  *atomic.String

 	runeReader *bufio.Reader
 }
@@ -361,7 +362,7 @@ func (e *outExpecter) logf(format string, args ...interface{}) {

 	// Match regular logger timestamp format, we seem to be logging in
 	// UTC in other places as well, so match here.
-	e.t.Logf("%s: %s: %s", time.Now().UTC().Format("2006-01-02 15:04:05.000"), e.name, fmt.Sprintf(format, args...))
+	e.t.Logf("%s: %s: %s", time.Now().UTC().Format("2006-01-02 15:04:05.000"), e.name.Load(), fmt.Sprintf(format, args...))
 }

 func (e *outExpecter) fatalf(reason string, format string, args ...interface{}) {
@@ -430,6 +431,15 @@ func (p *PTY) WriteLine(str string) {
 	require.NoError(p.t, err, "write line failed")
 }

+// Named sets the PTY name in the logs. Defaults to "cmd". Make sure you set this before anything starts writing to the
+// pty, or it may not be named consistently. E.g.
+//
+// p := New(t).Named("myCmd")
+func (p *PTY) Named(name string) *PTY {
+	p.name.Store(name)
+	return p
+}
+
 type PTYCmd struct {
 	outExpecter
 	pty.PTYCmd