From 5807fe01e4c46aeb4de5680f1e793a2b8d20914b Mon Sep 17 00:00:00 2001 From: Spike Curtis Date: Wed, 8 Oct 2025 16:23:46 +0400 Subject: [PATCH] test: prevent TestAgent_ReconnectingPTY connection reporting check from interfering (#20210) When we added support for connection tracking in the Workspace agent, we modified the ReconnectingPTY tests to add an initial connection that we immediately hang up and check that connections are logged. In the case of `screen`-based pty handling, hanging up the initial connection can race with the initial attachment to the `screen` process, and cause that process to exit early. This leaves subsequent connections to the same session ID to fail. In this PR we just use different pty session IDs so that the initial connections we do to verify logging don't interfere with the rest of the test. _Arguably_ it's a bug in our Reconnecting PTY code that hanging up immediately can leave the system in a weird state, but we do eventually recover and error out, so I don't think it's worth trying to fix. --- agent/agent_test.go | 3 ++- agent/reconnectingpty/screen.go | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/agent/agent_test.go b/agent/agent_test.go index d4c857dc39..ca00180767 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -1807,11 +1807,12 @@ func TestAgent_ReconnectingPTY(t *testing.T) { //nolint:dogsled conn, agentClient, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0) + idConnectionReport := uuid.New() id := uuid.New() // Test that the connection is reported. This must be tested in the // first connection because we care about verifying all of these. - netConn0, err := conn.ReconnectingPTY(ctx, id, 80, 80, "bash --norc") + netConn0, err := conn.ReconnectingPTY(ctx, idConnectionReport, 80, 80, "bash --norc") require.NoError(t, err) _ = netConn0.Close() assertConnectionReport(t, agentClient, proto.Connection_RECONNECTING_PTY, 0, "") diff --git a/agent/reconnectingpty/screen.go b/agent/reconnectingpty/screen.go index 04e1861ead..ffab2f7d5b 100644 --- a/agent/reconnectingpty/screen.go +++ b/agent/reconnectingpty/screen.go @@ -25,6 +25,7 @@ import ( // screenReconnectingPTY provides a reconnectable PTY via `screen`. type screenReconnectingPTY struct { + logger slog.Logger execer agentexec.Execer command *pty.Cmd @@ -62,6 +63,7 @@ type screenReconnectingPTY struct { // own which causes it to spawn with the specified size. func newScreen(ctx context.Context, logger slog.Logger, execer agentexec.Execer, cmd *pty.Cmd, options *Options) *screenReconnectingPTY { rpty := &screenReconnectingPTY{ + logger: logger, execer: execer, command: cmd, metrics: options.Metrics, @@ -173,6 +175,7 @@ func (rpty *screenReconnectingPTY) Attach(ctx context.Context, _ string, conn ne ptty, process, err := rpty.doAttach(ctx, conn, height, width, logger) if err != nil { + logger.Debug(ctx, "unable to attach to screen reconnecting pty", slog.Error(err)) if errors.Is(err, context.Canceled) { // Likely the process was too short-lived and canceled the version command. // TODO: Is it worth distinguishing between that and a cancel from the @@ -182,6 +185,7 @@ func (rpty *screenReconnectingPTY) Attach(ctx context.Context, _ string, conn ne } return err } + logger.Debug(ctx, "attached to screen reconnecting pty") defer func() { // Log only for debugging since the process might have already exited on its @@ -403,6 +407,7 @@ func (rpty *screenReconnectingPTY) Wait() { } func (rpty *screenReconnectingPTY) Close(err error) { + rpty.logger.Debug(context.Background(), "closing screen reconnecting pty", slog.Error(err)) // The closing state change will be handled by the lifecycle. rpty.state.setState(StateClosing, err) }