fix(coderd): allow agent auth during workspace shutdown (#21538)

Agents were losing authentication during workspace shutdown, causing
shutdown scripts to fail. The auth query required agents to belong to
the latest build, but during shutdown a `stop` build becomes latest while
the `start` build's agents are still running.

Modified the auth query to allow `start` build agents to authenticate
temporarily during `stop` execution. The query allows auth when:

- Agent's `start` build job succeeded
- Latest build is `stop` with `pending`/`running` job status
- Builds are adjacent (`stop` is `build_number + 1`)
- Template versions match

Auth closes once `stop` completes.

Renamed `GetWorkspaceAgentAndLatestBuildByAuthToken` to
`GetAuthenticatedWorkspaceAgentAndBuildByAuthToken` since it returns the
agent's build (not always latest) during shutdown.

Closes coder/internal#1249
Fixes #19467
This commit is contained in:
Mathias Fredriksson
2026-01-21 15:18:43 +02:00
committed by GitHub
parent a14a22eb54
commit 97e8a5b093
15 changed files with 726 additions and 72 deletions
+40 -12
View File
@@ -281,7 +281,12 @@ WHERE
-- Filter out deleted sub agents.
AND workspace_agents.deleted = FALSE;
-- name: GetWorkspaceAgentAndLatestBuildByAuthToken :one
-- GetAuthenticatedWorkspaceAgentAndBuildByAuthToken returns an authenticated
-- workspace agent and its associated build. During normal operation, this is
-- the latest build. During shutdown, this may be the previous START build while
-- the STOP build is executing, allowing shutdown scripts to authenticate (see
-- issue #19467).
-- name: GetAuthenticatedWorkspaceAgentAndBuildByAuthToken :one
SELECT
sqlc.embed(workspaces),
sqlc.embed(workspace_agents),
@@ -311,17 +316,40 @@ WHERE
AND workspaces.deleted = FALSE
-- Filter out deleted sub agents.
AND workspace_agents.deleted = FALSE
-- Filter out builds that are not the latest.
AND workspace_build_with_user.build_number = (
-- Select from workspace_builds as it's one less join compared
-- to workspace_build_with_user.
SELECT
MAX(build_number)
FROM
workspace_builds
WHERE
workspace_id = workspace_build_with_user.workspace_id
)
-- Filter out builds that are not the latest, with exception for shutdown case.
-- Use CASE for short-circuiting: check normal case first (most common), then shutdown case.
AND CASE
-- Normal case: Agent's build is the latest build.
WHEN workspace_build_with_user.build_number = (
SELECT
MAX(build_number)
FROM
workspace_builds
WHERE
workspace_id = workspace_build_with_user.workspace_id
) THEN TRUE
-- Shutdown case: Agent from previous START build during STOP build execution.
WHEN workspace_build_with_user.transition = 'start'
-- Agent's START build job succeeded.
AND (SELECT job_status FROM provisioner_jobs WHERE id = workspace_build_with_user.job_id) = 'succeeded'
-- Latest build is a STOP build whose job is still active,
-- and agent's build is immediately previous.
AND EXISTS (
SELECT 1
FROM workspace_builds latest
JOIN provisioner_jobs pj ON pj.id = latest.job_id
WHERE latest.workspace_id = workspace_build_with_user.workspace_id
AND latest.build_number = workspace_build_with_user.build_number + 1
AND latest.build_number = (
SELECT MAX(build_number)
FROM workspace_builds l2
WHERE l2.workspace_id = latest.workspace_id
)
AND latest.transition = 'stop'
AND pj.job_status IN ('pending', 'running')
) THEN TRUE
ELSE FALSE
END
;
-- name: InsertWorkspaceAgentScriptTimings :one