fix(coderd): auto-update workspace to active template version on chat start (#24424)

## Problem

When a template has `require_active_version` enabled and the chat agent
tries to start a workspace that is stopped on an older template version,
the agent gets stuck in an infinite loop: `start_workspace` fails with a
403 (the old version is not the active version and the user lacks
`ActionUpdate` on the template), then `create_workspace` sees the
existing stopped workspace and tells the agent to use `start_workspace`,
repeat forever.

The root cause is that `chatStartWorkspace()` passes the start build
request through without setting `TemplateVersionID`, so `wsbuilder`
defaults to the previous build's template version — which RBAC rejects
when `RequireActiveVersion` is true.

## Fix

In `chatStartWorkspace()` (`coderd/exp_chats.go`), when the template's
access control has `RequireActiveVersion` enabled, explicitly set
`req.TemplateVersionID` to `template.ActiveVersionID` before calling
`postWorkspaceBuildsInternal()`. This mirrors how the autobuild executor
handles the same scenario (`coderd/autobuild/lifecycle_executor.go`).

If the new active version introduces required parameters that cannot be
resolved automatically (no defaults, no previous values), the build
fails at parameter validation before a provisioner job is created. In
that case, a clear error message tells the user to update and start the
workspace from the UI instead of surfacing a raw internal error.

On successful auto-update, the tool response includes
`updated_to_active_version`, `update_reason`, and a human-readable
`message` so the model can explain to the user what happened.

<img width="782" height="122" alt="image"
src="https://github.com/user-attachments/assets/289430d6-066e-41cf-bc97-cd013dcf717d"
/>

### Changes

- **`coderd/exp_chats.go`**: `chatStartWorkspace()` loads the template,
checks `RequireActiveVersion` via `AccessControlStore`, and pins the
build to the active version when required. New
`isChatStartWorkspaceManualUpdateRequiredError()` classifies parameter
validation failures from both the dynamic parameters path
(`DiagnosticError`) and the classic path (`ErrParameterValidation`
sentinel).
- **`coderd/wsbuilder/wsbuilder.go`**: New `ErrParameterValidation`
sentinel error, wrapped into the classic parameter validation
`BuildError` so callers can use `errors.Is` instead of string matching.
- **`coderd/x/chatd/chattool/startworkspace.go`**:
`waitForAgentAndRespond` now returns `map[string]any` instead of
`fantasy.ToolResponse`, letting the caller annotate the result (e.g.
auto-update metadata) before converting. Error handling for `StartFn`
checks for `httperror.Responder` errors to surface clean messages for
the manual-update case.
- **`coderd/x/chatd/chattool/startworkspace_test.go`**: Two new tests —
`StoppedWorkspaceReportsAutoUpdate` (verifies auto-update fields in
response) and `ManualUpdateRequired` (verifies clean error message
without internal wrapping).

### Follow-up

The manual-update error message could include a direct link to the
workspace settings page, but the chattool layer does not currently have
access to the deployment's access URL. Plumbing it through is
straightforward but out of scope for this fix.


Closes CODAGT-192
This commit is contained in:
Ethan
2026-04-17 00:16:37 +10:00
committed by GitHub
parent f07a33225e
commit 91b35a25ee
4 changed files with 191 additions and 13 deletions
+36
View File
@@ -34,6 +34,7 @@ import (
"github.com/coder/coder/v2/coderd/database/db2sdk"
"github.com/coder/coder/v2/coderd/database/dbauthz"
dbpubsub "github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/coder/v2/coderd/dynamicparameters"
"github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/externalauth/gitprovider"
"github.com/coder/coder/v2/coderd/httpapi"
@@ -47,6 +48,7 @@ import (
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/coderd/util/xjson"
"github.com/coder/coder/v2/coderd/workspaceapps"
"github.com/coder/coder/v2/coderd/wsbuilder"
"github.com/coder/coder/v2/coderd/x/chatd"
"github.com/coder/coder/v2/coderd/x/chatd/chatprovider"
"github.com/coder/coder/v2/coderd/x/gitsync"
@@ -2656,6 +2658,25 @@ func (api *API) chatStartWorkspace(
return codersdk.WorkspaceBuild{}, xerrors.Errorf("get workspace: %w", err)
}
updatedToActiveVersion := false
if req.Transition == codersdk.WorkspaceTransitionStart {
template, err := api.Database.GetTemplateByID(ctx, workspace.TemplateID)
if err != nil {
return codersdk.WorkspaceBuild{}, xerrors.Errorf("get template: %w", err)
}
templateAccessControl := (*(api.AccessControlStore.Load())).GetTemplateAccessControl(template)
if templateAccessControl.RequireActiveVersion {
latestBuild, err := api.Database.GetLatestWorkspaceBuildByWorkspaceID(ctx, workspace.ID)
if err != nil {
return codersdk.WorkspaceBuild{}, xerrors.Errorf("get latest workspace build: %w", err)
}
updatedToActiveVersion = latestBuild.TemplateVersionID != template.ActiveVersionID
req.TemplateVersionID = template.ActiveVersionID
}
}
// Build a synthetic API key so postWorkspaceBuildsInternal can
// record the correct initiator.
syntheticKey := database.APIKey{
@@ -2675,12 +2696,27 @@ func (api *API) chatStartWorkspace(
audit.WorkspaceBuildBaggage{},
)
if err != nil {
if updatedToActiveVersion && isChatStartWorkspaceManualUpdateRequiredError(err) {
return codersdk.WorkspaceBuild{}, httperror.NewResponseError(http.StatusBadRequest, codersdk.Response{
Message: "The workspace needs to be updated before it can start because the template requires the active version, and the newer version has parameter changes that must be chosen manually. Please update and start the workspace from the UI.",
Detail: err.Error(),
})
}
return codersdk.WorkspaceBuild{}, xerrors.Errorf("create workspace build: %w", err)
}
return apiBuild, nil
}
func isChatStartWorkspaceManualUpdateRequiredError(err error) bool {
var diagnosticErr *dynamicparameters.DiagnosticError
if errors.As(err, &diagnosticErr) {
return true
}
return errors.Is(err, wsbuilder.ErrParameterValidation)
}
func chatWorkspaceAuditStatus(err error) int {
if responder, ok := httperror.IsResponder(err); ok {
status, _ := responder.Response()
+7 -1
View File
@@ -6,6 +6,7 @@ import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"net/http"
"time"
@@ -262,6 +263,11 @@ func (b Builder) BuildMetrics(m *Metrics) Builder {
return b
}
// ErrParameterValidation is a sentinel indicating that a workspace
// build failed because a template-version parameter could not be
// validated (missing required value, immutable change, etc.).
var ErrParameterValidation = xerrors.New("parameter validation failed")
type BuildError struct {
// Status is a suitable HTTP status code
Status int
@@ -929,7 +935,7 @@ func (b *Builder) getClassicParameters() (names, values []string, err error) {
// At this point, we've queried all the data we need from the database,
// so the only errors are problems with the request (missing data, failed
// validation, immutable parameters, etc.)
return nil, nil, BuildError{http.StatusBadRequest, fmt.Sprintf("Unable to validate parameter %q", templateVersionParameter.Name), err}
return nil, nil, BuildError{http.StatusBadRequest, fmt.Sprintf("Unable to validate parameter %q", templateVersionParameter.Name), errors.Join(ErrParameterValidation, err)}
}
names = append(names, templateVersionParameter.Name)
+35 -12
View File
@@ -10,6 +10,7 @@ import (
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/httpapi/httperror"
"github.com/coder/coder/v2/coderd/x/chatd/internal/agentselect"
"github.com/coder/coder/v2/codersdk"
)
@@ -134,7 +135,7 @@ func StartWorkspace(options StartWorkspaceOptions) fantasy.AgentTool {
build.ID,
)), nil
}
resp, respErr := waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws, build.ID)
result := waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws, build.ID)
// Re-fire after the agent is fully ready so
// callers can load instruction files (AGENTS.md).
// This must happen after waitForAgentAndRespond —
@@ -144,12 +145,12 @@ func StartWorkspace(options StartWorkspaceOptions) fantasy.AgentTool {
options.OnChatUpdated(latest)
}
}
return resp, respErr
return toolResponse(result), nil
case database.ProvisionerJobStatusSucceeded:
// If the latest successful build is a start
// transition, the workspace should be running.
if build.Transition == database.WorkspaceTransitionStart {
return waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws, uuid.Nil)
return toolResponse(waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws, uuid.Nil)), nil
}
// Otherwise it is stopped (or deleted) — proceed
// to start it below.
@@ -168,6 +169,12 @@ func StartWorkspace(options StartWorkspaceOptions) fantasy.AgentTool {
Transition: codersdk.WorkspaceTransitionStart,
})
if err != nil {
if responseErr, ok := httperror.IsResponder(err); ok {
_, resp := responseErr.Response()
if resp.Message != "" {
return fantasy.NewTextErrorResponse(resp.Message), nil
}
}
return fantasy.NewTextErrorResponse(
xerrors.Errorf("start workspace: %w", err).Error(),
), nil
@@ -200,7 +207,19 @@ func StartWorkspace(options StartWorkspaceOptions) fantasy.AgentTool {
)), nil
}
resp, respErr := waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws, startBuild.ID)
result := waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws, startBuild.ID)
// If the template version changed, annotate the
// response so the model knows an auto-update
// occurred.
if startBuild.TemplateVersionID != uuid.Nil &&
build.TemplateVersionID != uuid.Nil &&
startBuild.TemplateVersionID != build.TemplateVersionID {
result["updated_to_active_version"] = true
result["update_reason"] = "template requires active versions"
result["message"] = "Workspace started and was updated to the active template version because the template requires active versions."
}
// Re-fire after the agent is fully ready so
// callers can load instruction files (AGENTS.md).
// This must happen after waitForAgentAndRespond —
@@ -210,24 +229,28 @@ func StartWorkspace(options StartWorkspaceOptions) fantasy.AgentTool {
options.OnChatUpdated(latest)
}
}
return resp, respErr
return toolResponse(result), nil
})
}
// waitForAgentAndRespond selects the chat agent from the workspace's
// latest build, waits for it to become reachable, and returns a
// success response. When buildID is non-zero, it is included in the
// response so the frontend can fetch historical build logs. Pass
// result map. When buildID is non-zero, it is included in the
// result so the frontend can fetch historical build logs. Pass
// uuid.Nil when no build was triggered (e.g. workspace already
// running); the response will include no_build: true so the
// running); the result will include no_build: true so the
// frontend can suppress the build-log section.
//
// The caller is responsible for converting the returned map to a
// fantasy.ToolResponse via toolResponse(), and may add extra
// fields before doing so.
func waitForAgentAndRespond(
ctx context.Context,
db database.Store,
agentConnFn AgentConnFunc,
ws database.Workspace,
buildID uuid.UUID,
) (fantasy.ToolResponse, error) {
) map[string]any {
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, ws.ID)
if err != nil || len(agents) == 0 {
// Workspace started but no agent found - still report
@@ -239,7 +262,7 @@ func waitForAgentAndRespond(
}
setBuildID(result, buildID)
setNoBuild(result, buildID)
return toolResponse(result), nil
return result
}
selected, err := agentselect.FindChatAgent(agents)
@@ -252,7 +275,7 @@ func waitForAgentAndRespond(
}
setBuildID(result, buildID)
setNoBuild(result, buildID)
return toolResponse(result), nil
return result
}
result := map[string]any{
@@ -264,5 +287,5 @@ func waitForAgentAndRespond(
for k, v := range waitForAgentReady(ctx, db, selected.ID, agentConnFn) {
result[k] = v
}
return toolResponse(result), nil
return result
}
@@ -18,6 +18,7 @@ import (
"github.com/coder/coder/v2/coderd/database/dbfake"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/httpapi/httperror"
"github.com/coder/coder/v2/coderd/x/chatd/chattool"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
@@ -407,6 +408,118 @@ func TestStartWorkspace(t *testing.T) {
require.Nil(t, result["no_build"], "no_build should not be set when a build was triggered")
})
t.Run("StoppedWorkspaceReportsAutoUpdate", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
db, _ := dbtestutil.NewDB(t)
user := dbgen.User(t, db, database.User{})
modelCfg := seedModelConfig(ctx, t, db, user.ID)
org := dbgen.Organization(t, db, database.Organization{})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
UserID: user.ID,
OrganizationID: org.ID,
})
wsResp := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: user.ID,
OrganizationID: org.ID,
}).Seed(database.WorkspaceBuild{
Transition: database.WorkspaceTransitionStop,
}).Do()
ws := wsResp.Workspace
chat, err := db.InsertChat(ctx, database.InsertChatParams{
OrganizationID: org.ID,
Status: database.ChatStatusWaiting,
OwnerID: user.ID,
WorkspaceID: uuid.NullUUID{UUID: ws.ID, Valid: true},
LastModelConfigID: modelCfg.ID,
Title: "test-stopped-workspace-auto-update",
})
require.NoError(t, err)
startFn := func(_ context.Context, _ uuid.UUID, wsID uuid.UUID, req codersdk.CreateWorkspaceBuildRequest) (codersdk.WorkspaceBuild, error) {
require.Equal(t, codersdk.WorkspaceTransitionStart, req.Transition)
require.Equal(t, ws.ID, wsID)
buildResp := dbfake.WorkspaceBuild(t, db, ws).Seed(database.WorkspaceBuild{
Transition: database.WorkspaceTransitionStart,
BuildNumber: 2,
}).Do()
return codersdk.WorkspaceBuild{
ID: buildResp.Build.ID,
TemplateVersionID: uuid.New(),
}, nil
}
tool := chattool.StartWorkspace(chattool.StartWorkspaceOptions{
DB: db,
OwnerID: user.ID,
ChatID: chat.ID,
StartFn: startFn,
AgentConnFn: func(_ context.Context, _ uuid.UUID) (workspacesdk.AgentConn, func(), error) {
return nil, func() {}, nil
},
WorkspaceMu: &sync.Mutex{},
})
resp, err := tool.Run(ctx, fantasy.ToolCall{ID: "call-1", Name: "start_workspace", Input: "{}"})
require.NoError(t, err)
var result map[string]any
require.NoError(t, json.Unmarshal([]byte(resp.Content), &result))
require.Equal(t, true, result["updated_to_active_version"])
require.Equal(t, "template requires active versions", result["update_reason"])
require.Contains(t, result["message"], "updated to the active template version")
})
t.Run("ManualUpdateRequired", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
db, _ := dbtestutil.NewDB(t)
user := dbgen.User(t, db, database.User{})
modelCfg := seedModelConfig(ctx, t, db, user.ID)
org := dbgen.Organization(t, db, database.Organization{})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
UserID: user.ID,
OrganizationID: org.ID,
})
wsResp := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: user.ID,
OrganizationID: org.ID,
}).Seed(database.WorkspaceBuild{
Transition: database.WorkspaceTransitionStop,
}).Do()
ws := wsResp.Workspace
chat, err := db.InsertChat(ctx, database.InsertChatParams{
OrganizationID: org.ID,
Status: database.ChatStatusWaiting,
OwnerID: user.ID,
WorkspaceID: uuid.NullUUID{UUID: ws.ID, Valid: true},
LastModelConfigID: modelCfg.ID,
Title: "test-start-workspace-manual-update-required",
})
require.NoError(t, err)
tool := chattool.StartWorkspace(chattool.StartWorkspaceOptions{
DB: db,
OwnerID: user.ID,
ChatID: chat.ID,
StartFn: func(_ context.Context, _ uuid.UUID, _ uuid.UUID, _ codersdk.CreateWorkspaceBuildRequest) (codersdk.WorkspaceBuild, error) {
return codersdk.WorkspaceBuild{}, httperror.NewResponseError(400, codersdk.Response{Message: "The workspace needs to be updated before it can start because the template requires the active version, and the newer version has parameter changes that must be chosen manually. Please update and start the workspace from the UI."})
},
WorkspaceMu: &sync.Mutex{},
})
resp, err := tool.Run(ctx, fantasy.ToolCall{ID: "call-1", Name: "start_workspace", Input: "{}"})
require.NoError(t, err)
require.True(t, resp.IsError)
require.Contains(t, resp.Content, "template requires the active version")
require.Contains(t, resp.Content, "must be chosen manually")
require.NotContains(t, resp.Content, "start workspace:")
})
t.Run("InProgressBuild", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)