From ef0151601e4a282190516b2c064b3ca3c73fbd0c Mon Sep 17 00:00:00 2001 From: Ethan <39577870+ethanndickson@users.noreply.github.com> Date: Thu, 7 May 2026 15:01:58 +1000 Subject: [PATCH] feat: report insufficient quota build failures in chat tools (#24956) ## Summary When a workspace build fails because the user is over their group quota, the chat tools currently surface the failure as a bare `"workspace build failed: insufficient quota"` string with no machine-readable error code and no visibility into the user's current usage. Agents and the UI cannot distinguish quota failures from any other Terraform error, so users see an opaque message and have no clear path to recovery. This PR tags quota failures with a typed error code at the source and propagates it through the chat tool layer so callers can react to it explicitly. Relates to CODAGT-20 ## Changes **Provisioner runner** - Add `InsufficientQuotaErrorCode = "INSUFFICIENT_QUOTA"` and set it explicitly at the `commitQuota` failure site via a new `failedWorkspaceBuildfCode` helper, so `provisioner_jobs.error_code` is populated only on the genuine quota path. The substring matcher used for externally produced sentinels (e.g. `"missing parameter"`, `"required template variables"`) is intentionally not extended; provider errors that happen to mention "insufficient quota" stay classified as generic build failures. **SDK and API contract** - Add `JobErrorCodeInsufficientQuota` and a `JobIsInsufficientQuotaErrorCode` helper to `codersdk`. - Extend the swagger `enums` tag on `ProvisionerJob.ErrorCode` to include `INSUFFICIENT_QUOTA`. - Regenerate `coderd/apidoc`, `docs/reference/api/*`, and `site/src/api/typesGenerated.ts`. **chattool create_workspace / start_workspace** - `waitForBuild` now returns a typed `*workspaceBuildError` carrying both the message and the `JobErrorCode`, instead of a bare error string. - New `quotaerror.go` introduces a structured `quotaErrorResult` (with `error_code`, `title`, `message`, `build_id`, and optional `quota`) and a best-effort `workspaceQuotaDetails` lookup that wraps owner authorization internally and fetches `credits_consumed` and `budget` from the database. Quota lookup failures (including authorization failures) never block the failure payload. - On quota-coded build failures, both `create_workspace` and `start_workspace` now return the structured response (with the recovery guidance inlined into `message`) instead of the bare `"insufficient quota"` string. This applies to all three failure paths: post-creation, an in-progress existing build, and a freshly triggered start build. Non-quota build failures continue to use the existing `buildToolResponse` / `newBuildError` path. - Owner authorization is wrapped only on the call sites that need it (the `CreateFn` and `StartFn` invocations and the quota-detail lookup), so idempotent fast paths (already running, already in progress, existing-workspace early returns) do not pay for an extra RBAC round-trip or fail when role lookup is transient. ## Out of scope - No changes to quota math, allowances, or bypass behavior. - No automatic retries. - No new quota-inspection tools and no changes to MCP `coder_create_workspace` (which returns immediately and never observed the build outcome here). - No frontend UI changes; those will land in a follow-up PR that consumes the new `INSUFFICIENT_QUOTA` code. --- coderd/apidoc/docs.go | 9 +- coderd/apidoc/swagger.json | 6 +- coderd/x/chatd/chattool/chattool.go | 18 +- coderd/x/chatd/chattool/createworkspace.go | 54 +++- .../x/chatd/chattool/createworkspace_test.go | 277 +++++++++++++++++- coderd/x/chatd/chattool/quotaerror.go | 192 ++++++++++++ coderd/x/chatd/chattool/startworkspace.go | 24 +- .../x/chatd/chattool/startworkspace_test.go | 62 ++-- codersdk/provisionerdaemons.go | 10 +- docs/reference/api/builds.md | 2 +- docs/reference/api/organizations.md | 2 +- docs/reference/api/schemas.md | 8 +- docs/reference/api/templates.md | 4 +- provisionerd/provisionerd_test.go | 4 + provisionerd/runner/runner.go | 22 +- provisionerd/runner/runner_test.go | 20 ++ site/src/api/typesGenerated.ts | 7 +- 17 files changed, 655 insertions(+), 66 deletions(-) create mode 100644 coderd/x/chatd/chattool/quotaerror.go create mode 100644 provisionerd/runner/runner_test.go diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index e6dd2eefe8..4f6323e4b9 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -18670,10 +18670,12 @@ const docTemplate = `{ "codersdk.JobErrorCode": { "type": "string", "enum": [ - "REQUIRED_TEMPLATE_VARIABLES" + "REQUIRED_TEMPLATE_VARIABLES", + "INSUFFICIENT_QUOTA" ], "x-enum-varnames": [ - "RequiredTemplateVariables" + "RequiredTemplateVariables", + "InsufficientQuota" ] }, "codersdk.License": { @@ -20839,7 +20841,8 @@ const docTemplate = `{ }, "error_code": { "enum": [ - "REQUIRED_TEMPLATE_VARIABLES" + "REQUIRED_TEMPLATE_VARIABLES", + "INSUFFICIENT_QUOTA" ], "allOf": [ { diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 2c9eac6b40..77c0817ec1 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -17013,8 +17013,8 @@ }, "codersdk.JobErrorCode": { "type": "string", - "enum": ["REQUIRED_TEMPLATE_VARIABLES"], - "x-enum-varnames": ["RequiredTemplateVariables"] + "enum": ["REQUIRED_TEMPLATE_VARIABLES", "INSUFFICIENT_QUOTA"], + "x-enum-varnames": ["RequiredTemplateVariables", "InsufficientQuota"] }, "codersdk.License": { "type": "object", @@ -19102,7 +19102,7 @@ "type": "string" }, "error_code": { - "enum": ["REQUIRED_TEMPLATE_VARIABLES"], + "enum": ["REQUIRED_TEMPLATE_VARIABLES", "INSUFFICIENT_QUOTA"], "allOf": [ { "$ref": "#/definitions/codersdk.JobErrorCode" diff --git a/coderd/x/chatd/chattool/chattool.go b/coderd/x/chatd/chattool/chattool.go index b4eef8a50c..69b65f3e10 100644 --- a/coderd/x/chatd/chattool/chattool.go +++ b/coderd/x/chatd/chattool/chattool.go @@ -10,10 +10,7 @@ import ( "github.com/coder/coder/v2/codersdk" ) -// toolResponse builds a fantasy.ToolResponse from a JSON-serializable -// result map. The map constraint ensures all tool results serialize -// to JSON objects so the frontend can safely parse them. -func toolResponse(result map[string]any) fantasy.ToolResponse { +func marshalToolResponse(result any) fantasy.ToolResponse { data, err := json.Marshal(result) if err != nil { return fantasy.NewTextResponse("{}") @@ -21,15 +18,18 @@ func toolResponse(result map[string]any) fantasy.ToolResponse { return fantasy.NewTextResponse(string(data)) } +// toolResponse builds a fantasy.ToolResponse from a JSON-serializable +// result map. The map constraint ensures all tool results serialize +// to JSON objects so the frontend can safely parse them. +func toolResponse(result map[string]any) fantasy.ToolResponse { + return marshalToolResponse(result) +} + // buildToolResponse marshals a buildErrorResult into a tool response. // Separate from toolResponse to keep the map[string]any constraint // on the general helper while allowing typed error structs. func buildToolResponse(r buildErrorResult) fantasy.ToolResponse { - data, err := json.Marshal(r) - if err != nil { - return fantasy.NewTextResponse("{}") - } - return fantasy.NewTextResponse(string(data)) + return marshalToolResponse(r) } // responseErrorResult converts a codersdk.Response into a structured diff --git a/coderd/x/chatd/chattool/createworkspace.go b/coderd/x/chatd/chattool/createworkspace.go index 4a95a6e639..5d96731247 100644 --- a/coderd/x/chatd/chattool/createworkspace.go +++ b/coderd/x/chatd/chattool/createworkspace.go @@ -125,18 +125,28 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option defer options.WorkspaceMu.Unlock() } + ownerID := options.OwnerID + // Check for an existing workspace on the chat. check := options.checkExistingWorkspace(ctx, db, chatID) + if check.BuildErr != nil { + return buildFailureToolResponse( + ctx, + options.Logger, + db, + ownerID, + organizationID, + check.BuildAction, + check.BuildID, + check.BuildErr, + ), nil + } if check.Err != nil { - if check.FailedBuildID != uuid.Nil { - return buildToolResponse(newBuildError(check.Err.Error(), check.FailedBuildID)), nil - } return fantasy.NewTextErrorResponse(check.Err.Error()), nil } if check.Done { return toolResponse(check.Result), nil } - ownerID := options.OwnerID // Set up dbauthz context for DB lookups. ownerCtx, ownerErr := asOwner(ctx, db, ownerID) @@ -260,10 +270,16 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option buildID := workspace.LatestBuild.ID if buildID != uuid.Nil { if err := waitForBuild(ctx, db, buildID); err != nil { - return buildToolResponse(newBuildError( - xerrors.Errorf("workspace build failed: %w", err).Error(), + return buildFailureToolResponse( + ctx, + options.Logger, + db, + ownerID, + organizationID, + buildFailureActionCreate, buildID, - )), nil + xerrors.Errorf("workspace build failed: %w", err), + ), nil } } @@ -323,9 +339,12 @@ type existingWorkspaceResult struct { Result map[string]any // Done indicates the caller should return early. Done bool - // FailedBuildID is set when waitForBuild failed, so the - // caller can include it in a structured error response. - FailedBuildID uuid.UUID + // BuildAction, BuildID, and BuildErr are set together when + // waitForBuild failed, so the caller can render the build + // failure through the shared response path. + BuildAction buildFailureAction + BuildID uuid.UUID + BuildErr error // Err is non-nil when the check itself failed. Err error } @@ -397,9 +416,14 @@ func (o CreateWorkspaceOptions) checkExistingWorkspace( o.OnChatUpdated(updatedChat) } if err := waitForBuild(ctx, db, build.ID); err != nil { + action := buildFailureActionCreate + if build.Transition == database.WorkspaceTransitionStart { + action = buildFailureActionStart + } return existingWorkspaceResult{ - FailedBuildID: build.ID, - Err: xerrors.Errorf("existing workspace build failed: %w", err), + BuildAction: action, + BuildID: build.ID, + BuildErr: xerrors.Errorf("existing workspace build failed: %w", err), } } result := map[string]any{ @@ -517,7 +541,11 @@ func waitForBuild( if job.Error.Valid { errMsg = job.Error.String } - return xerrors.New(errMsg) + var code codersdk.JobErrorCode + if job.ErrorCode.Valid { + code = codersdk.JobErrorCode(job.ErrorCode.String) + } + return &workspaceBuildError{message: errMsg, code: code} case database.ProvisionerJobStatusCanceled: return xerrors.New("build was canceled") case database.ProvisionerJobStatusPending, diff --git a/coderd/x/chatd/chattool/createworkspace_test.go b/coderd/x/chatd/chattool/createworkspace_test.go index f7f9e7e0d3..7557bf4c01 100644 --- a/coderd/x/chatd/chattool/createworkspace_test.go +++ b/coderd/x/chatd/chattool/createworkspace_test.go @@ -17,6 +17,7 @@ import ( "cdr.dev/slog/v3/sloggers/slogtest" "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbmock" "github.com/coder/coder/v2/coderd/httpapi/httperror" "github.com/coder/coder/v2/coderd/util/ptr" @@ -413,10 +414,261 @@ func TestCreateWorkspace_PostCreationBuildFailure(t *testing.T) { require.NoError(t, json.Unmarshal([]byte(resp.Content), &result)) require.Contains(t, result["error"], "workspace build failed") require.Equal(t, buildID.String(), result["build_id"]) + require.NotContains(t, result, "error_code", + "generic build failures must not surface a quota error_code") + require.NotContains(t, result, "quota", + "generic build failures must not surface quota details") require.False(t, resp.IsError, "buildToolResponse must not set IsError; chatprompt strips structured fields from error responses") } +func TestCreateWorkspace_PostCreationQuotaFailure(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + db := dbmock.NewMockStore(ctrl) + + ownerID := uuid.New() + orgID := uuid.New() + chatID := uuid.New() + templateID := uuid.New() + workspaceID := uuid.New() + jobID := uuid.New() + buildID := uuid.New() + + db.EXPECT(). + GetChatByID(gomock.Any(), chatID). + Return(database.Chat{ID: chatID}, nil) + + db.EXPECT(). + UpdateChatWorkspaceBinding(gomock.Any(), gomock.Any()). + Return(database.Chat{ID: chatID}, nil) + + db.EXPECT(). + GetAuthorizationUserRoles(gomock.Any(), ownerID). + Return(database.GetAuthorizationUserRolesRow{ + ID: ownerID, + Roles: []string{}, + Groups: []string{}, + Status: database.UserStatusActive, + }, nil) + + db.EXPECT(). + GetTemplateByID(gomock.Any(), templateID). + Return(database.Template{ + ID: templateID, + OrganizationID: orgID, + }, nil) + + db.EXPECT(). + GetChatWorkspaceTTL(gomock.Any()). + Return("0s", nil) + + db.EXPECT(). + GetWorkspaceBuildByID(gomock.Any(), buildID). + Return(database.WorkspaceBuild{ + ID: buildID, + WorkspaceID: workspaceID, + JobID: jobID, + }, nil) + + db.EXPECT(). + GetProvisionerJobByID(gomock.Any(), jobID). + Return(database.ProvisionerJob{ + ID: jobID, + JobStatus: database.ProvisionerJobStatusFailed, + Error: sql.NullString{String: "insufficient quota", Valid: true}, + ErrorCode: sql.NullString{ + String: string(codersdk.InsufficientQuota), + Valid: true, + }, + }, nil) + + db.EXPECT(). + GetQuotaConsumedForUser(gomock.Any(), database.GetQuotaConsumedForUserParams{ + OwnerID: ownerID, + OrganizationID: orgID, + }). + Return(int64(40), nil) + db.EXPECT(). + GetQuotaAllowanceForUser(gomock.Any(), database.GetQuotaAllowanceForUserParams{ + UserID: ownerID, + OrganizationID: orgID, + }). + Return(int64(40), nil) + + createFn := func(_ context.Context, _ uuid.UUID, req codersdk.CreateWorkspaceRequest) (codersdk.Workspace, error) { + return codersdk.Workspace{ + ID: workspaceID, + Name: req.Name, + OwnerName: "testuser", + LatestBuild: codersdk.WorkspaceBuild{ + ID: buildID, + }, + }, nil + } + + tool := CreateWorkspace(db, orgID, chatID, CreateWorkspaceOptions{ + OwnerID: ownerID, + CreateFn: createFn, + WorkspaceMu: &sync.Mutex{}, + Logger: slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}), + }) + + input := fmt.Sprintf(`{"template_id":%q,"name":"test-quota-fail"}`, templateID.String()) + resp, err := tool.Run(context.Background(), fantasy.ToolCall{ + ID: "call-1", + Name: "create_workspace", + Input: input, + }) + require.NoError(t, err) + + var result map[string]any + require.NoError(t, json.Unmarshal([]byte(resp.Content), &result)) + require.Equal(t, string(codersdk.InsufficientQuota), result["error_code"]) + require.Equal(t, "Workspace quota reached", result["title"]) + require.Contains(t, result["error"], "workspace build failed") + require.Contains(t, result["message"], "workspace quota is full") + require.Contains(t, result["message"], "Delete a workspace") + require.Contains(t, result["message"], "raise your group quota allowance") + require.NotContains(t, result, "next_steps") + require.Equal(t, buildID.String(), result["build_id"]) + quota, ok := result["quota"].(map[string]any) + require.True(t, ok) + require.Equal(t, float64(40), quota["credits_consumed"]) + require.Equal(t, float64(40), quota["budget"]) + require.False(t, resp.IsError, + "quota responses must not set IsError; chatprompt strips structured fields from error responses") +} + +func TestCreateWorkspace_ExistingBuildQuotaFailure(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + db := dbmock.NewMockStore(ctrl) + + ownerID := uuid.New() + orgID := uuid.New() + chatID := uuid.New() + templateID := uuid.New() + workspaceID := uuid.New() + jobID := uuid.New() + buildID := uuid.New() + + db.EXPECT(). + GetAuthorizationUserRoles(gomock.Any(), ownerID). + Return(database.GetAuthorizationUserRolesRow{ + ID: ownerID, + Roles: []string{}, + Groups: []string{}, + Status: database.UserStatusActive, + }, nil) + + db.EXPECT(). + GetChatByID(gomock.Any(), chatID). + Return(database.Chat{ + ID: chatID, + WorkspaceID: uuid.NullUUID{UUID: workspaceID, Valid: true}, + }, nil) + db.EXPECT(). + GetWorkspaceByID(gomock.Any(), workspaceID). + Return(database.Workspace{ + ID: workspaceID, + Name: "existing-quota-workspace", + OrganizationID: orgID, + }, nil) + db.EXPECT(). + GetLatestWorkspaceBuildByWorkspaceID(gomock.Any(), workspaceID). + Return(database.WorkspaceBuild{ + ID: buildID, + WorkspaceID: workspaceID, + JobID: jobID, + Transition: database.WorkspaceTransitionStart, + }, nil) + firstJob := db.EXPECT(). + GetProvisionerJobByID(gomock.Any(), jobID). + Return(database.ProvisionerJob{ + ID: jobID, + JobStatus: database.ProvisionerJobStatusRunning, + }, nil) + db.EXPECT(). + UpdateChatWorkspaceBinding(gomock.Any(), database.UpdateChatWorkspaceBindingParams{ + ID: chatID, + WorkspaceID: uuid.NullUUID{UUID: workspaceID, Valid: true}, + BuildID: uuid.NullUUID{UUID: buildID, Valid: true}, + AgentID: uuid.NullUUID{}, + }). + Return(database.Chat{ + ID: chatID, + WorkspaceID: uuid.NullUUID{UUID: workspaceID, Valid: true}, + }, nil) + db.EXPECT(). + GetWorkspaceBuildByID(gomock.Any(), buildID). + Return(database.WorkspaceBuild{ + ID: buildID, + WorkspaceID: workspaceID, + JobID: jobID, + Transition: database.WorkspaceTransitionStart, + }, nil) + db.EXPECT(). + GetProvisionerJobByID(gomock.Any(), jobID). + Return(database.ProvisionerJob{ + ID: jobID, + JobStatus: database.ProvisionerJobStatusFailed, + Error: sql.NullString{String: "insufficient quota", Valid: true}, + ErrorCode: sql.NullString{ + String: string(codersdk.InsufficientQuota), + Valid: true, + }, + }, nil). + After(firstJob) + ownerCtx := ownerContextMatcher{ownerID: ownerID} + db.EXPECT(). + GetQuotaConsumedForUser(ownerCtx, database.GetQuotaConsumedForUserParams{ + OwnerID: ownerID, + OrganizationID: orgID, + }). + Return(int64(40), nil) + db.EXPECT(). + GetQuotaAllowanceForUser(ownerCtx, database.GetQuotaAllowanceForUserParams{ + UserID: ownerID, + OrganizationID: orgID, + }). + Return(int64(40), nil) + + tool := CreateWorkspace(db, orgID, chatID, CreateWorkspaceOptions{ + OwnerID: ownerID, + CreateFn: func(context.Context, uuid.UUID, codersdk.CreateWorkspaceRequest) (codersdk.Workspace, error) { + t.Fatal("CreateFn should not be called when an existing build is in progress") + return codersdk.Workspace{}, nil + }, + WorkspaceMu: &sync.Mutex{}, + Logger: slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}), + }) + + input := fmt.Sprintf(`{"template_id":%q,"name":"test-existing-quota-fail"}`, templateID.String()) + resp, err := tool.Run(context.Background(), fantasy.ToolCall{ + ID: "call-1", + Name: "create_workspace", + Input: input, + }) + require.NoError(t, err) + + var result map[string]any + require.NoError(t, json.Unmarshal([]byte(resp.Content), &result)) + require.Equal(t, string(codersdk.InsufficientQuota), result["error_code"]) + require.Equal(t, "Workspace quota reached", result["title"]) + require.Contains(t, result["error"], "existing workspace build failed") + require.Contains(t, result["message"], "could not start this workspace") + require.Contains(t, result["message"], "workspace quota is full") + require.Equal(t, buildID.String(), result["build_id"]) + quota, ok := result["quota"].(map[string]any) + require.True(t, ok) + require.Equal(t, float64(40), quota["credits_consumed"]) + require.Equal(t, float64(40), quota["budget"]) + require.False(t, resp.IsError) +} + func TestCreateWorkspace_ResponderErrorPreservesStructuredFields(t *testing.T) { t.Parallel() @@ -907,9 +1159,11 @@ func TestCheckExistingWorkspace_InProgressBuildFailureReturnsBuildID(t *testing. options := testCheckExistingWorkspaceOptions(nil) check := options.checkExistingWorkspace(context.Background(), db, chatID) - require.Error(t, check.Err) - require.Contains(t, check.Err.Error(), "existing workspace build failed") - require.Equal(t, buildID, check.FailedBuildID) + require.Error(t, check.BuildErr) + require.Contains(t, check.BuildErr.Error(), "existing workspace build failed") + require.Equal(t, buildID, check.BuildID) + require.Equal(t, buildFailureActionStart, check.BuildAction) + require.NoError(t, check.Err) } func TestCheckExistingWorkspace_ConnectingAgentWaits(t *testing.T) { @@ -1186,6 +1440,23 @@ func testCheckExistingWorkspaceOptions( } } +type ownerContextMatcher struct { + ownerID uuid.UUID +} + +func (m ownerContextMatcher) Matches(v any) bool { + ctx, ok := v.(context.Context) + if !ok { + return false + } + actor, ok := dbauthz.ActorFromContext(ctx) + return ok && actor.ID == m.ownerID.String() +} + +func (ownerContextMatcher) String() string { + return "context with owner actor" +} + func expectExistingWorkspaceLookup( db *dbmock.MockStore, chatID uuid.UUID, diff --git a/coderd/x/chatd/chattool/quotaerror.go b/coderd/x/chatd/chattool/quotaerror.go new file mode 100644 index 0000000000..435c5a7592 --- /dev/null +++ b/coderd/x/chatd/chattool/quotaerror.go @@ -0,0 +1,192 @@ +package chattool + +import ( + "context" + "errors" + "fmt" + + "charm.land/fantasy" + "github.com/google/uuid" + + "cdr.dev/slog/v3" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/codersdk" +) + +const workspaceQuotaErrorTitle = "Workspace quota reached" + +type buildFailureAction string + +const ( + buildFailureActionCreate buildFailureAction = "create" + buildFailureActionStart buildFailureAction = "start" +) + +type workspaceBuildError struct { + message string + code codersdk.JobErrorCode +} + +func (e *workspaceBuildError) Error() string { + return e.message +} + +func buildErrorCode(err error) codersdk.JobErrorCode { + var buildErr *workspaceBuildError + if errors.As(err, &buildErr) { + return buildErr.code + } + return "" +} + +// quotaErrorResult is the structured response returned when a workspace +// build fails because the user's workspace quota is exhausted. +type quotaErrorResult struct { + ErrorCode codersdk.JobErrorCode `json:"error_code"` + // Error is the raw build failure string used for debugging and + // frontend error detection. + Error string `json:"error"` + // Title is a short user-facing summary. + Title string `json:"title"` + // Message explains the failure and inlines the recovery guidance + // the model should relay to the user. + Message string `json:"message"` + BuildID string `json:"build_id,omitempty"` + Quota *quotaErrorDetails `json:"quota,omitempty"` +} + +type quotaErrorDetails struct { + CreditsConsumed int64 `json:"credits_consumed"` + Budget int64 `json:"budget"` +} + +func newQuotaError( + msg string, + buildID uuid.UUID, + action buildFailureAction, + quota *quotaErrorDetails, +) quotaErrorResult { + verb := "create" + if action == buildFailureActionStart { + verb = "start" + } + message := fmt.Sprintf( + "Coder could not %s this workspace because your workspace quota is "+ + "full. Delete a workspace you no longer need to free quota, or "+ + "ask an administrator to raise your group quota allowance.", + verb, + ) + + r := quotaErrorResult{ + ErrorCode: codersdk.InsufficientQuota, + Error: msg, + Title: workspaceQuotaErrorTitle, + Message: message, + Quota: quota, + } + if buildID != uuid.Nil { + r.BuildID = buildID.String() + } + return r +} + +func workspaceQuotaDetails( + ctx context.Context, + logger slog.Logger, + db database.Store, + ownerID uuid.UUID, + organizationID uuid.UUID, +) *quotaErrorDetails { + if db == nil || ownerID == uuid.Nil || organizationID == uuid.Nil { + return nil + } + + quotaCtx := ctx + if actor, ok := dbauthz.ActorFromContext(ctx); !ok || actor.ID != ownerID.String() { + ownerCtx, err := asOwner(ctx, db, ownerID) + if err != nil { + logger.Debug(ctx, "failed to load owner authorization for quota lookup", + slog.F("owner_id", ownerID), + slog.F("organization_id", organizationID), + slog.Error(err), + ) + return nil + } + quotaCtx = ownerCtx + } + + consumed, err := db.GetQuotaConsumedForUser(quotaCtx, database.GetQuotaConsumedForUserParams{ + OwnerID: ownerID, + OrganizationID: organizationID, + }) + if err != nil { + logger.Debug(ctx, "failed to load consumed workspace quota", + slog.F("owner_id", ownerID), + slog.F("organization_id", organizationID), + slog.Error(err), + ) + return nil + } + budget, err := db.GetQuotaAllowanceForUser(quotaCtx, database.GetQuotaAllowanceForUserParams{ + UserID: ownerID, + OrganizationID: organizationID, + }) + if err != nil { + logger.Debug(ctx, "failed to load workspace quota allowance", + slog.F("owner_id", ownerID), + slog.F("organization_id", organizationID), + slog.Error(err), + ) + return nil + } + return "aErrorDetails{ + CreditsConsumed: consumed, + Budget: budget, + } +} + +func quotaErrorToolResponse( + ctx context.Context, + logger slog.Logger, + db database.Store, + ownerID uuid.UUID, + organizationID uuid.UUID, + msg string, + buildID uuid.UUID, + action buildFailureAction, +) fantasy.ToolResponse { + quota := workspaceQuotaDetails(ctx, logger, db, ownerID, organizationID) + return marshalToolResponse(newQuotaError(msg, buildID, action, quota)) +} + +// buildFailureToolResponse keeps build failures as JSON carried in a normal +// text tool response. The chatprompt pipeline flattens IsError responses into +// a single string and drops structured fields, so quota and generic build +// failures both keep IsError false and let the frontend detect failures via +// the "error" key. +func buildFailureToolResponse( + ctx context.Context, + logger slog.Logger, + db database.Store, + ownerID uuid.UUID, + organizationID uuid.UUID, + action buildFailureAction, + buildID uuid.UUID, + err error, +) fantasy.ToolResponse { + msg := err.Error() + if codersdk.JobIsInsufficientQuotaErrorCode(buildErrorCode(err)) { + return quotaErrorToolResponse( + ctx, + logger, + db, + ownerID, + organizationID, + msg, + buildID, + action, + ) + } + return buildToolResponse(newBuildError(msg, buildID)) +} diff --git a/coderd/x/chatd/chattool/startworkspace.go b/coderd/x/chatd/chattool/startworkspace.go index 332beaa128..af388c2f2b 100644 --- a/coderd/x/chatd/chattool/startworkspace.go +++ b/coderd/x/chatd/chattool/startworkspace.go @@ -131,10 +131,16 @@ func StartWorkspace(db database.Store, chatID uuid.UUID, options StartWorkspaceO // viewer. The fantasy/chatprompt pipeline discards structured // fields from IsError content. // The frontend detects errors via the "error" key instead. - return buildToolResponse(newBuildError( - xerrors.Errorf("waiting for in-progress build: %w", err).Error(), + return buildFailureToolResponse( + ctx, + options.Logger, + db, + options.OwnerID, + ws.OrganizationID, + buildFailureActionStart, build.ID, - )), nil + xerrors.Errorf("waiting for in-progress build: %w", err), + ), nil } result := waitForAgentAndRespond(ctx, db, options.AgentConnFn, ws, build.ID) // Re-fire after the agent is fully ready so @@ -212,10 +218,16 @@ func StartWorkspace(db database.Store, chatID uuid.UUID, options StartWorkspaceO options.OnChatUpdated(updatedChat) } if err := waitForBuild(ctx, db, startBuild.ID); err != nil { - return buildToolResponse(newBuildError( - xerrors.Errorf("workspace start build failed: %w", err).Error(), + return buildFailureToolResponse( + ctx, + options.Logger, + db, + options.OwnerID, + ws.OrganizationID, + buildFailureActionStart, startBuild.ID, - )), nil + xerrors.Errorf("workspace start build failed: %w", err), + ), nil } result := waitForAgentAndRespond(ctx, db, options.AgentConnFn, ws, startBuild.ID) diff --git a/coderd/x/chatd/chattool/startworkspace_test.go b/coderd/x/chatd/chattool/startworkspace_test.go index bf1e1f966f..8955760e22 100644 --- a/coderd/x/chatd/chattool/startworkspace_test.go +++ b/coderd/x/chatd/chattool/startworkspace_test.go @@ -11,14 +11,17 @@ import ( "charm.land/fantasy" "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "cdr.dev/slog/v3/sloggers/slogtest" "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbfake" "github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/httpapi/httperror" + "github.com/coder/coder/v2/coderd/rbac" "github.com/coder/coder/v2/coderd/x/chatd/chattool" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk/workspacesdk" @@ -706,7 +709,7 @@ func TestStartWorkspace(t *testing.T) { require.True(t, onChatUpdatedCalled.Load(), "OnChatUpdated should be called to notify frontend of build ID") }) - t.Run("FailedBuild", func(t *testing.T) { + t.Run("FailedBuildQuota", func(t *testing.T) { t.Parallel() ctx := testutil.Context(t, testutil.WaitLong) @@ -714,17 +717,18 @@ func TestStartWorkspace(t *testing.T) { user := dbgen.User(t, db, database.User{}) modelCfg := seedModelConfig(t, db) - org := dbgen.Organization(t, db, database.Organization{}) - _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ - UserID: user.ID, - OrganizationID: org.ID, - }) + orgResp := dbfake.Organization(t, db). + EveryoneAllowance(40). + Members(user). + Do() + org := orgResp.Org // Create a workspace with a build that is still running. wsResp := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{ OwnerID: user.ID, OrganizationID: org.ID, }).Seed(database.WorkspaceBuild{ Transition: database.WorkspaceTransitionStart, + DailyCost: 40, }).Starting().Do() ws := wsResp.Workspace @@ -736,8 +740,14 @@ func TestStartWorkspace(t *testing.T) { Title: "test-failed-build", }) + authzDB := dbauthz.New( + db, + rbac.NewStrictCachingAuthorizer(prometheus.NewRegistry()), + slogtest.Make(t, nil), + testAccessControlStorePointer(), + ) jobRead := make(chan struct{}, 1) - wrappedDB := &jobInterceptStore{Store: db, jobRead: jobRead} + wrappedDB := &jobInterceptStore{Store: authzDB, jobRead: jobRead} tool := chattool.StartWorkspace(wrappedDB, chat.ID, chattool.StartWorkspaceOptions{ OwnerID: user.ID, @@ -758,7 +768,10 @@ func TestStartWorkspace(t *testing.T) { } done := make(chan toolResult, 1) go func() { - resp, err := tool.Run(ctx, fantasy.ToolCall{ID: "call-1", Name: "start_workspace", Input: "{}"}) + resp, err := tool.Run( + dbauthz.AsChatd(ctx), + fantasy.ToolCall{ID: "call-1", Name: "start_workspace", Input: "{}"}, + ) done <- toolResult{resp, err} }() @@ -771,7 +784,11 @@ func TestStartWorkspace(t *testing.T) { ID: wsResp.Build.JobID, UpdatedAt: now, CompletedAt: sql.NullTime{Time: now, Valid: true}, - Error: sql.NullString{String: "terraform apply failed", Valid: true}, + Error: sql.NullString{String: "insufficient quota", Valid: true}, + ErrorCode: sql.NullString{ + String: string(codersdk.InsufficientQuota), + Valid: true, + }, })) res := testutil.TryReceive(ctx, t, done) @@ -780,9 +797,16 @@ func TestStartWorkspace(t *testing.T) { var result map[string]any require.NoError(t, json.Unmarshal([]byte(res.resp.Content), &result)) require.Contains(t, result["error"], "waiting for in-progress build") + require.Equal(t, string(codersdk.InsufficientQuota), result["error_code"]) + require.Equal(t, "Workspace quota reached", result["title"]) + require.Contains(t, result["message"], "workspace quota is full") require.Equal(t, wsResp.Build.ID.String(), result["build_id"]) + quota, ok := result["quota"].(map[string]any) + require.True(t, ok) + require.Equal(t, float64(40), quota["credits_consumed"]) + require.Equal(t, float64(40), quota["budget"]) require.False(t, res.resp.IsError, - "buildToolResponse must not set IsError; chatprompt strips structured fields from error responses") + "quota responses must not set IsError; chatprompt strips structured fields from error responses") }) t.Run("StartTriggeredBuildFailure", func(t *testing.T) { @@ -797,7 +821,7 @@ func TestStartWorkspace(t *testing.T) { UserID: user.ID, OrganizationID: org.ID, }) - // Create a stopped workspace (succeeded stop transition). + // Create a stopped workspace with a succeeded stop transition. wsResp := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{ OwnerID: user.ID, OrganizationID: org.ID, @@ -811,10 +835,9 @@ func TestStartWorkspace(t *testing.T) { OwnerID: user.ID, WorkspaceID: uuid.NullUUID{UUID: ws.ID, Valid: true}, LastModelConfigID: modelCfg.ID, - Title: "test-start-triggered-build-failure", + Title: "test-start-triggered-generic-build-failure", }) - // StartFn creates a real in-progress build via dbfake. var startBuildJobID uuid.UUID var startBuildID uuid.UUID startFn := func(_ context.Context, _ uuid.UUID, wsID uuid.UUID, req codersdk.CreateWorkspaceBuildRequest) (codersdk.WorkspaceBuild, error) { @@ -852,13 +875,9 @@ func TestStartWorkspace(t *testing.T) { done <- toolResult{resp, err} }() - // First signal: initial GetProvisionerJobByID for the - // old stop build. Second signal: waitForBuild's first - // poll for the new start build. testutil.TryReceive(ctx, t, jobRead) testutil.TryReceive(ctx, t, jobRead) - // Fail the provisioner job. now := time.Now().UTC() require.NoError(t, db.UpdateProvisionerJobWithCompleteByID(ctx, database.UpdateProvisionerJobWithCompleteByIDParams{ ID: startBuildJobID, @@ -874,6 +893,8 @@ func TestStartWorkspace(t *testing.T) { require.NoError(t, json.Unmarshal([]byte(res.resp.Content), &result)) require.Contains(t, result["error"], "workspace start build failed") require.Equal(t, startBuildID.String(), result["build_id"]) + require.NotContains(t, result, "error_code") + require.NotContains(t, result, "quota") require.False(t, res.resp.IsError, "buildToolResponse must not set IsError; chatprompt strips structured fields from error responses") }) @@ -952,3 +973,10 @@ func (s *jobInterceptStore) GetProvisionerJobByID(ctx context.Context, id uuid.U } return result, err } + +func testAccessControlStorePointer() *atomic.Pointer[dbauthz.AccessControlStore] { + acs := &atomic.Pointer[dbauthz.AccessControlStore]{} + var store dbauthz.AccessControlStore = dbauthz.AGPLTemplateAccessControlStore{} + acs.Store(&store) + return acs +} diff --git a/codersdk/provisionerdaemons.go b/codersdk/provisionerdaemons.go index cbc603d984..46238d7d48 100644 --- a/codersdk/provisionerdaemons.go +++ b/codersdk/provisionerdaemons.go @@ -167,6 +167,7 @@ type JobErrorCode string const ( RequiredTemplateVariables JobErrorCode = "REQUIRED_TEMPLATE_VARIABLES" + InsufficientQuota JobErrorCode = "INSUFFICIENT_QUOTA" ) // JobIsMissingParameterErrorCode returns whether the error is a missing parameter error. @@ -181,6 +182,13 @@ func JobIsMissingRequiredTemplateVariableErrorCode(code JobErrorCode) bool { return string(code) == runner.RequiredTemplateVariablesErrorCode } +// JobIsInsufficientQuotaErrorCode returns whether the error is an insufficient +// quota error. This can indicate to consumers that they should explain quota +// recovery options instead of treating the failure as a generic build error. +func JobIsInsufficientQuotaErrorCode(code JobErrorCode) bool { + return string(code) == runner.InsufficientQuotaErrorCode +} + // ProvisionerJob describes the job executed by the provisioning daemon. type ProvisionerJob struct { ID uuid.UUID `json:"id" format:"uuid" table:"id"` @@ -189,7 +197,7 @@ type ProvisionerJob struct { CompletedAt *time.Time `json:"completed_at,omitempty" format:"date-time" table:"completed at"` CanceledAt *time.Time `json:"canceled_at,omitempty" format:"date-time" table:"canceled at"` Error string `json:"error,omitempty" table:"error"` - ErrorCode JobErrorCode `json:"error_code,omitempty" enums:"REQUIRED_TEMPLATE_VARIABLES" table:"error code"` + ErrorCode JobErrorCode `json:"error_code,omitempty" enums:"REQUIRED_TEMPLATE_VARIABLES,INSUFFICIENT_QUOTA" table:"error code"` Status ProvisionerJobStatus `json:"status" enums:"pending,running,succeeded,canceling,canceled,failed" table:"status"` WorkerID *uuid.UUID `json:"worker_id,omitempty" format:"uuid" table:"worker id"` WorkerName string `json:"worker_name,omitempty" table:"worker name"` diff --git a/docs/reference/api/builds.md b/docs/reference/api/builds.md index 30e6a26d0a..00db92184d 100644 --- a/docs/reference/api/builds.md +++ b/docs/reference/api/builds.md @@ -1731,7 +1731,7 @@ Status Code **200** | Property | Value(s) | |------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` | +| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` | | `workspace_build_transition` | `delete`, `start`, `stop` | | `status` | `canceled`, `canceling`, `connected`, `connecting`, `deleted`, `deleting`, `disconnected`, `exit_failure`, `failed`, `ok`, `pending`, `pipes_left_open`, `running`, `starting`, `stopped`, `stopping`, `succeeded`, `timed_out`, `timeout` | | `type` | `template_version_dry_run`, `template_version_import`, `workspace_build` | diff --git a/docs/reference/api/organizations.md b/docs/reference/api/organizations.md index 63a7efcc4e..dbbe6b4fe5 100644 --- a/docs/reference/api/organizations.md +++ b/docs/reference/api/organizations.md @@ -388,7 +388,7 @@ Status Code **200** | Property | Value(s) | |------------------------------|--------------------------------------------------------------------------| -| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` | +| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` | | `workspace_build_transition` | `delete`, `start`, `stop` | | `status` | `canceled`, `canceling`, `failed`, `pending`, `running`, `succeeded` | | `type` | `template_version_dry_run`, `template_version_import`, `workspace_build` | diff --git a/docs/reference/api/schemas.md b/docs/reference/api/schemas.md index 534003f5cd..976fba8186 100644 --- a/docs/reference/api/schemas.md +++ b/docs/reference/api/schemas.md @@ -7486,9 +7486,9 @@ Only certain features set these fields: - FeatureManagedAgentLimit| #### Enumerated Values -| Value(s) | -|-------------------------------| -| `REQUIRED_TEMPLATE_VARIABLES` | +| Value(s) | +|-----------------------------------------------------| +| `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` | ## codersdk.License @@ -10007,7 +10007,7 @@ Only certain features set these fields: - FeatureManagedAgentLimit| | Property | Value(s) | |--------------|----------------------------------------------------------------------| -| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` | +| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` | | `status` | `canceled`, `canceling`, `failed`, `pending`, `running`, `succeeded` | ## codersdk.ProvisionerJobInput diff --git a/docs/reference/api/templates.md b/docs/reference/api/templates.md index ae9482eb58..37a19b3acf 100644 --- a/docs/reference/api/templates.md +++ b/docs/reference/api/templates.md @@ -1446,7 +1446,7 @@ Status Code **200** | Property | Value(s) | |------------------------------|--------------------------------------------------------------------------| -| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` | +| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` | | `workspace_build_transition` | `delete`, `start`, `stop` | | `status` | `canceled`, `canceling`, `failed`, `pending`, `running`, `succeeded` | | `type` | `template_version_dry_run`, `template_version_import`, `workspace_build` | @@ -1729,7 +1729,7 @@ Status Code **200** | Property | Value(s) | |------------------------------|--------------------------------------------------------------------------| -| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` | +| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` | | `workspace_build_transition` | `delete`, `start`, `stop` | | `status` | `canceled`, `canceling`, `failed`, `pending`, `running`, `succeeded` | | `type` | `template_version_dry_run`, `template_version_import`, `workspace_build` | diff --git a/provisionerd/provisionerd_test.go b/provisionerd/provisionerd_test.go index 4ac7553e80..c35e23608f 100644 --- a/provisionerd/provisionerd_test.go +++ b/provisionerd/provisionerd_test.go @@ -25,6 +25,7 @@ import ( "github.com/coder/coder/v2/codersdk/drpcsdk" "github.com/coder/coder/v2/provisionerd" "github.com/coder/coder/v2/provisionerd/proto" + "github.com/coder/coder/v2/provisionerd/runner" "github.com/coder/coder/v2/provisionersdk" sdkproto "github.com/coder/coder/v2/provisionersdk/proto" "github.com/coder/coder/v2/provisionersdk/tfpath" @@ -527,6 +528,7 @@ func TestProvisionerd(t *testing.T) { didComplete atomic.Bool didLog atomic.Bool didFail atomic.Bool + failedCode = atomic.NewString("") acq = newAcquireOne(t, &proto.AcquiredJob{ JobId: "test", Provisioner: "someprovisioner", @@ -561,6 +563,7 @@ func TestProvisionerd(t *testing.T) { }, failJob: func(ctx context.Context, job *proto.FailedJob) (*proto.Empty, error) { didFail.Store(true) + failedCode.Store(job.ErrorCode) return &proto.Empty{}, nil }, }), nil @@ -605,6 +608,7 @@ func TestProvisionerd(t *testing.T) { require.NoError(t, closer.Close()) assert.True(t, didLog.Load(), "should log some updates") assert.False(t, didComplete.Load(), "should not complete the job") + assert.Equal(t, runner.InsufficientQuotaErrorCode, failedCode.Load()) assert.True(t, didFail.Load(), "should fail the job") }) diff --git a/provisionerd/runner/runner.go b/provisionerd/runner/runner.go index 42ce41eb85..0ba7fcba2d 100644 --- a/provisionerd/runner/runner.go +++ b/provisionerd/runner/runner.go @@ -33,6 +33,9 @@ const ( RequiredTemplateVariablesErrorCode = "REQUIRED_TEMPLATE_VARIABLES" requiredTemplateVariablesErrorText = "required template variables" + + InsufficientQuotaErrorCode = "INSUFFICIENT_QUOTA" + insufficientQuotaErrorText = "insufficient quota" ) var errorCodes = map[string]string{ @@ -870,7 +873,10 @@ func (r *Runner) commitQuota(ctx context.Context, cost int32) *proto.FailedJob { Output: "This build would exceed your quota. Failing.", Stage: stage, }) - return r.failedWorkspaceBuildf("insufficient quota") + return r.failedWorkspaceBuildfCode( + InsufficientQuotaErrorCode, + insufficientQuotaErrorText, + ) } return nil } @@ -1110,6 +1116,20 @@ func (r *Runner) failedWorkspaceBuildf(format string, args ...interface{}) *prot return failedJob } +func (r *Runner) failedWorkspaceBuildfCode( + code string, + format string, + args ...interface{}, +) *proto.FailedJob { + failedJob := &proto.FailedJob{ + JobId: r.job.JobId, + Error: fmt.Sprintf(format, args...), + ErrorCode: code, + } + failedJob.Type = &proto.FailedJob_WorkspaceBuild_{} + return failedJob +} + func (r *Runner) failedJobf(format string, args ...interface{}) *proto.FailedJob { message := fmt.Sprintf(format, args...) var code string diff --git a/provisionerd/runner/runner_test.go b/provisionerd/runner/runner_test.go new file mode 100644 index 0000000000..18804d23f2 --- /dev/null +++ b/provisionerd/runner/runner_test.go @@ -0,0 +1,20 @@ +package runner //nolint:testpackage // Tests unexported failure classification helpers. + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/coder/coder/v2/provisionerd/proto" +) + +func TestFailedWorkspaceBuildfDoesNotInferQuotaErrorCode(t *testing.T) { + t.Parallel() + + r := &Runner{job: &proto.AcquiredJob{JobId: "job"}} + failed := r.failedWorkspaceBuildf( + "provider failed: insufficient quota in us-east1", + ) + + require.Empty(t, failed.ErrorCode) +} diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 12c18112b7..33ccbd03c9 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -4563,9 +4563,12 @@ export interface IssueReconnectingPTYSignedTokenResponse { } // From codersdk/provisionerdaemons.go -export type JobErrorCode = "REQUIRED_TEMPLATE_VARIABLES"; +export type JobErrorCode = "INSUFFICIENT_QUOTA" | "REQUIRED_TEMPLATE_VARIABLES"; -export const JobErrorCodes: JobErrorCode[] = ["REQUIRED_TEMPLATE_VARIABLES"]; +export const JobErrorCodes: JobErrorCode[] = [ + "INSUFFICIENT_QUOTA", + "REQUIRED_TEMPLATE_VARIABLES", +]; // From codersdk/licenses.go export interface License {