feat: report insufficient quota build failures in chat tools (#24956)

## Summary

When a workspace build fails because the user is over their group quota,
the chat tools currently surface the failure as a bare `"workspace build
failed: insufficient quota"` string with no machine-readable error code
and no visibility into the user's current usage. Agents and the UI
cannot distinguish quota failures from any other Terraform error, so
users see an opaque message and have no clear path to recovery.

This PR tags quota failures with a typed error code at the source and
propagates it through the chat tool layer so callers can react to it
explicitly.

Relates to CODAGT-20

## Changes

**Provisioner runner**

- Add `InsufficientQuotaErrorCode = "INSUFFICIENT_QUOTA"` and set it
explicitly at the `commitQuota` failure site via a new
`failedWorkspaceBuildfCode` helper, so `provisioner_jobs.error_code` is
populated only on the genuine quota path. The substring matcher used for
externally produced sentinels (e.g. `"missing parameter"`, `"required
template variables"`) is intentionally not extended; provider errors
that happen to mention "insufficient quota" stay classified as generic
build failures.

**SDK and API contract**

- Add `JobErrorCodeInsufficientQuota` and a
`JobIsInsufficientQuotaErrorCode` helper to `codersdk`.
- Extend the swagger `enums` tag on `ProvisionerJob.ErrorCode` to
include `INSUFFICIENT_QUOTA`.
- Regenerate `coderd/apidoc`, `docs/reference/api/*`, and
`site/src/api/typesGenerated.ts`.

**chattool create_workspace / start_workspace**

- `waitForBuild` now returns a typed `*workspaceBuildError` carrying
both the message and the `JobErrorCode`, instead of a bare error string.
- New `quotaerror.go` introduces a structured `quotaErrorResult` (with
`error_code`, `title`, `message`, `build_id`, and optional `quota`) and
a best-effort `workspaceQuotaDetails` lookup that wraps owner
authorization internally and fetches `credits_consumed` and `budget`
from the database. Quota lookup failures (including authorization
failures) never block the failure payload.
- On quota-coded build failures, both `create_workspace` and
`start_workspace` now return the structured response (with the recovery
guidance inlined into `message`) instead of the bare `"insufficient
quota"` string. This applies to all three failure paths: post-creation,
an in-progress existing build, and a freshly triggered start build.
Non-quota build failures continue to use the existing
`buildToolResponse` / `newBuildError` path.
- Owner authorization is wrapped only on the call sites that need it
(the `CreateFn` and `StartFn` invocations and the quota-detail lookup),
so idempotent fast paths (already running, already in progress,
existing-workspace early returns) do not pay for an extra RBAC
round-trip or fail when role lookup is transient.

## Out of scope

- No changes to quota math, allowances, or bypass behavior.
- No automatic retries.
- No new quota-inspection tools and no changes to MCP
`coder_create_workspace` (which returns immediately and never observed
the build outcome here).
- No frontend UI changes; those will land in a follow-up PR that
consumes the new `INSUFFICIENT_QUOTA` code.
This commit is contained in:
Ethan
2026-05-07 15:01:58 +10:00
committed by GitHub
parent 3c3708f562
commit ef0151601e
17 changed files with 655 additions and 66 deletions
+6 -3
View File
@@ -18670,10 +18670,12 @@ const docTemplate = `{
"codersdk.JobErrorCode": {
"type": "string",
"enum": [
"REQUIRED_TEMPLATE_VARIABLES"
"REQUIRED_TEMPLATE_VARIABLES",
"INSUFFICIENT_QUOTA"
],
"x-enum-varnames": [
"RequiredTemplateVariables"
"RequiredTemplateVariables",
"InsufficientQuota"
]
},
"codersdk.License": {
@@ -20839,7 +20841,8 @@ const docTemplate = `{
},
"error_code": {
"enum": [
"REQUIRED_TEMPLATE_VARIABLES"
"REQUIRED_TEMPLATE_VARIABLES",
"INSUFFICIENT_QUOTA"
],
"allOf": [
{
+3 -3
View File
@@ -17013,8 +17013,8 @@
},
"codersdk.JobErrorCode": {
"type": "string",
"enum": ["REQUIRED_TEMPLATE_VARIABLES"],
"x-enum-varnames": ["RequiredTemplateVariables"]
"enum": ["REQUIRED_TEMPLATE_VARIABLES", "INSUFFICIENT_QUOTA"],
"x-enum-varnames": ["RequiredTemplateVariables", "InsufficientQuota"]
},
"codersdk.License": {
"type": "object",
@@ -19102,7 +19102,7 @@
"type": "string"
},
"error_code": {
"enum": ["REQUIRED_TEMPLATE_VARIABLES"],
"enum": ["REQUIRED_TEMPLATE_VARIABLES", "INSUFFICIENT_QUOTA"],
"allOf": [
{
"$ref": "#/definitions/codersdk.JobErrorCode"
+9 -9
View File
@@ -10,10 +10,7 @@ import (
"github.com/coder/coder/v2/codersdk"
)
// toolResponse builds a fantasy.ToolResponse from a JSON-serializable
// result map. The map constraint ensures all tool results serialize
// to JSON objects so the frontend can safely parse them.
func toolResponse(result map[string]any) fantasy.ToolResponse {
func marshalToolResponse(result any) fantasy.ToolResponse {
data, err := json.Marshal(result)
if err != nil {
return fantasy.NewTextResponse("{}")
@@ -21,15 +18,18 @@ func toolResponse(result map[string]any) fantasy.ToolResponse {
return fantasy.NewTextResponse(string(data))
}
// toolResponse builds a fantasy.ToolResponse from a JSON-serializable
// result map. The map constraint ensures all tool results serialize
// to JSON objects so the frontend can safely parse them.
func toolResponse(result map[string]any) fantasy.ToolResponse {
return marshalToolResponse(result)
}
// buildToolResponse marshals a buildErrorResult into a tool response.
// Separate from toolResponse to keep the map[string]any constraint
// on the general helper while allowing typed error structs.
func buildToolResponse(r buildErrorResult) fantasy.ToolResponse {
data, err := json.Marshal(r)
if err != nil {
return fantasy.NewTextResponse("{}")
}
return fantasy.NewTextResponse(string(data))
return marshalToolResponse(r)
}
// responseErrorResult converts a codersdk.Response into a structured
+41 -13
View File
@@ -125,18 +125,28 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option
defer options.WorkspaceMu.Unlock()
}
ownerID := options.OwnerID
// Check for an existing workspace on the chat.
check := options.checkExistingWorkspace(ctx, db, chatID)
if check.Err != nil {
if check.FailedBuildID != uuid.Nil {
return buildToolResponse(newBuildError(check.Err.Error(), check.FailedBuildID)), nil
if check.BuildErr != nil {
return buildFailureToolResponse(
ctx,
options.Logger,
db,
ownerID,
organizationID,
check.BuildAction,
check.BuildID,
check.BuildErr,
), nil
}
if check.Err != nil {
return fantasy.NewTextErrorResponse(check.Err.Error()), nil
}
if check.Done {
return toolResponse(check.Result), nil
}
ownerID := options.OwnerID
// Set up dbauthz context for DB lookups.
ownerCtx, ownerErr := asOwner(ctx, db, ownerID)
@@ -260,10 +270,16 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option
buildID := workspace.LatestBuild.ID
if buildID != uuid.Nil {
if err := waitForBuild(ctx, db, buildID); err != nil {
return buildToolResponse(newBuildError(
xerrors.Errorf("workspace build failed: %w", err).Error(),
return buildFailureToolResponse(
ctx,
options.Logger,
db,
ownerID,
organizationID,
buildFailureActionCreate,
buildID,
)), nil
xerrors.Errorf("workspace build failed: %w", err),
), nil
}
}
@@ -323,9 +339,12 @@ type existingWorkspaceResult struct {
Result map[string]any
// Done indicates the caller should return early.
Done bool
// FailedBuildID is set when waitForBuild failed, so the
// caller can include it in a structured error response.
FailedBuildID uuid.UUID
// BuildAction, BuildID, and BuildErr are set together when
// waitForBuild failed, so the caller can render the build
// failure through the shared response path.
BuildAction buildFailureAction
BuildID uuid.UUID
BuildErr error
// Err is non-nil when the check itself failed.
Err error
}
@@ -397,9 +416,14 @@ func (o CreateWorkspaceOptions) checkExistingWorkspace(
o.OnChatUpdated(updatedChat)
}
if err := waitForBuild(ctx, db, build.ID); err != nil {
action := buildFailureActionCreate
if build.Transition == database.WorkspaceTransitionStart {
action = buildFailureActionStart
}
return existingWorkspaceResult{
FailedBuildID: build.ID,
Err: xerrors.Errorf("existing workspace build failed: %w", err),
BuildAction: action,
BuildID: build.ID,
BuildErr: xerrors.Errorf("existing workspace build failed: %w", err),
}
}
result := map[string]any{
@@ -517,7 +541,11 @@ func waitForBuild(
if job.Error.Valid {
errMsg = job.Error.String
}
return xerrors.New(errMsg)
var code codersdk.JobErrorCode
if job.ErrorCode.Valid {
code = codersdk.JobErrorCode(job.ErrorCode.String)
}
return &workspaceBuildError{message: errMsg, code: code}
case database.ProvisionerJobStatusCanceled:
return xerrors.New("build was canceled")
case database.ProvisionerJobStatusPending,
+274 -3
View File
@@ -17,6 +17,7 @@ import (
"cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbmock"
"github.com/coder/coder/v2/coderd/httpapi/httperror"
"github.com/coder/coder/v2/coderd/util/ptr"
@@ -413,10 +414,261 @@ func TestCreateWorkspace_PostCreationBuildFailure(t *testing.T) {
require.NoError(t, json.Unmarshal([]byte(resp.Content), &result))
require.Contains(t, result["error"], "workspace build failed")
require.Equal(t, buildID.String(), result["build_id"])
require.NotContains(t, result, "error_code",
"generic build failures must not surface a quota error_code")
require.NotContains(t, result, "quota",
"generic build failures must not surface quota details")
require.False(t, resp.IsError,
"buildToolResponse must not set IsError; chatprompt strips structured fields from error responses")
}
func TestCreateWorkspace_PostCreationQuotaFailure(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
db := dbmock.NewMockStore(ctrl)
ownerID := uuid.New()
orgID := uuid.New()
chatID := uuid.New()
templateID := uuid.New()
workspaceID := uuid.New()
jobID := uuid.New()
buildID := uuid.New()
db.EXPECT().
GetChatByID(gomock.Any(), chatID).
Return(database.Chat{ID: chatID}, nil)
db.EXPECT().
UpdateChatWorkspaceBinding(gomock.Any(), gomock.Any()).
Return(database.Chat{ID: chatID}, nil)
db.EXPECT().
GetAuthorizationUserRoles(gomock.Any(), ownerID).
Return(database.GetAuthorizationUserRolesRow{
ID: ownerID,
Roles: []string{},
Groups: []string{},
Status: database.UserStatusActive,
}, nil)
db.EXPECT().
GetTemplateByID(gomock.Any(), templateID).
Return(database.Template{
ID: templateID,
OrganizationID: orgID,
}, nil)
db.EXPECT().
GetChatWorkspaceTTL(gomock.Any()).
Return("0s", nil)
db.EXPECT().
GetWorkspaceBuildByID(gomock.Any(), buildID).
Return(database.WorkspaceBuild{
ID: buildID,
WorkspaceID: workspaceID,
JobID: jobID,
}, nil)
db.EXPECT().
GetProvisionerJobByID(gomock.Any(), jobID).
Return(database.ProvisionerJob{
ID: jobID,
JobStatus: database.ProvisionerJobStatusFailed,
Error: sql.NullString{String: "insufficient quota", Valid: true},
ErrorCode: sql.NullString{
String: string(codersdk.InsufficientQuota),
Valid: true,
},
}, nil)
db.EXPECT().
GetQuotaConsumedForUser(gomock.Any(), database.GetQuotaConsumedForUserParams{
OwnerID: ownerID,
OrganizationID: orgID,
}).
Return(int64(40), nil)
db.EXPECT().
GetQuotaAllowanceForUser(gomock.Any(), database.GetQuotaAllowanceForUserParams{
UserID: ownerID,
OrganizationID: orgID,
}).
Return(int64(40), nil)
createFn := func(_ context.Context, _ uuid.UUID, req codersdk.CreateWorkspaceRequest) (codersdk.Workspace, error) {
return codersdk.Workspace{
ID: workspaceID,
Name: req.Name,
OwnerName: "testuser",
LatestBuild: codersdk.WorkspaceBuild{
ID: buildID,
},
}, nil
}
tool := CreateWorkspace(db, orgID, chatID, CreateWorkspaceOptions{
OwnerID: ownerID,
CreateFn: createFn,
WorkspaceMu: &sync.Mutex{},
Logger: slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}),
})
input := fmt.Sprintf(`{"template_id":%q,"name":"test-quota-fail"}`, templateID.String())
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
ID: "call-1",
Name: "create_workspace",
Input: input,
})
require.NoError(t, err)
var result map[string]any
require.NoError(t, json.Unmarshal([]byte(resp.Content), &result))
require.Equal(t, string(codersdk.InsufficientQuota), result["error_code"])
require.Equal(t, "Workspace quota reached", result["title"])
require.Contains(t, result["error"], "workspace build failed")
require.Contains(t, result["message"], "workspace quota is full")
require.Contains(t, result["message"], "Delete a workspace")
require.Contains(t, result["message"], "raise your group quota allowance")
require.NotContains(t, result, "next_steps")
require.Equal(t, buildID.String(), result["build_id"])
quota, ok := result["quota"].(map[string]any)
require.True(t, ok)
require.Equal(t, float64(40), quota["credits_consumed"])
require.Equal(t, float64(40), quota["budget"])
require.False(t, resp.IsError,
"quota responses must not set IsError; chatprompt strips structured fields from error responses")
}
func TestCreateWorkspace_ExistingBuildQuotaFailure(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
db := dbmock.NewMockStore(ctrl)
ownerID := uuid.New()
orgID := uuid.New()
chatID := uuid.New()
templateID := uuid.New()
workspaceID := uuid.New()
jobID := uuid.New()
buildID := uuid.New()
db.EXPECT().
GetAuthorizationUserRoles(gomock.Any(), ownerID).
Return(database.GetAuthorizationUserRolesRow{
ID: ownerID,
Roles: []string{},
Groups: []string{},
Status: database.UserStatusActive,
}, nil)
db.EXPECT().
GetChatByID(gomock.Any(), chatID).
Return(database.Chat{
ID: chatID,
WorkspaceID: uuid.NullUUID{UUID: workspaceID, Valid: true},
}, nil)
db.EXPECT().
GetWorkspaceByID(gomock.Any(), workspaceID).
Return(database.Workspace{
ID: workspaceID,
Name: "existing-quota-workspace",
OrganizationID: orgID,
}, nil)
db.EXPECT().
GetLatestWorkspaceBuildByWorkspaceID(gomock.Any(), workspaceID).
Return(database.WorkspaceBuild{
ID: buildID,
WorkspaceID: workspaceID,
JobID: jobID,
Transition: database.WorkspaceTransitionStart,
}, nil)
firstJob := db.EXPECT().
GetProvisionerJobByID(gomock.Any(), jobID).
Return(database.ProvisionerJob{
ID: jobID,
JobStatus: database.ProvisionerJobStatusRunning,
}, nil)
db.EXPECT().
UpdateChatWorkspaceBinding(gomock.Any(), database.UpdateChatWorkspaceBindingParams{
ID: chatID,
WorkspaceID: uuid.NullUUID{UUID: workspaceID, Valid: true},
BuildID: uuid.NullUUID{UUID: buildID, Valid: true},
AgentID: uuid.NullUUID{},
}).
Return(database.Chat{
ID: chatID,
WorkspaceID: uuid.NullUUID{UUID: workspaceID, Valid: true},
}, nil)
db.EXPECT().
GetWorkspaceBuildByID(gomock.Any(), buildID).
Return(database.WorkspaceBuild{
ID: buildID,
WorkspaceID: workspaceID,
JobID: jobID,
Transition: database.WorkspaceTransitionStart,
}, nil)
db.EXPECT().
GetProvisionerJobByID(gomock.Any(), jobID).
Return(database.ProvisionerJob{
ID: jobID,
JobStatus: database.ProvisionerJobStatusFailed,
Error: sql.NullString{String: "insufficient quota", Valid: true},
ErrorCode: sql.NullString{
String: string(codersdk.InsufficientQuota),
Valid: true,
},
}, nil).
After(firstJob)
ownerCtx := ownerContextMatcher{ownerID: ownerID}
db.EXPECT().
GetQuotaConsumedForUser(ownerCtx, database.GetQuotaConsumedForUserParams{
OwnerID: ownerID,
OrganizationID: orgID,
}).
Return(int64(40), nil)
db.EXPECT().
GetQuotaAllowanceForUser(ownerCtx, database.GetQuotaAllowanceForUserParams{
UserID: ownerID,
OrganizationID: orgID,
}).
Return(int64(40), nil)
tool := CreateWorkspace(db, orgID, chatID, CreateWorkspaceOptions{
OwnerID: ownerID,
CreateFn: func(context.Context, uuid.UUID, codersdk.CreateWorkspaceRequest) (codersdk.Workspace, error) {
t.Fatal("CreateFn should not be called when an existing build is in progress")
return codersdk.Workspace{}, nil
},
WorkspaceMu: &sync.Mutex{},
Logger: slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}),
})
input := fmt.Sprintf(`{"template_id":%q,"name":"test-existing-quota-fail"}`, templateID.String())
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
ID: "call-1",
Name: "create_workspace",
Input: input,
})
require.NoError(t, err)
var result map[string]any
require.NoError(t, json.Unmarshal([]byte(resp.Content), &result))
require.Equal(t, string(codersdk.InsufficientQuota), result["error_code"])
require.Equal(t, "Workspace quota reached", result["title"])
require.Contains(t, result["error"], "existing workspace build failed")
require.Contains(t, result["message"], "could not start this workspace")
require.Contains(t, result["message"], "workspace quota is full")
require.Equal(t, buildID.String(), result["build_id"])
quota, ok := result["quota"].(map[string]any)
require.True(t, ok)
require.Equal(t, float64(40), quota["credits_consumed"])
require.Equal(t, float64(40), quota["budget"])
require.False(t, resp.IsError)
}
func TestCreateWorkspace_ResponderErrorPreservesStructuredFields(t *testing.T) {
t.Parallel()
@@ -907,9 +1159,11 @@ func TestCheckExistingWorkspace_InProgressBuildFailureReturnsBuildID(t *testing.
options := testCheckExistingWorkspaceOptions(nil)
check := options.checkExistingWorkspace(context.Background(), db, chatID)
require.Error(t, check.Err)
require.Contains(t, check.Err.Error(), "existing workspace build failed")
require.Equal(t, buildID, check.FailedBuildID)
require.Error(t, check.BuildErr)
require.Contains(t, check.BuildErr.Error(), "existing workspace build failed")
require.Equal(t, buildID, check.BuildID)
require.Equal(t, buildFailureActionStart, check.BuildAction)
require.NoError(t, check.Err)
}
func TestCheckExistingWorkspace_ConnectingAgentWaits(t *testing.T) {
@@ -1186,6 +1440,23 @@ func testCheckExistingWorkspaceOptions(
}
}
type ownerContextMatcher struct {
ownerID uuid.UUID
}
func (m ownerContextMatcher) Matches(v any) bool {
ctx, ok := v.(context.Context)
if !ok {
return false
}
actor, ok := dbauthz.ActorFromContext(ctx)
return ok && actor.ID == m.ownerID.String()
}
func (ownerContextMatcher) String() string {
return "context with owner actor"
}
func expectExistingWorkspaceLookup(
db *dbmock.MockStore,
chatID uuid.UUID,
+192
View File
@@ -0,0 +1,192 @@
package chattool
import (
"context"
"errors"
"fmt"
"charm.land/fantasy"
"github.com/google/uuid"
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/codersdk"
)
const workspaceQuotaErrorTitle = "Workspace quota reached"
type buildFailureAction string
const (
buildFailureActionCreate buildFailureAction = "create"
buildFailureActionStart buildFailureAction = "start"
)
type workspaceBuildError struct {
message string
code codersdk.JobErrorCode
}
func (e *workspaceBuildError) Error() string {
return e.message
}
func buildErrorCode(err error) codersdk.JobErrorCode {
var buildErr *workspaceBuildError
if errors.As(err, &buildErr) {
return buildErr.code
}
return ""
}
// quotaErrorResult is the structured response returned when a workspace
// build fails because the user's workspace quota is exhausted.
type quotaErrorResult struct {
ErrorCode codersdk.JobErrorCode `json:"error_code"`
// Error is the raw build failure string used for debugging and
// frontend error detection.
Error string `json:"error"`
// Title is a short user-facing summary.
Title string `json:"title"`
// Message explains the failure and inlines the recovery guidance
// the model should relay to the user.
Message string `json:"message"`
BuildID string `json:"build_id,omitempty"`
Quota *quotaErrorDetails `json:"quota,omitempty"`
}
type quotaErrorDetails struct {
CreditsConsumed int64 `json:"credits_consumed"`
Budget int64 `json:"budget"`
}
func newQuotaError(
msg string,
buildID uuid.UUID,
action buildFailureAction,
quota *quotaErrorDetails,
) quotaErrorResult {
verb := "create"
if action == buildFailureActionStart {
verb = "start"
}
message := fmt.Sprintf(
"Coder could not %s this workspace because your workspace quota is "+
"full. Delete a workspace you no longer need to free quota, or "+
"ask an administrator to raise your group quota allowance.",
verb,
)
r := quotaErrorResult{
ErrorCode: codersdk.InsufficientQuota,
Error: msg,
Title: workspaceQuotaErrorTitle,
Message: message,
Quota: quota,
}
if buildID != uuid.Nil {
r.BuildID = buildID.String()
}
return r
}
func workspaceQuotaDetails(
ctx context.Context,
logger slog.Logger,
db database.Store,
ownerID uuid.UUID,
organizationID uuid.UUID,
) *quotaErrorDetails {
if db == nil || ownerID == uuid.Nil || organizationID == uuid.Nil {
return nil
}
quotaCtx := ctx
if actor, ok := dbauthz.ActorFromContext(ctx); !ok || actor.ID != ownerID.String() {
ownerCtx, err := asOwner(ctx, db, ownerID)
if err != nil {
logger.Debug(ctx, "failed to load owner authorization for quota lookup",
slog.F("owner_id", ownerID),
slog.F("organization_id", organizationID),
slog.Error(err),
)
return nil
}
quotaCtx = ownerCtx
}
consumed, err := db.GetQuotaConsumedForUser(quotaCtx, database.GetQuotaConsumedForUserParams{
OwnerID: ownerID,
OrganizationID: organizationID,
})
if err != nil {
logger.Debug(ctx, "failed to load consumed workspace quota",
slog.F("owner_id", ownerID),
slog.F("organization_id", organizationID),
slog.Error(err),
)
return nil
}
budget, err := db.GetQuotaAllowanceForUser(quotaCtx, database.GetQuotaAllowanceForUserParams{
UserID: ownerID,
OrganizationID: organizationID,
})
if err != nil {
logger.Debug(ctx, "failed to load workspace quota allowance",
slog.F("owner_id", ownerID),
slog.F("organization_id", organizationID),
slog.Error(err),
)
return nil
}
return &quotaErrorDetails{
CreditsConsumed: consumed,
Budget: budget,
}
}
func quotaErrorToolResponse(
ctx context.Context,
logger slog.Logger,
db database.Store,
ownerID uuid.UUID,
organizationID uuid.UUID,
msg string,
buildID uuid.UUID,
action buildFailureAction,
) fantasy.ToolResponse {
quota := workspaceQuotaDetails(ctx, logger, db, ownerID, organizationID)
return marshalToolResponse(newQuotaError(msg, buildID, action, quota))
}
// buildFailureToolResponse keeps build failures as JSON carried in a normal
// text tool response. The chatprompt pipeline flattens IsError responses into
// a single string and drops structured fields, so quota and generic build
// failures both keep IsError false and let the frontend detect failures via
// the "error" key.
func buildFailureToolResponse(
ctx context.Context,
logger slog.Logger,
db database.Store,
ownerID uuid.UUID,
organizationID uuid.UUID,
action buildFailureAction,
buildID uuid.UUID,
err error,
) fantasy.ToolResponse {
msg := err.Error()
if codersdk.JobIsInsufficientQuotaErrorCode(buildErrorCode(err)) {
return quotaErrorToolResponse(
ctx,
logger,
db,
ownerID,
organizationID,
msg,
buildID,
action,
)
}
return buildToolResponse(newBuildError(msg, buildID))
}
+18 -6
View File
@@ -131,10 +131,16 @@ func StartWorkspace(db database.Store, chatID uuid.UUID, options StartWorkspaceO
// viewer. The fantasy/chatprompt pipeline discards structured
// fields from IsError content.
// The frontend detects errors via the "error" key instead.
return buildToolResponse(newBuildError(
xerrors.Errorf("waiting for in-progress build: %w", err).Error(),
return buildFailureToolResponse(
ctx,
options.Logger,
db,
options.OwnerID,
ws.OrganizationID,
buildFailureActionStart,
build.ID,
)), nil
xerrors.Errorf("waiting for in-progress build: %w", err),
), nil
}
result := waitForAgentAndRespond(ctx, db, options.AgentConnFn, ws, build.ID)
// Re-fire after the agent is fully ready so
@@ -212,10 +218,16 @@ func StartWorkspace(db database.Store, chatID uuid.UUID, options StartWorkspaceO
options.OnChatUpdated(updatedChat)
}
if err := waitForBuild(ctx, db, startBuild.ID); err != nil {
return buildToolResponse(newBuildError(
xerrors.Errorf("workspace start build failed: %w", err).Error(),
return buildFailureToolResponse(
ctx,
options.Logger,
db,
options.OwnerID,
ws.OrganizationID,
buildFailureActionStart,
startBuild.ID,
)), nil
xerrors.Errorf("workspace start build failed: %w", err),
), nil
}
result := waitForAgentAndRespond(ctx, db, options.AgentConnFn, ws, startBuild.ID)
+45 -17
View File
@@ -11,14 +11,17 @@ import (
"charm.land/fantasy"
"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbfake"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/httpapi/httperror"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/x/chatd/chattool"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
@@ -706,7 +709,7 @@ func TestStartWorkspace(t *testing.T) {
require.True(t, onChatUpdatedCalled.Load(), "OnChatUpdated should be called to notify frontend of build ID")
})
t.Run("FailedBuild", func(t *testing.T) {
t.Run("FailedBuildQuota", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
@@ -714,17 +717,18 @@ func TestStartWorkspace(t *testing.T) {
user := dbgen.User(t, db, database.User{})
modelCfg := seedModelConfig(t, db)
org := dbgen.Organization(t, db, database.Organization{})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
UserID: user.ID,
OrganizationID: org.ID,
})
orgResp := dbfake.Organization(t, db).
EveryoneAllowance(40).
Members(user).
Do()
org := orgResp.Org
// Create a workspace with a build that is still running.
wsResp := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: user.ID,
OrganizationID: org.ID,
}).Seed(database.WorkspaceBuild{
Transition: database.WorkspaceTransitionStart,
DailyCost: 40,
}).Starting().Do()
ws := wsResp.Workspace
@@ -736,8 +740,14 @@ func TestStartWorkspace(t *testing.T) {
Title: "test-failed-build",
})
authzDB := dbauthz.New(
db,
rbac.NewStrictCachingAuthorizer(prometheus.NewRegistry()),
slogtest.Make(t, nil),
testAccessControlStorePointer(),
)
jobRead := make(chan struct{}, 1)
wrappedDB := &jobInterceptStore{Store: db, jobRead: jobRead}
wrappedDB := &jobInterceptStore{Store: authzDB, jobRead: jobRead}
tool := chattool.StartWorkspace(wrappedDB, chat.ID, chattool.StartWorkspaceOptions{
OwnerID: user.ID,
@@ -758,7 +768,10 @@ func TestStartWorkspace(t *testing.T) {
}
done := make(chan toolResult, 1)
go func() {
resp, err := tool.Run(ctx, fantasy.ToolCall{ID: "call-1", Name: "start_workspace", Input: "{}"})
resp, err := tool.Run(
dbauthz.AsChatd(ctx),
fantasy.ToolCall{ID: "call-1", Name: "start_workspace", Input: "{}"},
)
done <- toolResult{resp, err}
}()
@@ -771,7 +784,11 @@ func TestStartWorkspace(t *testing.T) {
ID: wsResp.Build.JobID,
UpdatedAt: now,
CompletedAt: sql.NullTime{Time: now, Valid: true},
Error: sql.NullString{String: "terraform apply failed", Valid: true},
Error: sql.NullString{String: "insufficient quota", Valid: true},
ErrorCode: sql.NullString{
String: string(codersdk.InsufficientQuota),
Valid: true,
},
}))
res := testutil.TryReceive(ctx, t, done)
@@ -780,9 +797,16 @@ func TestStartWorkspace(t *testing.T) {
var result map[string]any
require.NoError(t, json.Unmarshal([]byte(res.resp.Content), &result))
require.Contains(t, result["error"], "waiting for in-progress build")
require.Equal(t, string(codersdk.InsufficientQuota), result["error_code"])
require.Equal(t, "Workspace quota reached", result["title"])
require.Contains(t, result["message"], "workspace quota is full")
require.Equal(t, wsResp.Build.ID.String(), result["build_id"])
quota, ok := result["quota"].(map[string]any)
require.True(t, ok)
require.Equal(t, float64(40), quota["credits_consumed"])
require.Equal(t, float64(40), quota["budget"])
require.False(t, res.resp.IsError,
"buildToolResponse must not set IsError; chatprompt strips structured fields from error responses")
"quota responses must not set IsError; chatprompt strips structured fields from error responses")
})
t.Run("StartTriggeredBuildFailure", func(t *testing.T) {
@@ -797,7 +821,7 @@ func TestStartWorkspace(t *testing.T) {
UserID: user.ID,
OrganizationID: org.ID,
})
// Create a stopped workspace (succeeded stop transition).
// Create a stopped workspace with a succeeded stop transition.
wsResp := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: user.ID,
OrganizationID: org.ID,
@@ -811,10 +835,9 @@ func TestStartWorkspace(t *testing.T) {
OwnerID: user.ID,
WorkspaceID: uuid.NullUUID{UUID: ws.ID, Valid: true},
LastModelConfigID: modelCfg.ID,
Title: "test-start-triggered-build-failure",
Title: "test-start-triggered-generic-build-failure",
})
// StartFn creates a real in-progress build via dbfake.
var startBuildJobID uuid.UUID
var startBuildID uuid.UUID
startFn := func(_ context.Context, _ uuid.UUID, wsID uuid.UUID, req codersdk.CreateWorkspaceBuildRequest) (codersdk.WorkspaceBuild, error) {
@@ -852,13 +875,9 @@ func TestStartWorkspace(t *testing.T) {
done <- toolResult{resp, err}
}()
// First signal: initial GetProvisionerJobByID for the
// old stop build. Second signal: waitForBuild's first
// poll for the new start build.
testutil.TryReceive(ctx, t, jobRead)
testutil.TryReceive(ctx, t, jobRead)
// Fail the provisioner job.
now := time.Now().UTC()
require.NoError(t, db.UpdateProvisionerJobWithCompleteByID(ctx, database.UpdateProvisionerJobWithCompleteByIDParams{
ID: startBuildJobID,
@@ -874,6 +893,8 @@ func TestStartWorkspace(t *testing.T) {
require.NoError(t, json.Unmarshal([]byte(res.resp.Content), &result))
require.Contains(t, result["error"], "workspace start build failed")
require.Equal(t, startBuildID.String(), result["build_id"])
require.NotContains(t, result, "error_code")
require.NotContains(t, result, "quota")
require.False(t, res.resp.IsError,
"buildToolResponse must not set IsError; chatprompt strips structured fields from error responses")
})
@@ -952,3 +973,10 @@ func (s *jobInterceptStore) GetProvisionerJobByID(ctx context.Context, id uuid.U
}
return result, err
}
func testAccessControlStorePointer() *atomic.Pointer[dbauthz.AccessControlStore] {
acs := &atomic.Pointer[dbauthz.AccessControlStore]{}
var store dbauthz.AccessControlStore = dbauthz.AGPLTemplateAccessControlStore{}
acs.Store(&store)
return acs
}
+9 -1
View File
@@ -167,6 +167,7 @@ type JobErrorCode string
const (
RequiredTemplateVariables JobErrorCode = "REQUIRED_TEMPLATE_VARIABLES"
InsufficientQuota JobErrorCode = "INSUFFICIENT_QUOTA"
)
// JobIsMissingParameterErrorCode returns whether the error is a missing parameter error.
@@ -181,6 +182,13 @@ func JobIsMissingRequiredTemplateVariableErrorCode(code JobErrorCode) bool {
return string(code) == runner.RequiredTemplateVariablesErrorCode
}
// JobIsInsufficientQuotaErrorCode returns whether the error is an insufficient
// quota error. This can indicate to consumers that they should explain quota
// recovery options instead of treating the failure as a generic build error.
func JobIsInsufficientQuotaErrorCode(code JobErrorCode) bool {
return string(code) == runner.InsufficientQuotaErrorCode
}
// ProvisionerJob describes the job executed by the provisioning daemon.
type ProvisionerJob struct {
ID uuid.UUID `json:"id" format:"uuid" table:"id"`
@@ -189,7 +197,7 @@ type ProvisionerJob struct {
CompletedAt *time.Time `json:"completed_at,omitempty" format:"date-time" table:"completed at"`
CanceledAt *time.Time `json:"canceled_at,omitempty" format:"date-time" table:"canceled at"`
Error string `json:"error,omitempty" table:"error"`
ErrorCode JobErrorCode `json:"error_code,omitempty" enums:"REQUIRED_TEMPLATE_VARIABLES" table:"error code"`
ErrorCode JobErrorCode `json:"error_code,omitempty" enums:"REQUIRED_TEMPLATE_VARIABLES,INSUFFICIENT_QUOTA" table:"error code"`
Status ProvisionerJobStatus `json:"status" enums:"pending,running,succeeded,canceling,canceled,failed" table:"status"`
WorkerID *uuid.UUID `json:"worker_id,omitempty" format:"uuid" table:"worker id"`
WorkerName string `json:"worker_name,omitempty" table:"worker name"`
+1 -1
View File
@@ -1731,7 +1731,7 @@ Status Code **200**
| Property | Value(s) |
|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` |
| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` |
| `workspace_build_transition` | `delete`, `start`, `stop` |
| `status` | `canceled`, `canceling`, `connected`, `connecting`, `deleted`, `deleting`, `disconnected`, `exit_failure`, `failed`, `ok`, `pending`, `pipes_left_open`, `running`, `starting`, `stopped`, `stopping`, `succeeded`, `timed_out`, `timeout` |
| `type` | `template_version_dry_run`, `template_version_import`, `workspace_build` |
+1 -1
View File
@@ -388,7 +388,7 @@ Status Code **200**
| Property | Value(s) |
|------------------------------|--------------------------------------------------------------------------|
| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` |
| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` |
| `workspace_build_transition` | `delete`, `start`, `stop` |
| `status` | `canceled`, `canceling`, `failed`, `pending`, `running`, `succeeded` |
| `type` | `template_version_dry_run`, `template_version_import`, `workspace_build` |
+3 -3
View File
@@ -7487,8 +7487,8 @@ Only certain features set these fields: - FeatureManagedAgentLimit|
#### Enumerated Values
| Value(s) |
|-------------------------------|
| `REQUIRED_TEMPLATE_VARIABLES` |
|-----------------------------------------------------|
| `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` |
## codersdk.License
@@ -10007,7 +10007,7 @@ Only certain features set these fields: - FeatureManagedAgentLimit|
| Property | Value(s) |
|--------------|----------------------------------------------------------------------|
| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` |
| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` |
| `status` | `canceled`, `canceling`, `failed`, `pending`, `running`, `succeeded` |
## codersdk.ProvisionerJobInput
+2 -2
View File
@@ -1446,7 +1446,7 @@ Status Code **200**
| Property | Value(s) |
|------------------------------|--------------------------------------------------------------------------|
| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` |
| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` |
| `workspace_build_transition` | `delete`, `start`, `stop` |
| `status` | `canceled`, `canceling`, `failed`, `pending`, `running`, `succeeded` |
| `type` | `template_version_dry_run`, `template_version_import`, `workspace_build` |
@@ -1729,7 +1729,7 @@ Status Code **200**
| Property | Value(s) |
|------------------------------|--------------------------------------------------------------------------|
| `error_code` | `REQUIRED_TEMPLATE_VARIABLES` |
| `error_code` | `INSUFFICIENT_QUOTA`, `REQUIRED_TEMPLATE_VARIABLES` |
| `workspace_build_transition` | `delete`, `start`, `stop` |
| `status` | `canceled`, `canceling`, `failed`, `pending`, `running`, `succeeded` |
| `type` | `template_version_dry_run`, `template_version_import`, `workspace_build` |
+4
View File
@@ -25,6 +25,7 @@ import (
"github.com/coder/coder/v2/codersdk/drpcsdk"
"github.com/coder/coder/v2/provisionerd"
"github.com/coder/coder/v2/provisionerd/proto"
"github.com/coder/coder/v2/provisionerd/runner"
"github.com/coder/coder/v2/provisionersdk"
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
"github.com/coder/coder/v2/provisionersdk/tfpath"
@@ -527,6 +528,7 @@ func TestProvisionerd(t *testing.T) {
didComplete atomic.Bool
didLog atomic.Bool
didFail atomic.Bool
failedCode = atomic.NewString("")
acq = newAcquireOne(t, &proto.AcquiredJob{
JobId: "test",
Provisioner: "someprovisioner",
@@ -561,6 +563,7 @@ func TestProvisionerd(t *testing.T) {
},
failJob: func(ctx context.Context, job *proto.FailedJob) (*proto.Empty, error) {
didFail.Store(true)
failedCode.Store(job.ErrorCode)
return &proto.Empty{}, nil
},
}), nil
@@ -605,6 +608,7 @@ func TestProvisionerd(t *testing.T) {
require.NoError(t, closer.Close())
assert.True(t, didLog.Load(), "should log some updates")
assert.False(t, didComplete.Load(), "should not complete the job")
assert.Equal(t, runner.InsufficientQuotaErrorCode, failedCode.Load())
assert.True(t, didFail.Load(), "should fail the job")
})
+21 -1
View File
@@ -33,6 +33,9 @@ const (
RequiredTemplateVariablesErrorCode = "REQUIRED_TEMPLATE_VARIABLES"
requiredTemplateVariablesErrorText = "required template variables"
InsufficientQuotaErrorCode = "INSUFFICIENT_QUOTA"
insufficientQuotaErrorText = "insufficient quota"
)
var errorCodes = map[string]string{
@@ -870,7 +873,10 @@ func (r *Runner) commitQuota(ctx context.Context, cost int32) *proto.FailedJob {
Output: "This build would exceed your quota. Failing.",
Stage: stage,
})
return r.failedWorkspaceBuildf("insufficient quota")
return r.failedWorkspaceBuildfCode(
InsufficientQuotaErrorCode,
insufficientQuotaErrorText,
)
}
return nil
}
@@ -1110,6 +1116,20 @@ func (r *Runner) failedWorkspaceBuildf(format string, args ...interface{}) *prot
return failedJob
}
func (r *Runner) failedWorkspaceBuildfCode(
code string,
format string,
args ...interface{},
) *proto.FailedJob {
failedJob := &proto.FailedJob{
JobId: r.job.JobId,
Error: fmt.Sprintf(format, args...),
ErrorCode: code,
}
failedJob.Type = &proto.FailedJob_WorkspaceBuild_{}
return failedJob
}
func (r *Runner) failedJobf(format string, args ...interface{}) *proto.FailedJob {
message := fmt.Sprintf(format, args...)
var code string
+20
View File
@@ -0,0 +1,20 @@
package runner //nolint:testpackage // Tests unexported failure classification helpers.
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/provisionerd/proto"
)
func TestFailedWorkspaceBuildfDoesNotInferQuotaErrorCode(t *testing.T) {
t.Parallel()
r := &Runner{job: &proto.AcquiredJob{JobId: "job"}}
failed := r.failedWorkspaceBuildf(
"provider failed: insufficient quota in us-east1",
)
require.Empty(t, failed.ErrorCode)
}
+5 -2
View File
@@ -4563,9 +4563,12 @@ export interface IssueReconnectingPTYSignedTokenResponse {
}
// From codersdk/provisionerdaemons.go
export type JobErrorCode = "REQUIRED_TEMPLATE_VARIABLES";
export type JobErrorCode = "INSUFFICIENT_QUOTA" | "REQUIRED_TEMPLATE_VARIABLES";
export const JobErrorCodes: JobErrorCode[] = ["REQUIRED_TEMPLATE_VARIABLES"];
export const JobErrorCodes: JobErrorCode[] = [
"INSUFFICIENT_QUOTA",
"REQUIRED_TEMPLATE_VARIABLES",
];
// From codersdk/licenses.go
export interface License {