Files
coder/coderd/x/chatd/chattool/quotaerror.go
T
Ethan ef0151601e feat: report insufficient quota build failures in chat tools (#24956)
## Summary

When a workspace build fails because the user is over their group quota,
the chat tools currently surface the failure as a bare `"workspace build
failed: insufficient quota"` string with no machine-readable error code
and no visibility into the user's current usage. Agents and the UI
cannot distinguish quota failures from any other Terraform error, so
users see an opaque message and have no clear path to recovery.

This PR tags quota failures with a typed error code at the source and
propagates it through the chat tool layer so callers can react to it
explicitly.

Relates to CODAGT-20

## Changes

**Provisioner runner**

- Add `InsufficientQuotaErrorCode = "INSUFFICIENT_QUOTA"` and set it
explicitly at the `commitQuota` failure site via a new
`failedWorkspaceBuildfCode` helper, so `provisioner_jobs.error_code` is
populated only on the genuine quota path. The substring matcher used for
externally produced sentinels (e.g. `"missing parameter"`, `"required
template variables"`) is intentionally not extended; provider errors
that happen to mention "insufficient quota" stay classified as generic
build failures.

**SDK and API contract**

- Add `JobErrorCodeInsufficientQuota` and a
`JobIsInsufficientQuotaErrorCode` helper to `codersdk`.
- Extend the swagger `enums` tag on `ProvisionerJob.ErrorCode` to
include `INSUFFICIENT_QUOTA`.
- Regenerate `coderd/apidoc`, `docs/reference/api/*`, and
`site/src/api/typesGenerated.ts`.

**chattool create_workspace / start_workspace**

- `waitForBuild` now returns a typed `*workspaceBuildError` carrying
both the message and the `JobErrorCode`, instead of a bare error string.
- New `quotaerror.go` introduces a structured `quotaErrorResult` (with
`error_code`, `title`, `message`, `build_id`, and optional `quota`) and
a best-effort `workspaceQuotaDetails` lookup that wraps owner
authorization internally and fetches `credits_consumed` and `budget`
from the database. Quota lookup failures (including authorization
failures) never block the failure payload.
- On quota-coded build failures, both `create_workspace` and
`start_workspace` now return the structured response (with the recovery
guidance inlined into `message`) instead of the bare `"insufficient
quota"` string. This applies to all three failure paths: post-creation,
an in-progress existing build, and a freshly triggered start build.
Non-quota build failures continue to use the existing
`buildToolResponse` / `newBuildError` path.
- Owner authorization is wrapped only on the call sites that need it
(the `CreateFn` and `StartFn` invocations and the quota-detail lookup),
so idempotent fast paths (already running, already in progress,
existing-workspace early returns) do not pay for an extra RBAC
round-trip or fail when role lookup is transient.

## Out of scope

- No changes to quota math, allowances, or bypass behavior.
- No automatic retries.
- No new quota-inspection tools and no changes to MCP
`coder_create_workspace` (which returns immediately and never observed
the build outcome here).
- No frontend UI changes; those will land in a follow-up PR that
consumes the new `INSUFFICIENT_QUOTA` code.
2026-05-07 15:01:58 +10:00

193 lines
4.8 KiB
Go

package chattool
import (
"context"
"errors"
"fmt"
"charm.land/fantasy"
"github.com/google/uuid"
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/codersdk"
)
const workspaceQuotaErrorTitle = "Workspace quota reached"
type buildFailureAction string
const (
buildFailureActionCreate buildFailureAction = "create"
buildFailureActionStart buildFailureAction = "start"
)
type workspaceBuildError struct {
message string
code codersdk.JobErrorCode
}
func (e *workspaceBuildError) Error() string {
return e.message
}
func buildErrorCode(err error) codersdk.JobErrorCode {
var buildErr *workspaceBuildError
if errors.As(err, &buildErr) {
return buildErr.code
}
return ""
}
// quotaErrorResult is the structured response returned when a workspace
// build fails because the user's workspace quota is exhausted.
type quotaErrorResult struct {
ErrorCode codersdk.JobErrorCode `json:"error_code"`
// Error is the raw build failure string used for debugging and
// frontend error detection.
Error string `json:"error"`
// Title is a short user-facing summary.
Title string `json:"title"`
// Message explains the failure and inlines the recovery guidance
// the model should relay to the user.
Message string `json:"message"`
BuildID string `json:"build_id,omitempty"`
Quota *quotaErrorDetails `json:"quota,omitempty"`
}
type quotaErrorDetails struct {
CreditsConsumed int64 `json:"credits_consumed"`
Budget int64 `json:"budget"`
}
func newQuotaError(
msg string,
buildID uuid.UUID,
action buildFailureAction,
quota *quotaErrorDetails,
) quotaErrorResult {
verb := "create"
if action == buildFailureActionStart {
verb = "start"
}
message := fmt.Sprintf(
"Coder could not %s this workspace because your workspace quota is "+
"full. Delete a workspace you no longer need to free quota, or "+
"ask an administrator to raise your group quota allowance.",
verb,
)
r := quotaErrorResult{
ErrorCode: codersdk.InsufficientQuota,
Error: msg,
Title: workspaceQuotaErrorTitle,
Message: message,
Quota: quota,
}
if buildID != uuid.Nil {
r.BuildID = buildID.String()
}
return r
}
func workspaceQuotaDetails(
ctx context.Context,
logger slog.Logger,
db database.Store,
ownerID uuid.UUID,
organizationID uuid.UUID,
) *quotaErrorDetails {
if db == nil || ownerID == uuid.Nil || organizationID == uuid.Nil {
return nil
}
quotaCtx := ctx
if actor, ok := dbauthz.ActorFromContext(ctx); !ok || actor.ID != ownerID.String() {
ownerCtx, err := asOwner(ctx, db, ownerID)
if err != nil {
logger.Debug(ctx, "failed to load owner authorization for quota lookup",
slog.F("owner_id", ownerID),
slog.F("organization_id", organizationID),
slog.Error(err),
)
return nil
}
quotaCtx = ownerCtx
}
consumed, err := db.GetQuotaConsumedForUser(quotaCtx, database.GetQuotaConsumedForUserParams{
OwnerID: ownerID,
OrganizationID: organizationID,
})
if err != nil {
logger.Debug(ctx, "failed to load consumed workspace quota",
slog.F("owner_id", ownerID),
slog.F("organization_id", organizationID),
slog.Error(err),
)
return nil
}
budget, err := db.GetQuotaAllowanceForUser(quotaCtx, database.GetQuotaAllowanceForUserParams{
UserID: ownerID,
OrganizationID: organizationID,
})
if err != nil {
logger.Debug(ctx, "failed to load workspace quota allowance",
slog.F("owner_id", ownerID),
slog.F("organization_id", organizationID),
slog.Error(err),
)
return nil
}
return &quotaErrorDetails{
CreditsConsumed: consumed,
Budget: budget,
}
}
func quotaErrorToolResponse(
ctx context.Context,
logger slog.Logger,
db database.Store,
ownerID uuid.UUID,
organizationID uuid.UUID,
msg string,
buildID uuid.UUID,
action buildFailureAction,
) fantasy.ToolResponse {
quota := workspaceQuotaDetails(ctx, logger, db, ownerID, organizationID)
return marshalToolResponse(newQuotaError(msg, buildID, action, quota))
}
// buildFailureToolResponse keeps build failures as JSON carried in a normal
// text tool response. The chatprompt pipeline flattens IsError responses into
// a single string and drops structured fields, so quota and generic build
// failures both keep IsError false and let the frontend detect failures via
// the "error" key.
func buildFailureToolResponse(
ctx context.Context,
logger slog.Logger,
db database.Store,
ownerID uuid.UUID,
organizationID uuid.UUID,
action buildFailureAction,
buildID uuid.UUID,
err error,
) fantasy.ToolResponse {
msg := err.Error()
if codersdk.JobIsInsufficientQuotaErrorCode(buildErrorCode(err)) {
return quotaErrorToolResponse(
ctx,
logger,
db,
ownerID,
organizationID,
msg,
buildID,
action,
)
}
return buildToolResponse(newBuildError(msg, buildID))
}