feat: classify provider_disabled 503 as non-retryable (#25800)

Builds on top of https://github.com/coder/coder/pull/25794

Adds a new `provider_disabled` error classification in `chatd` with the
corresponding plumbing to classify it as non-retryable. Also adds a
story for how this particular error kind is displayed in the UI.
This commit is contained in:
Cian Johnston
2026-05-29 13:14:04 +01:00
committed by GitHub
parent 4144eb3c4f
commit d0a51da0a9
13 changed files with 200 additions and 64 deletions
+4 -2
View File
@@ -16498,7 +16498,8 @@ const docTemplate = `{
"auth",
"config",
"usage_limit",
"missing_key"
"missing_key",
"provider_disabled"
],
"x-enum-varnames": [
"ChatErrorKindGeneric",
@@ -16509,7 +16510,8 @@ const docTemplate = `{
"ChatErrorKindAuth",
"ChatErrorKindConfig",
"ChatErrorKindUsageLimit",
"ChatErrorKindMissingKey"
"ChatErrorKindMissingKey",
"ChatErrorKindProviderDisabled"
]
},
"codersdk.ChatFileMetadata": {
+4 -2
View File
@@ -14848,7 +14848,8 @@
"auth",
"config",
"usage_limit",
"missing_key"
"missing_key",
"provider_disabled"
],
"x-enum-varnames": [
"ChatErrorKindGeneric",
@@ -14859,7 +14860,8 @@
"ChatErrorKindAuth",
"ChatErrorKindConfig",
"ChatErrorKindUsageLimit",
"ChatErrorKindMissingKey"
"ChatErrorKindMissingKey",
"ChatErrorKindProviderDisabled"
]
},
"codersdk.ChatFileMetadata": {
+8
View File
@@ -195,6 +195,7 @@ func Classify(err error) ClassifiedError {
}
retryableHTTP2StreamReset, hasHTTP2StreamReset := classifyHTTP2StreamReset(err)
providerDisabledMatch := containsAny(lower, providerDisabledPatterns...)
deadline := errors.Is(err, context.DeadlineExceeded) || strings.Contains(lower, "context deadline exceeded")
overloadedMatch := statusCode == 529 || containsAny(lower, overloadedPatterns...)
usageLimitMatch := containsAny(lower, usageLimitPatterns...)
@@ -221,6 +222,8 @@ func Classify(err error) ClassifiedError {
// over whatever HTTP status code the provider happened to use.
// Strong auth still stays above config because bad credentials are
// the root cause when both signals appear.
// Provider-disabled must precede timeout because disabled providers
// return 503, which matches the timeout rule.
rules := []struct {
match bool
kind codersdk.ChatErrorKind
@@ -251,6 +254,11 @@ func Classify(err error) ClassifiedError {
kind: codersdk.ChatErrorKindRateLimit,
retryable: true,
},
{
match: providerDisabledMatch,
kind: codersdk.ChatErrorKindProviderDisabled,
retryable: false,
},
{
match: timeoutMatch && !configMatch,
kind: codersdk.ChatErrorKindTimeout,
+81
View File
@@ -2,6 +2,7 @@ package chaterror_test
import (
"context"
"fmt"
"io"
"net/http"
"strings"
@@ -218,6 +219,85 @@ func TestClassify(t *testing.T) {
StatusCode: 0,
},
},
// The next cases model the error that fantasy produces
// when aibridge's disabledProviderHandler returns a 503
// plain-text sentinel. Fantasy sets Title from the HTTP
// status text and Message from the response body (including
// the trailing newline written by http.Error).
{
name: "ProviderDisabled503ClassifiesAsProviderDisabled",
err: &fantasy.ProviderError{
Title: fantasy.ErrorTitleForStatusCode(http.StatusServiceUnavailable),
Message: fmt.Sprintf("%s: AI provider %q is disabled\n", codersdk.ChatErrorKindProviderDisabled, "openai"),
StatusCode: http.StatusServiceUnavailable,
},
want: chaterror.ClassifiedError{
Message: "The OpenAI provider has been disabled. Contact your Coder administrator.",
Detail: fmt.Sprintf("%s: AI provider %q is disabled", codersdk.ChatErrorKindProviderDisabled, "openai"),
Kind: codersdk.ChatErrorKindProviderDisabled,
Provider: "openai",
Retryable: false,
StatusCode: 503,
},
},
{
name: "ProviderDisabled503UnknownProvider",
err: &fantasy.ProviderError{
Title: fantasy.ErrorTitleForStatusCode(http.StatusServiceUnavailable),
Message: fmt.Sprintf("%s: AI provider %q is disabled\n", codersdk.ChatErrorKindProviderDisabled, "mycustomprovider"),
StatusCode: http.StatusServiceUnavailable,
},
want: chaterror.ClassifiedError{
Message: "The AI provider has been disabled. Contact your Coder administrator.",
Detail: fmt.Sprintf("%s: AI provider %q is disabled", codersdk.ChatErrorKindProviderDisabled, "mycustomprovider"),
Kind: codersdk.ChatErrorKindProviderDisabled,
Provider: "",
Retryable: false,
StatusCode: 503,
},
},
{
name: "ProviderDisabledPlainErrorString",
err: xerrors.New(fmt.Sprintf("%s: AI provider %q is disabled", codersdk.ChatErrorKindProviderDisabled, "anthropic")),
want: chaterror.ClassifiedError{
Message: "The Anthropic provider has been disabled. Contact your Coder administrator.",
Kind: codersdk.ChatErrorKindProviderDisabled,
Provider: "anthropic",
Retryable: false,
StatusCode: 0,
},
},
{
name: "ProviderDisabledBeatsTimeout503",
err: &fantasy.ProviderError{
Title: fantasy.ErrorTitleForStatusCode(http.StatusServiceUnavailable),
Message: fmt.Sprintf("%s: AI provider %q is disabled\n", codersdk.ChatErrorKindProviderDisabled, "google"),
StatusCode: http.StatusServiceUnavailable,
},
want: chaterror.ClassifiedError{
Message: "The Google provider has been disabled. Contact your Coder administrator.",
Detail: fmt.Sprintf("%s: AI provider %q is disabled", codersdk.ChatErrorKindProviderDisabled, "google"),
Kind: codersdk.ChatErrorKindProviderDisabled,
Provider: "google",
Retryable: false,
StatusCode: 503,
},
},
{
name: "Generic503StillClassifiesAsTimeout",
err: &fantasy.ProviderError{
Message: "service unavailable",
StatusCode: 503,
},
want: chaterror.ClassifiedError{
Message: "The AI provider is temporarily unavailable.",
Detail: "service unavailable",
Kind: codersdk.ChatErrorKindTimeout,
Provider: "",
Retryable: true,
StatusCode: 503,
},
},
}
for _, tt := range tests {
@@ -363,6 +443,7 @@ func TestClassify_PatternCoverage(t *testing.T) {
{name: "OperationInterruptedLiteral", err: "operation interrupted", wantKind: codersdk.ChatErrorKindGeneric, wantRetry: false},
{name: "Status408", err: "status 408", wantKind: codersdk.ChatErrorKindTimeout, wantRetry: true},
{name: "Status500", err: "status 500", wantKind: codersdk.ChatErrorKindGeneric, wantRetry: true},
{name: "ProviderDisabledLiteral", err: "provider_disabled", wantKind: codersdk.ChatErrorKindProviderDisabled, wantRetry: false},
}
for _, tt := range tests {
+39 -38
View File
@@ -4,6 +4,7 @@ import (
"fmt"
"strings"
stringutil "github.com/coder/coder/v2/coderd/util/strings"
"github.com/coder/coder/v2/codersdk"
)
@@ -16,60 +17,58 @@ func terminalMessage(classified ClassifiedError) string {
subject := providerSubject(classified.Provider)
switch classified.Kind {
case codersdk.ChatErrorKindOverloaded:
return fmt.Sprintf("%s is temporarily overloaded.", subject)
return stringutil.Capitalize(fmt.Sprintf("%s is temporarily overloaded.", subject))
case codersdk.ChatErrorKindRateLimit:
return fmt.Sprintf("%s is rate limiting requests.", subject)
return stringutil.Capitalize(fmt.Sprintf("%s is rate limiting requests.", subject))
case codersdk.ChatErrorKindTimeout:
if !classified.Retryable && classified.StatusCode == 0 {
return "The request timed out before it completed."
}
return fmt.Sprintf("%s is temporarily unavailable.", subject)
return stringutil.Capitalize(fmt.Sprintf("%s is temporarily unavailable.", subject))
case codersdk.ChatErrorKindStartupTimeout:
return fmt.Sprintf(
return stringutil.Capitalize(fmt.Sprintf(
"%s did not start responding in time.", subject,
)
))
case codersdk.ChatErrorKindUsageLimit:
displayName := providerDisplayName(classified.Provider)
if displayName == "" {
displayName = "the AI provider"
}
return fmt.Sprintf(
return stringutil.Capitalize(fmt.Sprintf(
"The usage quota for %s has been exceeded."+
" Check the billing and quota settings for the provider account.",
displayName,
)
subject,
))
case codersdk.ChatErrorKindAuth:
displayName := providerDisplayName(classified.Provider)
if displayName == "" {
displayName = "the AI provider"
}
return fmt.Sprintf(
"Authentication with %s failed."+
" Check the API key and permissions.",
displayName,
subject,
)
case codersdk.ChatErrorKindConfig:
return fmt.Sprintf(
return stringutil.Capitalize(fmt.Sprintf(
"%s rejected the model configuration."+
" Check the selected model and provider settings.",
subject,
)
))
case codersdk.ChatErrorKindMissingKey:
return "This conversation was started with an API key that is no longer available." +
" Send your message again to continue."
case codersdk.ChatErrorKindProviderDisabled:
displayName := providerDisplayName(classified.Provider)
return fmt.Sprintf(
"The %s provider has been disabled."+
" Contact your Coder administrator.",
displayName,
)
default:
if !classified.Retryable && classified.StatusCode == 0 {
return "The chat request failed unexpectedly."
}
return fmt.Sprintf("%s returned an unexpected error.", subject)
return stringutil.Capitalize(fmt.Sprintf("%s returned an unexpected error.", subject))
}
}
@@ -85,41 +84,43 @@ func retryMessage(classified ClassifiedError) string {
subject := providerSubject(classified.Provider)
switch classified.Kind {
case codersdk.ChatErrorKindOverloaded:
return fmt.Sprintf("%s is temporarily overloaded.", subject)
return stringutil.Capitalize(fmt.Sprintf("%s is temporarily overloaded.", subject))
case codersdk.ChatErrorKindRateLimit:
return fmt.Sprintf("%s is rate limiting requests.", subject)
return stringutil.Capitalize(fmt.Sprintf("%s is rate limiting requests.", subject))
case codersdk.ChatErrorKindTimeout:
return fmt.Sprintf("%s is temporarily unavailable.", subject)
return stringutil.Capitalize(fmt.Sprintf("%s is temporarily unavailable.", subject))
case codersdk.ChatErrorKindStartupTimeout:
return fmt.Sprintf(
return stringutil.Capitalize(fmt.Sprintf(
"%s did not start responding in time.", subject,
)
))
case codersdk.ChatErrorKindAuth:
displayName := providerDisplayName(classified.Provider)
if displayName == "" {
displayName = "the AI provider"
}
return fmt.Sprintf(
"Authentication with %s failed.", displayName,
"Authentication with %s failed.", subject,
)
case codersdk.ChatErrorKindConfig:
return fmt.Sprintf(
return stringutil.Capitalize(fmt.Sprintf(
"%s rejected the model configuration.", subject,
)
))
case codersdk.ChatErrorKindMissingKey:
return "The API key for this conversation is no longer available."
default:
case codersdk.ChatErrorKindProviderDisabled:
displayName := providerDisplayName(classified.Provider)
return fmt.Sprintf(
"%s returned an unexpected error.", subject,
"The %s provider has been disabled by an administrator.",
displayName,
)
default:
return stringutil.Capitalize(fmt.Sprintf(
"%s returned an unexpected error.", subject,
))
}
}
func providerSubject(provider string) string {
if displayName := providerDisplayName(provider); displayName != "" {
if displayName := providerDisplayName(provider); displayName != "AI" && displayName != "" {
return displayName
}
return "The AI provider"
return "the AI provider"
}
func providerDisplayName(provider string) string {
@@ -141,7 +142,7 @@ func providerDisplayName(provider string) string {
case "vercel":
return "Vercel AI Gateway"
default:
return ""
return "AI"
}
}
+3
View File
@@ -4,6 +4,8 @@ import (
"regexp"
"strconv"
"strings"
"github.com/coder/coder/v2/aibridge"
)
type providerHint struct {
@@ -83,6 +85,7 @@ var (
}
genericRetryablePatterns = []string{"server error", "internal server error"}
interruptedPatterns = []string{"chat interrupted", "request interrupted", "operation interrupted"}
providerDisabledPatterns = []string{aibridge.ErrorCodeProviderDisabled}
)
func extractStatusCode(lower string) int {
+11 -9
View File
@@ -1525,15 +1525,16 @@ type ChatStreamStatus struct {
type ChatErrorKind string
const (
ChatErrorKindGeneric ChatErrorKind = "generic"
ChatErrorKindOverloaded ChatErrorKind = "overloaded"
ChatErrorKindRateLimit ChatErrorKind = "rate_limit"
ChatErrorKindTimeout ChatErrorKind = "timeout"
ChatErrorKindStartupTimeout ChatErrorKind = "startup_timeout"
ChatErrorKindAuth ChatErrorKind = "auth"
ChatErrorKindConfig ChatErrorKind = "config"
ChatErrorKindUsageLimit ChatErrorKind = "usage_limit"
ChatErrorKindMissingKey ChatErrorKind = "missing_key"
ChatErrorKindGeneric ChatErrorKind = "generic"
ChatErrorKindOverloaded ChatErrorKind = "overloaded"
ChatErrorKindRateLimit ChatErrorKind = "rate_limit"
ChatErrorKindTimeout ChatErrorKind = "timeout"
ChatErrorKindStartupTimeout ChatErrorKind = "startup_timeout"
ChatErrorKindAuth ChatErrorKind = "auth"
ChatErrorKindConfig ChatErrorKind = "config"
ChatErrorKindUsageLimit ChatErrorKind = "usage_limit"
ChatErrorKindMissingKey ChatErrorKind = "missing_key"
ChatErrorKindProviderDisabled ChatErrorKind = "provider_disabled"
)
// AllChatErrorKinds contains every ChatErrorKind value.
@@ -1548,6 +1549,7 @@ var AllChatErrorKinds = []ChatErrorKind{
ChatErrorKindConfig,
ChatErrorKindUsageLimit,
ChatErrorKindMissingKey,
ChatErrorKindProviderDisabled,
}
// ChatError represents a terminal chat error in persisted chat state or the
+7 -7
View File
@@ -292,13 +292,13 @@ Status Code **200**
#### Enumerated Values
| Property | Value(s) |
|---------------|---------------------------------------------------------------------------------------------------------------------|
| `client_type` | `api`, `ui` |
| `kind` | `auth`, `config`, `generic`, `missing_key`, `overloaded`, `rate_limit`, `startup_timeout`, `timeout`, `usage_limit` |
| `type` | `context-file`, `file`, `file-reference`, `reasoning`, `skill`, `source`, `text`, `tool-call`, `tool-result` |
| `plan_mode` | `plan` |
| `status` | `completed`, `error`, `paused`, `pending`, `requires_action`, `running`, `waiting` |
| Property | Value(s) |
|---------------|------------------------------------------------------------------------------------------------------------------------------------------|
| `client_type` | `api`, `ui` |
| `kind` | `auth`, `config`, `generic`, `missing_key`, `overloaded`, `provider_disabled`, `rate_limit`, `startup_timeout`, `timeout`, `usage_limit` |
| `type` | `context-file`, `file`, `file-reference`, `reasoning`, `skill`, `source`, `text`, `tool-call`, `tool-result` |
| `plan_mode` | `plan` |
| `status` | `completed`, `error`, `paused`, `pending`, `requires_action`, `running`, `waiting` |
To perform this operation, you must be authenticated. [Learn more](authentication.md).
+3 -3
View File
@@ -2681,9 +2681,9 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in
#### Enumerated Values
| Value(s) |
|---------------------------------------------------------------------------------------------------------------------|
| `auth`, `config`, `generic`, `missing_key`, `overloaded`, `rate_limit`, `startup_timeout`, `timeout`, `usage_limit` |
| Value(s) |
|------------------------------------------------------------------------------------------------------------------------------------------|
| `auth`, `config`, `generic`, `missing_key`, `overloaded`, `provider_disabled`, `rate_limit`, `startup_timeout`, `timeout`, `usage_limit` |
## codersdk.ChatFileMetadata
+2
View File
@@ -1969,6 +1969,7 @@ export type ChatErrorKind =
| "generic"
| "missing_key"
| "overloaded"
| "provider_disabled"
| "rate_limit"
| "startup_timeout"
| "timeout"
@@ -1980,6 +1981,7 @@ export const ChatErrorKinds: ChatErrorKind[] = [
"generic",
"missing_key",
"overloaded",
"provider_disabled",
"rate_limit",
"startup_timeout",
"timeout",
@@ -288,6 +288,40 @@ export const TerminalStartupTimeoutError: Story = {
},
};
/** Disabled provider errors render an admin-oriented message without retry. */
export const TerminalProviderDisabledError: Story = {
args: {
...defaultArgs,
liveStatus: buildLiveStatus({
streamError: {
kind: "provider_disabled",
message:
"The OpenAI provider has been disabled. Contact your Coder administrator.",
provider: "openai",
retryable: false,
statusCode: 503,
},
}),
},
play: async ({ canvasElement }) => {
const canvas = within(canvasElement);
expect(
canvas.getByRole("heading", { name: /provider disabled/i }),
).toBeVisible();
expect(
canvas.getByText(
/the openai provider has been disabled.*contact your coder administrator/i,
),
).toBeVisible();
expect(canvas.getByText(/^HTTP 503$/)).toBeVisible();
// No retry or status link for administrative disablement.
expect(canvas.queryByText(/retrying/i)).not.toBeInTheDocument();
expect(
canvas.queryByRole("link", { name: /status/i }),
).not.toBeInTheDocument();
},
};
/** Generic failures do not show usage or provider CTAs. */
export const GenericErrorDoesNotShowUsageAction: Story = {
args: {
@@ -44,6 +44,8 @@ export const getErrorTitle = (
return "Usage limit reached";
case "missing_key":
return "Chat interrupted";
case "provider_disabled":
return "Provider disabled";
default:
return mode === "retry" ? "Retrying request" : "Request failed";
}
@@ -11,9 +11,8 @@ type UsageLimitData = Partial<
/**
* Typed classification for errors surfaced in the agent detail view.
* - "usage_limit": the user hit a spending cap (409 + valid usage data).
* - other kinds come from normalized stream/provider failures such as
* "generic", "overloaded", "rate_limit", "timeout",
* "startup_timeout", "auth", and "config".
* - other kinds come from normalized stream/provider failures.
* See ChatErrorKind for the full set.
*/
export type ChatDetailError = {
message: string;