fix!: persist structured chat errors (#24919)

**Breaking change for changelog:**

> `codersdk.Chat.last_error` now returns a structured `ChatError` object
(`{message, kind, provider, retryable, status_code, detail}`) instead of
a plain string. The chats API is experimental
(`/api/experimental/chats`), so this ships without a deprecation cycle;
consumers reading `chat.last_error` as a string must update to read
`chat.last_error.message`. SDK/generated TypeScript terminal error
payloads now use the single `ChatError` type; the live stream error
payload type is renamed from `ChatStreamError` to `ChatError`.

Persisted chat errors now carry the same provider-specific detail (kind,
provider, retryable, HTTP status, optional detail) as the live stream,
so refreshing a failed chat rehydrates with the full structured error
instead of a one-line headline.

Existing rows are migrated in place: legacy text errors are wrapped into
`{message, kind: "generic"}` so already-errored chats still render, and
rows with `last_error IS NULL` stay NULL. Internally, persisted fallback
decoding now reuses the existing `chaterror.KindGeneric` constant, with
no JSON value change.

Closes CODAGT-239
This commit is contained in:
Ethan
2026-05-05 12:56:06 +10:00
committed by GitHub
parent 7e01edeb8e
commit 4751416b29
47 changed files with 599 additions and 256 deletions
+1 -1
View File
@@ -1287,7 +1287,7 @@ func (m chatViewModel) handleStreamEvent(event codersdk.ChatStreamEvent) (chatVi
chatID: m.activeChatID,
event: codersdk.ChatStreamEvent{
Type: codersdk.ChatStreamEventTypeError,
Error: &codersdk.ChatStreamError{
Error: &codersdk.ChatError{
Message: fmt.Sprintf(
"failed to parse ask_user_question: %v",
err,
+4 -3
View File
@@ -78,7 +78,7 @@ func (m chatListModel) filteredChats() []codersdk.Chat {
filtered = append(filtered, chat)
continue
}
if chat.LastError != nil && strings.Contains(strings.ToLower(*chat.LastError), query) {
if chat.LastError != nil && strings.Contains(strings.ToLower(chat.LastError.Message), query) {
filtered = append(filtered, chat)
}
}
@@ -445,13 +445,14 @@ func (m chatListModel) View() string {
rowText := fmt.Sprintf("%s%s %s %s%s", rowPrefix, rowStyle.Render(title), status, m.styles.dimmedText.Render(timeAgo(row.chat.UpdatedAt)), extra)
lines = append(lines, rowText)
if row.chat.Status == codersdk.ChatStatusError && row.chat.LastError != nil {
if row.chat.Status == codersdk.ChatStatusError && row.chat.LastError != nil && row.chat.LastError.Message != "" {
lastError := row.chat.LastError.Message
errWidth := max(m.width-4, 20)
errPrefix := " "
if row.depth > 0 {
errPrefix += strings.Repeat(" ", row.depth)
}
lines = append(lines, errPrefix+m.styles.dimmedText.Render(m.styles.truncate(sanitizeTerminalRenderableText(*row.chat.LastError), errWidth)))
lines = append(lines, errPrefix+m.styles.dimmedText.Render(m.styles.truncate(sanitizeTerminalRenderableText(lastError), errWidth)))
}
}
+1 -1
View File
@@ -116,7 +116,7 @@ func TestConsumeChatStreamText(t *testing.T) {
{Type: codersdk.ChatStreamEventTypeMessage, Message: &codersdk.ChatMessage{ID: 1, ChatID: uuid.New(), Role: codersdk.ChatMessageRoleAssistant, Content: []codersdk.ChatMessagePart{{Type: codersdk.ChatMessagePartTypeText, Text: "Hello world"}}}},
{Type: codersdk.ChatStreamEventTypeStatus, Status: &codersdk.ChatStreamStatus{Status: codersdk.ChatStatusRunning}},
{Type: codersdk.ChatStreamEventTypeRetry, Retry: &codersdk.ChatStreamRetry{Attempt: 2, Error: "rate limited"}},
{Type: codersdk.ChatStreamEventTypeError, Error: &codersdk.ChatStreamError{Message: "boom"}},
{Type: codersdk.ChatStreamEventTypeError, Error: &codersdk.ChatError{Message: "boom"}},
} {
events <- event
}
+1 -1
View File
@@ -1070,7 +1070,7 @@ func TestAgents(t *testing.T) {
t.Parallel()
updated, cmd := applyStream(newTestChatViewModel(nil), codersdk.ChatStreamEvent{
Type: codersdk.ChatStreamEventTypeError,
Error: &codersdk.ChatStreamError{Message: "stream blew up"},
Error: &codersdk.ChatError{Message: "stream blew up"},
})
require.Nil(t, cmd)
require.Equal(t, "stream error: stream blew up", updated.err.Error())
+31 -3
View File
@@ -28,6 +28,7 @@ import (
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/coderd/util/slice"
"github.com/coder/coder/v2/coderd/workspaceapps/appurl"
"github.com/coder/coder/v2/coderd/x/chatd/chaterror"
"github.com/coder/coder/v2/coderd/x/chatd/chatprompt"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/provisionersdk/proto"
@@ -1607,6 +1608,34 @@ func nullTimePtr(v sql.NullTime) *time.Time {
return &value
}
const fallbackChatLastErrorMessage = "The chat request failed unexpectedly."
func decodeChatLastError(raw pqtype.NullRawMessage) *codersdk.ChatError {
if !raw.Valid {
return nil
}
var payload codersdk.ChatError
if err := json.Unmarshal(raw.RawMessage, &payload); err != nil {
return &codersdk.ChatError{
Message: fallbackChatLastErrorMessage,
Kind: chaterror.KindGeneric,
}
}
payload.Message = strings.TrimSpace(payload.Message)
payload.Detail = strings.TrimSpace(payload.Detail)
payload.Kind = strings.TrimSpace(payload.Kind)
payload.Provider = strings.TrimSpace(payload.Provider)
if payload.Kind == "" {
payload.Kind = chaterror.KindGeneric
}
if payload.Message == "" {
payload.Message = fallbackChatLastErrorMessage
}
return &payload
}
// Chat converts a database.Chat to a codersdk.Chat. It coalesces
// nil slices and maps to empty values for JSON serialization and
// derives RootChatID from the parent chain when not explicitly set.
@@ -1622,6 +1651,7 @@ func Chat(c database.Chat, diffStatus *database.ChatDiffStatus, files []database
if labels == nil {
labels = map[string]string{}
}
lastError := decodeChatLastError(c.LastError)
chat := codersdk.Chat{
ID: c.ID,
OrganizationID: c.OrganizationID,
@@ -1636,9 +1666,7 @@ func Chat(c database.Chat, diffStatus *database.ChatDiffStatus, files []database
MCPServerIDs: mcpServerIDs,
Labels: labels,
ClientType: codersdk.ChatClientType(c.ClientType),
}
if c.LastError.Valid {
chat.LastError = &c.LastError.String
LastError: lastError,
}
if c.PlanMode.Valid {
chat.PlanMode = codersdk.ChatPlanMode(c.PlanMode.ChatPlanMode)
+93 -1
View File
@@ -19,6 +19,7 @@ import (
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/x/chatd/chaterror"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/provisionersdk/proto"
)
@@ -916,6 +917,17 @@ func TestChat_AllFieldsPopulated(t *testing.T) {
// field to codersdk.Chat, this test will fail until the
// converter is updated.
now := dbtime.Now()
lastErrorPayload := codersdk.ChatError{
Message: "boom",
Detail: "provider detail",
Kind: chaterror.KindGeneric,
Provider: "openai",
Retryable: true,
StatusCode: 503,
}
lastErrorRaw, err := json.Marshal(lastErrorPayload)
require.NoError(t, err)
input := database.Chat{
ID: uuid.New(),
OwnerID: uuid.New(),
@@ -929,7 +941,7 @@ func TestChat_AllFieldsPopulated(t *testing.T) {
Title: "all-fields-test",
Status: database.ChatStatusRunning,
ClientType: database.ChatClientTypeUi,
LastError: sql.NullString{String: "boom", Valid: true},
LastError: pqtype.NullRawMessage{RawMessage: lastErrorRaw, Valid: true},
CreatedAt: now,
UpdatedAt: now,
Archived: true,
@@ -970,6 +982,8 @@ func TestChat_AllFieldsPopulated(t *testing.T) {
got := db2sdk.Chat(input, diffStatus, fileRows)
require.Equal(t, &lastErrorPayload, got.LastError)
v := reflect.ValueOf(got)
typ := v.Type()
// HasUnread is populated by ChatRowsWithChildren (which joins the
@@ -1053,6 +1067,84 @@ func TestChat_NilFilesOmitted(t *testing.T) {
require.Empty(t, result.Files)
}
func TestChat_LastErrorFallback(t *testing.T) {
t.Parallel()
const fallbackMessage = "The chat request failed unexpectedly."
tests := []struct {
name string
raw json.RawMessage
expectPayload *codersdk.ChatError
}{
{
name: "MalformedJSON",
raw: json.RawMessage(`{`),
expectPayload: &codersdk.ChatError{
Message: fallbackMessage,
Kind: chaterror.KindGeneric,
Retryable: false,
},
},
{
name: "MessageMissingPreservesMetadata",
raw: json.RawMessage(`{"kind":"timeout","provider":"openai","status_code":504}`),
expectPayload: &codersdk.ChatError{
Message: fallbackMessage,
Kind: "timeout",
Provider: "openai",
Retryable: false,
StatusCode: 504,
},
},
{
name: "WhitespaceMessageDefaultsKind",
raw: json.RawMessage(`{"message":" ","provider":"openai"}`),
expectPayload: &codersdk.ChatError{
Message: fallbackMessage,
Kind: chaterror.KindGeneric,
Provider: "openai",
Retryable: false,
},
},
{
name: "KindMissingDefaultsGeneric",
raw: json.RawMessage(`{"message":"OpenAI returned an unexpected error.","provider":"openai","status_code":502}`),
expectPayload: &codersdk.ChatError{
Message: "OpenAI returned an unexpected error.",
Kind: chaterror.KindGeneric,
Provider: "openai",
Retryable: false,
StatusCode: 502,
},
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
chat := database.Chat{
ID: uuid.New(),
OwnerID: uuid.New(),
LastModelConfigID: uuid.New(),
Title: "fallback payload",
Status: database.ChatStatusError,
CreatedAt: dbtime.Now(),
UpdatedAt: dbtime.Now(),
LastError: pqtype.NullRawMessage{
RawMessage: tc.raw,
Valid: true,
},
}
result := db2sdk.Chat(chat, nil, nil)
require.Equal(t, tc.expectPayload, result.LastError)
})
}
}
func TestChat_MultipleFiles(t *testing.T) {
t.Parallel()
+1 -1
View File
@@ -1438,7 +1438,7 @@ CREATE TABLE chats (
root_chat_id uuid,
last_model_config_id uuid NOT NULL,
archived boolean DEFAULT false NOT NULL,
last_error text,
last_error jsonb,
mode chat_mode,
mcp_server_ids uuid[] DEFAULT '{}'::uuid[] NOT NULL,
labels jsonb DEFAULT '{}'::jsonb NOT NULL,
@@ -0,0 +1,3 @@
ALTER TABLE chats
ALTER COLUMN last_error TYPE text
USING last_error ->> 'message';
@@ -0,0 +1,9 @@
ALTER TABLE chats
ALTER COLUMN last_error TYPE jsonb
USING CASE
WHEN last_error IS NULL THEN NULL
ELSE jsonb_build_object(
'message', last_error,
'kind', 'generic'
)
END;
@@ -0,0 +1,27 @@
-- Migration 424 adds chats.last_error as text. Seed one existing fixture
-- chat with a legacy plain-text error so migration 485 has a non-null row
-- to backfill, and add a second chat that leaves last_error NULL so the
-- migration fixture can assert both branches of the CASE expression.
UPDATE chats
SET last_error = 'Legacy provider failure'
WHERE id = '72c0438a-18eb-4688-ab80-e4c6a126ef96';
INSERT INTO chats (
id,
owner_id,
last_model_config_id,
title,
status,
created_at,
updated_at
)
SELECT
'5a4ac6a3-9dc5-440f-ae6b-5805e477bc59',
owner_id,
last_model_config_id,
'Fixture Chat With Null Error',
'waiting',
'2024-01-01 00:00:00+00',
'2024-01-01 00:00:00+00'
FROM chats
WHERE id = '72c0438a-18eb-4688-ab80-e4c6a126ef96';
@@ -0,0 +1,28 @@
-- Migration 485 retypes chats.last_error to jsonb and backfills legacy
-- text rows into the structured persisted payload shape.
DO $$
DECLARE
payload jsonb;
BEGIN
SELECT last_error INTO STRICT payload
FROM chats
WHERE id = '72c0438a-18eb-4688-ab80-e4c6a126ef96';
IF payload ->> 'message' <> 'Legacy provider failure' THEN
RAISE EXCEPTION 'expected migrated last_error message, got %',
payload ->> 'message';
END IF;
IF payload ->> 'kind' <> 'generic' THEN
RAISE EXCEPTION 'expected migrated last_error kind, got %',
payload ->> 'kind';
END IF;
PERFORM 1
FROM chats
WHERE id = '5a4ac6a3-9dc5-440f-ae6b-5805e477bc59'
AND last_error IS NULL;
IF NOT FOUND THEN
RAISE EXCEPTION 'expected null last_error row to remain NULL after migration';
END IF;
END $$;
+1 -1
View File
@@ -4367,7 +4367,7 @@ type Chat struct {
RootChatID uuid.NullUUID `db:"root_chat_id" json:"root_chat_id"`
LastModelConfigID uuid.UUID `db:"last_model_config_id" json:"last_model_config_id"`
Archived bool `db:"archived" json:"archived"`
LastError sql.NullString `db:"last_error" json:"last_error"`
LastError pqtype.NullRawMessage `db:"last_error" json:"last_error"`
Mode NullChatMode `db:"mode" json:"mode"`
MCPServerIDs []uuid.UUID `db:"mcp_server_ids" json:"mcp_server_ids"`
Labels StringMap `db:"labels" json:"labels"`
+5 -5
View File
@@ -5400,7 +5400,7 @@ type AutoArchiveInactiveChatsRow struct {
RootChatID uuid.NullUUID `db:"root_chat_id" json:"root_chat_id"`
LastModelConfigID uuid.UUID `db:"last_model_config_id" json:"last_model_config_id"`
Archived bool `db:"archived" json:"archived"`
LastError sql.NullString `db:"last_error" json:"last_error"`
LastError pqtype.NullRawMessage `db:"last_error" json:"last_error"`
Mode NullChatMode `db:"mode" json:"mode"`
MCPServerIDs []uuid.UUID `db:"mcp_server_ids" json:"mcp_server_ids"`
Labels json.RawMessage `db:"labels" json:"labels"`
@@ -8701,7 +8701,7 @@ SET
worker_id = $2::uuid,
started_at = $3::timestamptz,
heartbeat_at = $4::timestamptz,
last_error = $5::text,
last_error = $5::jsonb,
updated_at = NOW()
WHERE
id = $6::uuid
@@ -8714,7 +8714,7 @@ type UpdateChatStatusParams struct {
WorkerID uuid.NullUUID `db:"worker_id" json:"worker_id"`
StartedAt sql.NullTime `db:"started_at" json:"started_at"`
HeartbeatAt sql.NullTime `db:"heartbeat_at" json:"heartbeat_at"`
LastError sql.NullString `db:"last_error" json:"last_error"`
LastError pqtype.NullRawMessage `db:"last_error" json:"last_error"`
ID uuid.UUID `db:"id" json:"id"`
}
@@ -8768,7 +8768,7 @@ SET
worker_id = $2::uuid,
started_at = $3::timestamptz,
heartbeat_at = $4::timestamptz,
last_error = $5::text,
last_error = $5::jsonb,
updated_at = $6::timestamptz
WHERE
id = $7::uuid
@@ -8781,7 +8781,7 @@ type UpdateChatStatusPreserveUpdatedAtParams struct {
WorkerID uuid.NullUUID `db:"worker_id" json:"worker_id"`
StartedAt sql.NullTime `db:"started_at" json:"started_at"`
HeartbeatAt sql.NullTime `db:"heartbeat_at" json:"heartbeat_at"`
LastError sql.NullString `db:"last_error" json:"last_error"`
LastError pqtype.NullRawMessage `db:"last_error" json:"last_error"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
ID uuid.UUID `db:"id" json:"id"`
}
+2 -2
View File
@@ -718,7 +718,7 @@ SET
worker_id = sqlc.narg('worker_id')::uuid,
started_at = sqlc.narg('started_at')::timestamptz,
heartbeat_at = sqlc.narg('heartbeat_at')::timestamptz,
last_error = sqlc.narg('last_error')::text,
last_error = sqlc.narg('last_error')::jsonb,
updated_at = NOW()
WHERE
id = @id::uuid
@@ -733,7 +733,7 @@ SET
worker_id = sqlc.narg('worker_id')::uuid,
started_at = sqlc.narg('started_at')::timestamptz,
heartbeat_at = sqlc.narg('heartbeat_at')::timestamptz,
last_error = sqlc.narg('last_error')::text,
last_error = sqlc.narg('last_error')::jsonb,
updated_at = @updated_at::timestamptz
WHERE
id = @id::uuid
+1 -1
View File
@@ -3252,7 +3252,7 @@ func (api *API) interruptChat(rw http.ResponseWriter, r *http.Request) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
if updateErr != nil {
logger.Error(ctx, "failed to mark chat as waiting", slog.Error(updateErr))
+9 -12
View File
@@ -4765,7 +4765,6 @@ func TestPatchChat(t *testing.T) {
client := newChatClient(t)
firstUser := coderdtest.CreateFirstUser(t, client.Client)
_ = createChatModelConfig(t, client)
chat := createChat(ctx, t, client, firstUser.OrganizationID, "boundary baseline")
waitChatSettled(ctx, t, client, chat.ID)
@@ -5882,7 +5881,7 @@ func TestSendMessageQueuesEffectiveModelConfigID(t *testing.T) {
WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true},
StartedAt: sql.NullTime{Time: time.Now(), Valid: true},
HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -5933,7 +5932,7 @@ func TestQueuedMessageWithoutOverrideCapturesEnqueueTimeModel(t *testing.T) {
WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true},
StartedAt: sql.NullTime{Time: time.Now(), Valid: true},
HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -6059,7 +6058,7 @@ func TestWatchChatsStatusChangeCarriesUpdatedLastModelConfigID(t *testing.T) {
WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true},
StartedAt: sql.NullTime{Time: time.Now(), Valid: true},
HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -6081,7 +6080,7 @@ func TestWatchChatsStatusChangeCarriesUpdatedLastModelConfigID(t *testing.T) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -7564,7 +7563,7 @@ func TestRegenerateChatTitle(t *testing.T) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -7604,8 +7603,7 @@ func TestRegenerateChatTitle(t *testing.T) {
WorkerID: uuid.NullUUID{UUID: uuid.MustParse("00000000-0000-0000-0000-000000000001"), Valid: true},
StartedAt: sql.NullTime{Time: time.Now(), Valid: true},
HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -7646,8 +7644,7 @@ func TestRegenerateChatTitle(t *testing.T) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -7711,7 +7708,7 @@ func TestRegenerateChatTitle(t *testing.T) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -8237,7 +8234,7 @@ func TestPromoteChatQueuedMessage(t *testing.T) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
+1 -1
View File
@@ -40,7 +40,7 @@ type ChatStreamNotifyMessage struct {
// ErrorPayload carries a structured error event for cross-replica
// live delivery. Keep Error for backward compatibility with older
// replicas during rolling deploys.
ErrorPayload *codersdk.ChatStreamError `json:"error_payload,omitempty"`
ErrorPayload *codersdk.ChatError `json:"error_payload,omitempty"`
// Error is the legacy string-only error payload kept for mixed-
// version compatibility during rollout.
+65 -29
View File
@@ -1761,7 +1761,7 @@ func (p *Server) EditMessage(
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
if err != nil {
return xerrors.Errorf("set chat pending: %w", err)
@@ -1849,7 +1849,7 @@ func (p *Server) ArchiveChat(ctx context.Context, chat database.Chat) error {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
if updateErr != nil {
return xerrors.Errorf("set archived chat waiting before cleanup: %w", updateErr)
@@ -2365,7 +2365,7 @@ func (p *Server) SubmitToolResults(
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
}); updateErr != nil {
return xerrors.Errorf("update chat status: %w", updateErr)
}
@@ -3369,7 +3369,7 @@ func (p *Server) setChatWaiting(ctx context.Context, chatID uuid.UUID) (database
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
return updateErr
}, nil)
@@ -3569,7 +3569,7 @@ func insertUserMessageAndSetPending(
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
if err != nil {
return database.ChatMessage{}, database.Chat{}, xerrors.Errorf("set chat pending: %w", err)
@@ -3846,7 +3846,7 @@ func (p *Server) processOnce(ctx context.Context) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
if updateErr != nil {
p.logger.Error(ctx, "failed to release chat acquired during shutdown",
@@ -4364,7 +4364,7 @@ func (p *Server) Subscribe(
initialSnapshot = append(initialSnapshot, codersdk.ChatStreamEvent{
Type: codersdk.ChatStreamEventTypeError,
ChatID: chatID,
Error: &codersdk.ChatStreamError{Message: "failed to load initial snapshot"},
Error: &codersdk.ChatError{Message: "failed to load initial snapshot"},
})
} else {
for _, msg := range messages {
@@ -4387,7 +4387,7 @@ func (p *Server) Subscribe(
initialSnapshot = append(initialSnapshot, codersdk.ChatStreamEvent{
Type: codersdk.ChatStreamEventTypeError,
ChatID: chatID,
Error: &codersdk.ChatStreamError{Message: "failed to load initial snapshot"},
Error: &codersdk.ChatError{Message: "failed to load initial snapshot"},
})
} else if len(queued) > 0 {
initialSnapshot = append(initialSnapshot, codersdk.ChatStreamEvent{
@@ -4412,7 +4412,7 @@ func (p *Server) Subscribe(
initialSnapshot = append(initialSnapshot, codersdk.ChatStreamEvent{
Type: codersdk.ChatStreamEventTypeError,
ChatID: chatID,
Error: &codersdk.ChatStreamError{Message: "failed to load initial snapshot"},
Error: &codersdk.ChatError{Message: "failed to load initial snapshot"},
})
} else {
statusEvent := codersdk.ChatStreamEvent{
@@ -4490,7 +4490,7 @@ func (p *Server) Subscribe(
case mergedEvents <- codersdk.ChatStreamEvent{
Type: codersdk.ChatStreamEventTypeError,
ChatID: chatID,
Error: &codersdk.ChatStreamError{
Error: &codersdk.ChatError{
Message: psErr.Error(),
},
}:
@@ -4593,7 +4593,7 @@ func (p *Server) Subscribe(
case mergedEvents <- codersdk.ChatStreamEvent{
Type: codersdk.ChatStreamEventTypeError,
ChatID: chatID,
Error: &codersdk.ChatStreamError{
Error: &codersdk.ChatError{
Message: notify.Error,
},
}:
@@ -4856,7 +4856,7 @@ func (p *Server) publishRetry(chatID uuid.UUID, payload *codersdk.ChatStreamRetr
}
func (p *Server) publishError(chatID uuid.UUID, classified chaterror.ClassifiedError) {
payload := chaterror.StreamErrorPayload(classified)
payload := chaterror.TerminalErrorPayload(classified)
if payload == nil {
return
}
@@ -4882,6 +4882,17 @@ func processingFailure(err error) (chaterror.ClassifiedError, bool) {
return classified, true
}
func encodeChatLastErrorPayload(payload *codersdk.ChatError) (pqtype.NullRawMessage, error) {
if payload == nil {
return pqtype.NullRawMessage{}, nil
}
encoded, err := json.Marshal(payload)
if err != nil {
return pqtype.NullRawMessage{}, err
}
return pqtype.NullRawMessage{RawMessage: encoded, Valid: true}, nil
}
func panicFailureReason(recovered any) string {
var reason string
switch typed := recovered.(type) {
@@ -5156,7 +5167,7 @@ func (p *Server) finishActiveChat(
logger slog.Logger,
chat database.Chat,
status database.ChatStatus,
lastError string,
lastError pqtype.NullRawMessage,
) (finishActiveChatResult, error) {
result := finishActiveChatResult{}
@@ -5204,7 +5215,7 @@ func (p *Server) finishActiveChat(
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{String: lastError, Valid: lastError != ""},
LastError: lastError,
})
return updateErr
}, nil)
@@ -5330,10 +5341,10 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) {
// interrupt processing.
close(controlArmed)
// Determine the final status and last error to set when we're done.
// Determine the final status and last error payload to set when we're done.
status := database.ChatStatusWaiting
wasInterrupted := false
lastError := ""
var lastErrorPayload *codersdk.ChatError
generatedTitle := &generatedChatTitle{}
runResult := runChatResult{}
remainingQueuedMessages := []database.ChatQueuedMessage{}
@@ -5349,20 +5360,30 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) {
// Handle panics gracefully.
if r := recover(); r != nil {
logger.Error(cleanupCtx, "panic during chat processing", slog.F("panic", r))
lastError = panicFailureReason(r)
p.publishError(chat.ID, chaterror.ClassifiedError{
Message: lastError,
classified := chaterror.ClassifiedError{
Message: panicFailureReason(r),
Kind: chaterror.KindGeneric,
})
}
lastErrorPayload = chaterror.TerminalErrorPayload(classified)
p.publishError(chat.ID, classified)
status = database.ChatStatusError
}
encodedLastError, err := encodeChatLastErrorPayload(lastErrorPayload)
if err != nil {
logger.Warn(cleanupCtx, "failed to marshal chat last error payload",
slog.Error(err),
)
lastErrorPayload = nil
encodedLastError = pqtype.NullRawMessage{}
}
// Check for queued messages and auto-promote the next one.
// This must be done atomically with the status update to avoid
// races with the promote endpoint (which also sets status to
// pending). We use a transaction with FOR UPDATE to ensure we
// don't overwrite a status change made by another caller.
finishResult, err := p.finishActiveChat(cleanupCtx, logger, chat, status, lastError)
finishResult, err := p.finishActiveChat(cleanupCtx, logger, chat, status, encodedLastError)
if errors.Is(err, errChatTakenByOtherWorker) {
// Another worker owns this chat now — skip all
// post-TX side effects (status publish, pubsub,
@@ -5425,7 +5446,11 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) {
p.publishChatActionRequired(finishResult.updatedChat, runResult.PendingDynamicToolCalls)
}
if !wasInterrupted {
p.maybeSendPushNotification(cleanupCtx, finishResult.updatedChat, status, lastError, runResult, logger)
lastErrorMessage := ""
if lastErrorPayload != nil {
lastErrorMessage = lastErrorPayload.Message
}
p.maybeSendPushNotification(cleanupCtx, finishResult.updatedChat, status, lastErrorMessage, runResult, logger)
}
}()
@@ -5440,18 +5465,19 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) {
if errors.Is(err, chatloop.ErrInterrupted) || errors.Is(context.Cause(chatCtx), chatloop.ErrInterrupted) {
logger.Info(ctx, "chat interrupted")
status = database.ChatStatusWaiting
lastErrorPayload = nil
wasInterrupted = true
return
}
if isShutdownCancellation(ctx, chatCtx, err) {
logger.Info(ctx, "chat canceled during shutdown; returning to pending")
status = database.ChatStatusPending
lastError = ""
lastErrorPayload = nil
return
}
logger.Error(ctx, "failed to process chat", slog.Error(err))
if classified, ok := processingFailure(err); ok {
lastError = classified.Message
lastErrorPayload = chaterror.TerminalErrorPayload(classified)
p.publishError(chat.ID, classified)
}
status = database.ChatStatusError
@@ -5476,7 +5502,7 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) {
if ctx.Err() != nil {
logger.Info(ctx, "chat completed during shutdown; returning to pending")
status = database.ChatStatusPending
lastError = ""
lastErrorPayload = nil
return
}
}
@@ -7983,11 +8009,21 @@ func (p *Server) recoverStaleChats(ctx context.Context) {
return nil
}
lastError := sql.NullString{}
lastError := pqtype.NullRawMessage{}
if locked.Status == database.ChatStatusRequiresAction {
lastError = sql.NullString{
String: "Dynamic tool execution timed out",
Valid: true,
lastErrorPayload, marshalErr := encodeChatLastErrorPayload(
chaterror.TerminalErrorPayload(chaterror.ClassifiedError{
Message: "Dynamic tool execution timed out",
Kind: chaterror.KindGeneric,
}),
)
if marshalErr != nil {
p.logger.Warn(ctx, "failed to marshal stale recovery last error payload",
slog.F("chat_id", chat.ID),
slog.Error(marshalErr),
)
} else {
lastError = lastErrorPayload
}
}
+12 -9
View File
@@ -2322,7 +2322,7 @@ func TestSubscribeDoesNotReplayRetryAfterTerminalError(t *testing.T) {
server.publishRetry(chatID, newTestRetryPayload())
server.publishError(chatID, chaterror.ClassifiedError{
Message: "OpenAI is rate limiting requests (HTTP 429).",
Message: "OpenAI is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "openai",
Retryable: true,
@@ -2398,7 +2398,7 @@ func TestSubscribePrefersStructuredErrorPayloadViaPubsub(t *testing.T) {
defer cancel()
classified := chaterror.ClassifiedError{
Message: "OpenAI is rate limiting requests (HTTP 429).",
Message: "OpenAI is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "openai",
Retryable: true,
@@ -2407,7 +2407,7 @@ func TestSubscribePrefersStructuredErrorPayloadViaPubsub(t *testing.T) {
server.publishError(chatID, classified)
event := requireStreamErrorEvent(t, events)
require.Equal(t, chaterror.StreamErrorPayload(classified), event.Error)
require.Equal(t, chaterror.TerminalErrorPayload(classified), event.Error)
requireNoStreamEvent(t, events, 200*time.Millisecond)
}
@@ -2442,20 +2442,23 @@ func TestSubscribeFallsBackToLegacyErrorStringViaPubsub(t *testing.T) {
})
event := requireStreamErrorEvent(t, events)
require.Equal(t, &codersdk.ChatStreamError{Message: "legacy error only"}, event.Error)
require.Equal(t, &codersdk.ChatError{Message: "legacy error only"}, event.Error)
requireNoStreamEvent(t, events, 200*time.Millisecond)
}
func newTestRetryPayload() *codersdk.ChatStreamRetry {
return &codersdk.ChatStreamRetry{
Attempt: 1,
DelayMs: (1500 * time.Millisecond).Milliseconds(),
Error: "OpenAI is rate limiting requests (HTTP 429).",
payload := chaterror.StreamRetryPayload(1, 1500*time.Millisecond, chaterror.ClassifiedError{
Message: "OpenAI is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "openai",
Retryable: true,
StatusCode: 429,
RetryingAt: time.Unix(1_700_000_000, 0).UTC(),
})
if payload == nil {
panic("expected retry payload")
}
payload.RetryingAt = time.Unix(1_700_000_000, 0).UTC()
return payload
}
func newSubscribeTestServer(t *testing.T, db database.Store) *Server {
+77 -36
View File
@@ -46,6 +46,7 @@ import (
"github.com/coder/coder/v2/coderd/workspacestats"
"github.com/coder/coder/v2/coderd/x/chatd"
"github.com/coder/coder/v2/coderd/x/chatd/chatadvisor"
"github.com/coder/coder/v2/coderd/x/chatd/chaterror"
"github.com/coder/coder/v2/coderd/x/chatd/chatprompt"
"github.com/coder/coder/v2/coderd/x/chatd/chattest"
"github.com/coder/coder/v2/coderd/x/chatd/chattool"
@@ -70,6 +71,35 @@ func openAIToolName(tool chattest.OpenAITool) string {
return cmp.Or(tool.Function.Name, tool.Name, tool.Type)
}
func mustChatLastErrorRawMessage(t testing.TB, payload codersdk.ChatError) pqtype.NullRawMessage {
t.Helper()
encoded, err := json.Marshal(payload)
require.NoError(t, err)
return pqtype.NullRawMessage{RawMessage: encoded, Valid: true}
}
func requireChatLastErrorPayload(t testing.TB, raw pqtype.NullRawMessage) codersdk.ChatError {
t.Helper()
require.True(t, raw.Valid, "last error should be set")
var payload codersdk.ChatError
require.NoError(t, json.Unmarshal(raw.RawMessage, &payload))
return payload
}
func chatLastErrorMessage(raw pqtype.NullRawMessage) string {
if !raw.Valid {
return ""
}
var payload codersdk.ChatError
if err := json.Unmarshal(raw.RawMessage, &payload); err == nil && payload.Message != "" {
return payload.Message
}
return string(raw.RawMessage)
}
func recordOpenAIRequest(req *chattest.OpenAIRequest) recordedOpenAIRequest {
messages := append([]chattest.OpenAIMessage(nil), req.Messages...)
tools := make([]string, 0, len(req.Tools))
@@ -867,7 +897,7 @@ func TestExploreChatUsesPersistedMCPSnapshot(t *testing.T) {
chatResult := waitForTerminalChat(ctx, t, db, exploreChat.ID)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "explore chat failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
requestsMu.Lock()
@@ -960,7 +990,7 @@ func TestRootExploreChatStaysBuiltinOnlyAtRuntime(t *testing.T) {
storedChat, err := db.GetChatByID(ctx, exploreChat.ID)
require.NoError(t, err)
if storedChat.Status == database.ChatStatusError {
require.FailNowf(t, "explore chat failed", "last_error=%q", storedChat.LastError.String)
require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(storedChat.LastError))
}
require.Equal(t, database.ChatStatusWaiting, storedChat.Status)
require.ElementsMatch(t, []uuid.UUID{mcpConfig.ID}, storedChat.MCPServerIDs)
@@ -1044,7 +1074,7 @@ func TestRootExploreChatExcludesWebSearchProviderToolAtRuntime(t *testing.T) {
storedChat, err := db.GetChatByID(ctx, exploreChat.ID)
require.NoError(t, err)
if storedChat.Status == database.ChatStatusError {
require.FailNowf(t, "explore chat failed", "last_error=%q", storedChat.LastError.String)
require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(storedChat.LastError))
}
require.Equal(t, database.ChatStatusWaiting, storedChat.Status)
@@ -1179,7 +1209,7 @@ func TestExploreChatSendMessageCannotMutateMCPSnapshot(t *testing.T) {
chatResult := waitForTerminalChat(ctx, t, db, exploreChat.ID)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "explore chat failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
exploreChat, err = db.GetChatByID(ctx, exploreChat.ID)
@@ -1208,7 +1238,7 @@ func TestExploreChatSendMessageCannotMutateMCPSnapshot(t *testing.T) {
chatResult = waitForTerminalChat(ctx, t, db, exploreChat.ID)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "explore chat failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
recordedChildRequests := childRequests()
@@ -1481,7 +1511,7 @@ func TestArchiveChatMovesPendingChatToWaiting(t *testing.T) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -1953,7 +1983,7 @@ func TestSendMessageQueuesWhenWaitingWithQueuedBacklog(t *testing.T) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -2418,7 +2448,7 @@ func TestPromoteQueuedAllowsAlreadyQueuedMessageWhenUsageLimitReached(t *testing
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -3657,7 +3687,11 @@ func TestRecoverStaleRequiresActionChat(t *testing.T) {
return chatResult.Status == database.ChatStatusError
}, testutil.WaitMedium, testutil.IntervalFast)
require.Contains(t, chatResult.LastError.String, "Dynamic tool execution timed out")
persistedError := requireChatLastErrorPayload(t, chatResult.LastError)
require.Equal(t, codersdk.ChatError{
Message: "Dynamic tool execution timed out",
Kind: chaterror.KindGeneric,
}, persistedError)
require.False(t, chatResult.WorkerID.Valid)
}
@@ -3764,25 +3798,30 @@ func TestUpdateChatStatusPersistsLastError(t *testing.T) {
LastModelConfigID: model.ID,
})
// Simulate a chat that failed with an error.
// Write a minimal structured last_error payload through the
// query layer, then verify it round-trips through storage.
errorMessage := "stream response: status 500: internal server error"
wantPayload := codersdk.ChatError{
Message: errorMessage,
Kind: chaterror.KindGeneric,
}
chat, err := db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{
ID: chat.ID,
Status: database.ChatStatusError,
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{String: errorMessage, Valid: true},
LastError: mustChatLastErrorRawMessage(t, wantPayload),
})
require.NoError(t, err)
require.Equal(t, database.ChatStatusError, chat.Status)
require.Equal(t, sql.NullString{String: errorMessage, Valid: true}, chat.LastError)
require.Equal(t, wantPayload, requireChatLastErrorPayload(t, chat.LastError))
// Verify the error is persisted when re-read from the database.
fromDB, err := db.GetChatByID(ctx, chat.ID)
require.NoError(t, err)
require.Equal(t, database.ChatStatusError, fromDB.Status)
require.Equal(t, sql.NullString{String: errorMessage, Valid: true}, fromDB.LastError)
require.Equal(t, wantPayload, requireChatLastErrorPayload(t, fromDB.LastError))
// Verify the error is cleared when the chat transitions to a
// non-error status (e.g. pending after a retry).
@@ -3792,7 +3831,7 @@ func TestUpdateChatStatusPersistsLastError(t *testing.T) {
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
require.Equal(t, database.ChatStatusPending, chat.Status)
@@ -3949,7 +3988,7 @@ func TestPersistToolResultWithBinaryData(t *testing.T) {
}, testutil.WaitLong, testutil.IntervalFast)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
var toolMessage *database.ChatMessage
@@ -4100,7 +4139,7 @@ func TestDynamicToolCallPausesAndResumes(t *testing.T) {
require.Equal(t, database.ChatStatusRequiresAction, chatResult.Status,
"expected requires_action, got %s (last_error=%q)",
chatResult.Status, chatResult.LastError.String)
chatResult.Status, chatLastErrorMessage(chatResult.LastError))
// 2. Read the assistant message to find the tool-call ID.
var toolCallID string
@@ -4160,7 +4199,7 @@ func TestDynamicToolCallPausesAndResumes(t *testing.T) {
// 5. Verify the chat completed successfully.
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
// 6. Verify the mock received exactly 2 streaming calls.
@@ -4264,7 +4303,7 @@ func TestDynamicToolNamedProposePlanRemainsAvailableOutsidePlanMode(t *testing.T
}, testutil.WaitLong, testutil.IntervalFast)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
streamedCallsMu.Lock()
@@ -4381,7 +4420,7 @@ func TestDynamicToolCallMixedWithBuiltIn(t *testing.T) {
require.Equal(t, database.ChatStatusRequiresAction, chatResult.Status,
"expected requires_action, got %s (last_error=%q)",
chatResult.Status, chatResult.LastError.String)
chatResult.Status, chatLastErrorMessage(chatResult.LastError))
// 2. Verify the built-in tool (read_file) was already
// executed by checking that a tool result message
@@ -4443,7 +4482,7 @@ func TestDynamicToolCallMixedWithBuiltIn(t *testing.T) {
}, testutil.WaitLong, testutil.IntervalFast)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
// 5. Verify the LLM received exactly 2 streaming calls.
@@ -4519,7 +4558,7 @@ func TestSubmitToolResultsConcurrency(t *testing.T) {
}, testutil.WaitLong, testutil.IntervalFast)
require.Equal(t, database.ChatStatusRequiresAction, chatResult.Status,
"expected requires_action, got %s (last_error=%q)",
chatResult.Status, chatResult.LastError.String)
chatResult.Status, chatLastErrorMessage(chatResult.LastError))
// Find the tool call ID from the assistant message.
var toolCallID string
@@ -4842,7 +4881,7 @@ func TestCreateWorkspaceTool_EndToEnd(t *testing.T) {
if chatResult.Status == codersdk.ChatStatusError {
lastError := ""
if chatResult.LastError != nil {
lastError = *chatResult.LastError
lastError = chatResult.LastError.Message
}
require.FailNowf(t, "chat run failed", "last_error=%q", lastError)
}
@@ -5014,7 +5053,7 @@ func TestStartWorkspaceTool_EndToEnd(t *testing.T) {
if chatResult.Status == codersdk.ChatStatusError {
lastError := ""
if chatResult.LastError != nil {
lastError = *chatResult.LastError
lastError = chatResult.LastError.Message
}
require.FailNowf(t, "chat run failed", "last_error=%q", lastError)
}
@@ -5138,7 +5177,7 @@ func TestStoppedWorkspaceWithPersistedAgentBindingDoesNotBlockChat(t *testing.T)
WorkerID: uuid.NullUUID{},
StartedAt: sql.NullTime{},
HeartbeatAt: sql.NullTime{},
LastError: sql.NullString{},
LastError: pqtype.NullRawMessage{},
})
require.NoError(t, err)
@@ -5177,7 +5216,7 @@ func TestStoppedWorkspaceWithPersistedAgentBindingDoesNotBlockChat(t *testing.T)
}, testutil.WaitLong, testutil.IntervalFast)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
require.EqualValues(t, 1, dialCalls.Load())
@@ -7035,9 +7074,10 @@ func TestProcessChat_UserProviderKey_MissingKeyError(t *testing.T) {
chatResult := waitForTerminalChat(ctx, t, db, chat.ID)
require.Equal(t, database.ChatStatusError, chatResult.Status)
require.True(t, chatResult.LastError.Valid, "LastError should be set")
require.NotEmpty(t, chatResult.LastError.String)
require.NotContains(t, chatResult.LastError.String, "panicked")
persistedError := requireChatLastErrorPayload(t, chatResult.LastError)
require.NotEmpty(t, persistedError.Message)
require.NotContains(t, persistedError.Message, "panicked")
require.Equal(t, chaterror.KindGeneric, persistedError.Kind)
require.NotEqual(t, database.ChatStatusRunning, chatResult.Status)
require.Zero(t, llmCalls.Load(), "missing user key should fail before any LLM request")
}
@@ -7099,9 +7139,10 @@ func TestProcessChatPanicRecovery(t *testing.T) {
return got.Status == database.ChatStatusError
}, testutil.WaitLong, testutil.IntervalFast)
require.True(t, chatResult.LastError.Valid, "LastError should be set")
require.Contains(t, chatResult.LastError.String, "chat processing panicked")
require.Contains(t, chatResult.LastError.String, "intentional test panic")
persistedError := requireChatLastErrorPayload(t, chatResult.LastError)
require.Contains(t, persistedError.Message, "chat processing panicked")
require.Contains(t, persistedError.Message, "intentional test panic")
require.Equal(t, chaterror.KindGeneric, persistedError.Kind)
}
// panicOnInTxDB wraps a database.Store and panics on the first InTx
@@ -7265,7 +7306,7 @@ func TestMCPServerToolInvocation(t *testing.T) {
}, testutil.WaitLong, testutil.IntervalFast)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
// The MCP tool (test-mcp__echo) should appear in the tool
@@ -7765,7 +7806,7 @@ func TestMCPServerOAuth2TokenRefresh(t *testing.T) {
}, testutil.WaitLong, testutil.IntervalFast)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
// The token should have been refreshed.
@@ -7873,7 +7914,7 @@ func TestMCPServerOAuth2TokenRefreshFailureGraceful(t *testing.T) {
}, testutil.WaitLong, testutil.IntervalFast)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat should not fail", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat should not fail", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
// The LLM should have been called at least once.
@@ -7996,7 +8037,7 @@ func TestChatTemplateAllowlistEnforcement(t *testing.T) {
}, testutil.WaitLong, testutil.IntervalFast)
if chatResult.Status == database.ChatStatusError {
require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String)
require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError))
}
// Collect all tool results keyed by tool name. Each tool may
@@ -9285,7 +9326,7 @@ func TestAdvisorChainMode_SnapshotKeepsFullHistory(t *testing.T) {
turn1Chat, err := db.GetChatByID(ctx, chat.ID)
require.NoError(t, err)
require.Equal(t, database.ChatStatusWaiting, turn1Chat.Status,
"turn 1 must complete before turn 2 can be sent; last_error=%q", turn1Chat.LastError.String)
"turn 1 must complete before turn 2 can be sent; last_error=%q", chatLastErrorMessage(turn1Chat.LastError))
_, err = server.SendMessage(ctx, chatd.SendMessageOptions{
ChatID: chat.ID,
+7 -7
View File
@@ -26,7 +26,7 @@ func TestClassify(t *testing.T) {
name: "AmbiguousOverloadKeepsProviderUnknown",
err: xerrors.New("status 529 from upstream"),
want: chaterror.ClassifiedError{
Message: "The AI provider is temporarily overloaded (HTTP 529).",
Message: "The AI provider is temporarily overloaded.",
Kind: chaterror.KindOverloaded,
Provider: "",
Retryable: true,
@@ -114,7 +114,7 @@ func TestClassify(t *testing.T) {
name: "ExplicitStatus429ClassifiesAsRateLimit",
err: xerrors.New("status 429 from upstream"),
want: chaterror.ClassifiedError{
Message: "The AI provider is rate limiting requests (HTTP 429).",
Message: "The AI provider is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "",
Retryable: true,
@@ -561,7 +561,7 @@ func TestWithProviderUsesExplicitHint(t *testing.T) {
enriched := classified.WithProvider("azure openai")
require.Equal(t, chaterror.ClassifiedError{
Message: "Azure OpenAI is rate limiting requests (HTTP 429).",
Message: "Azure OpenAI is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "azure",
Retryable: true,
@@ -577,7 +577,7 @@ func TestWithProviderAddsProviderWhenUnknown(t *testing.T) {
enriched := classified.WithProvider("openai")
require.Equal(t, chaterror.ClassifiedError{
Message: "OpenAI is rate limiting requests (HTTP 429).",
Message: "OpenAI is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "openai",
Retryable: true,
@@ -595,7 +595,7 @@ func TestClassify_UsesStructuredProviderStatusAndRetryAfter(t *testing.T) {
))
require.Equal(t, chaterror.ClassifiedError{
Message: "The AI provider is rate limiting requests (HTTP 429).",
Message: "The AI provider is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "",
Retryable: true,
@@ -659,7 +659,7 @@ func TestWithProviderPreservesRetryAfter(t *testing.T) {
enriched := classified.WithProvider("openai")
require.Equal(t, 30*time.Second, enriched.RetryAfter)
require.Equal(t, chaterror.ClassifiedError{
Message: "OpenAI is rate limiting requests (HTTP 429).",
Message: "OpenAI is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "openai",
Retryable: true,
@@ -679,7 +679,7 @@ func TestClassify_UsesStructuredProviderDetailFromResponseDump(t *testing.T) {
))
require.Equal(t, chaterror.ClassifiedError{
Message: "The AI provider returned an unexpected error (HTTP 400).",
Message: "The AI provider returned an unexpected error.",
Detail: "Image exceeds 5 MB maximum.",
Kind: chaterror.KindGeneric,
Provider: "",
+6 -28
View File
@@ -6,37 +6,21 @@ import (
)
// terminalMessage produces the user-facing error description shown
// when retries are exhausted. It includes HTTP status codes and
// actionable remediation guidance.
// when retries are exhausted. HTTP status codes are carried in the
// classified payload's StatusCode field and rendered as a separate
// footer chip by the UI, so they are intentionally omitted here to
// avoid duplicating the same information in two places.
func terminalMessage(classified ClassifiedError) string {
subject := providerSubject(classified.Provider)
switch classified.Kind {
case KindOverloaded:
if classified.StatusCode > 0 {
return fmt.Sprintf(
"%s is temporarily overloaded (HTTP %d).",
subject, classified.StatusCode,
)
}
return fmt.Sprintf("%s is temporarily overloaded.", subject)
case KindRateLimit:
if classified.StatusCode > 0 {
return fmt.Sprintf(
"%s is rate limiting requests (HTTP %d).",
subject, classified.StatusCode,
)
}
return fmt.Sprintf("%s is rate limiting requests.", subject)
case KindTimeout:
if classified.StatusCode > 0 {
return fmt.Sprintf(
"%s is temporarily unavailable (HTTP %d).",
subject, classified.StatusCode,
)
}
if !classified.Retryable {
if !classified.Retryable && classified.StatusCode == 0 {
return "The request timed out before it completed."
}
return fmt.Sprintf("%s is temporarily unavailable.", subject)
@@ -65,13 +49,7 @@ func terminalMessage(classified ClassifiedError) string {
)
default:
if classified.StatusCode > 0 {
return fmt.Sprintf(
"%s returned an unexpected error (HTTP %d).",
subject, classified.StatusCode,
)
}
if !classified.Retryable {
if !classified.Retryable && classified.StatusCode == 0 {
return "The chat request failed unexpectedly."
}
return fmt.Sprintf("%s returned an unexpected error.", subject)
+2 -2
View File
@@ -6,11 +6,11 @@ import (
"github.com/coder/coder/v2/codersdk"
)
func StreamErrorPayload(classified ClassifiedError) *codersdk.ChatStreamError {
func TerminalErrorPayload(classified ClassifiedError) *codersdk.ChatError {
if classified.Message == "" {
return nil
}
return &codersdk.ChatStreamError{
return &codersdk.ChatError{
Message: classified.Message,
Detail: classified.Detail,
Kind: classified.Kind,
+9 -9
View File
@@ -11,16 +11,16 @@ import (
"github.com/coder/coder/v2/codersdk"
)
func TestStreamErrorPayloadUsesNormalizedClassification(t *testing.T) {
func TestTerminalErrorPayloadUsesNormalizedClassification(t *testing.T) {
t.Parallel()
classified := chaterror.Classify(
xerrors.New("azure openai received status 429 from upstream"),
)
payload := chaterror.StreamErrorPayload(classified)
payload := chaterror.TerminalErrorPayload(classified)
require.Equal(t, &codersdk.ChatStreamError{
Message: "Azure OpenAI is rate limiting requests (HTTP 429).",
require.Equal(t, &codersdk.ChatError{
Message: "Azure OpenAI is rate limiting requests.",
Kind: chaterror.KindRateLimit,
Provider: "azure",
Retryable: true,
@@ -28,10 +28,10 @@ func TestStreamErrorPayloadUsesNormalizedClassification(t *testing.T) {
}, payload)
}
func TestStreamErrorPayloadIncludesProviderDetail(t *testing.T) {
func TestTerminalErrorPayloadIncludesProviderDetail(t *testing.T) {
t.Parallel()
payload := chaterror.StreamErrorPayload(chaterror.Classify(testProviderError(
payload := chaterror.TerminalErrorPayload(chaterror.Classify(testProviderError(
"",
400,
nil,
@@ -41,10 +41,10 @@ func TestStreamErrorPayloadIncludesProviderDetail(t *testing.T) {
require.Equal(t, "Image exceeds 5 MB maximum.", payload.Detail)
}
func TestStreamErrorPayloadNilForEmptyClassification(t *testing.T) {
func TestTerminalErrorPayloadNilForEmptyClassification(t *testing.T) {
t.Parallel()
require.Nil(t, chaterror.StreamErrorPayload(chaterror.ClassifiedError{}))
require.Nil(t, chaterror.TerminalErrorPayload(chaterror.ClassifiedError{}))
}
func TestStreamRetryPayloadUsesNormalizedClassification(t *testing.T) {
@@ -53,7 +53,7 @@ func TestStreamRetryPayloadUsesNormalizedClassification(t *testing.T) {
delay := 3 * time.Second
startedAt := time.Now()
payload := chaterror.StreamRetryPayload(2, delay, chaterror.ClassifiedError{
Message: "OpenAI returned an unexpected error (HTTP 503).",
Message: "OpenAI returned an unexpected error.",
Kind: chaterror.KindGeneric,
Provider: "openai",
Retryable: true,
+1 -1
View File
@@ -576,7 +576,7 @@ func TestRun_OnRetryEnrichesProvider(t *testing.T) {
require.Equal(t, 429, records[0].classified.StatusCode)
require.Equal(
t,
"OpenAI is rate limiting requests (HTTP 429).",
"OpenAI is rate limiting requests.",
records[0].classified.Message,
)
}
+1 -1
View File
@@ -493,7 +493,7 @@ func requireResponsesChatWaiting(
chat, err := db.GetChatByID(ctx, chatID)
require.NoError(t, err)
if chat.Status == database.ChatStatusError {
require.FailNowf(t, "chat failed", "last_error=%q", chat.LastError.String)
require.FailNowf(t, "chat failed", "last_error=%q", chatLastErrorMessage(chat.LastError))
}
require.Equal(t, database.ChatStatusWaiting, chat.Status)
}
+7 -2
View File
@@ -2,7 +2,6 @@ package chatd
import (
"context"
"database/sql"
"encoding/json"
"sync"
"testing"
@@ -22,6 +21,7 @@ import (
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/pubsub"
coderdpubsub "github.com/coder/coder/v2/coderd/pubsub"
"github.com/coder/coder/v2/coderd/x/chatd/chaterror"
"github.com/coder/coder/v2/coderd/x/chatd/chatloop"
"github.com/coder/coder/v2/coderd/x/chatd/chatprompt"
"github.com/coder/coder/v2/coderd/x/chatd/chatprovider"
@@ -2717,7 +2717,12 @@ func setChatStatus(
Status: status,
}
if lastError != "" {
params.LastError = sql.NullString{String: lastError, Valid: true}
encodedLastError, err := json.Marshal(codersdk.ChatError{
Message: lastError,
Kind: chaterror.KindGeneric,
})
require.NoError(t, err)
params.LastError = pqtype.NullRawMessage{RawMessage: encodedLastError, Valid: true}
}
_, err := db.UpdateChatStatus(ctx, params)
require.NoError(t, err)
+6 -5
View File
@@ -109,7 +109,7 @@ type Chat struct {
Title string `json:"title"`
Status ChatStatus `json:"status"`
PlanMode ChatPlanMode `json:"plan_mode,omitempty"`
LastError *string `json:"last_error"`
LastError *ChatError `json:"last_error,omitempty"`
DiffStatus *ChatDiffStatus `json:"diff_status,omitempty"`
CreatedAt time.Time `json:"created_at" format:"date-time"`
UpdatedAt time.Time `json:"updated_at" format:"date-time"`
@@ -1425,8 +1425,9 @@ type ChatStreamStatus struct {
Status ChatStatus `json:"status"`
}
// ChatStreamError represents an error event in the stream.
type ChatStreamError struct {
// ChatError represents a terminal chat error in persisted chat state or the
// live stream.
type ChatError struct {
// Message is the normalized, user-facing error message.
Message string `json:"message"`
// Detail is optional provider-specific context shown alongside the
@@ -1574,7 +1575,7 @@ type ChatStreamEvent struct {
Message *ChatMessage `json:"message,omitempty"`
MessagePart *ChatStreamMessagePart `json:"message_part,omitempty"`
Status *ChatStreamStatus `json:"status,omitempty"`
Error *ChatStreamError `json:"error,omitempty"`
Error *ChatError `json:"error,omitempty"`
Retry *ChatStreamRetry `json:"retry,omitempty"`
QueuedMessages []ChatQueuedMessage `json:"queued_messages,omitempty"`
ActionRequired *ChatStreamActionRequired `json:"action_required,omitempty"`
@@ -2651,7 +2652,7 @@ func (c *ExperimentalClient) StreamChat(ctx context.Context, chatID uuid.UUID, o
}
_ = send(ChatStreamEvent{
Type: ChatStreamEventTypeError,
Error: &ChatStreamError{
Error: &ChatError{
Message: fmt.Sprintf("read chat stream: %v", err),
},
})
+9 -2
View File
@@ -447,7 +447,14 @@ func TestChat_JSONRoundTrip(t *testing.T) {
reviewerCount := int32(2)
refreshedAt := now
staleAt := now.Add(time.Hour)
lastError := "boom"
lastError := &codersdk.ChatError{
Message: "boom",
Detail: "provider detail",
Kind: "generic",
Provider: "openai",
Retryable: true,
StatusCode: 503,
}
prURL := "https://github.com/coder/coder/pull/42"
workspaceID := uuid.New()
buildID := uuid.New()
@@ -466,7 +473,7 @@ func TestChat_JSONRoundTrip(t *testing.T) {
LastModelConfigID: uuid.New(),
Title: "round-trip-test",
Status: codersdk.ChatStatusRunning,
LastError: &lastError,
LastError: lastError,
CreatedAt: now,
UpdatedAt: now,
Archived: true,
+27 -1
View File
@@ -48,7 +48,6 @@ The response is the newly created `Chat` object:
"last_model_config_id": "...",
"title": "hello world",
"status": "waiting",
"last_error": null,
"diff_status": null,
"created_at": "2025-07-17T00:00:00Z",
"updated_at": "2025-07-17T00:00:00Z",
@@ -61,6 +60,33 @@ The response is the newly created `Chat` object:
}
```
If a chat later ends in error, the same `Chat` shape includes a structured
`last_error` object. For brevity, unchanged nullable IDs are omitted here:
```json
{
"id": "a1b2c3d4-...",
"title": "hello world",
"status": "error",
"last_error": {
"message": "Azure OpenAI is rate limiting requests.",
"kind": "rate_limit",
"provider": "azure",
"retryable": true,
"status_code": 429,
"detail": "Retry after 30 seconds."
},
"created_at": "2025-07-17T00:00:00Z",
"updated_at": "2025-07-17T00:00:30Z",
"archived": false,
"pin_order": 0,
"mcp_server_ids": [],
"labels": {},
"has_unread": false,
"client_type": "api"
}
```
The agent begins processing the prompt asynchronously. Use the
[stream endpoint](#stream-updates) to follow its progress.
+1 -1
View File
@@ -394,7 +394,7 @@ func NewMultiReplicaSubscribeFn(
case mergedEvents <- codersdk.ChatStreamEvent{
Type: codersdk.ChatStreamEventTypeError,
ChatID: chatID,
Error: &codersdk.ChatStreamError{Message: msg},
Error: &codersdk.ChatError{Message: msg},
}:
case <-ctx.Done():
}
+15 -2
View File
@@ -13,6 +13,7 @@ import (
"time"
"github.com/google/uuid"
"github.com/sqlc-dev/pqtype"
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"
@@ -30,6 +31,18 @@ import (
"github.com/coder/quartz"
)
func chatLastErrorMessage(raw pqtype.NullRawMessage) string {
if !raw.Valid {
return ""
}
var payload codersdk.ChatError
if err := json.Unmarshal(raw.RawMessage, &payload); err == nil && payload.Message != "" {
return payload.Message
}
return string(raw.RawMessage)
}
func newTestServer(
t *testing.T,
db database.Store,
@@ -1712,14 +1725,14 @@ waitForStream:
currentChat, dbErr := db.GetChatByID(ctx, chat.ID)
if dbErr == nil && currentChat.Status == database.ChatStatusError {
t.Fatalf("worker failed to process chat: status=%s last_error=%s",
currentChat.Status, currentChat.LastError.String)
currentChat.Status, chatLastErrorMessage(currentChat.LastError))
}
case <-ctx.Done():
// Dump the final chat status for debugging.
currentChat, dbErr := db.GetChatByID(context.Background(), chat.ID)
if dbErr == nil {
t.Fatalf("timed out waiting for worker to start streaming (chat status=%s, last_error=%q)",
currentChat.Status, currentChat.LastError.String)
currentChat.Status, chatLastErrorMessage(currentChat.LastError))
}
t.Fatal("timed out waiting for worker to start streaming")
}
-1
View File
@@ -112,7 +112,6 @@ const makeChat = (
pin_order: 0,
has_unread: false,
client_type: "ui",
last_error: null,
children: [],
...overrides,
});
+35 -34
View File
@@ -1286,7 +1286,7 @@ export interface Chat {
readonly title: string;
readonly status: ChatStatus;
readonly plan_mode?: ChatPlanMode;
readonly last_error: string | null;
readonly last_error?: ChatError;
readonly diff_status?: ChatDiffStatus;
readonly created_at: string;
readonly updated_at: string;
@@ -1679,6 +1679,39 @@ export interface ChatDiffStatus {
readonly stale_at?: string;
}
// From codersdk/chats.go
/**
* ChatError represents a terminal chat error in persisted chat state or the
* live stream.
*/
export interface ChatError {
/**
* Message is the normalized, user-facing error message.
*/
readonly message: string;
/**
* Detail is optional provider-specific context shown alongside the
* normalized error message when available.
*/
readonly detail?: string;
/**
* Kind classifies the error for consistent client rendering.
*/
readonly kind?: string;
/**
* Provider identifies the upstream model provider when known.
*/
readonly provider?: string;
/**
* Retryable reports whether the underlying error is transient.
*/
readonly retryable: boolean;
/**
* StatusCode is the best-effort upstream HTTP status code.
*/
readonly status_code?: number;
}
// From codersdk/chats.go
/**
* ChatFileMetadata contains lightweight metadata about a file
@@ -2381,38 +2414,6 @@ export interface ChatStreamActionRequired {
readonly tool_calls: readonly ChatStreamToolCall[];
}
// From codersdk/chats.go
/**
* ChatStreamError represents an error event in the stream.
*/
export interface ChatStreamError {
/**
* Message is the normalized, user-facing error message.
*/
readonly message: string;
/**
* Detail is optional provider-specific context shown alongside the
* normalized error message when available.
*/
readonly detail?: string;
/**
* Kind classifies the error for consistent client rendering.
*/
readonly kind?: string;
/**
* Provider identifies the upstream model provider when known.
*/
readonly provider?: string;
/**
* Retryable reports whether the underlying error is transient.
*/
readonly retryable: boolean;
/**
* StatusCode is the best-effort upstream HTTP status code.
*/
readonly status_code?: number;
}
// From codersdk/chats.go
/**
* ChatStreamEvent represents a real-time update for chat streaming.
@@ -2423,7 +2424,7 @@ export interface ChatStreamEvent {
readonly message?: ChatMessage;
readonly message_part?: ChatStreamMessagePart;
readonly status?: ChatStreamStatus;
readonly error?: ChatStreamError;
readonly error?: ChatError;
readonly retry?: ChatStreamRetry;
readonly queued_messages?: readonly ChatQueuedMessage[];
readonly action_required?: ChatStreamActionRequired;
@@ -137,7 +137,6 @@ const baseChatFields = {
pin_order: 0,
has_unread: false,
client_type: "ui",
last_error: null,
children: [],
} as const;
@@ -1188,6 +1187,42 @@ export const ArchivedOtherUserChat: Story = {
},
};
/** Persisted structured errors rehydrate the failed callout after refresh. */
export const PersistedStructuredError: Story = {
parameters: {
queries: buildQueries(
{
id: CHAT_ID,
...baseChatFields,
title: "Persisted provider error",
status: "error",
last_error: {
message: "Anthropic returned an unexpected error.",
detail:
"messages.0.content.1.image.source.base64: image exceeds 5 MB maximum.",
kind: "generic",
provider: "anthropic",
retryable: false,
status_code: 400,
},
},
{ messages: [], queued_messages: [], has_more: false },
{ diffUrl: undefined },
),
},
play: async ({ canvasElement }) => {
const canvas = within(canvasElement);
expect(
canvas.getByRole("heading", { name: /request failed/i }),
).toBeVisible();
expect(
canvas.getByText(/anthropic returned an unexpected error\./i),
).toBeVisible();
expect(canvas.getByText(/^HTTP 400$/)).toBeVisible();
expect(canvas.getByText(/image exceeds 5 mb maximum/i)).toBeVisible();
},
};
export const PlanModeFromChatState: Story = {
parameters: {
queries: buildQueries(
+5 -7
View File
@@ -57,6 +57,7 @@ import {
} from "./AgentChatPageView";
import type { AgentsOutletContext } from "./AgentsPage";
import type { ChatMessageInputRef } from "./components/AgentChatInput";
import { normalizeChatErrorPayload } from "./components/ChatConversation/chatError";
import {
getParentChatID,
getWorkspaceAgent,
@@ -547,16 +548,13 @@ const getPersistedDetailError = ({
if (cachedError?.kind === "usage_limit") {
return cachedError;
}
if (chatStatus === "error") {
if (chatStatus !== "error") {
return undefined;
}
if (cachedError) {
return cachedError;
}
const lastError = chatRecord?.last_error?.trim();
if (lastError) {
return { kind: "generic", message: lastError };
}
}
return undefined;
return normalizeChatErrorPayload(chatRecord?.last_error);
};
/**
@@ -62,7 +62,6 @@ const buildChat = (overrides: Partial<TypesGen.Chat> = {}): TypesGen.Chat => ({
pin_order: 0,
has_unread: false,
client_type: "ui",
last_error: null,
children: [],
...overrides,
});
@@ -295,7 +294,7 @@ export const WithError: Story = {
<StoryAgentChatPageView
persistedError={{
kind: "overloaded",
message: "Anthropic is temporarily overloaded (HTTP 529).",
message: "Anthropic is temporarily overloaded.",
provider: "anthropic",
retryable: true,
statusCode: 529,
@@ -308,8 +307,9 @@ export const WithError: Story = {
canvas.getByRole("heading", { name: /service overloaded/i }),
).toBeVisible();
expect(
canvas.getByText(/anthropic is temporarily overloaded \(http 529\)/i),
canvas.getByText(/anthropic is temporarily overloaded\./i),
).toBeVisible();
expect(canvas.getByText(/^HTTP 529$/)).toBeVisible();
expect(canvas.queryByText(/please try again/i)).not.toBeInTheDocument();
expect(canvas.queryByText(/^retryable$/i)).not.toBeInTheDocument();
},
@@ -145,7 +145,6 @@ const buildChat = (overrides: Partial<Chat> = {}): Chat => ({
pin_order: 0,
has_unread: false,
client_type: "ui",
last_error: null,
children: [],
...overrides,
});
@@ -426,7 +425,11 @@ export const WithChatList: Story = {
id: "chat-3",
title: "Fix database migration issue",
status: "error",
last_error: "Connection timeout",
last_error: {
message: "Connection timeout",
kind: "generic",
retryable: false,
},
updated_at: todayTimestamp,
}),
buildChat({
@@ -133,6 +133,9 @@ const StatusAlert: FC<{ status: RetryOrFailedStatus }> = ({ status }) => {
if (status.phase === "retrying") {
metadataItems.push(<span key="attempt">Attempt {status.attempt}</span>);
}
if (status.phase === "failed" && status.statusCode != null) {
metadataItems.push(<span key="code">HTTP {status.statusCode}</span>);
}
return (
<Alert
@@ -81,7 +81,7 @@ export const TerminalOverloadedError: Story = {
liveStatus: buildLiveStatus({
persistedError: {
kind: "overloaded",
message: "Anthropic is temporarily overloaded (HTTP 529).",
message: "Anthropic is temporarily overloaded.",
provider: "anthropic",
retryable: true,
statusCode: 529,
@@ -94,8 +94,9 @@ export const TerminalOverloadedError: Story = {
canvas.getByRole("heading", { name: /service overloaded/i }),
).toBeVisible();
expect(
canvas.getByText(/anthropic is temporarily overloaded \(http 529\)/i),
canvas.getByText(/anthropic is temporarily overloaded\./i),
).toBeVisible();
expect(canvas.getByText(/^HTTP 529$/)).toBeVisible();
expect(canvas.queryByText(/please try again/i)).not.toBeInTheDocument();
expect(canvas.queryByText(/^retryable$/i)).not.toBeInTheDocument();
expect(canvas.getByRole("link", { name: /status/i })).toBeVisible();
@@ -254,7 +255,7 @@ export const GenericErrorShowsProviderDetail: Story = {
liveStatus: buildLiveStatus({
streamError: {
kind: "generic",
message: "Anthropic returned an unexpected error (HTTP 400).",
message: "Anthropic returned an unexpected error.",
detail:
"messages.0.content.1.image.source.base64: image exceeds 5 MB maximum.",
provider: "anthropic",
@@ -269,8 +270,9 @@ export const GenericErrorShowsProviderDetail: Story = {
canvas.getByRole("heading", { name: /request failed/i }),
).toBeVisible();
expect(
canvas.getByText(/anthropic returned an unexpected error \(http 400\)/i),
canvas.getByText(/anthropic returned an unexpected error\./i),
).toBeVisible();
expect(canvas.getByText(/^HTTP 400$/)).toBeVisible();
expect(canvas.getByText(/image exceeds 5 mb maximum/i)).toBeVisible();
},
};
@@ -0,0 +1,24 @@
import type * as TypesGen from "#/api/typesGenerated";
import type { ChatDetailError } from "../../utils/usageLimitMessage";
export const normalizeChatErrorPayload = (
error: TypesGen.ChatError | undefined,
): ChatDetailError | undefined => {
const message = error?.message?.trim();
if (!message) {
return undefined;
}
const detail = error?.detail?.trim();
const statusCode =
typeof error?.status_code === "number" && error.status_code > 0
? error.status_code
: undefined;
return {
message,
kind: error?.kind?.trim() || "generic",
provider: error?.provider?.trim() || undefined,
retryable: error?.retryable,
statusCode,
...(detail ? { detail } : {}),
};
};
@@ -218,7 +218,6 @@ const makeChat = (chatID: string): TypesGen.Chat => ({
pin_order: 0,
has_unread: false,
client_type: "ui",
last_error: null,
children: [],
});
@@ -1805,9 +1804,6 @@ describe("useChatStore", () => {
expect(result.current.streamError).toEqual({
kind: "generic",
message: "Chat processing failed.",
provider: undefined,
retryable: false,
statusCode: undefined,
});
});
});
@@ -12,6 +12,7 @@ import type * as TypesGen from "#/api/typesGenerated";
import type { OneWayMessageEvent } from "#/utils/OneWayWebSocket";
import { createReconnectingWebSocket } from "#/utils/reconnectingWebSocket";
import type { ChatDetailError } from "../../utils/usageLimitMessage";
import { normalizeChatErrorPayload } from "./chatError";
import {
type ChatStore,
type ChatStoreState,
@@ -22,20 +23,6 @@ import {
} from "./chatStore";
import type { RetryState } from "./types";
const normalizeChatDetailError = (
error: TypesGen.ChatStreamError | undefined,
): ChatDetailError => {
const detail = error?.detail?.trim();
return {
message: error?.message.trim() || "Chat processing failed.",
kind: error?.kind?.trim() || "generic",
provider: error?.provider?.trim() || undefined,
retryable: error?.retryable,
statusCode: error?.status_code,
...(detail ? { detail } : {}),
};
};
const normalizeRetryState = (retry: TypesGen.ChatStreamRetry): RetryState => ({
attempt: Math.max(1, retry.attempt),
error: retry.error.trim() || "Retrying request shortly.",
@@ -527,7 +514,10 @@ export const useChatStore = (
if (streamEvent.chat_id && streamEvent.chat_id !== chatID) {
continue;
}
const reason = normalizeChatDetailError(streamEvent.error);
const reason = normalizeChatErrorPayload(streamEvent.error) ?? {
kind: "generic",
message: "Chat processing failed.",
};
store.setChatStatus("error");
store.setStreamError(reason);
store.clearRetryState();
@@ -65,7 +65,6 @@ export const WithParentChat: Story = {
labels: {},
title: "Set up CI/CD pipeline",
status: "completed",
last_error: null,
created_at: "2026-02-18T00:00:00.000Z",
updated_at: "2026-02-18T00:00:00.000Z",
archived: false,
@@ -69,7 +69,6 @@ const buildChat = (overrides: Partial<Chat> = {}): Chat => ({
pin_order: 0,
has_unread: false,
client_type: "ui",
last_error: null,
children: [],
...overrides,
});
@@ -66,7 +66,6 @@ const buildChat = (overrides: Partial<Chat> = {}): Chat => ({
pin_order: 0,
has_unread: false,
client_type: "ui",
last_error: null,
mcp_server_ids: [],
labels: {},
children: [],
@@ -503,7 +503,7 @@ const ChatTreeNode: FC<ChatTreeNodeProps> = ({ chat, isChildNode }) => {
);
const errorReason =
chat.status === "error"
? chatErrorReasons[chat.id] || chat.last_error || undefined
? chatErrorReasons[chat.id] || chat.last_error?.message || undefined
: undefined;
const subtitle = errorReason || modelName;
const diffStatus = getChatDiffStatus(chat);