diff --git a/cli/agents_chat.go b/cli/agents_chat.go index f3bde23257..8211659003 100644 --- a/cli/agents_chat.go +++ b/cli/agents_chat.go @@ -1287,7 +1287,7 @@ func (m chatViewModel) handleStreamEvent(event codersdk.ChatStreamEvent) (chatVi chatID: m.activeChatID, event: codersdk.ChatStreamEvent{ Type: codersdk.ChatStreamEventTypeError, - Error: &codersdk.ChatStreamError{ + Error: &codersdk.ChatError{ Message: fmt.Sprintf( "failed to parse ask_user_question: %v", err, diff --git a/cli/agents_list.go b/cli/agents_list.go index d6ea6a1d5f..1d9912365c 100644 --- a/cli/agents_list.go +++ b/cli/agents_list.go @@ -78,7 +78,7 @@ func (m chatListModel) filteredChats() []codersdk.Chat { filtered = append(filtered, chat) continue } - if chat.LastError != nil && strings.Contains(strings.ToLower(*chat.LastError), query) { + if chat.LastError != nil && strings.Contains(strings.ToLower(chat.LastError.Message), query) { filtered = append(filtered, chat) } } @@ -445,13 +445,14 @@ func (m chatListModel) View() string { rowText := fmt.Sprintf("%s%s %s %s%s", rowPrefix, rowStyle.Render(title), status, m.styles.dimmedText.Render(timeAgo(row.chat.UpdatedAt)), extra) lines = append(lines, rowText) - if row.chat.Status == codersdk.ChatStatusError && row.chat.LastError != nil { + if row.chat.Status == codersdk.ChatStatusError && row.chat.LastError != nil && row.chat.LastError.Message != "" { + lastError := row.chat.LastError.Message errWidth := max(m.width-4, 20) errPrefix := " " if row.depth > 0 { errPrefix += strings.Repeat(" ", row.depth) } - lines = append(lines, errPrefix+m.styles.dimmedText.Render(m.styles.truncate(sanitizeTerminalRenderableText(*row.chat.LastError), errWidth))) + lines = append(lines, errPrefix+m.styles.dimmedText.Render(m.styles.truncate(sanitizeTerminalRenderableText(lastError), errWidth))) } } diff --git a/cli/agents_stream_test.go b/cli/agents_stream_test.go index 95826db474..169e5118a0 100644 --- a/cli/agents_stream_test.go +++ b/cli/agents_stream_test.go @@ -116,7 +116,7 @@ func TestConsumeChatStreamText(t *testing.T) { {Type: codersdk.ChatStreamEventTypeMessage, Message: &codersdk.ChatMessage{ID: 1, ChatID: uuid.New(), Role: codersdk.ChatMessageRoleAssistant, Content: []codersdk.ChatMessagePart{{Type: codersdk.ChatMessagePartTypeText, Text: "Hello world"}}}}, {Type: codersdk.ChatStreamEventTypeStatus, Status: &codersdk.ChatStreamStatus{Status: codersdk.ChatStatusRunning}}, {Type: codersdk.ChatStreamEventTypeRetry, Retry: &codersdk.ChatStreamRetry{Attempt: 2, Error: "rate limited"}}, - {Type: codersdk.ChatStreamEventTypeError, Error: &codersdk.ChatStreamError{Message: "boom"}}, + {Type: codersdk.ChatStreamEventTypeError, Error: &codersdk.ChatError{Message: "boom"}}, } { events <- event } diff --git a/cli/agents_test.go b/cli/agents_test.go index cc77da20a0..8d08145f2c 100644 --- a/cli/agents_test.go +++ b/cli/agents_test.go @@ -1070,7 +1070,7 @@ func TestAgents(t *testing.T) { t.Parallel() updated, cmd := applyStream(newTestChatViewModel(nil), codersdk.ChatStreamEvent{ Type: codersdk.ChatStreamEventTypeError, - Error: &codersdk.ChatStreamError{Message: "stream blew up"}, + Error: &codersdk.ChatError{Message: "stream blew up"}, }) require.Nil(t, cmd) require.Equal(t, "stream error: stream blew up", updated.err.Error()) diff --git a/coderd/database/db2sdk/db2sdk.go b/coderd/database/db2sdk/db2sdk.go index 77b6bd0867..94eae63b69 100644 --- a/coderd/database/db2sdk/db2sdk.go +++ b/coderd/database/db2sdk/db2sdk.go @@ -28,6 +28,7 @@ import ( "github.com/coder/coder/v2/coderd/util/ptr" "github.com/coder/coder/v2/coderd/util/slice" "github.com/coder/coder/v2/coderd/workspaceapps/appurl" + "github.com/coder/coder/v2/coderd/x/chatd/chaterror" "github.com/coder/coder/v2/coderd/x/chatd/chatprompt" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/provisionersdk/proto" @@ -1607,6 +1608,34 @@ func nullTimePtr(v sql.NullTime) *time.Time { return &value } +const fallbackChatLastErrorMessage = "The chat request failed unexpectedly." + +func decodeChatLastError(raw pqtype.NullRawMessage) *codersdk.ChatError { + if !raw.Valid { + return nil + } + + var payload codersdk.ChatError + if err := json.Unmarshal(raw.RawMessage, &payload); err != nil { + return &codersdk.ChatError{ + Message: fallbackChatLastErrorMessage, + Kind: chaterror.KindGeneric, + } + } + + payload.Message = strings.TrimSpace(payload.Message) + payload.Detail = strings.TrimSpace(payload.Detail) + payload.Kind = strings.TrimSpace(payload.Kind) + payload.Provider = strings.TrimSpace(payload.Provider) + if payload.Kind == "" { + payload.Kind = chaterror.KindGeneric + } + if payload.Message == "" { + payload.Message = fallbackChatLastErrorMessage + } + return &payload +} + // Chat converts a database.Chat to a codersdk.Chat. It coalesces // nil slices and maps to empty values for JSON serialization and // derives RootChatID from the parent chain when not explicitly set. @@ -1622,6 +1651,7 @@ func Chat(c database.Chat, diffStatus *database.ChatDiffStatus, files []database if labels == nil { labels = map[string]string{} } + lastError := decodeChatLastError(c.LastError) chat := codersdk.Chat{ ID: c.ID, OrganizationID: c.OrganizationID, @@ -1636,9 +1666,7 @@ func Chat(c database.Chat, diffStatus *database.ChatDiffStatus, files []database MCPServerIDs: mcpServerIDs, Labels: labels, ClientType: codersdk.ChatClientType(c.ClientType), - } - if c.LastError.Valid { - chat.LastError = &c.LastError.String + LastError: lastError, } if c.PlanMode.Valid { chat.PlanMode = codersdk.ChatPlanMode(c.PlanMode.ChatPlanMode) diff --git a/coderd/database/db2sdk/db2sdk_test.go b/coderd/database/db2sdk/db2sdk_test.go index acdb43b03e..41a0d0e3d5 100644 --- a/coderd/database/db2sdk/db2sdk_test.go +++ b/coderd/database/db2sdk/db2sdk_test.go @@ -19,6 +19,7 @@ import ( "github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/database/dbtime" + "github.com/coder/coder/v2/coderd/x/chatd/chaterror" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/provisionersdk/proto" ) @@ -916,6 +917,17 @@ func TestChat_AllFieldsPopulated(t *testing.T) { // field to codersdk.Chat, this test will fail until the // converter is updated. now := dbtime.Now() + lastErrorPayload := codersdk.ChatError{ + Message: "boom", + Detail: "provider detail", + Kind: chaterror.KindGeneric, + Provider: "openai", + Retryable: true, + StatusCode: 503, + } + lastErrorRaw, err := json.Marshal(lastErrorPayload) + require.NoError(t, err) + input := database.Chat{ ID: uuid.New(), OwnerID: uuid.New(), @@ -929,7 +941,7 @@ func TestChat_AllFieldsPopulated(t *testing.T) { Title: "all-fields-test", Status: database.ChatStatusRunning, ClientType: database.ChatClientTypeUi, - LastError: sql.NullString{String: "boom", Valid: true}, + LastError: pqtype.NullRawMessage{RawMessage: lastErrorRaw, Valid: true}, CreatedAt: now, UpdatedAt: now, Archived: true, @@ -970,6 +982,8 @@ func TestChat_AllFieldsPopulated(t *testing.T) { got := db2sdk.Chat(input, diffStatus, fileRows) + require.Equal(t, &lastErrorPayload, got.LastError) + v := reflect.ValueOf(got) typ := v.Type() // HasUnread is populated by ChatRowsWithChildren (which joins the @@ -1053,6 +1067,84 @@ func TestChat_NilFilesOmitted(t *testing.T) { require.Empty(t, result.Files) } +func TestChat_LastErrorFallback(t *testing.T) { + t.Parallel() + + const fallbackMessage = "The chat request failed unexpectedly." + + tests := []struct { + name string + raw json.RawMessage + expectPayload *codersdk.ChatError + }{ + { + name: "MalformedJSON", + raw: json.RawMessage(`{`), + expectPayload: &codersdk.ChatError{ + Message: fallbackMessage, + Kind: chaterror.KindGeneric, + Retryable: false, + }, + }, + { + name: "MessageMissingPreservesMetadata", + raw: json.RawMessage(`{"kind":"timeout","provider":"openai","status_code":504}`), + expectPayload: &codersdk.ChatError{ + Message: fallbackMessage, + Kind: "timeout", + Provider: "openai", + Retryable: false, + StatusCode: 504, + }, + }, + { + name: "WhitespaceMessageDefaultsKind", + raw: json.RawMessage(`{"message":" ","provider":"openai"}`), + expectPayload: &codersdk.ChatError{ + Message: fallbackMessage, + Kind: chaterror.KindGeneric, + Provider: "openai", + Retryable: false, + }, + }, + { + name: "KindMissingDefaultsGeneric", + raw: json.RawMessage(`{"message":"OpenAI returned an unexpected error.","provider":"openai","status_code":502}`), + expectPayload: &codersdk.ChatError{ + Message: "OpenAI returned an unexpected error.", + Kind: chaterror.KindGeneric, + Provider: "openai", + Retryable: false, + StatusCode: 502, + }, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + chat := database.Chat{ + ID: uuid.New(), + OwnerID: uuid.New(), + LastModelConfigID: uuid.New(), + Title: "fallback payload", + Status: database.ChatStatusError, + CreatedAt: dbtime.Now(), + UpdatedAt: dbtime.Now(), + LastError: pqtype.NullRawMessage{ + RawMessage: tc.raw, + Valid: true, + }, + } + + result := db2sdk.Chat(chat, nil, nil) + require.Equal(t, tc.expectPayload, result.LastError) + }) + } +} + func TestChat_MultipleFiles(t *testing.T) { t.Parallel() diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 99109c9486..8b5162ba43 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -1438,7 +1438,7 @@ CREATE TABLE chats ( root_chat_id uuid, last_model_config_id uuid NOT NULL, archived boolean DEFAULT false NOT NULL, - last_error text, + last_error jsonb, mode chat_mode, mcp_server_ids uuid[] DEFAULT '{}'::uuid[] NOT NULL, labels jsonb DEFAULT '{}'::jsonb NOT NULL, diff --git a/coderd/database/migrations/000485_chat_last_error_jsonb.down.sql b/coderd/database/migrations/000485_chat_last_error_jsonb.down.sql new file mode 100644 index 0000000000..f3a565a331 --- /dev/null +++ b/coderd/database/migrations/000485_chat_last_error_jsonb.down.sql @@ -0,0 +1,3 @@ +ALTER TABLE chats + ALTER COLUMN last_error TYPE text + USING last_error ->> 'message'; diff --git a/coderd/database/migrations/000485_chat_last_error_jsonb.up.sql b/coderd/database/migrations/000485_chat_last_error_jsonb.up.sql new file mode 100644 index 0000000000..7ab895c8b7 --- /dev/null +++ b/coderd/database/migrations/000485_chat_last_error_jsonb.up.sql @@ -0,0 +1,9 @@ +ALTER TABLE chats + ALTER COLUMN last_error TYPE jsonb + USING CASE + WHEN last_error IS NULL THEN NULL + ELSE jsonb_build_object( + 'message', last_error, + 'kind', 'generic' + ) + END; diff --git a/coderd/database/migrations/testdata/fixtures/000424_chat_last_error.up.sql b/coderd/database/migrations/testdata/fixtures/000424_chat_last_error.up.sql new file mode 100644 index 0000000000..1feeacebc7 --- /dev/null +++ b/coderd/database/migrations/testdata/fixtures/000424_chat_last_error.up.sql @@ -0,0 +1,27 @@ +-- Migration 424 adds chats.last_error as text. Seed one existing fixture +-- chat with a legacy plain-text error so migration 485 has a non-null row +-- to backfill, and add a second chat that leaves last_error NULL so the +-- migration fixture can assert both branches of the CASE expression. +UPDATE chats +SET last_error = 'Legacy provider failure' +WHERE id = '72c0438a-18eb-4688-ab80-e4c6a126ef96'; + +INSERT INTO chats ( + id, + owner_id, + last_model_config_id, + title, + status, + created_at, + updated_at +) +SELECT + '5a4ac6a3-9dc5-440f-ae6b-5805e477bc59', + owner_id, + last_model_config_id, + 'Fixture Chat With Null Error', + 'waiting', + '2024-01-01 00:00:00+00', + '2024-01-01 00:00:00+00' +FROM chats +WHERE id = '72c0438a-18eb-4688-ab80-e4c6a126ef96'; diff --git a/coderd/database/migrations/testdata/fixtures/000485_chat_last_error_jsonb.up.sql b/coderd/database/migrations/testdata/fixtures/000485_chat_last_error_jsonb.up.sql new file mode 100644 index 0000000000..d7d86cf17c --- /dev/null +++ b/coderd/database/migrations/testdata/fixtures/000485_chat_last_error_jsonb.up.sql @@ -0,0 +1,28 @@ +-- Migration 485 retypes chats.last_error to jsonb and backfills legacy +-- text rows into the structured persisted payload shape. +DO $$ +DECLARE + payload jsonb; +BEGIN + SELECT last_error INTO STRICT payload + FROM chats + WHERE id = '72c0438a-18eb-4688-ab80-e4c6a126ef96'; + + IF payload ->> 'message' <> 'Legacy provider failure' THEN + RAISE EXCEPTION 'expected migrated last_error message, got %', + payload ->> 'message'; + END IF; + + IF payload ->> 'kind' <> 'generic' THEN + RAISE EXCEPTION 'expected migrated last_error kind, got %', + payload ->> 'kind'; + END IF; + + PERFORM 1 + FROM chats + WHERE id = '5a4ac6a3-9dc5-440f-ae6b-5805e477bc59' + AND last_error IS NULL; + IF NOT FOUND THEN + RAISE EXCEPTION 'expected null last_error row to remain NULL after migration'; + END IF; +END $$; diff --git a/coderd/database/models.go b/coderd/database/models.go index 65d3cfb2b4..65e6d5a142 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -4367,7 +4367,7 @@ type Chat struct { RootChatID uuid.NullUUID `db:"root_chat_id" json:"root_chat_id"` LastModelConfigID uuid.UUID `db:"last_model_config_id" json:"last_model_config_id"` Archived bool `db:"archived" json:"archived"` - LastError sql.NullString `db:"last_error" json:"last_error"` + LastError pqtype.NullRawMessage `db:"last_error" json:"last_error"` Mode NullChatMode `db:"mode" json:"mode"` MCPServerIDs []uuid.UUID `db:"mcp_server_ids" json:"mcp_server_ids"` Labels StringMap `db:"labels" json:"labels"` diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index d4f2feb71a..120976f3c8 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -5400,7 +5400,7 @@ type AutoArchiveInactiveChatsRow struct { RootChatID uuid.NullUUID `db:"root_chat_id" json:"root_chat_id"` LastModelConfigID uuid.UUID `db:"last_model_config_id" json:"last_model_config_id"` Archived bool `db:"archived" json:"archived"` - LastError sql.NullString `db:"last_error" json:"last_error"` + LastError pqtype.NullRawMessage `db:"last_error" json:"last_error"` Mode NullChatMode `db:"mode" json:"mode"` MCPServerIDs []uuid.UUID `db:"mcp_server_ids" json:"mcp_server_ids"` Labels json.RawMessage `db:"labels" json:"labels"` @@ -8701,7 +8701,7 @@ SET worker_id = $2::uuid, started_at = $3::timestamptz, heartbeat_at = $4::timestamptz, - last_error = $5::text, + last_error = $5::jsonb, updated_at = NOW() WHERE id = $6::uuid @@ -8710,12 +8710,12 @@ RETURNING ` type UpdateChatStatusParams struct { - Status ChatStatus `db:"status" json:"status"` - WorkerID uuid.NullUUID `db:"worker_id" json:"worker_id"` - StartedAt sql.NullTime `db:"started_at" json:"started_at"` - HeartbeatAt sql.NullTime `db:"heartbeat_at" json:"heartbeat_at"` - LastError sql.NullString `db:"last_error" json:"last_error"` - ID uuid.UUID `db:"id" json:"id"` + Status ChatStatus `db:"status" json:"status"` + WorkerID uuid.NullUUID `db:"worker_id" json:"worker_id"` + StartedAt sql.NullTime `db:"started_at" json:"started_at"` + HeartbeatAt sql.NullTime `db:"heartbeat_at" json:"heartbeat_at"` + LastError pqtype.NullRawMessage `db:"last_error" json:"last_error"` + ID uuid.UUID `db:"id" json:"id"` } func (q *sqlQuerier) UpdateChatStatus(ctx context.Context, arg UpdateChatStatusParams) (Chat, error) { @@ -8768,7 +8768,7 @@ SET worker_id = $2::uuid, started_at = $3::timestamptz, heartbeat_at = $4::timestamptz, - last_error = $5::text, + last_error = $5::jsonb, updated_at = $6::timestamptz WHERE id = $7::uuid @@ -8777,13 +8777,13 @@ RETURNING ` type UpdateChatStatusPreserveUpdatedAtParams struct { - Status ChatStatus `db:"status" json:"status"` - WorkerID uuid.NullUUID `db:"worker_id" json:"worker_id"` - StartedAt sql.NullTime `db:"started_at" json:"started_at"` - HeartbeatAt sql.NullTime `db:"heartbeat_at" json:"heartbeat_at"` - LastError sql.NullString `db:"last_error" json:"last_error"` - UpdatedAt time.Time `db:"updated_at" json:"updated_at"` - ID uuid.UUID `db:"id" json:"id"` + Status ChatStatus `db:"status" json:"status"` + WorkerID uuid.NullUUID `db:"worker_id" json:"worker_id"` + StartedAt sql.NullTime `db:"started_at" json:"started_at"` + HeartbeatAt sql.NullTime `db:"heartbeat_at" json:"heartbeat_at"` + LastError pqtype.NullRawMessage `db:"last_error" json:"last_error"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + ID uuid.UUID `db:"id" json:"id"` } func (q *sqlQuerier) UpdateChatStatusPreserveUpdatedAt(ctx context.Context, arg UpdateChatStatusPreserveUpdatedAtParams) (Chat, error) { diff --git a/coderd/database/queries/chats.sql b/coderd/database/queries/chats.sql index 8efde5ae7a..4f3e6935ad 100644 --- a/coderd/database/queries/chats.sql +++ b/coderd/database/queries/chats.sql @@ -718,7 +718,7 @@ SET worker_id = sqlc.narg('worker_id')::uuid, started_at = sqlc.narg('started_at')::timestamptz, heartbeat_at = sqlc.narg('heartbeat_at')::timestamptz, - last_error = sqlc.narg('last_error')::text, + last_error = sqlc.narg('last_error')::jsonb, updated_at = NOW() WHERE id = @id::uuid @@ -733,7 +733,7 @@ SET worker_id = sqlc.narg('worker_id')::uuid, started_at = sqlc.narg('started_at')::timestamptz, heartbeat_at = sqlc.narg('heartbeat_at')::timestamptz, - last_error = sqlc.narg('last_error')::text, + last_error = sqlc.narg('last_error')::jsonb, updated_at = @updated_at::timestamptz WHERE id = @id::uuid diff --git a/coderd/exp_chats.go b/coderd/exp_chats.go index 88b93338a9..2069081bd1 100644 --- a/coderd/exp_chats.go +++ b/coderd/exp_chats.go @@ -3252,7 +3252,7 @@ func (api *API) interruptChat(rw http.ResponseWriter, r *http.Request) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) if updateErr != nil { logger.Error(ctx, "failed to mark chat as waiting", slog.Error(updateErr)) diff --git a/coderd/exp_chats_test.go b/coderd/exp_chats_test.go index 68ea5f58ab..e3de80e28e 100644 --- a/coderd/exp_chats_test.go +++ b/coderd/exp_chats_test.go @@ -4765,7 +4765,6 @@ func TestPatchChat(t *testing.T) { client := newChatClient(t) firstUser := coderdtest.CreateFirstUser(t, client.Client) _ = createChatModelConfig(t, client) - chat := createChat(ctx, t, client, firstUser.OrganizationID, "boundary baseline") waitChatSettled(ctx, t, client, chat.ID) @@ -5882,7 +5881,7 @@ func TestSendMessageQueuesEffectiveModelConfigID(t *testing.T) { WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true}, StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -5933,7 +5932,7 @@ func TestQueuedMessageWithoutOverrideCapturesEnqueueTimeModel(t *testing.T) { WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true}, StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -6059,7 +6058,7 @@ func TestWatchChatsStatusChangeCarriesUpdatedLastModelConfigID(t *testing.T) { WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true}, StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -6081,7 +6080,7 @@ func TestWatchChatsStatusChangeCarriesUpdatedLastModelConfigID(t *testing.T) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -7564,7 +7563,7 @@ func TestRegenerateChatTitle(t *testing.T) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -7604,8 +7603,7 @@ func TestRegenerateChatTitle(t *testing.T) { WorkerID: uuid.NullUUID{UUID: uuid.MustParse("00000000-0000-0000-0000-000000000001"), Valid: true}, StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -7646,8 +7644,7 @@ func TestRegenerateChatTitle(t *testing.T) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -7711,7 +7708,7 @@ func TestRegenerateChatTitle(t *testing.T) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -8237,7 +8234,7 @@ func TestPromoteChatQueuedMessage(t *testing.T) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) diff --git a/coderd/pubsub/chatstreamnotify.go b/coderd/pubsub/chatstreamnotify.go index b39ca2cda7..d53605d29c 100644 --- a/coderd/pubsub/chatstreamnotify.go +++ b/coderd/pubsub/chatstreamnotify.go @@ -40,7 +40,7 @@ type ChatStreamNotifyMessage struct { // ErrorPayload carries a structured error event for cross-replica // live delivery. Keep Error for backward compatibility with older // replicas during rolling deploys. - ErrorPayload *codersdk.ChatStreamError `json:"error_payload,omitempty"` + ErrorPayload *codersdk.ChatError `json:"error_payload,omitempty"` // Error is the legacy string-only error payload kept for mixed- // version compatibility during rollout. diff --git a/coderd/x/chatd/chatd.go b/coderd/x/chatd/chatd.go index e3788f265c..adebbc834b 100644 --- a/coderd/x/chatd/chatd.go +++ b/coderd/x/chatd/chatd.go @@ -1761,7 +1761,7 @@ func (p *Server) EditMessage( WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) if err != nil { return xerrors.Errorf("set chat pending: %w", err) @@ -1849,7 +1849,7 @@ func (p *Server) ArchiveChat(ctx context.Context, chat database.Chat) error { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) if updateErr != nil { return xerrors.Errorf("set archived chat waiting before cleanup: %w", updateErr) @@ -2365,7 +2365,7 @@ func (p *Server) SubmitToolResults( WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }); updateErr != nil { return xerrors.Errorf("update chat status: %w", updateErr) } @@ -3369,7 +3369,7 @@ func (p *Server) setChatWaiting(ctx context.Context, chatID uuid.UUID) (database WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) return updateErr }, nil) @@ -3569,7 +3569,7 @@ func insertUserMessageAndSetPending( WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) if err != nil { return database.ChatMessage{}, database.Chat{}, xerrors.Errorf("set chat pending: %w", err) @@ -3846,7 +3846,7 @@ func (p *Server) processOnce(ctx context.Context) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) if updateErr != nil { p.logger.Error(ctx, "failed to release chat acquired during shutdown", @@ -4364,7 +4364,7 @@ func (p *Server) Subscribe( initialSnapshot = append(initialSnapshot, codersdk.ChatStreamEvent{ Type: codersdk.ChatStreamEventTypeError, ChatID: chatID, - Error: &codersdk.ChatStreamError{Message: "failed to load initial snapshot"}, + Error: &codersdk.ChatError{Message: "failed to load initial snapshot"}, }) } else { for _, msg := range messages { @@ -4387,7 +4387,7 @@ func (p *Server) Subscribe( initialSnapshot = append(initialSnapshot, codersdk.ChatStreamEvent{ Type: codersdk.ChatStreamEventTypeError, ChatID: chatID, - Error: &codersdk.ChatStreamError{Message: "failed to load initial snapshot"}, + Error: &codersdk.ChatError{Message: "failed to load initial snapshot"}, }) } else if len(queued) > 0 { initialSnapshot = append(initialSnapshot, codersdk.ChatStreamEvent{ @@ -4412,7 +4412,7 @@ func (p *Server) Subscribe( initialSnapshot = append(initialSnapshot, codersdk.ChatStreamEvent{ Type: codersdk.ChatStreamEventTypeError, ChatID: chatID, - Error: &codersdk.ChatStreamError{Message: "failed to load initial snapshot"}, + Error: &codersdk.ChatError{Message: "failed to load initial snapshot"}, }) } else { statusEvent := codersdk.ChatStreamEvent{ @@ -4490,7 +4490,7 @@ func (p *Server) Subscribe( case mergedEvents <- codersdk.ChatStreamEvent{ Type: codersdk.ChatStreamEventTypeError, ChatID: chatID, - Error: &codersdk.ChatStreamError{ + Error: &codersdk.ChatError{ Message: psErr.Error(), }, }: @@ -4593,7 +4593,7 @@ func (p *Server) Subscribe( case mergedEvents <- codersdk.ChatStreamEvent{ Type: codersdk.ChatStreamEventTypeError, ChatID: chatID, - Error: &codersdk.ChatStreamError{ + Error: &codersdk.ChatError{ Message: notify.Error, }, }: @@ -4856,7 +4856,7 @@ func (p *Server) publishRetry(chatID uuid.UUID, payload *codersdk.ChatStreamRetr } func (p *Server) publishError(chatID uuid.UUID, classified chaterror.ClassifiedError) { - payload := chaterror.StreamErrorPayload(classified) + payload := chaterror.TerminalErrorPayload(classified) if payload == nil { return } @@ -4882,6 +4882,17 @@ func processingFailure(err error) (chaterror.ClassifiedError, bool) { return classified, true } +func encodeChatLastErrorPayload(payload *codersdk.ChatError) (pqtype.NullRawMessage, error) { + if payload == nil { + return pqtype.NullRawMessage{}, nil + } + encoded, err := json.Marshal(payload) + if err != nil { + return pqtype.NullRawMessage{}, err + } + return pqtype.NullRawMessage{RawMessage: encoded, Valid: true}, nil +} + func panicFailureReason(recovered any) string { var reason string switch typed := recovered.(type) { @@ -5156,7 +5167,7 @@ func (p *Server) finishActiveChat( logger slog.Logger, chat database.Chat, status database.ChatStatus, - lastError string, + lastError pqtype.NullRawMessage, ) (finishActiveChatResult, error) { result := finishActiveChatResult{} @@ -5204,7 +5215,7 @@ func (p *Server) finishActiveChat( WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{String: lastError, Valid: lastError != ""}, + LastError: lastError, }) return updateErr }, nil) @@ -5330,10 +5341,10 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) { // interrupt processing. close(controlArmed) - // Determine the final status and last error to set when we're done. + // Determine the final status and last error payload to set when we're done. status := database.ChatStatusWaiting wasInterrupted := false - lastError := "" + var lastErrorPayload *codersdk.ChatError generatedTitle := &generatedChatTitle{} runResult := runChatResult{} remainingQueuedMessages := []database.ChatQueuedMessage{} @@ -5349,20 +5360,30 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) { // Handle panics gracefully. if r := recover(); r != nil { logger.Error(cleanupCtx, "panic during chat processing", slog.F("panic", r)) - lastError = panicFailureReason(r) - p.publishError(chat.ID, chaterror.ClassifiedError{ - Message: lastError, + classified := chaterror.ClassifiedError{ + Message: panicFailureReason(r), Kind: chaterror.KindGeneric, - }) + } + lastErrorPayload = chaterror.TerminalErrorPayload(classified) + p.publishError(chat.ID, classified) status = database.ChatStatusError } + encodedLastError, err := encodeChatLastErrorPayload(lastErrorPayload) + if err != nil { + logger.Warn(cleanupCtx, "failed to marshal chat last error payload", + slog.Error(err), + ) + lastErrorPayload = nil + encodedLastError = pqtype.NullRawMessage{} + } + // Check for queued messages and auto-promote the next one. // This must be done atomically with the status update to avoid // races with the promote endpoint (which also sets status to // pending). We use a transaction with FOR UPDATE to ensure we // don't overwrite a status change made by another caller. - finishResult, err := p.finishActiveChat(cleanupCtx, logger, chat, status, lastError) + finishResult, err := p.finishActiveChat(cleanupCtx, logger, chat, status, encodedLastError) if errors.Is(err, errChatTakenByOtherWorker) { // Another worker owns this chat now — skip all // post-TX side effects (status publish, pubsub, @@ -5425,7 +5446,11 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) { p.publishChatActionRequired(finishResult.updatedChat, runResult.PendingDynamicToolCalls) } if !wasInterrupted { - p.maybeSendPushNotification(cleanupCtx, finishResult.updatedChat, status, lastError, runResult, logger) + lastErrorMessage := "" + if lastErrorPayload != nil { + lastErrorMessage = lastErrorPayload.Message + } + p.maybeSendPushNotification(cleanupCtx, finishResult.updatedChat, status, lastErrorMessage, runResult, logger) } }() @@ -5440,18 +5465,19 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) { if errors.Is(err, chatloop.ErrInterrupted) || errors.Is(context.Cause(chatCtx), chatloop.ErrInterrupted) { logger.Info(ctx, "chat interrupted") status = database.ChatStatusWaiting + lastErrorPayload = nil wasInterrupted = true return } if isShutdownCancellation(ctx, chatCtx, err) { logger.Info(ctx, "chat canceled during shutdown; returning to pending") status = database.ChatStatusPending - lastError = "" + lastErrorPayload = nil return } logger.Error(ctx, "failed to process chat", slog.Error(err)) if classified, ok := processingFailure(err); ok { - lastError = classified.Message + lastErrorPayload = chaterror.TerminalErrorPayload(classified) p.publishError(chat.ID, classified) } status = database.ChatStatusError @@ -5476,7 +5502,7 @@ func (p *Server) processChat(ctx context.Context, chat database.Chat) { if ctx.Err() != nil { logger.Info(ctx, "chat completed during shutdown; returning to pending") status = database.ChatStatusPending - lastError = "" + lastErrorPayload = nil return } } @@ -7983,11 +8009,21 @@ func (p *Server) recoverStaleChats(ctx context.Context) { return nil } - lastError := sql.NullString{} + lastError := pqtype.NullRawMessage{} if locked.Status == database.ChatStatusRequiresAction { - lastError = sql.NullString{ - String: "Dynamic tool execution timed out", - Valid: true, + lastErrorPayload, marshalErr := encodeChatLastErrorPayload( + chaterror.TerminalErrorPayload(chaterror.ClassifiedError{ + Message: "Dynamic tool execution timed out", + Kind: chaterror.KindGeneric, + }), + ) + if marshalErr != nil { + p.logger.Warn(ctx, "failed to marshal stale recovery last error payload", + slog.F("chat_id", chat.ID), + slog.Error(marshalErr), + ) + } else { + lastError = lastErrorPayload } } diff --git a/coderd/x/chatd/chatd_internal_test.go b/coderd/x/chatd/chatd_internal_test.go index 5a351f3371..dd3180eae2 100644 --- a/coderd/x/chatd/chatd_internal_test.go +++ b/coderd/x/chatd/chatd_internal_test.go @@ -2322,7 +2322,7 @@ func TestSubscribeDoesNotReplayRetryAfterTerminalError(t *testing.T) { server.publishRetry(chatID, newTestRetryPayload()) server.publishError(chatID, chaterror.ClassifiedError{ - Message: "OpenAI is rate limiting requests (HTTP 429).", + Message: "OpenAI is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "openai", Retryable: true, @@ -2398,7 +2398,7 @@ func TestSubscribePrefersStructuredErrorPayloadViaPubsub(t *testing.T) { defer cancel() classified := chaterror.ClassifiedError{ - Message: "OpenAI is rate limiting requests (HTTP 429).", + Message: "OpenAI is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "openai", Retryable: true, @@ -2407,7 +2407,7 @@ func TestSubscribePrefersStructuredErrorPayloadViaPubsub(t *testing.T) { server.publishError(chatID, classified) event := requireStreamErrorEvent(t, events) - require.Equal(t, chaterror.StreamErrorPayload(classified), event.Error) + require.Equal(t, chaterror.TerminalErrorPayload(classified), event.Error) requireNoStreamEvent(t, events, 200*time.Millisecond) } @@ -2442,20 +2442,23 @@ func TestSubscribeFallsBackToLegacyErrorStringViaPubsub(t *testing.T) { }) event := requireStreamErrorEvent(t, events) - require.Equal(t, &codersdk.ChatStreamError{Message: "legacy error only"}, event.Error) + require.Equal(t, &codersdk.ChatError{Message: "legacy error only"}, event.Error) requireNoStreamEvent(t, events, 200*time.Millisecond) } func newTestRetryPayload() *codersdk.ChatStreamRetry { - return &codersdk.ChatStreamRetry{ - Attempt: 1, - DelayMs: (1500 * time.Millisecond).Milliseconds(), - Error: "OpenAI is rate limiting requests (HTTP 429).", + payload := chaterror.StreamRetryPayload(1, 1500*time.Millisecond, chaterror.ClassifiedError{ + Message: "OpenAI is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "openai", + Retryable: true, StatusCode: 429, - RetryingAt: time.Unix(1_700_000_000, 0).UTC(), + }) + if payload == nil { + panic("expected retry payload") } + payload.RetryingAt = time.Unix(1_700_000_000, 0).UTC() + return payload } func newSubscribeTestServer(t *testing.T, db database.Store) *Server { diff --git a/coderd/x/chatd/chatd_test.go b/coderd/x/chatd/chatd_test.go index 46e99fbf89..f64a013fa3 100644 --- a/coderd/x/chatd/chatd_test.go +++ b/coderd/x/chatd/chatd_test.go @@ -46,6 +46,7 @@ import ( "github.com/coder/coder/v2/coderd/workspacestats" "github.com/coder/coder/v2/coderd/x/chatd" "github.com/coder/coder/v2/coderd/x/chatd/chatadvisor" + "github.com/coder/coder/v2/coderd/x/chatd/chaterror" "github.com/coder/coder/v2/coderd/x/chatd/chatprompt" "github.com/coder/coder/v2/coderd/x/chatd/chattest" "github.com/coder/coder/v2/coderd/x/chatd/chattool" @@ -70,6 +71,35 @@ func openAIToolName(tool chattest.OpenAITool) string { return cmp.Or(tool.Function.Name, tool.Name, tool.Type) } +func mustChatLastErrorRawMessage(t testing.TB, payload codersdk.ChatError) pqtype.NullRawMessage { + t.Helper() + + encoded, err := json.Marshal(payload) + require.NoError(t, err) + return pqtype.NullRawMessage{RawMessage: encoded, Valid: true} +} + +func requireChatLastErrorPayload(t testing.TB, raw pqtype.NullRawMessage) codersdk.ChatError { + t.Helper() + require.True(t, raw.Valid, "last error should be set") + + var payload codersdk.ChatError + require.NoError(t, json.Unmarshal(raw.RawMessage, &payload)) + return payload +} + +func chatLastErrorMessage(raw pqtype.NullRawMessage) string { + if !raw.Valid { + return "" + } + + var payload codersdk.ChatError + if err := json.Unmarshal(raw.RawMessage, &payload); err == nil && payload.Message != "" { + return payload.Message + } + return string(raw.RawMessage) +} + func recordOpenAIRequest(req *chattest.OpenAIRequest) recordedOpenAIRequest { messages := append([]chattest.OpenAIMessage(nil), req.Messages...) tools := make([]string, 0, len(req.Tools)) @@ -867,7 +897,7 @@ func TestExploreChatUsesPersistedMCPSnapshot(t *testing.T) { chatResult := waitForTerminalChat(ctx, t, db, exploreChat.ID) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "explore chat failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } requestsMu.Lock() @@ -960,7 +990,7 @@ func TestRootExploreChatStaysBuiltinOnlyAtRuntime(t *testing.T) { storedChat, err := db.GetChatByID(ctx, exploreChat.ID) require.NoError(t, err) if storedChat.Status == database.ChatStatusError { - require.FailNowf(t, "explore chat failed", "last_error=%q", storedChat.LastError.String) + require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(storedChat.LastError)) } require.Equal(t, database.ChatStatusWaiting, storedChat.Status) require.ElementsMatch(t, []uuid.UUID{mcpConfig.ID}, storedChat.MCPServerIDs) @@ -1044,7 +1074,7 @@ func TestRootExploreChatExcludesWebSearchProviderToolAtRuntime(t *testing.T) { storedChat, err := db.GetChatByID(ctx, exploreChat.ID) require.NoError(t, err) if storedChat.Status == database.ChatStatusError { - require.FailNowf(t, "explore chat failed", "last_error=%q", storedChat.LastError.String) + require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(storedChat.LastError)) } require.Equal(t, database.ChatStatusWaiting, storedChat.Status) @@ -1179,7 +1209,7 @@ func TestExploreChatSendMessageCannotMutateMCPSnapshot(t *testing.T) { chatResult := waitForTerminalChat(ctx, t, db, exploreChat.ID) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "explore chat failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } exploreChat, err = db.GetChatByID(ctx, exploreChat.ID) @@ -1208,7 +1238,7 @@ func TestExploreChatSendMessageCannotMutateMCPSnapshot(t *testing.T) { chatResult = waitForTerminalChat(ctx, t, db, exploreChat.ID) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "explore chat failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "explore chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } recordedChildRequests := childRequests() @@ -1481,7 +1511,7 @@ func TestArchiveChatMovesPendingChatToWaiting(t *testing.T) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -1953,7 +1983,7 @@ func TestSendMessageQueuesWhenWaitingWithQueuedBacklog(t *testing.T) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -2418,7 +2448,7 @@ func TestPromoteQueuedAllowsAlreadyQueuedMessageWhenUsageLimitReached(t *testing WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -3657,7 +3687,11 @@ func TestRecoverStaleRequiresActionChat(t *testing.T) { return chatResult.Status == database.ChatStatusError }, testutil.WaitMedium, testutil.IntervalFast) - require.Contains(t, chatResult.LastError.String, "Dynamic tool execution timed out") + persistedError := requireChatLastErrorPayload(t, chatResult.LastError) + require.Equal(t, codersdk.ChatError{ + Message: "Dynamic tool execution timed out", + Kind: chaterror.KindGeneric, + }, persistedError) require.False(t, chatResult.WorkerID.Valid) } @@ -3764,25 +3798,30 @@ func TestUpdateChatStatusPersistsLastError(t *testing.T) { LastModelConfigID: model.ID, }) - // Simulate a chat that failed with an error. + // Write a minimal structured last_error payload through the + // query layer, then verify it round-trips through storage. errorMessage := "stream response: status 500: internal server error" + wantPayload := codersdk.ChatError{ + Message: errorMessage, + Kind: chaterror.KindGeneric, + } chat, err := db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ ID: chat.ID, Status: database.ChatStatusError, WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{String: errorMessage, Valid: true}, + LastError: mustChatLastErrorRawMessage(t, wantPayload), }) require.NoError(t, err) require.Equal(t, database.ChatStatusError, chat.Status) - require.Equal(t, sql.NullString{String: errorMessage, Valid: true}, chat.LastError) + require.Equal(t, wantPayload, requireChatLastErrorPayload(t, chat.LastError)) // Verify the error is persisted when re-read from the database. fromDB, err := db.GetChatByID(ctx, chat.ID) require.NoError(t, err) require.Equal(t, database.ChatStatusError, fromDB.Status) - require.Equal(t, sql.NullString{String: errorMessage, Valid: true}, fromDB.LastError) + require.Equal(t, wantPayload, requireChatLastErrorPayload(t, fromDB.LastError)) // Verify the error is cleared when the chat transitions to a // non-error status (e.g. pending after a retry). @@ -3792,7 +3831,7 @@ func TestUpdateChatStatusPersistsLastError(t *testing.T) { WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) require.Equal(t, database.ChatStatusPending, chat.Status) @@ -3949,7 +3988,7 @@ func TestPersistToolResultWithBinaryData(t *testing.T) { }, testutil.WaitLong, testutil.IntervalFast) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } var toolMessage *database.ChatMessage @@ -4100,7 +4139,7 @@ func TestDynamicToolCallPausesAndResumes(t *testing.T) { require.Equal(t, database.ChatStatusRequiresAction, chatResult.Status, "expected requires_action, got %s (last_error=%q)", - chatResult.Status, chatResult.LastError.String) + chatResult.Status, chatLastErrorMessage(chatResult.LastError)) // 2. Read the assistant message to find the tool-call ID. var toolCallID string @@ -4160,7 +4199,7 @@ func TestDynamicToolCallPausesAndResumes(t *testing.T) { // 5. Verify the chat completed successfully. if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } // 6. Verify the mock received exactly 2 streaming calls. @@ -4264,7 +4303,7 @@ func TestDynamicToolNamedProposePlanRemainsAvailableOutsidePlanMode(t *testing.T }, testutil.WaitLong, testutil.IntervalFast) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } streamedCallsMu.Lock() @@ -4381,7 +4420,7 @@ func TestDynamicToolCallMixedWithBuiltIn(t *testing.T) { require.Equal(t, database.ChatStatusRequiresAction, chatResult.Status, "expected requires_action, got %s (last_error=%q)", - chatResult.Status, chatResult.LastError.String) + chatResult.Status, chatLastErrorMessage(chatResult.LastError)) // 2. Verify the built-in tool (read_file) was already // executed by checking that a tool result message @@ -4443,7 +4482,7 @@ func TestDynamicToolCallMixedWithBuiltIn(t *testing.T) { }, testutil.WaitLong, testutil.IntervalFast) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } // 5. Verify the LLM received exactly 2 streaming calls. @@ -4519,7 +4558,7 @@ func TestSubmitToolResultsConcurrency(t *testing.T) { }, testutil.WaitLong, testutil.IntervalFast) require.Equal(t, database.ChatStatusRequiresAction, chatResult.Status, "expected requires_action, got %s (last_error=%q)", - chatResult.Status, chatResult.LastError.String) + chatResult.Status, chatLastErrorMessage(chatResult.LastError)) // Find the tool call ID from the assistant message. var toolCallID string @@ -4842,7 +4881,7 @@ func TestCreateWorkspaceTool_EndToEnd(t *testing.T) { if chatResult.Status == codersdk.ChatStatusError { lastError := "" if chatResult.LastError != nil { - lastError = *chatResult.LastError + lastError = chatResult.LastError.Message } require.FailNowf(t, "chat run failed", "last_error=%q", lastError) } @@ -5014,7 +5053,7 @@ func TestStartWorkspaceTool_EndToEnd(t *testing.T) { if chatResult.Status == codersdk.ChatStatusError { lastError := "" if chatResult.LastError != nil { - lastError = *chatResult.LastError + lastError = chatResult.LastError.Message } require.FailNowf(t, "chat run failed", "last_error=%q", lastError) } @@ -5138,7 +5177,7 @@ func TestStoppedWorkspaceWithPersistedAgentBindingDoesNotBlockChat(t *testing.T) WorkerID: uuid.NullUUID{}, StartedAt: sql.NullTime{}, HeartbeatAt: sql.NullTime{}, - LastError: sql.NullString{}, + LastError: pqtype.NullRawMessage{}, }) require.NoError(t, err) @@ -5177,7 +5216,7 @@ func TestStoppedWorkspaceWithPersistedAgentBindingDoesNotBlockChat(t *testing.T) }, testutil.WaitLong, testutil.IntervalFast) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } require.EqualValues(t, 1, dialCalls.Load()) @@ -7035,9 +7074,10 @@ func TestProcessChat_UserProviderKey_MissingKeyError(t *testing.T) { chatResult := waitForTerminalChat(ctx, t, db, chat.ID) require.Equal(t, database.ChatStatusError, chatResult.Status) - require.True(t, chatResult.LastError.Valid, "LastError should be set") - require.NotEmpty(t, chatResult.LastError.String) - require.NotContains(t, chatResult.LastError.String, "panicked") + persistedError := requireChatLastErrorPayload(t, chatResult.LastError) + require.NotEmpty(t, persistedError.Message) + require.NotContains(t, persistedError.Message, "panicked") + require.Equal(t, chaterror.KindGeneric, persistedError.Kind) require.NotEqual(t, database.ChatStatusRunning, chatResult.Status) require.Zero(t, llmCalls.Load(), "missing user key should fail before any LLM request") } @@ -7099,9 +7139,10 @@ func TestProcessChatPanicRecovery(t *testing.T) { return got.Status == database.ChatStatusError }, testutil.WaitLong, testutil.IntervalFast) - require.True(t, chatResult.LastError.Valid, "LastError should be set") - require.Contains(t, chatResult.LastError.String, "chat processing panicked") - require.Contains(t, chatResult.LastError.String, "intentional test panic") + persistedError := requireChatLastErrorPayload(t, chatResult.LastError) + require.Contains(t, persistedError.Message, "chat processing panicked") + require.Contains(t, persistedError.Message, "intentional test panic") + require.Equal(t, chaterror.KindGeneric, persistedError.Kind) } // panicOnInTxDB wraps a database.Store and panics on the first InTx @@ -7265,7 +7306,7 @@ func TestMCPServerToolInvocation(t *testing.T) { }, testutil.WaitLong, testutil.IntervalFast) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } // The MCP tool (test-mcp__echo) should appear in the tool @@ -7765,7 +7806,7 @@ func TestMCPServerOAuth2TokenRefresh(t *testing.T) { }, testutil.WaitLong, testutil.IntervalFast) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } // The token should have been refreshed. @@ -7873,7 +7914,7 @@ func TestMCPServerOAuth2TokenRefreshFailureGraceful(t *testing.T) { }, testutil.WaitLong, testutil.IntervalFast) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat should not fail", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat should not fail", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } // The LLM should have been called at least once. @@ -7996,7 +8037,7 @@ func TestChatTemplateAllowlistEnforcement(t *testing.T) { }, testutil.WaitLong, testutil.IntervalFast) if chatResult.Status == database.ChatStatusError { - require.FailNowf(t, "chat run failed", "last_error=%q", chatResult.LastError.String) + require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } // Collect all tool results keyed by tool name. Each tool may @@ -9285,7 +9326,7 @@ func TestAdvisorChainMode_SnapshotKeepsFullHistory(t *testing.T) { turn1Chat, err := db.GetChatByID(ctx, chat.ID) require.NoError(t, err) require.Equal(t, database.ChatStatusWaiting, turn1Chat.Status, - "turn 1 must complete before turn 2 can be sent; last_error=%q", turn1Chat.LastError.String) + "turn 1 must complete before turn 2 can be sent; last_error=%q", chatLastErrorMessage(turn1Chat.LastError)) _, err = server.SendMessage(ctx, chatd.SendMessageOptions{ ChatID: chat.ID, diff --git a/coderd/x/chatd/chaterror/classify_test.go b/coderd/x/chatd/chaterror/classify_test.go index 4d3f654d37..353c758134 100644 --- a/coderd/x/chatd/chaterror/classify_test.go +++ b/coderd/x/chatd/chaterror/classify_test.go @@ -26,7 +26,7 @@ func TestClassify(t *testing.T) { name: "AmbiguousOverloadKeepsProviderUnknown", err: xerrors.New("status 529 from upstream"), want: chaterror.ClassifiedError{ - Message: "The AI provider is temporarily overloaded (HTTP 529).", + Message: "The AI provider is temporarily overloaded.", Kind: chaterror.KindOverloaded, Provider: "", Retryable: true, @@ -114,7 +114,7 @@ func TestClassify(t *testing.T) { name: "ExplicitStatus429ClassifiesAsRateLimit", err: xerrors.New("status 429 from upstream"), want: chaterror.ClassifiedError{ - Message: "The AI provider is rate limiting requests (HTTP 429).", + Message: "The AI provider is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "", Retryable: true, @@ -561,7 +561,7 @@ func TestWithProviderUsesExplicitHint(t *testing.T) { enriched := classified.WithProvider("azure openai") require.Equal(t, chaterror.ClassifiedError{ - Message: "Azure OpenAI is rate limiting requests (HTTP 429).", + Message: "Azure OpenAI is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "azure", Retryable: true, @@ -577,7 +577,7 @@ func TestWithProviderAddsProviderWhenUnknown(t *testing.T) { enriched := classified.WithProvider("openai") require.Equal(t, chaterror.ClassifiedError{ - Message: "OpenAI is rate limiting requests (HTTP 429).", + Message: "OpenAI is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "openai", Retryable: true, @@ -595,7 +595,7 @@ func TestClassify_UsesStructuredProviderStatusAndRetryAfter(t *testing.T) { )) require.Equal(t, chaterror.ClassifiedError{ - Message: "The AI provider is rate limiting requests (HTTP 429).", + Message: "The AI provider is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "", Retryable: true, @@ -659,7 +659,7 @@ func TestWithProviderPreservesRetryAfter(t *testing.T) { enriched := classified.WithProvider("openai") require.Equal(t, 30*time.Second, enriched.RetryAfter) require.Equal(t, chaterror.ClassifiedError{ - Message: "OpenAI is rate limiting requests (HTTP 429).", + Message: "OpenAI is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "openai", Retryable: true, @@ -679,7 +679,7 @@ func TestClassify_UsesStructuredProviderDetailFromResponseDump(t *testing.T) { )) require.Equal(t, chaterror.ClassifiedError{ - Message: "The AI provider returned an unexpected error (HTTP 400).", + Message: "The AI provider returned an unexpected error.", Detail: "Image exceeds 5 MB maximum.", Kind: chaterror.KindGeneric, Provider: "", diff --git a/coderd/x/chatd/chaterror/message.go b/coderd/x/chatd/chaterror/message.go index 850c7ab461..a3e361773a 100644 --- a/coderd/x/chatd/chaterror/message.go +++ b/coderd/x/chatd/chaterror/message.go @@ -6,37 +6,21 @@ import ( ) // terminalMessage produces the user-facing error description shown -// when retries are exhausted. It includes HTTP status codes and -// actionable remediation guidance. +// when retries are exhausted. HTTP status codes are carried in the +// classified payload's StatusCode field and rendered as a separate +// footer chip by the UI, so they are intentionally omitted here to +// avoid duplicating the same information in two places. func terminalMessage(classified ClassifiedError) string { subject := providerSubject(classified.Provider) switch classified.Kind { case KindOverloaded: - if classified.StatusCode > 0 { - return fmt.Sprintf( - "%s is temporarily overloaded (HTTP %d).", - subject, classified.StatusCode, - ) - } return fmt.Sprintf("%s is temporarily overloaded.", subject) case KindRateLimit: - if classified.StatusCode > 0 { - return fmt.Sprintf( - "%s is rate limiting requests (HTTP %d).", - subject, classified.StatusCode, - ) - } return fmt.Sprintf("%s is rate limiting requests.", subject) case KindTimeout: - if classified.StatusCode > 0 { - return fmt.Sprintf( - "%s is temporarily unavailable (HTTP %d).", - subject, classified.StatusCode, - ) - } - if !classified.Retryable { + if !classified.Retryable && classified.StatusCode == 0 { return "The request timed out before it completed." } return fmt.Sprintf("%s is temporarily unavailable.", subject) @@ -65,13 +49,7 @@ func terminalMessage(classified ClassifiedError) string { ) default: - if classified.StatusCode > 0 { - return fmt.Sprintf( - "%s returned an unexpected error (HTTP %d).", - subject, classified.StatusCode, - ) - } - if !classified.Retryable { + if !classified.Retryable && classified.StatusCode == 0 { return "The chat request failed unexpectedly." } return fmt.Sprintf("%s returned an unexpected error.", subject) diff --git a/coderd/x/chatd/chaterror/payload.go b/coderd/x/chatd/chaterror/payload.go index ca7dc214f4..6262384525 100644 --- a/coderd/x/chatd/chaterror/payload.go +++ b/coderd/x/chatd/chaterror/payload.go @@ -6,11 +6,11 @@ import ( "github.com/coder/coder/v2/codersdk" ) -func StreamErrorPayload(classified ClassifiedError) *codersdk.ChatStreamError { +func TerminalErrorPayload(classified ClassifiedError) *codersdk.ChatError { if classified.Message == "" { return nil } - return &codersdk.ChatStreamError{ + return &codersdk.ChatError{ Message: classified.Message, Detail: classified.Detail, Kind: classified.Kind, diff --git a/coderd/x/chatd/chaterror/payload_test.go b/coderd/x/chatd/chaterror/payload_test.go index c41bf7cd0d..7aa21e6500 100644 --- a/coderd/x/chatd/chaterror/payload_test.go +++ b/coderd/x/chatd/chaterror/payload_test.go @@ -11,16 +11,16 @@ import ( "github.com/coder/coder/v2/codersdk" ) -func TestStreamErrorPayloadUsesNormalizedClassification(t *testing.T) { +func TestTerminalErrorPayloadUsesNormalizedClassification(t *testing.T) { t.Parallel() classified := chaterror.Classify( xerrors.New("azure openai received status 429 from upstream"), ) - payload := chaterror.StreamErrorPayload(classified) + payload := chaterror.TerminalErrorPayload(classified) - require.Equal(t, &codersdk.ChatStreamError{ - Message: "Azure OpenAI is rate limiting requests (HTTP 429).", + require.Equal(t, &codersdk.ChatError{ + Message: "Azure OpenAI is rate limiting requests.", Kind: chaterror.KindRateLimit, Provider: "azure", Retryable: true, @@ -28,10 +28,10 @@ func TestStreamErrorPayloadUsesNormalizedClassification(t *testing.T) { }, payload) } -func TestStreamErrorPayloadIncludesProviderDetail(t *testing.T) { +func TestTerminalErrorPayloadIncludesProviderDetail(t *testing.T) { t.Parallel() - payload := chaterror.StreamErrorPayload(chaterror.Classify(testProviderError( + payload := chaterror.TerminalErrorPayload(chaterror.Classify(testProviderError( "", 400, nil, @@ -41,10 +41,10 @@ func TestStreamErrorPayloadIncludesProviderDetail(t *testing.T) { require.Equal(t, "Image exceeds 5 MB maximum.", payload.Detail) } -func TestStreamErrorPayloadNilForEmptyClassification(t *testing.T) { +func TestTerminalErrorPayloadNilForEmptyClassification(t *testing.T) { t.Parallel() - require.Nil(t, chaterror.StreamErrorPayload(chaterror.ClassifiedError{})) + require.Nil(t, chaterror.TerminalErrorPayload(chaterror.ClassifiedError{})) } func TestStreamRetryPayloadUsesNormalizedClassification(t *testing.T) { @@ -53,7 +53,7 @@ func TestStreamRetryPayloadUsesNormalizedClassification(t *testing.T) { delay := 3 * time.Second startedAt := time.Now() payload := chaterror.StreamRetryPayload(2, delay, chaterror.ClassifiedError{ - Message: "OpenAI returned an unexpected error (HTTP 503).", + Message: "OpenAI returned an unexpected error.", Kind: chaterror.KindGeneric, Provider: "openai", Retryable: true, diff --git a/coderd/x/chatd/chatloop/chatloop_test.go b/coderd/x/chatd/chatloop/chatloop_test.go index 89f5996e1a..862aa9ed05 100644 --- a/coderd/x/chatd/chatloop/chatloop_test.go +++ b/coderd/x/chatd/chatloop/chatloop_test.go @@ -576,7 +576,7 @@ func TestRun_OnRetryEnrichesProvider(t *testing.T) { require.Equal(t, 429, records[0].classified.StatusCode) require.Equal( t, - "OpenAI is rate limiting requests (HTTP 429).", + "OpenAI is rate limiting requests.", records[0].classified.Message, ) } diff --git a/coderd/x/chatd/integration_responses_test.go b/coderd/x/chatd/integration_responses_test.go index adb4c1f708..ecb99539ba 100644 --- a/coderd/x/chatd/integration_responses_test.go +++ b/coderd/x/chatd/integration_responses_test.go @@ -493,7 +493,7 @@ func requireResponsesChatWaiting( chat, err := db.GetChatByID(ctx, chatID) require.NoError(t, err) if chat.Status == database.ChatStatusError { - require.FailNowf(t, "chat failed", "last_error=%q", chat.LastError.String) + require.FailNowf(t, "chat failed", "last_error=%q", chatLastErrorMessage(chat.LastError)) } require.Equal(t, database.ChatStatusWaiting, chat.Status) } diff --git a/coderd/x/chatd/subagent_internal_test.go b/coderd/x/chatd/subagent_internal_test.go index e46e3189ce..f536f44d33 100644 --- a/coderd/x/chatd/subagent_internal_test.go +++ b/coderd/x/chatd/subagent_internal_test.go @@ -2,7 +2,6 @@ package chatd import ( "context" - "database/sql" "encoding/json" "sync" "testing" @@ -22,6 +21,7 @@ import ( "github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/database/pubsub" coderdpubsub "github.com/coder/coder/v2/coderd/pubsub" + "github.com/coder/coder/v2/coderd/x/chatd/chaterror" "github.com/coder/coder/v2/coderd/x/chatd/chatloop" "github.com/coder/coder/v2/coderd/x/chatd/chatprompt" "github.com/coder/coder/v2/coderd/x/chatd/chatprovider" @@ -2717,7 +2717,12 @@ func setChatStatus( Status: status, } if lastError != "" { - params.LastError = sql.NullString{String: lastError, Valid: true} + encodedLastError, err := json.Marshal(codersdk.ChatError{ + Message: lastError, + Kind: chaterror.KindGeneric, + }) + require.NoError(t, err) + params.LastError = pqtype.NullRawMessage{RawMessage: encodedLastError, Valid: true} } _, err := db.UpdateChatStatus(ctx, params) require.NoError(t, err) diff --git a/codersdk/chats.go b/codersdk/chats.go index 7f829332b7..cc372f72fd 100644 --- a/codersdk/chats.go +++ b/codersdk/chats.go @@ -109,7 +109,7 @@ type Chat struct { Title string `json:"title"` Status ChatStatus `json:"status"` PlanMode ChatPlanMode `json:"plan_mode,omitempty"` - LastError *string `json:"last_error"` + LastError *ChatError `json:"last_error,omitempty"` DiffStatus *ChatDiffStatus `json:"diff_status,omitempty"` CreatedAt time.Time `json:"created_at" format:"date-time"` UpdatedAt time.Time `json:"updated_at" format:"date-time"` @@ -1425,8 +1425,9 @@ type ChatStreamStatus struct { Status ChatStatus `json:"status"` } -// ChatStreamError represents an error event in the stream. -type ChatStreamError struct { +// ChatError represents a terminal chat error in persisted chat state or the +// live stream. +type ChatError struct { // Message is the normalized, user-facing error message. Message string `json:"message"` // Detail is optional provider-specific context shown alongside the @@ -1574,7 +1575,7 @@ type ChatStreamEvent struct { Message *ChatMessage `json:"message,omitempty"` MessagePart *ChatStreamMessagePart `json:"message_part,omitempty"` Status *ChatStreamStatus `json:"status,omitempty"` - Error *ChatStreamError `json:"error,omitempty"` + Error *ChatError `json:"error,omitempty"` Retry *ChatStreamRetry `json:"retry,omitempty"` QueuedMessages []ChatQueuedMessage `json:"queued_messages,omitempty"` ActionRequired *ChatStreamActionRequired `json:"action_required,omitempty"` @@ -2651,7 +2652,7 @@ func (c *ExperimentalClient) StreamChat(ctx context.Context, chatID uuid.UUID, o } _ = send(ChatStreamEvent{ Type: ChatStreamEventTypeError, - Error: &ChatStreamError{ + Error: &ChatError{ Message: fmt.Sprintf("read chat stream: %v", err), }, }) diff --git a/codersdk/chats_test.go b/codersdk/chats_test.go index 6d0fb732b7..68d6e0ecce 100644 --- a/codersdk/chats_test.go +++ b/codersdk/chats_test.go @@ -447,7 +447,14 @@ func TestChat_JSONRoundTrip(t *testing.T) { reviewerCount := int32(2) refreshedAt := now staleAt := now.Add(time.Hour) - lastError := "boom" + lastError := &codersdk.ChatError{ + Message: "boom", + Detail: "provider detail", + Kind: "generic", + Provider: "openai", + Retryable: true, + StatusCode: 503, + } prURL := "https://github.com/coder/coder/pull/42" workspaceID := uuid.New() buildID := uuid.New() @@ -466,7 +473,7 @@ func TestChat_JSONRoundTrip(t *testing.T) { LastModelConfigID: uuid.New(), Title: "round-trip-test", Status: codersdk.ChatStatusRunning, - LastError: &lastError, + LastError: lastError, CreatedAt: now, UpdatedAt: now, Archived: true, diff --git a/docs/ai-coder/agents/chats-api.md b/docs/ai-coder/agents/chats-api.md index a2b523516a..568b04a7d6 100644 --- a/docs/ai-coder/agents/chats-api.md +++ b/docs/ai-coder/agents/chats-api.md @@ -48,7 +48,6 @@ The response is the newly created `Chat` object: "last_model_config_id": "...", "title": "hello world", "status": "waiting", - "last_error": null, "diff_status": null, "created_at": "2025-07-17T00:00:00Z", "updated_at": "2025-07-17T00:00:00Z", @@ -61,6 +60,33 @@ The response is the newly created `Chat` object: } ``` +If a chat later ends in error, the same `Chat` shape includes a structured +`last_error` object. For brevity, unchanged nullable IDs are omitted here: + +```json +{ + "id": "a1b2c3d4-...", + "title": "hello world", + "status": "error", + "last_error": { + "message": "Azure OpenAI is rate limiting requests.", + "kind": "rate_limit", + "provider": "azure", + "retryable": true, + "status_code": 429, + "detail": "Retry after 30 seconds." + }, + "created_at": "2025-07-17T00:00:00Z", + "updated_at": "2025-07-17T00:00:30Z", + "archived": false, + "pin_order": 0, + "mcp_server_ids": [], + "labels": {}, + "has_unread": false, + "client_type": "api" +} +``` + The agent begins processing the prompt asynchronously. Use the [stream endpoint](#stream-updates) to follow its progress. diff --git a/enterprise/coderd/x/chatd/chatd.go b/enterprise/coderd/x/chatd/chatd.go index ce1a002723..e407e0e23d 100644 --- a/enterprise/coderd/x/chatd/chatd.go +++ b/enterprise/coderd/x/chatd/chatd.go @@ -394,7 +394,7 @@ func NewMultiReplicaSubscribeFn( case mergedEvents <- codersdk.ChatStreamEvent{ Type: codersdk.ChatStreamEventTypeError, ChatID: chatID, - Error: &codersdk.ChatStreamError{Message: msg}, + Error: &codersdk.ChatError{Message: msg}, }: case <-ctx.Done(): } diff --git a/enterprise/coderd/x/chatd/chatd_test.go b/enterprise/coderd/x/chatd/chatd_test.go index 37d30e23c2..3345819696 100644 --- a/enterprise/coderd/x/chatd/chatd_test.go +++ b/enterprise/coderd/x/chatd/chatd_test.go @@ -13,6 +13,7 @@ import ( "time" "github.com/google/uuid" + "github.com/sqlc-dev/pqtype" "github.com/stretchr/testify/require" "golang.org/x/xerrors" @@ -30,6 +31,18 @@ import ( "github.com/coder/quartz" ) +func chatLastErrorMessage(raw pqtype.NullRawMessage) string { + if !raw.Valid { + return "" + } + + var payload codersdk.ChatError + if err := json.Unmarshal(raw.RawMessage, &payload); err == nil && payload.Message != "" { + return payload.Message + } + return string(raw.RawMessage) +} + func newTestServer( t *testing.T, db database.Store, @@ -1712,14 +1725,14 @@ waitForStream: currentChat, dbErr := db.GetChatByID(ctx, chat.ID) if dbErr == nil && currentChat.Status == database.ChatStatusError { t.Fatalf("worker failed to process chat: status=%s last_error=%s", - currentChat.Status, currentChat.LastError.String) + currentChat.Status, chatLastErrorMessage(currentChat.LastError)) } case <-ctx.Done(): // Dump the final chat status for debugging. currentChat, dbErr := db.GetChatByID(context.Background(), chat.ID) if dbErr == nil { t.Fatalf("timed out waiting for worker to start streaming (chat status=%s, last_error=%q)", - currentChat.Status, currentChat.LastError.String) + currentChat.Status, chatLastErrorMessage(currentChat.LastError)) } t.Fatal("timed out waiting for worker to start streaming") } diff --git a/site/src/api/queries/chats.test.ts b/site/src/api/queries/chats.test.ts index eb0fb9c2bb..5ba7549c31 100644 --- a/site/src/api/queries/chats.test.ts +++ b/site/src/api/queries/chats.test.ts @@ -112,7 +112,6 @@ const makeChat = ( pin_order: 0, has_unread: false, client_type: "ui", - last_error: null, children: [], ...overrides, }); diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 39c5d2f56c..00f9617ab0 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -1286,7 +1286,7 @@ export interface Chat { readonly title: string; readonly status: ChatStatus; readonly plan_mode?: ChatPlanMode; - readonly last_error: string | null; + readonly last_error?: ChatError; readonly diff_status?: ChatDiffStatus; readonly created_at: string; readonly updated_at: string; @@ -1679,6 +1679,39 @@ export interface ChatDiffStatus { readonly stale_at?: string; } +// From codersdk/chats.go +/** + * ChatError represents a terminal chat error in persisted chat state or the + * live stream. + */ +export interface ChatError { + /** + * Message is the normalized, user-facing error message. + */ + readonly message: string; + /** + * Detail is optional provider-specific context shown alongside the + * normalized error message when available. + */ + readonly detail?: string; + /** + * Kind classifies the error for consistent client rendering. + */ + readonly kind?: string; + /** + * Provider identifies the upstream model provider when known. + */ + readonly provider?: string; + /** + * Retryable reports whether the underlying error is transient. + */ + readonly retryable: boolean; + /** + * StatusCode is the best-effort upstream HTTP status code. + */ + readonly status_code?: number; +} + // From codersdk/chats.go /** * ChatFileMetadata contains lightweight metadata about a file @@ -2381,38 +2414,6 @@ export interface ChatStreamActionRequired { readonly tool_calls: readonly ChatStreamToolCall[]; } -// From codersdk/chats.go -/** - * ChatStreamError represents an error event in the stream. - */ -export interface ChatStreamError { - /** - * Message is the normalized, user-facing error message. - */ - readonly message: string; - /** - * Detail is optional provider-specific context shown alongside the - * normalized error message when available. - */ - readonly detail?: string; - /** - * Kind classifies the error for consistent client rendering. - */ - readonly kind?: string; - /** - * Provider identifies the upstream model provider when known. - */ - readonly provider?: string; - /** - * Retryable reports whether the underlying error is transient. - */ - readonly retryable: boolean; - /** - * StatusCode is the best-effort upstream HTTP status code. - */ - readonly status_code?: number; -} - // From codersdk/chats.go /** * ChatStreamEvent represents a real-time update for chat streaming. @@ -2423,7 +2424,7 @@ export interface ChatStreamEvent { readonly message?: ChatMessage; readonly message_part?: ChatStreamMessagePart; readonly status?: ChatStreamStatus; - readonly error?: ChatStreamError; + readonly error?: ChatError; readonly retry?: ChatStreamRetry; readonly queued_messages?: readonly ChatQueuedMessage[]; readonly action_required?: ChatStreamActionRequired; diff --git a/site/src/pages/AgentsPage/AgentChatPage.stories.tsx b/site/src/pages/AgentsPage/AgentChatPage.stories.tsx index f51acb8712..7dc7d12d79 100644 --- a/site/src/pages/AgentsPage/AgentChatPage.stories.tsx +++ b/site/src/pages/AgentsPage/AgentChatPage.stories.tsx @@ -137,7 +137,6 @@ const baseChatFields = { pin_order: 0, has_unread: false, client_type: "ui", - last_error: null, children: [], } as const; @@ -1188,6 +1187,42 @@ export const ArchivedOtherUserChat: Story = { }, }; +/** Persisted structured errors rehydrate the failed callout after refresh. */ +export const PersistedStructuredError: Story = { + parameters: { + queries: buildQueries( + { + id: CHAT_ID, + ...baseChatFields, + title: "Persisted provider error", + status: "error", + last_error: { + message: "Anthropic returned an unexpected error.", + detail: + "messages.0.content.1.image.source.base64: image exceeds 5 MB maximum.", + kind: "generic", + provider: "anthropic", + retryable: false, + status_code: 400, + }, + }, + { messages: [], queued_messages: [], has_more: false }, + { diffUrl: undefined }, + ), + }, + play: async ({ canvasElement }) => { + const canvas = within(canvasElement); + expect( + canvas.getByRole("heading", { name: /request failed/i }), + ).toBeVisible(); + expect( + canvas.getByText(/anthropic returned an unexpected error\./i), + ).toBeVisible(); + expect(canvas.getByText(/^HTTP 400$/)).toBeVisible(); + expect(canvas.getByText(/image exceeds 5 mb maximum/i)).toBeVisible(); + }, +}; + export const PlanModeFromChatState: Story = { parameters: { queries: buildQueries( diff --git a/site/src/pages/AgentsPage/AgentChatPage.tsx b/site/src/pages/AgentsPage/AgentChatPage.tsx index 68a25564a4..2d7b20132b 100644 --- a/site/src/pages/AgentsPage/AgentChatPage.tsx +++ b/site/src/pages/AgentsPage/AgentChatPage.tsx @@ -57,6 +57,7 @@ import { } from "./AgentChatPageView"; import type { AgentsOutletContext } from "./AgentsPage"; import type { ChatMessageInputRef } from "./components/AgentChatInput"; +import { normalizeChatErrorPayload } from "./components/ChatConversation/chatError"; import { getParentChatID, getWorkspaceAgent, @@ -547,16 +548,13 @@ const getPersistedDetailError = ({ if (cachedError?.kind === "usage_limit") { return cachedError; } - if (chatStatus === "error") { - if (cachedError) { - return cachedError; - } - const lastError = chatRecord?.last_error?.trim(); - if (lastError) { - return { kind: "generic", message: lastError }; - } + if (chatStatus !== "error") { + return undefined; } - return undefined; + if (cachedError) { + return cachedError; + } + return normalizeChatErrorPayload(chatRecord?.last_error); }; /** diff --git a/site/src/pages/AgentsPage/AgentChatPageView.stories.tsx b/site/src/pages/AgentsPage/AgentChatPageView.stories.tsx index f5500bb4fc..ce06574414 100644 --- a/site/src/pages/AgentsPage/AgentChatPageView.stories.tsx +++ b/site/src/pages/AgentsPage/AgentChatPageView.stories.tsx @@ -62,7 +62,6 @@ const buildChat = (overrides: Partial = {}): TypesGen.Chat => ({ pin_order: 0, has_unread: false, client_type: "ui", - last_error: null, children: [], ...overrides, }); @@ -295,7 +294,7 @@ export const WithError: Story = { = {}): Chat => ({ pin_order: 0, has_unread: false, client_type: "ui", - last_error: null, children: [], ...overrides, }); @@ -426,7 +425,11 @@ export const WithChatList: Story = { id: "chat-3", title: "Fix database migration issue", status: "error", - last_error: "Connection timeout", + last_error: { + message: "Connection timeout", + kind: "generic", + retryable: false, + }, updated_at: todayTimestamp, }), buildChat({ diff --git a/site/src/pages/AgentsPage/components/ChatConversation/ChatStatusCallout.tsx b/site/src/pages/AgentsPage/components/ChatConversation/ChatStatusCallout.tsx index b2971d2a07..38c2964128 100644 --- a/site/src/pages/AgentsPage/components/ChatConversation/ChatStatusCallout.tsx +++ b/site/src/pages/AgentsPage/components/ChatConversation/ChatStatusCallout.tsx @@ -133,6 +133,9 @@ const StatusAlert: FC<{ status: RetryOrFailedStatus }> = ({ status }) => { if (status.phase === "retrying") { metadataItems.push(Attempt {status.attempt}); } + if (status.phase === "failed" && status.statusCode != null) { + metadataItems.push(HTTP {status.statusCode}); + } return ( { + const message = error?.message?.trim(); + if (!message) { + return undefined; + } + const detail = error?.detail?.trim(); + const statusCode = + typeof error?.status_code === "number" && error.status_code > 0 + ? error.status_code + : undefined; + return { + message, + kind: error?.kind?.trim() || "generic", + provider: error?.provider?.trim() || undefined, + retryable: error?.retryable, + statusCode, + ...(detail ? { detail } : {}), + }; +}; diff --git a/site/src/pages/AgentsPage/components/ChatConversation/chatStore.test.tsx b/site/src/pages/AgentsPage/components/ChatConversation/chatStore.test.tsx index 9f9de11388..baf68d6c23 100644 --- a/site/src/pages/AgentsPage/components/ChatConversation/chatStore.test.tsx +++ b/site/src/pages/AgentsPage/components/ChatConversation/chatStore.test.tsx @@ -218,7 +218,6 @@ const makeChat = (chatID: string): TypesGen.Chat => ({ pin_order: 0, has_unread: false, client_type: "ui", - last_error: null, children: [], }); @@ -1805,9 +1804,6 @@ describe("useChatStore", () => { expect(result.current.streamError).toEqual({ kind: "generic", message: "Chat processing failed.", - provider: undefined, - retryable: false, - statusCode: undefined, }); }); }); diff --git a/site/src/pages/AgentsPage/components/ChatConversation/useChatStore.ts b/site/src/pages/AgentsPage/components/ChatConversation/useChatStore.ts index 0e6e18a917..7fbaf98c4c 100644 --- a/site/src/pages/AgentsPage/components/ChatConversation/useChatStore.ts +++ b/site/src/pages/AgentsPage/components/ChatConversation/useChatStore.ts @@ -12,6 +12,7 @@ import type * as TypesGen from "#/api/typesGenerated"; import type { OneWayMessageEvent } from "#/utils/OneWayWebSocket"; import { createReconnectingWebSocket } from "#/utils/reconnectingWebSocket"; import type { ChatDetailError } from "../../utils/usageLimitMessage"; +import { normalizeChatErrorPayload } from "./chatError"; import { type ChatStore, type ChatStoreState, @@ -22,20 +23,6 @@ import { } from "./chatStore"; import type { RetryState } from "./types"; -const normalizeChatDetailError = ( - error: TypesGen.ChatStreamError | undefined, -): ChatDetailError => { - const detail = error?.detail?.trim(); - return { - message: error?.message.trim() || "Chat processing failed.", - kind: error?.kind?.trim() || "generic", - provider: error?.provider?.trim() || undefined, - retryable: error?.retryable, - statusCode: error?.status_code, - ...(detail ? { detail } : {}), - }; -}; - const normalizeRetryState = (retry: TypesGen.ChatStreamRetry): RetryState => ({ attempt: Math.max(1, retry.attempt), error: retry.error.trim() || "Retrying request shortly.", @@ -527,7 +514,10 @@ export const useChatStore = ( if (streamEvent.chat_id && streamEvent.chat_id !== chatID) { continue; } - const reason = normalizeChatDetailError(streamEvent.error); + const reason = normalizeChatErrorPayload(streamEvent.error) ?? { + kind: "generic", + message: "Chat processing failed.", + }; store.setChatStatus("error"); store.setStreamError(reason); store.clearRetryState(); diff --git a/site/src/pages/AgentsPage/components/ChatTopBar.stories.tsx b/site/src/pages/AgentsPage/components/ChatTopBar.stories.tsx index 37d88aa1f2..38dc30283d 100644 --- a/site/src/pages/AgentsPage/components/ChatTopBar.stories.tsx +++ b/site/src/pages/AgentsPage/components/ChatTopBar.stories.tsx @@ -65,7 +65,6 @@ export const WithParentChat: Story = { labels: {}, title: "Set up CI/CD pipeline", status: "completed", - last_error: null, created_at: "2026-02-18T00:00:00.000Z", updated_at: "2026-02-18T00:00:00.000Z", archived: false, diff --git a/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.stories.tsx b/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.stories.tsx index ed9aa34eb0..11b5ba4fd3 100644 --- a/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.stories.tsx +++ b/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.stories.tsx @@ -69,7 +69,6 @@ const buildChat = (overrides: Partial = {}): Chat => ({ pin_order: 0, has_unread: false, client_type: "ui", - last_error: null, children: [], ...overrides, }); diff --git a/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.test.tsx b/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.test.tsx index ca42fa0084..946dca2748 100644 --- a/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.test.tsx +++ b/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.test.tsx @@ -66,7 +66,6 @@ const buildChat = (overrides: Partial = {}): Chat => ({ pin_order: 0, has_unread: false, client_type: "ui", - last_error: null, mcp_server_ids: [], labels: {}, children: [], diff --git a/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.tsx b/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.tsx index 78faff52d2..c13065ab16 100644 --- a/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.tsx +++ b/site/src/pages/AgentsPage/components/Sidebar/AgentsSidebar.tsx @@ -503,7 +503,7 @@ const ChatTreeNode: FC = ({ chat, isChildNode }) => { ); const errorReason = chat.status === "error" - ? chatErrorReasons[chat.id] || chat.last_error || undefined + ? chatErrorReasons[chat.id] || chat.last_error?.message || undefined : undefined; const subtitle = errorReason || modelName; const diffStatus = getChatDiffStatus(chat);