diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index aba2c0d52f..47676cee9a 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -818,6 +818,42 @@ const docTemplate = `{ ] } }, + "/api/experimental/chats/{chat}/reconcile-invalid": { + "post": { + "description": "Experimental: this endpoint is subject to change.", + "produces": [ + "application/json" + ], + "tags": [ + "Chats" + ], + "summary": "Reconcile invalid chat state", + "operationId": "reconcile-invalid-chat-state", + "parameters": [ + { + "type": "string", + "format": "uuid", + "description": "Chat ID", + "name": "chat", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/codersdk.Chat" + } + } + }, + "security": [ + { + "CoderSessionToken": [] + } + ] + } + }, "/api/experimental/chats/{chat}/stream": { "get": { "description": "Experimental: this endpoint is subject to change.", diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 0a6290d7fe..cefdd86e77 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -723,6 +723,38 @@ ] } }, + "/api/experimental/chats/{chat}/reconcile-invalid": { + "post": { + "description": "Experimental: this endpoint is subject to change.", + "produces": ["application/json"], + "tags": ["Chats"], + "summary": "Reconcile invalid chat state", + "operationId": "reconcile-invalid-chat-state", + "parameters": [ + { + "type": "string", + "format": "uuid", + "description": "Chat ID", + "name": "chat", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/codersdk.Chat" + } + } + }, + "security": [ + { + "CoderSessionToken": [] + } + ] + } + }, "/api/experimental/chats/{chat}/stream": { "get": { "description": "Experimental: this endpoint is subject to change.", diff --git a/coderd/coderd.go b/coderd/coderd.go index 91d95c5ef2..73fc861bf2 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -1331,6 +1331,7 @@ func New(options *Options) *API { r.Get("/git", api.watchChatGit) }) r.Post("/interrupt", api.interruptChat) + r.Post("/reconcile-invalid", api.reconcileInvalidChatState) r.Post("/tool-results", api.postChatToolResults) r.Post("/title/regenerate", api.regenerateChatTitle) r.Post("/title/propose", api.proposeChatTitle) diff --git a/coderd/exp_chats.go b/coderd/exp_chats.go index 01b1ae386f..9ed96a9793 100644 --- a/coderd/exp_chats.go +++ b/coderd/exp_chats.go @@ -49,6 +49,7 @@ import ( "github.com/coder/coder/v2/coderd/wsbuilder" "github.com/coder/coder/v2/coderd/x/chatd" "github.com/coder/coder/v2/coderd/x/chatd/chatprovider" + "github.com/coder/coder/v2/coderd/x/chatd/chatstate" "github.com/coder/coder/v2/coderd/x/chatd/chattool" "github.com/coder/coder/v2/coderd/x/chatfiles" "github.com/coder/coder/v2/coderd/x/gitsync" @@ -113,30 +114,6 @@ func maybeWriteLimitErr(ctx context.Context, rw http.ResponseWriter, err error) return false } -func publishChatTitleChange(logger slog.Logger, ps dbpubsub.Pubsub, chat database.Chat) { - if ps == nil { - return - } - event := codersdk.ChatWatchEvent{ - Kind: codersdk.ChatWatchEventKindTitleChange, - Chat: db2sdk.Chat(chat, nil, nil), - } - payload, err := json.Marshal(event) - if err != nil { - logger.Error(context.Background(), "failed to marshal chat title change event", - slog.F("chat_id", chat.ID), - slog.Error(err), - ) - return - } - if err := ps.Publish(pubsub.ChatWatchEventChannel(chat.OwnerID), payload); err != nil { - logger.Error(context.Background(), "failed to publish chat title change event", - slog.F("chat_id", chat.ID), - slog.Error(err), - ) - } -} - func publishChatConfigEvent(logger slog.Logger, ps dbpubsub.Pubsub, kind pubsub.ChatConfigEventKind, entityID uuid.UUID) { payload, err := json.Marshal(pubsub.ChatConfigEvent{ Kind: kind, @@ -1086,14 +1063,6 @@ func (api *API) postChats(rw http.ResponseWriter, r *http.Request) { title := chatTitleFromMessage(titleSource) - if api.chatDaemon == nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Chat processor is unavailable.", - Detail: "Chat processor is not configured.", - }) - return - } - modelConfigID, modelConfigStatus, modelConfigError := api.resolveCreateChatModelConfigID(ctx, apiKey.UserID, req) if modelConfigError != nil { httpapi.Write(ctx, rw, modelConfigStatus, *modelConfigError) @@ -1304,14 +1273,6 @@ func (api *API) postChats(rw http.ResponseWriter, r *http.Request) { func (api *API) listChatModels(rw http.ResponseWriter, r *http.Request) { ctx := r.Context() apiKey := httpmw.APIKey(r) - if api.chatDaemon == nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Chat processor is unavailable.", - Detail: "Chat processor is not configured.", - }) - return - } - availability, err := api.getUserChatProviderAvailability(ctx, apiKey.UserID) if err != nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ @@ -2579,35 +2540,7 @@ func (api *API) applyChatTitleUpdate( return chat, false } - var ( - updatedChat database.Chat - wrote bool - err error - ) - if api.chatDaemon != nil { - updatedChat, wrote, err = api.chatDaemon.RenameChatTitle(ctx, chat, trimmedTitle) - } else { - err = api.Database.InTx(func(tx database.Store) error { - currentChat, txErr := tx.GetChatByID(ctx, chat.ID) - if txErr != nil { - return txErr - } - if trimmedTitle == currentChat.Title { - updatedChat = currentChat - wrote = false - return nil - } - updatedChat, txErr = tx.UpdateChatTitleByID(ctx, database.UpdateChatTitleByIDParams{ - ID: chat.ID, - Title: trimmedTitle, - }) - if txErr != nil { - return txErr - } - wrote = true - return nil - }, nil) - } + updatedChat, wrote, err := api.chatDaemon.RenameChatTitle(ctx, chat, trimmedTitle) if err != nil { if errors.Is(err, chatd.ErrManualTitleRegenerationInProgress) { httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ @@ -2626,11 +2559,7 @@ func (api *API) applyChatTitleUpdate( return chat, true } if wrote { - if api.chatDaemon != nil { - api.chatDaemon.PublishTitleChange(updatedChat) - } else { - publishChatTitleChange(api.Logger, api.Pubsub, updatedChat) - } + api.chatDaemon.PublishTitleChange(updatedChat) } return updatedChat, false } @@ -2727,6 +2656,21 @@ func (api *API) patchChat(rw http.ResponseWriter, r *http.Request) { if req.Archived != nil { archived := *req.Archived + + // Archive invariant is one-way: parent archived implies + // child archived. Archive state changes target the root + // chat and cascade atomically across the family; child + // chats cannot be archived or unarchived independently. + // This check precedes the no-op check so any child attempt + // surfaces the root-only error regardless of the chat's + // current archived value. + if chat.ParentChatID.Valid { + httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ + Message: "Chat archive state can only be changed on the root chat.", + }) + return + } + if archived == chat.Archived { state := "archived" if !archived { @@ -2738,37 +2682,30 @@ func (api *API) patchChat(rw http.ResponseWriter, r *http.Request) { return } - // Archive invariant is one-way: parent archived implies - // child archived. Parent archive/unarchive cascade via - // root_chat_id; individual child archive is permitted; - // child unarchive while the parent is archived is rejected - // (enforced atomically in chatd.Server.UnarchiveChat). - if chat.ParentChatID.Valid && !archived { - if done := api.writeChildUnarchiveGuard(ctx, rw, chat); done { - return - } - } var err error - // Use chatDaemon when available so it can interrupt active - // processing before broadcasting archive state. Fall back to - // direct DB when no daemon is running. if archived { - if api.chatDaemon != nil { - err = api.chatDaemon.ArchiveChat(ctx, chat) - } else { - _, err = api.Database.ArchiveChatByID(ctx, chat.ID) - } + err = api.chatDaemon.ArchiveChat(ctx, chat) } else { - if api.chatDaemon != nil { - err = api.chatDaemon.UnarchiveChat(ctx, chat) - } else { - _, err = api.Database.UnarchiveChatByID(ctx, chat.ID) - } + err = api.chatDaemon.UnarchiveChat(ctx, chat) } if err != nil { - if errors.Is(err, chatd.ErrChildUnarchiveParentArchived) { + if errors.Is(err, chatd.ErrArchiveRequiresRootChat) || errors.Is(err, chatstate.ErrChatNotRoot) { httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ - Message: "Cannot unarchive a child chat while its parent is archived. Unarchive the parent chat to cascade.", + Message: "Chat archive state can only be changed on the root chat.", + }) + return + } + if writeChatInvalidState(ctx, rw, err) { + return + } + if errors.Is(err, chatstate.ErrTransitionNotAllowed) { + // Archive only succeeds from idle / error execution + // states (W, E0, E1) per the chatd RFC; active + // chats refuse archive instead of being silently + // transitioned to waiting first. + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Cannot archive an active chat. Interrupt or wait for the chat to finish first.", + Detail: err.Error(), }) return } @@ -2918,36 +2855,17 @@ func (api *API) patchChat(rw http.ResponseWriter, r *http.Request) { rw.WriteHeader(http.StatusNoContent) } -// writeChildUnarchiveGuard returns a 400 early when a child unarchive -// request obviously races an archived parent. The durable invariant -// is enforced atomically in chatd.Server.UnarchiveChat; this guard -// just surfaces the error before we take any locks. -// +// writeChatInvalidState writes the shared invalid-state response for +// chatstate.ErrInvalidState across every chat mutation endpoint. // Returns true when a response has been written. -func (api *API) writeChildUnarchiveGuard( - ctx context.Context, - rw http.ResponseWriter, - chat database.Chat, -) bool { - parent, err := api.Database.GetChatByID(ctx, chat.ParentChatID.UUID) - if err != nil { - if errors.Is(err, sql.ErrNoRows) { - httpapi.ResourceNotFound(rw) - return true - } - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Failed to load parent chat.", - Detail: err.Error(), - }) - return true +func writeChatInvalidState(ctx context.Context, rw http.ResponseWriter, err error) bool { + if !errors.Is(err, chatstate.ErrInvalidState) { + return false } - if parent.Archived { - httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ - Message: "Cannot unarchive a child chat while its parent is archived. Unarchive the parent chat to cascade.", - }) - return true - } - return false + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Chat is in an invalid state.", + }) + return true } // EXPERIMENTAL: this endpoint is experimental and is subject to change. @@ -2996,14 +2914,6 @@ func (api *API) postChatMessages(rw http.ResponseWriter, r *http.Request) { return } - if api.chatDaemon == nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Chat processor is unavailable.", - Detail: "Chat processor is not configured.", - }) - return - } - var req codersdk.CreateChatMessageRequest if !httpapi.Read(ctx, rw, r, &req) { return @@ -3104,10 +3014,15 @@ func (api *API) postChatMessages(rw http.ResponseWriter, r *http.Request) { }) return } - if xerrors.Is(sendErr, chatd.ErrMessageQueueFull) { + if xerrors.Is(sendErr, chatstate.ErrMessageQueueFull) { + var queueFull *chatstate.MessageQueueFullError + detail := "" + if errors.As(sendErr, &queueFull) { + detail = fmt.Sprintf("Maximum %d messages can be queued.", queueFull.Max) + } httpapi.Write(ctx, rw, http.StatusTooManyRequests, codersdk.Response{ Message: "Message queue is full.", - Detail: fmt.Sprintf("Maximum %d messages can be queued.", chatd.MaxQueueSize), + Detail: detail, }) return } @@ -3117,6 +3032,20 @@ func (api *API) postChatMessages(rw http.ResponseWriter, r *http.Request) { }) return } + if errors.Is(sendErr, chatstate.ErrChatNotFound) { + httpapi.ResourceNotFound(rw) + return + } + if writeChatInvalidState(ctx, rw, sendErr) { + return + } + if errors.Is(sendErr, chatstate.ErrTransitionNotAllowed) { + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Chat is not in a state that accepts new messages.", + Detail: sendErr.Error(), + }) + return + } httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to create chat message.", Detail: sendErr.Error(), @@ -3187,14 +3116,6 @@ func (api *API) patchChatMessage(rw http.ResponseWriter, r *http.Request) { return } - if api.chatDaemon == nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Chat processor is unavailable.", - Detail: "Chat processor is not configured.", - }) - return - } - messageIDStr := chi.URLParam(r, "message") messageID, err := strconv.ParseInt(messageIDStr, 10, 64) if err != nil || messageID <= 0 { @@ -3254,6 +3175,15 @@ func (api *API) patchChatMessage(rw http.ResponseWriter, r *http.Request) { httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: "Invalid model config ID.", }) + case errors.Is(editErr, chatstate.ErrChatNotFound): + httpapi.ResourceNotFound(rw) + case writeChatInvalidState(ctx, rw, editErr): + // response already written + case errors.Is(editErr, chatstate.ErrTransitionNotAllowed): + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Chat is not in a state that accepts message edits.", + Detail: editErr.Error(), + }) default: httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to edit chat message.", @@ -3300,19 +3230,28 @@ func (api *API) deleteChatQueuedMessage(rw http.ResponseWriter, r *http.Request) return } - if api.chatDaemon != nil { - err = api.chatDaemon.DeleteQueued(ctx, chatID, queuedMessageID) - } else { - err = api.Database.DeleteChatQueuedMessage(ctx, database.DeleteChatQueuedMessageParams{ - ID: queuedMessageID, - ChatID: chatID, - }) - } + err = api.chatDaemon.DeleteQueued(ctx, chatID, queuedMessageID) if err != nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Failed to delete queued message.", - Detail: err.Error(), - }) + switch { + case xerrors.Is(err, chatstate.ErrQueuedMessageNotFound), xerrors.Is(err, sql.ErrNoRows): + httpapi.Write(ctx, rw, http.StatusNotFound, codersdk.Response{ + Message: "Queued message not found.", + }) + case errors.Is(err, chatstate.ErrChatNotFound): + httpapi.ResourceNotFound(rw) + case writeChatInvalidState(ctx, rw, err): + // response already written + case errors.Is(err, chatstate.ErrTransitionNotAllowed): + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Chat has no queued messages to delete.", + Detail: err.Error(), + }) + default: + httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Failed to delete queued message.", + Detail: err.Error(), + }) + } return } @@ -3359,14 +3298,6 @@ func (api *API) promoteChatQueuedMessage(rw http.ResponseWriter, r *http.Request return } - if api.chatDaemon == nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Chat processor is unavailable.", - Detail: "Chat processor is not configured.", - }) - return - } - _, txErr := api.chatDaemon.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ ChatID: chatID, CreatedBy: apiKey.UserID, @@ -3377,16 +3308,30 @@ func (api *API) promoteChatQueuedMessage(rw http.ResponseWriter, r *http.Request if maybeWriteLimitErr(ctx, rw, txErr) { return } - if xerrors.Is(txErr, chatd.ErrChatArchived) { + switch { + case xerrors.Is(txErr, chatd.ErrChatArchived): httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: "Cannot promote queued messages in an archived chat.", }) - return + case xerrors.Is(txErr, chatstate.ErrQueuedMessageNotFound): + httpapi.Write(ctx, rw, http.StatusNotFound, codersdk.Response{ + Message: "Queued message not found.", + }) + case errors.Is(txErr, chatstate.ErrChatNotFound): + httpapi.ResourceNotFound(rw) + case writeChatInvalidState(ctx, rw, txErr): + // response already written + case errors.Is(txErr, chatstate.ErrTransitionNotAllowed): + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Chat has no queued messages to promote.", + Detail: txErr.Error(), + }) + default: + httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Failed to promote queued message.", + Detail: txErr.Error(), + }) } - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Failed to promote queued message.", - Detail: txErr.Error(), - }) return } @@ -3445,14 +3390,6 @@ func (api *API) streamChat(rw http.ResponseWriter, r *http.Request) { chatID := chat.ID logger := api.Logger.Named("chat_streamer").With(slog.F("chat_id", chatID)) - if api.chatDaemon == nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Chat streaming is not available.", - Detail: "Chat processor is not configured.", - }) - return - } - var afterMessageID int64 if v := r.URL.Query().Get("after_id"); v != "" { var err error @@ -3469,9 +3406,7 @@ func (api *API) streamChat(rw http.ResponseWriter, r *http.Request) { // Subscribe before accepting the WebSocket so that failures // can still be reported as normal HTTP errors. snapshot, events, cancelSub, ok := api.chatDaemon.SubscribeAuthorized(ctx, chat, r.Header, afterMessageID) - // Subscribe only fails today when the receiver is nil, which - // the chatDaemon == nil guard above already catches. This is - // defensive against future Subscribe failure modes. + // Defensive against future SubscribeAuthorized failure modes. if !ok { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Chat streaming is not available.", @@ -3597,31 +3532,86 @@ func (api *API) interruptChat(rw http.ResponseWriter, r *http.Request) { return } - if api.chatDaemon != nil { - chat = api.chatDaemon.InterruptChat(ctx, chat) - } else { - updatedChat, updateErr := api.Database.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chatID, - Status: database.ChatStatusWaiting, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - LastError: pqtype.NullRawMessage{}, - }) - if updateErr != nil { - logger.Error(ctx, "failed to mark chat as waiting", slog.Error(updateErr)) - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Failed to interrupt chat.", - Detail: updateErr.Error(), - }) - return - } - chat = updatedChat + if !api.Authorize(r, policy.ActionUpdate, chat.RBACObject()) { + httpapi.ResourceNotFound(rw) + return } + updated, err := api.chatDaemon.InterruptChat(ctx, chat) + if err != nil { + switch { + case xerrors.Is(err, chatd.ErrChatArchived): + httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ + Message: "Cannot interrupt an archived chat.", + }) + case writeChatInvalidState(ctx, rw, err): + // response already written + case errors.Is(err, chatstate.ErrTransitionNotAllowed): + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Chat is not in an interruptible state.", + Detail: err.Error(), + }) + default: + logger.Error(ctx, "failed to interrupt chat", slog.Error(err)) + httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Failed to interrupt chat.", + Detail: err.Error(), + }) + } + return + } + chat = updated + httpapi.Write(ctx, rw, http.StatusOK, db2sdk.Chat(chat, nil, nil)) } +// EXPERIMENTAL: this endpoint is experimental and is subject to change. +// +// @Summary Reconcile invalid chat state +// @ID reconcile-invalid-chat-state +// @Security CoderSessionToken +// @Tags Chats +// @Produce json +// @Param chat path string true "Chat ID" format(uuid) +// @Success 200 {object} codersdk.Chat +// @Router /api/experimental/chats/{chat}/reconcile-invalid [post] +// @Description Experimental: this endpoint is subject to change. +// +//nolint:revive // HTTP handler writes to ResponseWriter. +func (api *API) reconcileInvalidChatState(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + chat := httpmw.ChatParam(r) + chatID := chat.ID + logger := api.Logger.Named("chat_reconcile_invalid").With(slog.F("chat_id", chatID)) + + if !api.Authorize(r, policy.ActionUpdate, chat.RBACObject()) { + httpapi.ResourceNotFound(rw) + return + } + + updated, err := api.chatDaemon.ReconcileInvalidStateChat(ctx, chat) + if err != nil { + switch { + case errors.Is(err, chatstate.ErrChatNotFound), httpapi.Is404Error(err): + httpapi.ResourceNotFound(rw) + case errors.Is(err, chatstate.ErrTransitionNotAllowed): + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Chat is not in an invalid state.", + Detail: err.Error(), + }) + default: + logger.Error(ctx, "failed to reconcile invalid chat state", slog.Error(err)) + httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Failed to reconcile chat state.", + Detail: err.Error(), + }) + } + return + } + + httpapi.Write(ctx, rw, http.StatusOK, db2sdk.Chat(updated, nil, nil)) +} + // EXPERIMENTAL: this endpoint is experimental and is subject to change. // // @Summary Regenerate chat title @@ -3654,14 +3644,6 @@ func (api *API) regenerateChatTitle(rw http.ResponseWriter, r *http.Request) { return } - if api.chatDaemon == nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Chat processor is unavailable.", - Detail: "Chat processor is not configured.", - }) - return - } - updatedChat, err := api.chatDaemon.RegenerateChatTitle(ctx, chat) if err != nil { if errors.Is(err, chatd.ErrManualTitleRegenerationInProgress) { @@ -3707,14 +3689,6 @@ func (api *API) proposeChatTitle(rw http.ResponseWriter, r *http.Request) { return } - if api.chatDaemon == nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Chat processor is unavailable.", - Detail: "Chat processor is not configured.", - }) - return - } - title, err := api.chatDaemon.ProposeChatTitle(ctx, chat) if err != nil { if errors.Is(err, chatd.ErrManualTitleRegenerationInProgress) { @@ -7741,15 +7715,10 @@ func (api *API) postChatToolResults(rw http.ResponseWriter, r *http.Request) { return } - // Fast-path check outside the transaction. The authoritative - // check happens inside SubmitToolResults under a row lock. - if chat.Status != database.ChatStatusRequiresAction { - httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ - Message: "Chat is not waiting for tool results.", - Detail: fmt.Sprintf("Chat status is %q, expected %q.", chat.Status, database.ChatStatusRequiresAction), - }) - return - } + // The authoritative status check happens inside SubmitToolResults + // under the row lock; that path also surfaces the shared + // invalid-state response for chats that are not in a valid + // execution state at all. var dynamicTools json.RawMessage if chat.DynamicTools.Valid { @@ -7781,6 +7750,15 @@ func (api *API) postChatToolResults(rw http.ResponseWriter, r *http.Request) { Message: validationErr.Message, Detail: validationErr.Detail, }) + case errors.Is(err, chatstate.ErrChatNotFound): + httpapi.ResourceNotFound(rw) + case writeChatInvalidState(ctx, rw, err): + // response already written + case errors.Is(err, chatstate.ErrTransitionNotAllowed): + httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ + Message: "Chat is not waiting for tool results.", + Detail: err.Error(), + }) default: api.Logger.Error(ctx, "tool results submission failed", slog.F("chat_id", chat.ID), diff --git a/coderd/exp_chats_chatstate_test.go b/coderd/exp_chats_chatstate_test.go new file mode 100644 index 0000000000..e48cc086ba --- /dev/null +++ b/coderd/exp_chats_chatstate_test.go @@ -0,0 +1,780 @@ +package coderd_test + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + + "github.com/coder/coder/v2/coderd" + "github.com/coder/coder/v2/coderd/coderdtest" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/coderd/x/chatd/chatprompt" + "github.com/coder/coder/v2/coderd/x/chatd/chatstate" + "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/testutil" +) + +// driveChatToWaiting transitions the chat from `running` (its initial +// state per the RFC) to `waiting` by running chatstate.FinishTurn. +// Tests use this when they need to exercise endpoint behavior that +// only succeeds from idle execution states (W, E0). +func driveChatToWaiting(ctx context.Context, t *testing.T, api *coderd.API, chatID uuid.UUID) { + t.Helper() + // chatstate writes through the raw database/pubsub, so we use the + // system-restricted context to bypass dbauthz which is keyed on a + // user identity. This mirrors how chatd itself drives transitions + // from background work. + sysCtx := dbauthz.AsSystemRestricted(ctx) //nolint:gocritic // Test fixture composes chatstate transitions outside a request. + machine := chatstate.NewChatMachine(api.Database, api.Pubsub, chatID, chatstate.Options{}) + require.NoError(t, machine.Update(sysCtx, func(tx *chatstate.Tx) error { + _, err := tx.FinishTurn(chatstate.FinishTurnInput{}) + return err + })) +} + +// driveChatToRequiresAction commits an assistant message with a single +// dynamic tool_call part and then transitions the chat to +// `requires_action`. The tool_call_id returned lets the caller +// assemble a valid SubmitToolResultsRequest. +func driveChatToRequiresAction( + ctx context.Context, + t *testing.T, + api *coderd.API, + chat codersdk.Chat, + toolName string, +) (toolCallID string) { + t.Helper() + sysCtx := dbauthz.AsSystemRestricted(ctx) //nolint:gocritic // Test fixture composes chatstate transitions outside a request. + + toolCallID = "call-" + uuid.NewString() + assistantContent, err := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ + codersdk.ChatMessageText("dispatching dynamic tool"), + { + Type: codersdk.ChatMessagePartTypeToolCall, + ToolCallID: toolCallID, + ToolName: toolName, + Args: json.RawMessage(`{}`), + }, + }) + require.NoError(t, err) + + machine := chatstate.NewChatMachine(api.Database, api.Pubsub, chat.ID, chatstate.Options{}) + require.NoError(t, machine.Update(sysCtx, func(tx *chatstate.Tx) error { + _, err := tx.CommitStep(chatstate.CommitStepInput{ + Messages: []chatstate.Message{{ + Role: database.ChatMessageRoleAssistant, + Content: assistantContent, + Visibility: database.ChatMessageVisibilityBoth, + ModelConfigID: uuid.NullUUID{UUID: chat.LastModelConfigID, Valid: true}, + ContentVersion: chatprompt.CurrentContentVersion, + }}, + }) + if err != nil { + return err + } + _, err = tx.EnterRequiresAction(chatstate.EnterRequiresActionInput{}) + return err + })) + return toolCallID +} + +// TestPostChatsStartsRunning verifies the RFC-mandated `running` +// initial status surfaced by the create-chat endpoint. +func TestPostChatsStartsRunning(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, api := newChatClientWithAPI(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{ + Type: codersdk.ChatInputPartTypeText, + Text: "hello", + }}, + }) + require.NoError(t, err) + require.Equal(t, codersdk.ChatStatusRunning, chat.Status, + "new chats must start in `running` per chatd RFC") + + // Re-reading also reports `running` because the chat row is + // authoritative and no worker has advanced it. + gotChat, err := client.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.Equal(t, codersdk.ChatStatusRunning, gotChat.Status) + require.NotNil(t, api.Pubsub) +} + +// TestArchiveChatStateTransitions covers the two RFC-mandated archive +// behaviors at the endpoint contract level: archiving from an idle +// chat (W) succeeds, and archiving from an active chat (R0) returns +// a state conflict and leaves the chat unarchived. +func TestArchiveChatStateTransitions(t *testing.T) { + t.Parallel() + + t.Run("IdleSucceeds", func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, api := newChatClientWithAPI(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "archive me"}}, + }) + require.NoError(t, err) + + driveChatToWaiting(ctx, t, api, chat.ID) + + err = client.UpdateChat(ctx, chat.ID, codersdk.UpdateChatRequest{Archived: ptrTo(true)}) + require.NoError(t, err) + + got, err := client.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.True(t, got.Archived) + }) + + t.Run("ActiveChatReturnsConflict", func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "no archive"}}, + }) + require.NoError(t, err) + + err = client.UpdateChat(ctx, chat.ID, codersdk.UpdateChatRequest{Archived: ptrTo(true)}) + requireSDKError(t, err, http.StatusConflict) + + got, err := client.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.False(t, got.Archived, "active chat must remain unarchived after a conflict") + }) +} + +// TestPostChatMessagesBusyInterrupt verifies that a busy-interrupt +// send returns a queued response and leaves the chat in `interrupting` +// from the endpoint's perspective. +func TestPostChatMessagesBusyInterrupt(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + require.Equal(t, codersdk.ChatStatusRunning, chat.Status) + + // CreateChat leaves the chat in `running`; an interrupt-style + // follow-up should land it in `interrupting`. + resp, err := client.CreateChatMessage(ctx, chat.ID, codersdk.CreateChatMessageRequest{ + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "stop"}}, + BusyBehavior: codersdk.ChatBusyBehaviorInterrupt, + }) + require.NoError(t, err) + require.True(t, resp.Queued, "busy interrupt must return queued=true") + require.NotNil(t, resp.QueuedMessage) + + got, err := client.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.Equal(t, codersdk.ChatStatusInterrupting, got.Status, + "busy interrupt send must land the chat in `interrupting`") +} + +// TestDeleteChatQueuedMessageMissingReturns404 covers the new +// chatstate-driven 404 path for missing queued IDs. The chat must +// have at least one queued message so the request is in a state where +// DeleteQueuedMessage is allowed; the looked-up ID then mismatches +// and the endpoint returns 404 instead of a state-conflict 409. +func TestDeleteChatQueuedMessageMissingReturns404(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + + // Seed one queued message via the public endpoint (the chat + // starts in R0, so a queue send lands in R1). + _, err = client.CreateChatMessage(ctx, chat.ID, codersdk.CreateChatMessageRequest{ + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "queued"}}, + BusyBehavior: codersdk.ChatBusyBehaviorQueue, + }) + require.NoError(t, err) + + res, err := client.Request( + ctx, + http.MethodDelete, + fmt.Sprintf("/api/experimental/chats/%s/queue/99999999", chat.ID), + nil, + ) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, http.StatusNotFound, res.StatusCode) +} + +// TestDeleteChatQueuedMessageEmptyQueueReturnsConflict covers the +// state-conflict 409 path when the chat has no queued messages. +func TestDeleteChatQueuedMessageEmptyQueueReturnsConflict(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + + res, err := client.Request( + ctx, + http.MethodDelete, + fmt.Sprintf("/api/experimental/chats/%s/queue/99999999", chat.ID), + nil, + ) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, http.StatusConflict, res.StatusCode) +} + +// TestPromoteChatQueuedMessageMissingReturns404 mirrors the delete +// test for the promote endpoint: with a non-empty queue, an unknown +// queued-message ID returns 404 rather than a 409. +func TestPromoteChatQueuedMessageMissingReturns404(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + + // Seed one queued message so the promote transition is allowed. + _, err = client.CreateChatMessage(ctx, chat.ID, codersdk.CreateChatMessageRequest{ + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "queued"}}, + BusyBehavior: codersdk.ChatBusyBehaviorQueue, + }) + require.NoError(t, err) + + res, err := client.Request( + ctx, + http.MethodPost, + fmt.Sprintf("/api/experimental/chats/%s/queue/99999999/promote", chat.ID), + nil, + ) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, http.StatusNotFound, res.StatusCode) +} + +// TestPromoteChatQueuedMessageEmptyQueueReturnsConflict verifies the +// state-conflict 409 path when the chat has no queued messages. +func TestPromoteChatQueuedMessageEmptyQueueReturnsConflict(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + + res, err := client.Request( + ctx, + http.MethodPost, + fmt.Sprintf("/api/experimental/chats/%s/queue/99999999/promote", chat.ID), + nil, + ) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, http.StatusConflict, res.StatusCode) +} + +// TestInterruptChatIdleReturnsConflict verifies that interrupting an +// idle chat is now rejected. The fixture composes chatstate +// transitions to reach the W state without depending on the +// background worker. +func TestInterruptChatIdleReturnsConflict(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, api := newChatClientWithAPI(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "interrupt me"}}, + }) + require.NoError(t, err) + + driveChatToWaiting(ctx, t, api, chat.ID) + + _, err = client.InterruptChat(ctx, chat.ID) + requireSDKError(t, err, http.StatusConflict) +} + +// TestSubmitToolResultsWrongStateReturnsConflict covers the wrong +// chat-status response when the chat is not in requires_action. +func TestSubmitToolResultsWrongStateReturnsConflict(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + require.Equal(t, codersdk.ChatStatusRunning, chat.Status) + + err = client.SubmitToolResults(ctx, chat.ID, codersdk.SubmitToolResultsRequest{ + Results: []codersdk.ToolResult{{ + ToolCallID: "unknown-call", + Output: json.RawMessage(`{}`), + }}, + }) + requireSDKError(t, err, http.StatusConflict) +} + +// TestSubmitToolResultsRequiresActionSucceeds drives a chat into +// requires_action with a single dynamic tool call and verifies a +// matching SubmitToolResults call returns 204 with the tool result +// persisted. +func TestSubmitToolResultsRequiresActionSucceeds(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, api := newChatClientWithAPI(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + dynamicTools := []codersdk.DynamicTool{{ + Name: "echo", + Description: "test echo tool", + InputSchema: json.RawMessage(`{"type":"object"}`), + }} + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + UnsafeDynamicTools: dynamicTools, + }) + require.NoError(t, err) + + toolCallID := driveChatToRequiresAction(ctx, t, api, chat, "echo") + + err = client.SubmitToolResults(ctx, chat.ID, codersdk.SubmitToolResultsRequest{ + Results: []codersdk.ToolResult{{ + ToolCallID: toolCallID, + Output: json.RawMessage(`{"ok":true}`), + }}, + }) + require.NoError(t, err) + + // The tool result must be persisted as a visible tool message. + got, err := client.GetChatMessages(ctx, chat.ID, nil) + require.NoError(t, err) + foundToolResult := false + for _, msg := range got.Messages { + if msg.Role != codersdk.ChatMessageRoleTool { + continue + } + for _, part := range msg.Content { + if part.Type == codersdk.ChatMessagePartTypeToolResult && part.ToolCallID == toolCallID { + foundToolResult = true + break + } + } + } + require.True(t, foundToolResult, "tool result message must be visible in chat history") +} + +// TestPatchChatArchiveChildRejected verifies that PATCH /api/experimental/chats/{child} +// with archived=true returns the root-only error regardless of the +// child's current archived value, and does not change archive state on +// any family member. +func TestPatchChatArchiveChildRejected(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, db, api := newChatClientWithAPIAndDatabase(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + modelConfig := createChatModelConfig(t, client) + + root, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "root"}}, + }) + require.NoError(t, err) + driveChatToWaiting(ctx, t, api, root.ID) + + // Sibling child A and B; both unarchived. + childA := dbgen.Chat(t, db, database.Chat{ + OrganizationID: firstUser.OrganizationID, + OwnerID: firstUser.UserID, + LastModelConfigID: modelConfig.ID, + Title: "child-a", + Status: database.ChatStatusWaiting, + ParentChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + RootChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + }) + childB := dbgen.Chat(t, db, database.Chat{ + OrganizationID: firstUser.OrganizationID, + OwnerID: firstUser.UserID, + LastModelConfigID: modelConfig.ID, + Title: "child-b", + Status: database.ChatStatusWaiting, + ParentChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + RootChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + }) + + err = client.UpdateChat(ctx, childA.ID, codersdk.UpdateChatRequest{Archived: ptrTo(true)}) + requireSDKError(t, err, http.StatusBadRequest) + + for _, id := range []uuid.UUID{root.ID, childA.ID, childB.ID} { + got, gerr := loadChatRow(ctx, db, id) + require.NoError(t, gerr) + require.False(t, got.Archived, "no family member may flip archive state after a rejected child archive") + } +} + +// TestPatchChatUnarchiveChildRejected verifies that PATCH /api/experimental/chats/{child} +// with archived=false on an archived family is rejected with the +// root-only error and leaves every family member archived. The child +// already matches the requested value? No, the family is archived; +// we are asking to unarchive a child individually. +func TestPatchChatUnarchiveChildRejected(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, db, api := newChatClientWithAPIAndDatabase(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + modelConfig := createChatModelConfig(t, client) + + root, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "root"}}, + }) + require.NoError(t, err) + driveChatToWaiting(ctx, t, api, root.ID) + + childA := dbgen.Chat(t, db, database.Chat{ + OrganizationID: firstUser.OrganizationID, + OwnerID: firstUser.UserID, + LastModelConfigID: modelConfig.ID, + Title: "child-a", + Status: database.ChatStatusWaiting, + ParentChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + RootChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + }) + childB := dbgen.Chat(t, db, database.Chat{ + OrganizationID: firstUser.OrganizationID, + OwnerID: firstUser.UserID, + LastModelConfigID: modelConfig.ID, + Title: "child-b", + Status: database.ChatStatusWaiting, + ParentChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + RootChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + }) + + // Archive the whole family via the root. + err = client.UpdateChat(ctx, root.ID, codersdk.UpdateChatRequest{Archived: ptrTo(true)}) + require.NoError(t, err) + for _, id := range []uuid.UUID{root.ID, childA.ID, childB.ID} { + got, gerr := loadChatRow(ctx, db, id) + require.NoError(t, gerr) + require.True(t, got.Archived, "precondition: family archived after root archive") + } + + // Unarchiving a child must be rejected. + err = client.UpdateChat(ctx, childA.ID, codersdk.UpdateChatRequest{Archived: ptrTo(false)}) + requireSDKError(t, err, http.StatusBadRequest) + + for _, id := range []uuid.UUID{root.ID, childA.ID, childB.ID} { + got, gerr := loadChatRow(ctx, db, id) + require.NoError(t, gerr) + require.True(t, got.Archived, "no family member may flip archive state after a rejected child unarchive") + } +} + +// TestPatchChatArchiveRootRollsBackWhenChildCannotArchive verifies the +// family-archive atomicity guarantee surfaced through the endpoint: +// when a child is in a state that rejects SetArchived (running here), +// the whole cascade rolls back and no family member changes archive +// state. +func TestPatchChatArchiveRootRollsBackWhenChildCannotArchive(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, db, api := newChatClientWithAPIAndDatabase(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + modelConfig := createChatModelConfig(t, client) + + root, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "root"}}, + }) + require.NoError(t, err) + driveChatToWaiting(ctx, t, api, root.ID) + + // Child is running (R0) which is NOT archive-eligible. + child := dbgen.Chat(t, db, database.Chat{ + OrganizationID: firstUser.OrganizationID, + OwnerID: firstUser.UserID, + LastModelConfigID: modelConfig.ID, + Title: "child", + Status: database.ChatStatusRunning, + ParentChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + RootChatID: uuid.NullUUID{UUID: root.ID, Valid: true}, + }) + + err = client.UpdateChat(ctx, root.ID, codersdk.UpdateChatRequest{Archived: ptrTo(true)}) + requireSDKError(t, err, http.StatusConflict) + + for _, id := range []uuid.UUID{root.ID, child.ID} { + got, gerr := loadChatRow(ctx, db, id) + require.NoError(t, gerr) + require.False(t, got.Archived, "rolled-back family archive must not leave any member archived") + } +} + +// TestPostChatMessagesInvalidStateReturnsSharedResponse drives a chat +// into the chatstate-invalid state (waiting with a queued backlog) +// and asserts the shared invalid-state response. This is the +// representative endpoint required by the review. +func TestPostChatMessagesInvalidStateReturnsSharedResponse(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, _, api := newChatClientWithAPIAndDatabase(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + + // Drive the chat to an invalid combination: status=waiting (W), + // archived=false, and a queued message. ClassifyExecutionState + // returns StateInvalid for (waiting, queue=true). + driveChatToInvalidWaitingWithQueue(ctx, t, api, chat.ID) + + _, err = client.CreateChatMessage(ctx, chat.ID, codersdk.CreateChatMessageRequest{ + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "send"}}, + }) + sdkErr := requireSDKError(t, err, http.StatusConflict) + require.Equal(t, "Chat is in an invalid state.", sdkErr.Message, + "invalid-state endpoint response uses the shared message") +} + +// TestPostChatToolResultsInvalidStateReturnsSharedResponse drives a +// chat into the chatstate-invalid state and asserts that the tool +// results endpoint returns the shared invalid-state response instead +// of the old "Chat is not waiting for tool results." status-conflict +// message. This locks the fix that removes the endpoint fast-path +// and routes invalid chats through the chatstate-backed transaction. +func TestPostChatToolResultsInvalidStateReturnsSharedResponse(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, _, api := newChatClientWithAPIAndDatabase(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + + // Drive the chat to an invalid combination so the tool-results + // endpoint must surface the shared invalid-state response rather + // than the requires_action status conflict. + driveChatToInvalidWaitingWithQueue(ctx, t, api, chat.ID) + + err = client.SubmitToolResults(ctx, chat.ID, codersdk.SubmitToolResultsRequest{ + Results: []codersdk.ToolResult{{ + ToolCallID: "call-irrelevant", + Output: json.RawMessage(`{}`), + }}, + }) + sdkErr := requireSDKError(t, err, http.StatusConflict) + require.Equal(t, "Chat is in an invalid state.", sdkErr.Message, + "tool-results invalid-state response uses the shared message") +} + +// TestReconcileInvalidChatStateSucceeds drives a chat into the +// chatstate-invalid combination (waiting with a queued backlog) and +// verifies the reconcile endpoint moves it into a valid error state +// while preserving the queued message. +func TestReconcileInvalidChatStateSucceeds(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, db, api := newChatClientWithAPIAndDatabase(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + + // Drive the chat to an invalid combination: status=waiting (W), + // archived=false, with a queued message. ClassifyExecutionState + // returns StateInvalid for (waiting, queue=true). + driveChatToInvalidWaitingWithQueue(ctx, t, api, chat.ID) + + reconciled, err := client.ReconcileInvalidChatState(ctx, chat.ID) + require.NoError(t, err) + require.Equal(t, chat.ID, reconciled.ID) + require.Equal(t, codersdk.ChatStatusError, reconciled.Status) + + // The persisted row must reflect a valid error state with the + // queued message preserved (E1) and a populated last_error. + persisted, err := loadChatRow(ctx, db, chat.ID) + require.NoError(t, err) + require.Equal(t, database.ChatStatusError, persisted.Status) + require.False(t, persisted.Archived) + require.True(t, persisted.LastError.Valid) + + queueCount, err := db.CountChatQueuedMessages(dbauthz.AsSystemRestricted(ctx), chat.ID) + require.NoError(t, err) + require.Equal(t, int64(1), queueCount, "queued message is preserved by reconcile") +} + +// TestReconcileInvalidChatStateNotInvalidReturnsConflict verifies that +// reconciling a chat that is in a valid execution state is rejected +// with a 409 conflict. +func TestReconcileInvalidChatStateNotInvalidReturnsConflict(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, client.Client) + _ = createChatModelConfig(t, client) + + // A freshly created chat starts in the valid running state (R0). + chat, err := client.CreateChat(ctx, codersdk.CreateChatRequest{ + OrganizationID: firstUser.OrganizationID, + Content: []codersdk.ChatInputPart{{Type: codersdk.ChatInputPartTypeText, Text: "hello"}}, + }) + require.NoError(t, err) + require.Equal(t, codersdk.ChatStatusRunning, chat.Status) + + _, err = client.ReconcileInvalidChatState(ctx, chat.ID) + sdkErr := requireSDKError(t, err, http.StatusConflict) + require.Equal(t, "Chat is not in an invalid state.", sdkErr.Message) +} + +// TestReconcileInvalidChatStateNotFound verifies the reconcile +// endpoint returns 404 for a chat that does not exist. +func TestReconcileInvalidChatStateNotFound(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client := newChatClient(t) + _ = coderdtest.CreateFirstUser(t, client.Client) + + _, err := client.ReconcileInvalidChatState(ctx, uuid.New()) + requireSDKError(t, err, http.StatusNotFound) +} + +// loadChatRow reads a chat row directly through dbauthz.AsSystemRestricted +// so endpoint tests can verify side effects without authorization rules +// interfering. +func loadChatRow(ctx context.Context, db database.Store, id uuid.UUID) (database.Chat, error) { + sysCtx := dbauthz.AsSystemRestricted(ctx) //nolint:gocritic // Test fixture reads chat rows. + return db.GetChatByID(sysCtx, id) +} + +// driveChatToInvalidWaitingWithQueue forces a chat into the +// chatstate-invalid combination (status=waiting, archived=false, +// queue non-empty) by writing directly through the database. This is +// an intentional invalid fixture: chatstate transitions reject +// driving toward this combination, and ChatMachine.Update must not +// be composed inside a caller-owned transaction because it owns its +// own transaction lifecycle. +func driveChatToInvalidWaitingWithQueue( + ctx context.Context, + t *testing.T, + api *coderd.API, + chatID uuid.UUID, +) { + t.Helper() + sysCtx := dbauthz.AsSystemRestricted(ctx) //nolint:gocritic // Test fixture writes invalid combination by design. + + // Seed the queue with one row attributed to the chat owner. The + // content is a minimal valid JSON payload; only the row's + // presence matters for ClassifyExecutionState. The owner_id is + // filled from the chat row by the SQL. + rawContent, err := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ + codersdk.ChatMessageText("queued"), + }) + require.NoError(t, err) + _, err = api.Database.InsertChatQueuedMessage(sysCtx, database.InsertChatQueuedMessageParams{ + ChatID: chatID, + Content: rawContent.RawMessage, + ModelConfigID: uuid.NullUUID{}, + }) + require.NoError(t, err) + + // Flip the chat's status to waiting via a raw execution-state + // update. This bypasses the transition matrix to produce the + // (waiting, queued) invalid pairing. + _, err = api.Database.UpdateChatExecutionState(sysCtx, database.UpdateChatExecutionStateParams{ + ID: chatID, + Status: database.ChatStatusWaiting, + Archived: false, + }) + require.NoError(t, err) +} + +// ptrTo returns a pointer to the given value. Helper for the +// pointer-shaped UpdateChatRequest fields. +func ptrTo[T any](v T) *T { return &v } diff --git a/coderd/exp_chats_test.go b/coderd/exp_chats_test.go index b9718c996e..fa4ec5154e 100644 --- a/coderd/exp_chats_test.go +++ b/coderd/exp_chats_test.go @@ -14644,6 +14644,15 @@ func TestChatReadOnlySharedWriteHandlers(t *testing.T) { requireSDKError(t, err, http.StatusNotFound) }) + t.Run("ReconcileInvalidChatState", func(t *testing.T) { + t.Parallel() + + ctx, _, sharedClient, chat, _ := setup(t) + _, err := sharedClient.ReconcileInvalidChatState(ctx, chat.ID) + + requireSDKError(t, err, http.StatusNotFound) + }) + t.Run("RegenerateChatTitle", func(t *testing.T) { t.Parallel() diff --git a/coderd/x/chatd/chatd.go b/coderd/x/chatd/chatd.go index 991108a02b..a7dd49a7d0 100644 --- a/coderd/x/chatd/chatd.go +++ b/coderd/x/chatd/chatd.go @@ -49,6 +49,7 @@ import ( "github.com/coder/coder/v2/coderd/x/chatd/chatprovider" "github.com/coder/coder/v2/coderd/x/chatd/chatretry" "github.com/coder/coder/v2/coderd/x/chatd/chatsanitize" + "github.com/coder/coder/v2/coderd/x/chatd/chatstate" "github.com/coder/coder/v2/coderd/x/chatd/chattool" "github.com/coder/coder/v2/coderd/x/chatd/internal/agentselect" "github.com/coder/coder/v2/coderd/x/chatd/mcpclient" @@ -1374,14 +1375,9 @@ func (c *streamStateCollector) Collect(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric(streamSubscribersDesc, prometheus.GaugeValue, float64(totalSubs)) } -// MaxQueueSize is the maximum number of queued user messages per chat. -const MaxQueueSize = 20 - var ( // ErrInvalidModelConfigID indicates the requested model config does not exist. ErrInvalidModelConfigID = xerrors.New("invalid model config ID") - // ErrMessageQueueFull indicates the per-chat queue limit was reached. - ErrMessageQueueFull = xerrors.New("chat message queue is full") // ErrEditedMessageNotFound indicates the edited message does not exist // in the target chat. ErrEditedMessageNotFound = xerrors.New("edited message not found") @@ -1506,8 +1502,9 @@ type PromoteQueuedResult struct { PromotedMessage database.ChatMessage } -// CreateChat creates a chat, inserts optional system prompt and initial user -// message, and moves the chat into pending status. +// CreateChat creates a chat with its initial history through +// chatstate.CreateChat. The new chat starts in `running` status per +// the RFC. Ownership hints wake chat workers. func (p *Server) CreateChat(ctx context.Context, opts CreateOptions) (database.Chat, error) { if opts.OrganizationID == uuid.Nil { return database.Chat{}, xerrors.New("organization_id is required") @@ -1530,148 +1527,109 @@ func (p *Server) CreateChat(ctx context.Context, opts CreateOptions) (database.C if opts.Labels == nil { opts.Labels = database.StringMap{} } + opts.ClientType = cmp.Or(opts.ClientType, database.ChatClientTypeApi) + if !opts.ClientType.Valid() { + return database.Chat{}, xerrors.Errorf("invalid client_type: %q", opts.ClientType) + } // Resolve the deployment prompt before opening the transaction so // chat creation does not hold one DB connection while waiting for // another pool checkout. deploymentPrompt := p.resolveDeploymentSystemPrompt(ctx) - effectivePlanMode := opts.PlanMode - opts.ClientType = cmp.Or(opts.ClientType, database.ChatClientTypeApi) - if !opts.ClientType.Valid() { - return database.Chat{}, xerrors.Errorf("invalid client_type: %q", opts.ClientType) - } - var chat database.Chat - txErr := p.db.InTx(func(tx database.Store) error { - if limitErr := p.checkUsageLimit(ctx, tx, opts.OwnerID, uuid.NullUUID{UUID: opts.OrganizationID, Valid: true}); limitErr != nil { - return limitErr - } - - labelsJSON, err := json.Marshal(opts.Labels) - if err != nil { - return xerrors.Errorf("marshal labels: %w", err) - } - - insertedChat, err := tx.InsertChat(ctx, database.InsertChatParams{ - OrganizationID: opts.OrganizationID, - OwnerID: opts.OwnerID, - WorkspaceID: opts.WorkspaceID, - BuildID: opts.BuildID, - AgentID: opts.AgentID, - ParentChatID: opts.ParentChatID, - RootChatID: opts.RootChatID, - LastModelConfigID: opts.ModelConfigID, - Title: opts.Title, - Mode: opts.ChatMode, - PlanMode: effectivePlanMode, - ClientType: opts.ClientType, - // Chats created with an initial user message start pending. - // Waiting is reserved for idle chats with no pending work. - Status: database.ChatStatusPending, - MCPServerIDs: opts.MCPServerIDs, - Labels: pqtype.NullRawMessage{ - RawMessage: labelsJSON, - Valid: true, - }, - DynamicTools: pqtype.NullRawMessage{ - RawMessage: opts.DynamicTools, - Valid: len(opts.DynamicTools) > 0, - }, - }) - if err != nil { - return xerrors.Errorf("insert chat: %w", err) - } - - userPrompt := SanitizePromptText(opts.SystemPrompt) - workspaceAwareness := workspaceDetachedAwareness - if opts.WorkspaceID.Valid { - workspaceAwareness = workspaceAttachedAwareness - } - workspaceAwarenessContent, err := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ - codersdk.ChatMessageText(workspaceAwareness), - }) - if err != nil { - return xerrors.Errorf("marshal workspace awareness: %w", err) - } - userContent, err := chatprompt.MarshalParts(opts.InitialUserContent) - if err != nil { - return xerrors.Errorf("marshal initial user content: %w", err) - } - - msgParams := database.InsertChatMessagesParams{ //nolint:exhaustruct // Fields populated by appendChatMessage. - ChatID: insertedChat.ID, - } - - if deploymentPrompt != "" { - deploymentContent, err := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ - codersdk.ChatMessageText(deploymentPrompt), - }) - if err != nil { - return xerrors.Errorf("marshal deployment system prompt: %w", err) - } - appendChatMessage(&msgParams, newChatMessage( - database.ChatMessageRoleSystem, - deploymentContent, - database.ChatMessageVisibilityModel, - opts.ModelConfigID, - chatprompt.CurrentContentVersion, - )) - } - - if userPrompt != "" { - userPromptContent, err := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ - codersdk.ChatMessageText(userPrompt), - }) - if err != nil { - return xerrors.Errorf("marshal user system prompt: %w", err) - } - appendChatMessage(&msgParams, newChatMessage( - database.ChatMessageRoleSystem, - userPromptContent, - database.ChatMessageVisibilityModel, - opts.ModelConfigID, - chatprompt.CurrentContentVersion, - )) - } - - appendChatMessage(&msgParams, newChatMessage( - database.ChatMessageRoleSystem, - workspaceAwarenessContent, - database.ChatMessageVisibilityModel, - opts.ModelConfigID, - chatprompt.CurrentContentVersion, - )) - - appendChatMessage(&msgParams, newChatMessage( - database.ChatMessageRoleUser, - userContent, - database.ChatMessageVisibilityBoth, - opts.ModelConfigID, - chatprompt.CurrentContentVersion, - ).withCreatedBy(opts.OwnerID)) - - _, err = tx.InsertChatMessages(ctx, msgParams) - if err != nil { - return xerrors.Errorf("insert initial chat messages: %w", err) - } - - chat = insertedChat - - if !chat.RootChatID.Valid && !chat.ParentChatID.Valid { - chat.RootChatID = uuid.NullUUID{UUID: chat.ID, Valid: true} - } - return nil - }, nil) - if txErr != nil { - return database.Chat{}, txErr + // Usage limits gate the create before we touch the state machine. + if limitErr := p.checkUsageLimit(ctx, p.db, opts.OwnerID, uuid.NullUUID{UUID: opts.OrganizationID, Valid: true}); limitErr != nil { + return database.Chat{}, limitErr } + labelsJSON, err := json.Marshal(opts.Labels) + if err != nil { + return database.Chat{}, xerrors.Errorf("marshal labels: %w", err) + } + + userPrompt := SanitizePromptText(opts.SystemPrompt) + var workspaceAwareness string + if opts.WorkspaceID.Valid { + workspaceAwareness = workspaceAttachedAwareness + } else { + workspaceAwareness = workspaceDetachedAwareness + } + workspaceAwarenessContent, err := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ + codersdk.ChatMessageText(workspaceAwareness), + }) + if err != nil { + return database.Chat{}, xerrors.Errorf("marshal workspace awareness: %w", err) + } + userContent, err := chatprompt.MarshalParts(opts.InitialUserContent) + if err != nil { + return database.Chat{}, xerrors.Errorf("marshal initial user content: %w", err) + } + + initialMessages := make([]chatstate.Message, 0, 4) + if deploymentPrompt != "" { + deploymentContent, marshalErr := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ + codersdk.ChatMessageText(deploymentPrompt), + }) + if marshalErr != nil { + return database.Chat{}, xerrors.Errorf("marshal deployment system prompt: %w", marshalErr) + } + initialMessages = append(initialMessages, systemMessage(deploymentContent, opts.ModelConfigID)) + } + if userPrompt != "" { + userPromptContent, marshalErr := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ + codersdk.ChatMessageText(userPrompt), + }) + if marshalErr != nil { + return database.Chat{}, xerrors.Errorf("marshal user system prompt: %w", marshalErr) + } + initialMessages = append(initialMessages, systemMessage(userPromptContent, opts.ModelConfigID)) + } + initialMessages = append(initialMessages, systemMessage(workspaceAwarenessContent, opts.ModelConfigID)) + initialMessages = append(initialMessages, userMessage(userContent, opts.ModelConfigID, opts.OwnerID)) + + result, err := chatstate.CreateChat(ctx, p.db, p.pubsub, chatstate.CreateChatInput{ + OrganizationID: opts.OrganizationID, + OwnerID: opts.OwnerID, + WorkspaceID: opts.WorkspaceID, + BuildID: opts.BuildID, + AgentID: opts.AgentID, + ParentChatID: opts.ParentChatID, + RootChatID: opts.RootChatID, + LastModelConfigID: opts.ModelConfigID, + Title: opts.Title, + Mode: opts.ChatMode, + PlanMode: opts.PlanMode, + MCPServerIDs: opts.MCPServerIDs, + Labels: pqtype.NullRawMessage{ + RawMessage: labelsJSON, + Valid: true, + }, + DynamicTools: pqtype.NullRawMessage{ + RawMessage: opts.DynamicTools, + Valid: len(opts.DynamicTools) > 0, + }, + ClientType: opts.ClientType, + InitialMessages: initialMessages, + }) + if err != nil { + return database.Chat{}, err + } + chat := result.Chat + if !chat.RootChatID.Valid && !chat.ParentChatID.Valid { + chat.RootChatID = uuid.NullUUID{UUID: chat.ID, Valid: true} + } + + // Publish the sidebar watch event explicitly after chatstate has + // committed and emitted its own state-machine notifications. The + // watch endpoint is intentionally outside the RFC refactor scope. p.publishChatPubsubEvent(chat, codersdk.ChatWatchEventKindCreated, nil) - p.signalWake() return chat, nil } -// SendMessage inserts a user message and optionally queues it while the chat -// is busy, then publishes stream + pubsub updates. +// SendMessage admits a user message through the chatstate.SendMessage +// transition. Pre-transition admission policy (usage limit, plan-mode +// metadata update, MCP server ID update, model-config resolution, queue +// cap) runs inside the same chatstate transaction via tx.Store() so +// everything commits or rolls back together. func (p *Server) SendMessage( ctx context.Context, opts SendMessageOptions, @@ -1699,29 +1657,28 @@ func (p *Server) SendMessage( } requestedPlanMode := opts.PlanMode + requestedMCPServerIDs := opts.MCPServerIDs - var ( - result SendMessageResult - queuedMessagesSDK []codersdk.ChatQueuedMessage - ) - - txErr := p.db.InTx(func(tx database.Store) error { - lockedChat, err := tx.GetChatByIDForUpdate(ctx, opts.ChatID) + var result SendMessageResult + machine := p.newChatMachine(opts.ChatID) + updateErr := machine.Update(ctx, func(tx *chatstate.Tx) error { + store := tx.Store() + lockedChat, err := store.GetChatByID(ctx, opts.ChatID) if err != nil { - return xerrors.Errorf("lock chat: %w", err) + return xerrors.Errorf("load chat: %w", err) } if lockedChat.Archived { return ErrChatArchived } - // Enforce usage limits before queueing or inserting. - if limitErr := p.checkUsageLimit(ctx, tx, lockedChat.OwnerID, uuid.NullUUID{UUID: lockedChat.OrganizationID, Valid: true}); limitErr != nil { + // Enforce usage limits before any state-machine work. + if limitErr := p.checkUsageLimit(ctx, store, lockedChat.OwnerID, uuid.NullUUID{UUID: lockedChat.OrganizationID, Valid: true}); limitErr != nil { return limitErr } if requestedPlanMode != nil { - lockedChat, err = tx.UpdateChatPlanModeByID(ctx, database.UpdateChatPlanModeByIDParams{ + lockedChat, err = store.UpdateChatPlanModeByID(ctx, database.UpdateChatPlanModeByIDParams{ PlanMode: *requestedPlanMode, ID: opts.ChatID, }) @@ -1732,7 +1689,7 @@ func (p *Server) SendMessage( modelConfigID, err := resolveSendMessageModelConfigID( ctx, - tx, + store, lockedChat, opts.ModelConfigID, ) @@ -1742,16 +1699,16 @@ func (p *Server) SendMessage( // Update MCP server IDs on the chat when explicitly provided. // Explore child chats keep the spawn-time snapshot immutable. - if opts.MCPServerIDs != nil { + if requestedMCPServerIDs != nil { if isExploreSubagentMode(lockedChat.Mode) { p.logger.Warn(ctx, "ignoring explore subagent mcp server ids update, snapshot is immutable after spawn", slog.F("chat_id", opts.ChatID), ) } else { - lockedChat, err = tx.UpdateChatMCPServerIDs(ctx, database.UpdateChatMCPServerIDsParams{ + lockedChat, err = store.UpdateChatMCPServerIDs(ctx, database.UpdateChatMCPServerIDsParams{ ID: opts.ChatID, - MCPServerIDs: *opts.MCPServerIDs, + MCPServerIDs: *requestedMCPServerIDs, }) if err != nil { return xerrors.Errorf("update chat mcp server ids: %w", err) @@ -1759,109 +1716,43 @@ func (p *Server) SendMessage( } } - existingQueued, err := tx.GetChatQueuedMessages(ctx, opts.ChatID) - if err != nil { - return xerrors.Errorf("get queued messages: %w", err) - } - - // Both queue and interrupt behaviors queue messages - // when the chat is busy. We also keep queueing while a - // backlog exists so waiting chats blocked by spend limits - // preserve FIFO user-message order. Interrupt additionally - // signals the running loop to stop so the queued message - // is promoted sooner. Crucially, this guarantees the - // interrupted assistant response is persisted (with a - // lower id/created_at) before the user message is - // promoted into chat_messages, preserving correct - // conversation order. - if shouldQueueUserMessage(lockedChat.Status) || len(existingQueued) > 0 { - if len(existingQueued) >= MaxQueueSize { - return ErrMessageQueueFull - } - - queued, err := tx.InsertChatQueuedMessage(ctx, database.InsertChatQueuedMessageParams{ - ChatID: opts.ChatID, - Content: content.RawMessage, - ModelConfigID: uuid.NullUUID{ - UUID: modelConfigID, - Valid: modelConfigID != uuid.Nil, - }, - }) - if err != nil { - return xerrors.Errorf("insert queued message: %w", err) - } - - queuedMessages, err := tx.GetChatQueuedMessages(ctx, opts.ChatID) - if err != nil { - return xerrors.Errorf("get queued messages: %w", err) - } - - result.Queued = true - result.QueuedMessage = &queued - result.Chat = lockedChat - queuedMessagesSDK = db2sdk.ChatQueuedMessages(queuedMessages) - return nil - } - - message, updatedChat, err := insertUserMessageAndSetPending( - ctx, - tx, - lockedChat, - modelConfigID, - content, - opts.CreatedBy, - ) + // Queue capacity is enforced inside tx.SendMessage; this + // wrapper only propagates the typed error. + sendResult, err := tx.SendMessage(chatstate.SendMessageInput{ + Message: userMessage(content, modelConfigID, opts.CreatedBy), + BusyBehavior: busyBehaviorToChatState(busyBehavior), + }) if err != nil { return err } - result.Message = message - result.Chat = updatedChat - return nil - }, nil) - if txErr != nil { - return SendMessageResult{}, txErr - } - - if result.Queued { - p.publishEvent(opts.ChatID, codersdk.ChatStreamEvent{ - Type: codersdk.ChatStreamEventTypeQueueUpdate, - ChatID: opts.ChatID, - QueuedMessages: queuedMessagesSDK, - }) - p.publishChatStreamNotify(opts.ChatID, coderdpubsub.ChatStreamNotifyMessage{ - QueueUpdate: true, - }) - - // For interrupt behavior, signal the running loop to - // stop. setChatWaiting publishes a status notification - // that the worker's control subscriber detects, causing - // it to cancel with ErrInterrupted. The deferred cleanup - // in processChat then auto-promotes the queued message - // after persisting the partial assistant response. - if busyBehavior == SendMessageBusyBehaviorInterrupt { - updatedChat, err := p.setChatWaiting(ctx, opts.ChatID) - if err != nil { - // The message is already queued so the chat is - // not in a broken state — the user can still - // wait for the current run to finish. Log the - // error but don't fail the request. - p.logger.Error(ctx, "failed to interrupt chat for queued message", - slog.F("chat_id", opts.ChatID), - slog.Error(err), - ) - } else { - result.Chat = updatedChat - } + if sendResult.QueuedMessage != nil { + result.Queued = true + result.QueuedMessage = sendResult.QueuedMessage + } else if len(sendResult.InsertedMessages) > 0 { + // The state machine prepends synthetic tool-result + // cancellation messages; the user message is always + // last in the inserted slice. + result.Message = sendResult.InsertedMessages[len(sendResult.InsertedMessages)-1] } - - return result, nil + // Capture the post-transition chat inside the same + // transaction so the returned chat and the watch event + // reflect the snapshot bump and status change produced by + // the transition itself. + refreshed, err := store.GetChatByID(ctx, opts.ChatID) + if err != nil { + return xerrors.Errorf("reload chat after send: %w", err) + } + result.Chat = refreshed + return nil + }) + if updateErr != nil { + return SendMessageResult{}, updateErr } - p.publishMessage(opts.ChatID, result.Message) - p.publishStatus(opts.ChatID, result.Chat.Status, result.Chat.WorkerID) + // Sidebar watch event keeps the chat list in sync. Stream side + // effects are handled by chat:update consumers. p.publishChatPubsubEvent(result.Chat, codersdk.ChatWatchEventKindStatusChange, nil) - p.signalWake() return result, nil } @@ -1974,9 +1865,10 @@ func resolveFallbackModelConfigID( return defaultConfig.ID, nil } -// EditMessage marks the old user message as deleted, soft-deletes all -// following messages, inserts a new message with the updated content, -// clears queued messages, and moves the chat into pending status. +// EditMessage replaces an earlier user message and discards the +// active-history suffix through chatstate.EditMessage. Model-config +// override validation and usage-limit admission run in the same +// transaction as the state-machine transition. func (p *Server) EditMessage( ctx context.Context, opts EditMessageOptions, @@ -1997,60 +1889,45 @@ func (p *Server) EditMessage( } var ( - result EditMessageResult - editedMsg database.ChatMessage + result EditMessageResult + editedMsg database.ChatMessage + editedCutoffT time.Time ) - txErr := p.db.InTx(func(tx database.Store) error { - lockedChat, err := tx.GetChatByIDForUpdate(ctx, opts.ChatID) + machine := p.newChatMachine(opts.ChatID) + updateErr := machine.Update(ctx, func(tx *chatstate.Tx) error { + store := tx.Store() + lockedChat, err := store.GetChatByID(ctx, opts.ChatID) if err != nil { - return xerrors.Errorf("lock chat: %w", err) + return xerrors.Errorf("load chat: %w", err) } - if lockedChat.Archived { return ErrChatArchived } - - if limitErr := p.checkUsageLimit(ctx, tx, lockedChat.OwnerID, uuid.NullUUID{UUID: lockedChat.OrganizationID, Valid: true}); limitErr != nil { + if limitErr := p.checkUsageLimit(ctx, store, lockedChat.OwnerID, uuid.NullUUID{UUID: lockedChat.OrganizationID, Valid: true}); limitErr != nil { return limitErr } - editedMsg, err = tx.GetChatMessageByID(ctx, opts.EditedMessageID) + // Capture the target message for the post-commit debug + // cleanup hook below. The transition itself revalidates + // chat ownership and user-message constraints. + target, err := store.GetChatMessageByID(ctx, opts.EditedMessageID) if err != nil { if errors.Is(err, sql.ErrNoRows) { return ErrEditedMessageNotFound } return xerrors.Errorf("get edited message: %w", err) } - if editedMsg.ChatID != opts.ChatID { + if target.ChatID != opts.ChatID { return ErrEditedMessageNotFound } - if editedMsg.Role != database.ChatMessageRoleUser { - return ErrEditedMessageNotUser - } + editedMsg = target - // Soft-delete the original message instead of updating in place - // so that usage/cost data is preserved. - err = tx.SoftDeleteChatMessageByID(ctx, opts.EditedMessageID) - if err != nil { - return xerrors.Errorf("soft-delete edited message: %w", err) - } - - // Soft-delete all messages that came after the edited one. - err = tx.SoftDeleteChatMessagesAfterID(ctx, database.SoftDeleteChatMessagesAfterIDParams{ - ChatID: opts.ChatID, - AfterID: opts.EditedMessageID, - }) - if err != nil { - return xerrors.Errorf("soft-delete later chat messages: %w", err) - } - - // Resolve the model for the replacement message. When the - // caller does not specify a model, preserve the original - // message's model so an edit that only changes text keeps - // behaving as before. - messageModelConfigID := editedMsg.ModelConfigID.UUID + // Validate the optional model-config override up front so + // the user sees ErrInvalidModelConfigID instead of a + // foreign-key error from the message-insert path. + var modelOverride uuid.NullUUID if opts.ModelConfigID != uuid.Nil { - if _, err := tx.GetChatModelConfigByID( + if _, err := store.GetChatModelConfigByID( chatdModelConfigLookupContext(ctx), opts.ModelConfigID, ); err != nil { @@ -2067,74 +1944,50 @@ func (p *Server) EditMessage( err, ) } - messageModelConfigID = opts.ModelConfigID + modelOverride = uuid.NullUUID{UUID: opts.ModelConfigID, Valid: true} } - // Insert a new message with the updated content. The - // InsertChatMessages CTE updates chats.last_model_config_id - // when the new message's model differs, so the assistant turn - // that follows picks up the new selection. - msgParams := database.InsertChatMessagesParams{ //nolint:exhaustruct // Fields populated by appendChatMessage. - ChatID: opts.ChatID, - } - appendChatMessage(&msgParams, newChatMessage( - database.ChatMessageRoleUser, - content, - editedMsg.Visibility, - messageModelConfigID, - chatprompt.CurrentContentVersion, - ).withCreatedBy(opts.CreatedBy)) - newMessages, err := insertChatMessageWithStore(ctx, tx, msgParams) - if err != nil { - return xerrors.Errorf("insert replacement message: %w", err) - } - newMessage := newMessages[0] - - err = tx.DeleteAllChatQueuedMessages(ctx, opts.ChatID) - if err != nil { - return xerrors.Errorf("delete queued messages: %w", err) - } - updatedChat, err := tx.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: opts.ChatID, - Status: database.ChatStatusPending, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - LastError: pqtype.NullRawMessage{}, + editResult, err := tx.EditMessage(chatstate.EditMessageInput{ + MessageID: opts.EditedMessageID, + CreatedBy: opts.CreatedBy, + Content: content, + ModelConfigIDOverride: modelOverride, }) if err != nil { - return xerrors.Errorf("set chat pending: %w", err) + if errors.Is(err, chatstate.ErrEditedMessageNotUser) { + return ErrEditedMessageNotUser + } + return err } - - result.Message = newMessage - result.Chat = updatedChat + result.Message = editResult.ReplacementMessage + // Capture the post-edit chat inside the same transaction so + // the returned chat and the debug-cleanup cutoff use the + // snapshot bump and updated_at stamped by the transition. + refreshed, err := store.GetChatByID(ctx, opts.ChatID) + if err != nil { + return xerrors.Errorf("reload chat after edit: %w", err) + } + result.Chat = refreshed + editedCutoffT = refreshed.UpdatedAt return nil - }, nil) - if txErr != nil { - return EditMessageResult{}, txErr + }) + if updateErr != nil { + return EditMessageResult{}, updateErr } - p.publishEditedMessage(opts.ChatID, result.Message) - p.publishEvent(opts.ChatID, codersdk.ChatStreamEvent{ - Type: codersdk.ChatStreamEventTypeQueueUpdate, - QueuedMessages: []codersdk.ChatQueuedMessage{}, - }) - p.publishChatStreamNotify(opts.ChatID, coderdpubsub.ChatStreamNotifyMessage{ - QueueUpdate: true, - }) - p.publishStatus(opts.ChatID, result.Chat.Status, result.Chat.WorkerID) + // Sidebar watch event keeps the chat list responsive. Stream + // side effects are handled by chat:update consumers. p.publishChatPubsubEvent(result.Chat, codersdk.ChatWatchEventKindStatusChange, nil) // Editing can race with an interrupted worker still flushing its // final debug writes. Run a short bounded retry loop so we converge // quickly without relying on the much longer stale-finalization // sweep. Source editCutoff from the DB-stamped updated_at returned - // by UpdateChatStatus so the filter uses the same clock that - // FinalizeStale and other DB timestamps use; subtract + // by the post-edit chat row so the filter uses the same clock that + // stamps replacement-turn debug rows; subtract // debugCleanupClockSkew so replica clock drift cannot let the retry - // delete a replacement turn's debug rows (see the constant for the - // full rationale). - editCutoff := result.Chat.UpdatedAt.Add(-debugCleanupClockSkew) + // delete a replacement turn's debug rows. + editCutoff := editedCutoffT.Add(-debugCleanupClockSkew) p.scheduleDebugCleanup( ctx, "failed to delete chat debug rows after edit", @@ -2147,78 +2000,88 @@ func (p *Server) EditMessage( return err }, ) - p.signalWake() return result, nil } -// ArchiveChat archives a chat family and broadcasts deleted events for each -// affected chat so watching clients converge without a full refetch. If the -// target chat is pending or running, it first transitions the chat back to -// waiting so active processing stops before the archive is broadcast. +// ErrArchiveRequiresRootChat is returned by [Server.ArchiveChat] and +// [Server.UnarchiveChat] when the supplied chat is a child chat. +// Archive state changes must always target the root chat so the +// whole family flips together. +var ErrArchiveRequiresRootChat = xerrors.New( + "chat archive state can only be changed on the root chat", +) + +// ArchiveChat archives a root chat and every child in its family +// through the chatstate state machine. The transition is atomic over +// the whole family: either every member is archived or none is. The +// state machine only permits archive from the idle / error execution +// states (W, E0, E1); active members cause a state conflict that the +// HTTP handler maps to a client error. +// +// Child chats must not be archived independently. ArchiveChat +// rejects them with [ErrArchiveRequiresRootChat] so callers cannot +// silently break the parent-implies-child archive invariant. func (p *Server) ArchiveChat(ctx context.Context, chat database.Chat) error { if chat.ID == uuid.Nil { return xerrors.New("chat_id is required") } + if chat.ParentChatID.Valid { + return ErrArchiveRequiresRootChat + } + return p.setChatFamilyArchived(ctx, chat, true, codersdk.ChatWatchEventKindDeleted) +} - var ( - archivedChats []database.Chat - interruptedChats []database.Chat - ) - if err := p.db.InTx(func(tx database.Store) error { - if _, err := tx.GetChatByIDForUpdate(ctx, chat.ID); err != nil { - return xerrors.Errorf("lock chat for archive: %w", err) - } +// UnarchiveChat unarchives a root chat and every child in its family +// through the chatstate state machine. Like ArchiveChat the cascade +// is atomic; ChildChat unarchive attempts are rejected with +// [ErrArchiveRequiresRootChat]. +func (p *Server) UnarchiveChat(ctx context.Context, chat database.Chat) error { + if chat.ID == uuid.Nil { + return xerrors.New("chat_id is required") + } + if chat.ParentChatID.Valid { + return ErrArchiveRequiresRootChat + } + return p.setChatFamilyArchived(ctx, chat, false, codersdk.ChatWatchEventKindCreated) +} - var err error - archivedChats, err = tx.ArchiveChatByID(ctx, chat.ID) - if err != nil { - return xerrors.Errorf("archive chat: %w", err) - } - - for i, archivedChat := range archivedChats { - if archivedChat.Status != database.ChatStatusPending && - archivedChat.Status != database.ChatStatusRunning { - continue - } - - updatedChat, updateErr := tx.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: archivedChat.ID, - Status: database.ChatStatusWaiting, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - LastError: pqtype.NullRawMessage{}, - }) - if updateErr != nil { - return xerrors.Errorf("set archived chat waiting before cleanup: %w", updateErr) - } - archivedChats[i] = updatedChat - interruptedChats = append(interruptedChats, updatedChat) - } - return nil - }, nil); err != nil { - return err +// setChatFamilyArchived applies SetArchived(archived) to every chat +// in chat's family through chatstate. The transaction-captured +// family rows feed the post-commit debug cleanup and sidebar watch +// events. Callers must only invoke this for root chats. +func (p *Server) setChatFamilyArchived( + ctx context.Context, + chat database.Chat, + archived bool, + watchKind codersdk.ChatWatchEventKind, +) error { + if chat.ID == uuid.Nil { + return xerrors.New("chat_id is required") + } + if chat.ParentChatID.Valid { + return ErrArchiveRequiresRootChat } - for _, interruptedChat := range interruptedChats { - p.publishStatus(interruptedChat.ID, interruptedChat.Status, interruptedChat.WorkerID) - p.publishChatPubsubEvent(interruptedChat, codersdk.ChatWatchEventKindStatusChange, nil) + familyChats, err := chatstate.SetFamilyArchived( + ctx, + p.db, + p.pubsub, + chatstate.SetFamilyArchivedInput{ + RootID: chat.ID, + Archived: archived, + }, + ) + if err != nil { + return err } // Archiving can race with an interrupted worker still flushing its // final debug writes. Retry a few times so orphaned rows are - // removed quickly instead of waiting for the stale sweeper. Source - // archiveCutoff from the DB-stamped updated_at returned by - // ArchiveChatByID so the filter uses the same clock that stamps - // replacement-turn debug rows; subtract debugCleanupClockSkew so - // replica clock drift cannot let the retry delete a replacement's - // debug rows if an unarchive races ahead (see the constant for the - // full rationale). All archived chats share the transaction-start - // NOW() so any entry's UpdatedAt is equivalent. - if len(archivedChats) > 0 { - archiveCutoff := archivedChats[0].UpdatedAt.Add(-debugCleanupClockSkew) - for _, archivedChat := range archivedChats { + // removed quickly instead of waiting for the stale sweeper. + if archived && len(familyChats) > 0 { + archiveCutoff := familyChats[0].UpdatedAt.Add(-debugCleanupClockSkew) + for _, archivedChat := range familyChats { p.scheduleDebugCleanup( ctx, "failed to delete chat debug rows after archive", @@ -2231,92 +2094,13 @@ func (p *Server) ArchiveChat(ctx context.Context, chat database.Chat) error { } } - p.publishChatPubsubEvents(archivedChats, codersdk.ChatWatchEventKindDeleted) + p.publishChatPubsubEvents(familyChats, watchKind) return nil } -// ErrChildUnarchiveParentArchived is returned by UnarchiveChat when a -// child unarchive is rejected because the parent is still archived. -// The patchChat handler maps this to a 400 response. -var ErrChildUnarchiveParentArchived = xerrors.New( - "cannot unarchive child chat while parent is archived", -) - -// UnarchiveChat unarchives a chat family and broadcasts created events. -// Root chats cascade through UnarchiveChatByID. Child chats run under -// a row-level lock on the child (GetChatByIDForUpdate) with an -// in-transaction re-read of the parent, returning -// ErrChildUnarchiveParentArchived when the parent is archived and a -// no-op when the child is already active. -// -// The child is locked before the parent is read to avoid deadlocking -// with a concurrent ArchiveChatByID cascade, which visits child rows -// before the parent. -func (p *Server) UnarchiveChat(ctx context.Context, chat database.Chat) error { - if chat.ID == uuid.Nil { - return xerrors.New("chat_id is required") - } - - if !chat.ParentChatID.Valid { - return p.applyChatLifecycleTransition( - ctx, - chat.ID, - "unarchive", - codersdk.ChatWatchEventKindCreated, - p.db.UnarchiveChatByID, - ) - } - - var updated []database.Chat - if err := p.db.InTx(func(tx database.Store) error { - locked, err := tx.GetChatByIDForUpdate(ctx, chat.ID) - if err != nil { - return xerrors.Errorf("lock child for unarchive: %w", err) - } - if !locked.Archived { - // Already unarchived by a concurrent caller; idempotent no-op. - return nil - } - parent, err := tx.GetChatByID(ctx, chat.ParentChatID.UUID) - if err != nil { - return xerrors.Errorf("load parent chat: %w", err) - } - if parent.Archived { - return ErrChildUnarchiveParentArchived - } - updated, err = tx.UnarchiveChatByID(ctx, chat.ID) - if err != nil { - return xerrors.Errorf("unarchive child chat: %w", err) - } - return nil - }, nil); err != nil { - if errors.Is(err, ErrChildUnarchiveParentArchived) { - return ErrChildUnarchiveParentArchived - } - return err - } - - p.publishChatPubsubEvents(updated, codersdk.ChatWatchEventKindCreated) - return nil -} - -func (p *Server) applyChatLifecycleTransition( - ctx context.Context, - chatID uuid.UUID, - action string, - kind codersdk.ChatWatchEventKind, - transition func(context.Context, uuid.UUID) ([]database.Chat, error), -) error { - updatedChats, err := transition(ctx, chatID) - if err != nil { - return xerrors.Errorf("%s chat: %w", action, err) - } - - p.publishChatPubsubEvents(updatedChats, kind) - return nil -} - -// DeleteQueued removes a queued user message and publishes the queue update. +// DeleteQueued removes a queued user message through the chatstate +// state machine. Stream side effects are handled by chat:update +// consumers. func (p *Server) DeleteQueued( ctx context.Context, chatID uuid.UUID, @@ -2326,61 +2110,22 @@ func (p *Server) DeleteQueued( return xerrors.New("chat_id is required") } - var queuedMessages []database.ChatQueuedMessage - var queueLoadedOK bool - - txErr := p.db.InTx(func(tx database.Store) error { - // Lock the chat row to prevent processChat from - // auto-promoting a message the user intended to delete. - if _, err := tx.GetChatByIDForUpdate(ctx, chatID); err != nil { - return xerrors.Errorf("lock chat: %w", err) - } - - err := tx.DeleteChatQueuedMessage(ctx, database.DeleteChatQueuedMessageParams{ - ID: queuedMessageID, - ChatID: chatID, + machine := p.newChatMachine(chatID) + err := machine.Update(ctx, func(tx *chatstate.Tx) error { + _, err := tx.DeleteQueuedMessage(chatstate.DeleteQueuedMessageInput{ + QueuedMessageID: queuedMessageID, }) - if err != nil { - return xerrors.Errorf("delete queued message: %w", err) - } - - var err2 error - queuedMessages, err2 = tx.GetChatQueuedMessages(ctx, chatID) - if err2 != nil { - p.logger.Warn(ctx, "failed to load queued messages after delete", - slog.F("chat_id", chatID), - slog.F("queued_message_id", queuedMessageID), - slog.Error(err2), - ) - // Non-fatal: the delete succeeded, so we still commit. - return nil - } - queueLoadedOK = true - - return nil - }, nil) - if txErr != nil { - return txErr - } - - if queueLoadedOK { - p.publishEvent(chatID, codersdk.ChatStreamEvent{ - Type: codersdk.ChatStreamEventTypeQueueUpdate, - QueuedMessages: db2sdk.ChatQueuedMessages(queuedMessages), - }) - } - // Always notify subscribers so they can re-fetch, even if we - // failed to load the updated queue payload above. - p.publishChatStreamNotify(chatID, coderdpubsub.ChatStreamNotifyMessage{ - QueueUpdate: true, + return err }) - return nil + return err } -// PromoteQueued promotes a queued message into chat history. On a -// running chat with a fresh worker heartbeat the promote is deferred -// to the worker's persist+auto-promote so partial assistant output -// is not lost; otherwise it inserts the user message synchronously. +// PromoteQueued promotes a queued message through the chatstate state +// machine. From running / interrupting states the state machine +// transitions the chat to `interrupting` so the worker can drain the +// in-flight generation before promoting; from idle / error / requires +// action states it inserts the user message into history +// synchronously. func (p *Server) PromoteQueued( ctx context.Context, opts PromoteQueuedOptions, @@ -2390,184 +2135,48 @@ func (p *Server) PromoteQueued( } var ( - result PromoteQueuedResult - promoted database.ChatMessage - updatedChat database.Chat - remainingQueue []database.ChatQueuedMessage - deferred bool - syntheticResults []database.ChatMessage + result PromoteQueuedResult + refreshChat database.Chat + refreshedOK bool ) - - txErr := p.db.InTx(func(tx database.Store) error { - lockedChat, err := tx.GetChatByIDForUpdate(ctx, opts.ChatID) + machine := p.newChatMachine(opts.ChatID) + updateErr := machine.Update(ctx, func(tx *chatstate.Tx) error { + store := tx.Store() + lockedChat, err := store.GetChatByID(ctx, opts.ChatID) if err != nil { - return xerrors.Errorf("lock chat: %w", err) + return xerrors.Errorf("load chat: %w", err) } - if lockedChat.Archived { return ErrChatArchived } - queuedMessages, err := tx.GetChatQueuedMessages(ctx, opts.ChatID) - if err != nil { - return xerrors.Errorf("get queued messages: %w", err) - } - - var ( - targetContent json.RawMessage - targetModelConfigID uuid.NullUUID - found bool - ) - for _, qm := range queuedMessages { - if qm.ID == opts.QueuedMessageID { - targetContent = qm.Content - targetModelConfigID = qm.ModelConfigID - found = true - break - } - } - if !found { - return xerrors.Errorf("queued message %d not found in chat %s", opts.QueuedMessageID, opts.ChatID) - } - - // Setting pending would trip persistStep's ownership guard - // and drop the worker's partial output. Set waiting and - // reorder the queued row so the worker's auto-promote picks - // it up after the persist. - heartbeatFresh := lockedChat.HeartbeatAt.Valid && - p.clock.Now().Sub(lockedChat.HeartbeatAt.Time) < p.inFlightChatStaleAfter - if lockedChat.Status == database.ChatStatusRunning && heartbeatFresh { - rowsAffected, err := tx.ReorderChatQueuedMessageToFront(ctx, database.ReorderChatQueuedMessageToFrontParams{ - ChatID: opts.ChatID, - TargetID: opts.QueuedMessageID, - }) - if err != nil { - return xerrors.Errorf("reorder queued message to front: %w", err) - } - // Defensive guard against a future non-chat-locked - // queue mutator. The found check above makes this a - // no-op on the current code path. - if rowsAffected != 1 { - return xerrors.Errorf("reorder queued message to front affected %d rows, want 1", rowsAffected) - } - updatedChat, err = tx.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: opts.ChatID, - Status: database.ChatStatusWaiting, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - LastError: pqtype.NullRawMessage{}, - }) - if err != nil { - return xerrors.Errorf("set chat to waiting for deferred promote: %w", err) - } - remainingQueue, err = tx.GetChatQueuedMessages(ctx, opts.ChatID) - if err != nil { - return xerrors.Errorf("get remaining queue after reorder: %w", err) - } - deferred = true - return nil - } - - effectiveModelConfigID, err := resolveQueuedMessageModelConfigID( - ctx, - tx, - lockedChat, - targetModelConfigID, - ) - if err != nil { - return err - } - - // Without synthetic results, the next turn would carry - // unresolved tool_call parts; the LLM API rejects this and the - // chat dead-ends in error. - if lockedChat.Status == database.ChatStatusRequiresAction { - inserted, err := insertSyntheticToolResultsTx( - ctx, tx, lockedChat, - "Tool execution interrupted by queued message promotion", - ) - if err != nil { - return xerrors.Errorf("insert synthetic tool results: %w", err) - } - syntheticResults = inserted - } - - err = tx.DeleteChatQueuedMessage(ctx, database.DeleteChatQueuedMessageParams{ - ID: opts.QueuedMessageID, - ChatID: opts.ChatID, + promoteResult, err := tx.PromoteQueuedMessage(chatstate.PromoteQueuedMessageInput{ + QueuedMessageID: opts.QueuedMessageID, }) - if err != nil { - return xerrors.Errorf("delete queued message: %w", err) - } - - promoted, updatedChat, err = insertUserMessageAndSetPending( - ctx, - tx, - lockedChat, - effectiveModelConfigID, - pqtype.NullRawMessage{ - RawMessage: targetContent, - Valid: len(targetContent) > 0, - }, - opts.CreatedBy, - ) if err != nil { return err } - - remainingQueue, err = tx.GetChatQueuedMessages(ctx, opts.ChatID) - if err != nil { - return xerrors.Errorf("get remaining queue: %w", err) + if promoteResult.InsertedMessage != nil { + result.PromotedMessage = *promoteResult.InsertedMessage } - result.PromotedMessage = promoted - + // Capture the chat inside the transaction so the watch event + // published below uses the snapshot bump and status change + // produced by the transition itself. + refreshed, err := store.GetChatByID(ctx, opts.ChatID) + if err != nil { + return xerrors.Errorf("reload chat after promote: %w", err) + } + refreshChat = refreshed + refreshedOK = true return nil - }, nil) - if txErr != nil { - return PromoteQueuedResult{}, txErr - } - - if deferred { - // Skip publishMessage and signalWake: there is no synchronous - // user message yet, and the active worker's interrupt path - // signals its own auto-promote follow-up. - p.publishEvent(opts.ChatID, codersdk.ChatStreamEvent{ - Type: codersdk.ChatStreamEventTypeQueueUpdate, - QueuedMessages: db2sdk.ChatQueuedMessages(remainingQueue), - }) - p.publishChatStreamNotify(opts.ChatID, coderdpubsub.ChatStreamNotifyMessage{ - QueueUpdate: true, - }) - p.publishStatus(opts.ChatID, updatedChat.Status, updatedChat.WorkerID) - p.publishChatPubsubEvent(updatedChat, codersdk.ChatWatchEventKindStatusChange, nil) - return result, nil - } - - p.publishEvent(opts.ChatID, codersdk.ChatStreamEvent{ - Type: codersdk.ChatStreamEventTypeQueueUpdate, - QueuedMessages: db2sdk.ChatQueuedMessages(remainingQueue), }) - p.publishChatStreamNotify(opts.ChatID, coderdpubsub.ChatStreamNotifyMessage{ - QueueUpdate: true, - }) - // Publish synth rows before the user message so live viewers - // see the interruption inline. - for _, msg := range syntheticResults { - p.publishMessage(opts.ChatID, msg) + if updateErr != nil { + return PromoteQueuedResult{}, updateErr } - p.publishMessage(opts.ChatID, promoted) - p.publishStatus(opts.ChatID, updatedChat.Status, updatedChat.WorkerID) - p.publishChatPubsubEvent(updatedChat, codersdk.ChatWatchEventKindStatusChange, nil) - // Marker for ENG-2645: confirms post-TX publishes ran. - p.logger.Debug(ctx, "promote queued completed", - slog.F("chat_id", opts.ChatID), - slog.F("promoted_id", promoted.ID), - slog.F("synthetic_count", len(syntheticResults)), - slog.F("status", updatedChat.Status), - ) - p.signalWake() + if refreshedOK { + p.publishChatPubsubEvent(refreshChat, codersdk.ChatWatchEventKindStatusChange, nil) + } return result, nil } @@ -2609,250 +2218,198 @@ func (e *ToolResultStatusConflictError) Error() string { } // SubmitToolResults validates and persists client-provided tool -// results, transitions the chat to pending, and wakes the run -// loop. The caller is responsible for the fast-path status check; -// this method performs an authoritative re-check under a row lock. +// results, returning the chat to running through the chatstate state +// machine. Validation runs inside the same transaction as the +// transition so the assistant message and pending tool calls cannot +// drift between reads. func (p *Server) SubmitToolResults( ctx context.Context, opts SubmitToolResultsOptions, ) error { - dynamicToolNames, err := parseDynamicToolNames(pqtype.NullRawMessage{ - RawMessage: opts.DynamicTools, - Valid: len(opts.DynamicTools) > 0, - }) - if err != nil { - return xerrors.Errorf("parse chat dynamic tools: %w", err) - } - - // The GetLastChatMessageByRole lookup and all subsequent - // validation and persistence run inside a single transaction - // so the assistant message cannot change between reads. - var statusConflict *ToolResultStatusConflictError - txErr := p.db.InTx(func(tx database.Store) error { - // Authoritative status check under row lock. - locked, lockErr := tx.GetChatByIDForUpdate(ctx, opts.ChatID) - if lockErr != nil { - return xerrors.Errorf("lock chat for update: %w", lockErr) + var ( + statusConflict *ToolResultStatusConflictError + refreshChat database.Chat + refreshedOK bool + ) + machine := p.newChatMachine(opts.ChatID) + updateErr := machine.Update(ctx, func(tx *chatstate.Tx) error { + store := tx.Store() + locked, err := store.GetChatByID(ctx, opts.ChatID) + if err != nil { + return xerrors.Errorf("load chat: %w", err) } if locked.Archived { return ErrChatArchived } - if locked.Status != database.ChatStatusRequiresAction { - statusConflict = &ToolResultStatusConflictError{ - ActualStatus: locked.Status, + + toolResults := make([]chatstate.ToolResultInput, 0, len(opts.Results)) + for _, r := range opts.Results { + toolResults = append(toolResults, chatstate.ToolResultInput{ + ToolCallID: r.ToolCallID, + Output: r.Output, + IsError: r.IsError, + }) + } + modelConfigID := opts.ModelConfigID + if modelConfigID == uuid.Nil { + modelConfigID = locked.LastModelConfigID + } + if _, err := tx.CompleteRequiresAction(chatstate.CompleteRequiresActionInput{ + CreatedBy: opts.UserID, + ModelConfigID: modelConfigID, + Results: toolResults, + }); err != nil { + if !errors.Is(err, chatstate.ErrInvalidState) && + locked.Status != database.ChatStatusRequiresAction && + errors.Is(err, chatstate.ErrTransitionNotAllowed) { + statusConflict = &ToolResultStatusConflictError{ + ActualStatus: locked.Status, + } + return statusConflict } + return err + } + // Capture the chat inside the transaction so the watch event + // uses the snapshot bump and status change produced by the + // transition itself. + refreshed, err := store.GetChatByID(ctx, opts.ChatID) + if err != nil { + return xerrors.Errorf("reload chat after tool results: %w", err) + } + refreshChat = refreshed + refreshedOK = true + return nil + }) + if updateErr != nil { + if statusConflict != nil { return statusConflict } - - // Get the last assistant message inside the transaction - // for consistency with the row lock above. - lastAssistant, err := tx.GetLastChatMessageByRole(ctx, database.GetLastChatMessageByRoleParams{ - ChatID: opts.ChatID, - Role: database.ChatMessageRoleAssistant, - }) - if err != nil { - return xerrors.Errorf("get last assistant message: %w", err) - } - - // Collect tool-call IDs that already have results. - // When a dynamic tool name collides with a built-in, - // the chatloop executes it as a built-in and persists - // the result. Those calls must not count as pending. - afterMsgs, afterErr := tx.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: opts.ChatID, - AfterID: lastAssistant.ID, - }) - if afterErr != nil { - return xerrors.Errorf("get messages after assistant: %w", afterErr) - } - handledCallIDs := make(map[string]bool) - for _, msg := range afterMsgs { - if msg.Role != database.ChatMessageRoleTool { - continue - } - msgParts, msgParseErr := chatprompt.ParseContent(msg) - if msgParseErr != nil { - continue - } - for _, mp := range msgParts { - if mp.Type == codersdk.ChatMessagePartTypeToolResult { - handledCallIDs[mp.ToolCallID] = true - } - } - } - - // Extract pending dynamic tool-call IDs, skipping any - // that were already handled by the chatloop. - pendingCallIDs := make(map[string]bool) - toolCallIDToName := make(map[string]string) - parts, parseErr := chatprompt.ParseContent(lastAssistant) - if parseErr != nil { - return xerrors.Errorf("parse assistant message: %w", parseErr) - } - for _, part := range parts { - if part.Type == codersdk.ChatMessagePartTypeToolCall && - dynamicToolNames[part.ToolName] && - !handledCallIDs[part.ToolCallID] { - pendingCallIDs[part.ToolCallID] = true - toolCallIDToName[part.ToolCallID] = part.ToolName - } - } - - // Validate submitted results match pending calls exactly. - submittedIDs := make(map[string]bool, len(opts.Results)) - for _, result := range opts.Results { - if submittedIDs[result.ToolCallID] { - return &ToolResultValidationError{ - Message: "Duplicate tool_call_id in results.", - Detail: fmt.Sprintf("Duplicate tool call ID %q.", result.ToolCallID), - } - } - submittedIDs[result.ToolCallID] = true - } - for id := range pendingCallIDs { - if !submittedIDs[id] { - return &ToolResultValidationError{ - Message: "Missing tool result.", - Detail: fmt.Sprintf("Missing result for tool call %q.", id), - } - } - } - for id := range submittedIDs { - if !pendingCallIDs[id] { - return &ToolResultValidationError{ - Message: "Unexpected tool result.", - Detail: fmt.Sprintf("No pending tool call with ID %q.", id), - } - } - } - - // Marshal each tool result into a separate message row. - resultContents := make([]pqtype.NullRawMessage, 0, len(opts.Results)) - for _, result := range opts.Results { - if !json.Valid(result.Output) { - return &ToolResultValidationError{ - Message: "Tool result output must be valid JSON.", - Detail: fmt.Sprintf("Output for tool call %q is not valid JSON.", result.ToolCallID), - } - } - part := codersdk.ChatMessagePart{ - Type: codersdk.ChatMessagePartTypeToolResult, - ToolCallID: result.ToolCallID, - ToolName: toolCallIDToName[result.ToolCallID], - Result: result.Output, - IsError: result.IsError, - } - marshaled, marshalErr := chatprompt.MarshalParts([]codersdk.ChatMessagePart{part}) - if marshalErr != nil { - return xerrors.Errorf("marshal tool result: %w", marshalErr) - } - resultContents = append(resultContents, marshaled) - } - - // Insert tool-result messages. - n := len(resultContents) - params := database.InsertChatMessagesParams{ - ChatID: opts.ChatID, - CreatedBy: make([]uuid.UUID, n), - ModelConfigID: make([]uuid.UUID, n), - Role: make([]database.ChatMessageRole, n), - Content: make([]string, n), - ContentVersion: make([]int16, n), - Visibility: make([]database.ChatMessageVisibility, n), - InputTokens: make([]int64, n), - OutputTokens: make([]int64, n), - TotalTokens: make([]int64, n), - ReasoningTokens: make([]int64, n), - CacheCreationTokens: make([]int64, n), - CacheReadTokens: make([]int64, n), - ContextLimit: make([]int64, n), - Compressed: make([]bool, n), - TotalCostMicros: make([]int64, n), - RuntimeMs: make([]int64, n), - ProviderResponseID: make([]string, n), - } - for i, rc := range resultContents { - params.CreatedBy[i] = opts.UserID - params.ModelConfigID[i] = opts.ModelConfigID - params.Role[i] = database.ChatMessageRoleTool - params.Content[i] = string(rc.RawMessage) - params.ContentVersion[i] = chatprompt.CurrentContentVersion - params.Visibility[i] = database.ChatMessageVisibilityBoth - } - if _, insertErr := tx.InsertChatMessages(ctx, params); insertErr != nil { - return xerrors.Errorf("insert tool results: %w", insertErr) - } - - // Transition chat to pending. - if _, updateErr := tx.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: opts.ChatID, - Status: database.ChatStatusPending, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - LastError: pqtype.NullRawMessage{}, - }); updateErr != nil { - return xerrors.Errorf("update chat status: %w", updateErr) - } - - return nil - }, nil) - if txErr != nil { - return txErr + return translateToolResultValidationError(updateErr) } - // Wake the chatd run loop so it processes the chat immediately. - p.signalWake() + if refreshedOK { + p.publishChatPubsubEvent(refreshChat, codersdk.ChatWatchEventKindStatusChange, nil) + } return nil } -// InterruptChat interrupts execution, sets waiting status, and broadcasts status updates. +// translateToolResultValidationError converts a chatstate tool-result +// validation error into the legacy chatd.ToolResultValidationError +// shape so HTTP handlers preserve their existing response detail. If +// err is not a tool-result validation error, it is returned +// unchanged. +func translateToolResultValidationError(err error) error { + var v *chatstate.ToolResultValidationError + if !errors.As(err, &v) { + return err + } + switch { + case xerrors.Is(v, chatstate.ErrToolResultDuplicate): + return &ToolResultValidationError{ + Message: "Duplicate tool_call_id in results.", + Detail: fmt.Sprintf("Duplicate tool call ID %q.", v.ToolCallID), + } + case xerrors.Is(v, chatstate.ErrToolResultMissing): + return &ToolResultValidationError{ + Message: "Missing tool result.", + Detail: fmt.Sprintf("Missing result for tool call %q.", v.ToolCallID), + } + case xerrors.Is(v, chatstate.ErrToolResultUnexpected): + return &ToolResultValidationError{ + Message: "Unexpected tool result.", + Detail: fmt.Sprintf("No pending tool call with ID %q.", v.ToolCallID), + } + case xerrors.Is(v, chatstate.ErrToolResultInvalidJSON): + return &ToolResultValidationError{ + Message: "Tool result output must be valid JSON.", + Detail: fmt.Sprintf("Output for tool call %q is not valid JSON.", v.ToolCallID), + } + } + return err +} + +// InterruptChat interrupts execution through the chatstate.Interrupt +// transition. Active runs land in `interrupting`; requires-action +// chats synthesize cancellation messages and return to running. +// +// Returns the post-transition chat and an error so callers can map +// state conflicts deliberately. Idle chats return a +// chatstate.ErrTransitionNotAllowed wrapper. func (p *Server) InterruptChat( ctx context.Context, chat database.Chat, -) database.Chat { +) (database.Chat, error) { if chat.ID == uuid.Nil { - return chat + return chat, xerrors.New("chat_id is required") } - // If the chat is in requires_action, insert synthetic error - // tool-result messages for each pending dynamic tool call - // before transitioning to waiting. Without this, the LLM - // would see unmatched tool-call parts on the next run. - if chat.Status == database.ChatStatusRequiresAction { - if txErr := p.db.InTx(func(tx database.Store) error { - locked, lockErr := tx.GetChatByIDForUpdate(ctx, chat.ID) - if lockErr != nil { - return xerrors.Errorf("lock chat for interrupt: %w", lockErr) - } - // Another request may have already transitioned - // the chat (e.g. SubmitToolResults committed - // between our snapshot and this lock). - if locked.Status != database.ChatStatusRequiresAction { - return nil - } - _, err := insertSyntheticToolResultsTx(ctx, tx, locked, "Tool execution interrupted by user") + var refreshed database.Chat + machine := p.newChatMachine(chat.ID) + err := machine.Update(ctx, func(tx *chatstate.Tx) error { + if _, err := tx.Interrupt(chatstate.InterruptInput{ + Reason: "Tool execution interrupted by user", + }); err != nil { return err - }, nil); txErr != nil { - p.logger.Error(ctx, "failed to insert synthetic tool results during interrupt", - slog.F("chat_id", chat.ID), - slog.Error(txErr), - ) - // Fall through — still try to set waiting status. } + // Capture the post-interrupt chat inside the transaction so + // the returned chat and the watch event reflect the snapshot + // bump and status change produced by the transition itself. + latest, err := tx.Store().GetChatByID(ctx, chat.ID) + if err != nil { + return xerrors.Errorf("reload chat after interrupt: %w", err) + } + refreshed = latest + return nil + }) + if err != nil { + return chat, err } - // Debug runs are finalized in the execution path when the owning - // goroutine observes cancellation, so we do not mutate debug state here. - updatedChat, err := p.setChatWaiting(ctx, chat.ID) - if err != nil { - p.logger.Error(ctx, "failed to mark chat as waiting", - slog.F("chat_id", chat.ID), - slog.Error(err), - ) - return chat + p.publishChatPubsubEvent(refreshed, codersdk.ChatWatchEventKindStatusChange, nil) + return refreshed, nil +} + +// ReconcileInvalidStateChat recovers a chat stuck in an invalid +// execution-state combination by running the +// chatstate.ReconcileInvalidState transition. The chat lands in an +// error state (E0/E1); queued messages are preserved and pending +// dynamic-tool calls are closed with synthetic cancellations. +// +// Returns the post-transition chat. When the chat is not actually in an +// invalid state the transition returns a wrapped +// chatstate.ErrTransitionNotAllowed; a missing chat returns +// chatstate.ErrChatNotFound. Callers map these to deliberate HTTP +// responses. +func (p *Server) ReconcileInvalidStateChat( + ctx context.Context, + chat database.Chat, +) (database.Chat, error) { + if chat.ID == uuid.Nil { + return chat, xerrors.New("chat_id is required") } - return updatedChat + + var refreshed database.Chat + machine := p.newChatMachine(chat.ID) + err := machine.Update(ctx, func(tx *chatstate.Tx) error { + if _, err := tx.ReconcileInvalidState(chatstate.ReconcileInvalidStateInput{}); err != nil { + return err + } + // Capture the post-reconcile chat inside the transaction so + // the returned chat and the watch event reflect the snapshot + // bump and status change produced by the transition itself. + latest, err := tx.Store().GetChatByID(ctx, chat.ID) + if err != nil { + return xerrors.Errorf("reload chat after reconcile: %w", err) + } + refreshed = latest + return nil + }) + if err != nil { + return chat, err + } + + p.publishChatPubsubEvent(refreshed, codersdk.ChatWatchEventKindStatusChange, nil) + return refreshed, nil } const manualTitleMessageWindowLimit = 50 @@ -4017,17 +3574,6 @@ func insertUserMessageAndSetPending( return message, updatedChat, nil } -// shouldQueueUserMessage reports whether a user message should be -// queued while a chat is active. -func shouldQueueUserMessage(status database.ChatStatus) bool { - switch status { - case database.ChatStatusRunning, database.ChatStatusPending, database.ChatStatusRequiresAction: - return true - default: - return false - } -} - // Config configures a chat processor. type Config struct { Logger slog.Logger @@ -4092,6 +3638,11 @@ func New(cfg Config) *Server { clk = quartz.NewReal() } + if cfg.Pubsub == nil { + panic("chatd: Pubsub is nil") + } + ps := cfg.Pubsub + instructionLookupTimeout := cfg.InstructionLookupTimeout if instructionLookupTimeout == 0 { instructionLookupTimeout = homeInstructionLookupTimeout @@ -4120,7 +3671,7 @@ func New(cfg Config) *Server { createWorkspaceFn: cfg.CreateWorkspace, startWorkspaceFn: cfg.StartWorkspace, stopWorkspaceFn: cfg.StopWorkspace, - pubsub: cfg.Pubsub, + pubsub: ps, webpushDispatcher: cfg.WebpushDispatcher, providerAPIKeys: cfg.ProviderAPIKeys, allowBYOK: allowBYOK, @@ -4129,7 +3680,7 @@ func New(cfg Config) *Server { debugSvc := chatdebug.NewService( cfg.Database, cfg.Logger.Named("chatdebug"), - cfg.Pubsub, + ps, chatdebug.WithAlwaysEnable(cfg.AlwaysEnableDebugLogs), ) // Debug runs do not heartbeat during model streams; their @@ -4159,29 +3710,28 @@ func New(cfg Config) *Server { ctx = dbauthz.AsChatd(ctx) p.configCache = newChatConfigCache(ctx, cfg.Database, clk) - if p.pubsub != nil { - cancelConfigSub, err := p.pubsub.SubscribeWithErr( - coderdpubsub.ChatConfigEventChannel, - coderdpubsub.HandleChatConfigEvent(func(ctx context.Context, ev coderdpubsub.ChatConfigEvent, err error) { - if err != nil { - p.logger.Warn(ctx, "chat config event error", slog.Error(err)) - return - } - switch ev.Kind { - case coderdpubsub.ChatConfigEventProviders: - p.configCache.InvalidateProviders() - case coderdpubsub.ChatConfigEventModelConfig: - p.configCache.InvalidateModelConfig(ev.EntityID) - case coderdpubsub.ChatConfigEventUserPrompt: - p.configCache.InvalidateUserPrompt(ev.EntityID) - case coderdpubsub.ChatConfigEventAdvisorConfig: - p.configCache.InvalidateAdvisorConfig() - } - }), - ) - if err != nil { - p.logger.Error(ctx, "subscribe to chat config events", slog.Error(err)) - } + cancelConfigSub, err := p.pubsub.SubscribeWithErr( + coderdpubsub.ChatConfigEventChannel, + coderdpubsub.HandleChatConfigEvent(func(ctx context.Context, ev coderdpubsub.ChatConfigEvent, err error) { + if err != nil { + p.logger.Warn(ctx, "chat config event error", slog.Error(err)) + return + } + switch ev.Kind { + case coderdpubsub.ChatConfigEventProviders: + p.configCache.InvalidateProviders() + case coderdpubsub.ChatConfigEventModelConfig: + p.configCache.InvalidateModelConfig(ev.EntityID) + case coderdpubsub.ChatConfigEventUserPrompt: + p.configCache.InvalidateUserPrompt(ev.EntityID) + case coderdpubsub.ChatConfigEventAdvisorConfig: + p.configCache.InvalidateAdvisorConfig() + } + }), + ) + if err != nil { + p.logger.Error(ctx, "subscribe to chat config events", slog.Error(err)) + } else { p.configCacheUnsubscribe = cancelConfigSub } @@ -4850,53 +4400,46 @@ func (p *Server) SubscribeAuthorized( // Subscribe to pubsub for durable and structured control // events (status, messages, queue updates, retry, errors). - // When pubsub is nil (e.g. in-memory - // single-instance) we skip this and deliver all local events. + // If the subscription cannot be established, deliver all local + // events. // // This MUST happen before the DB queries below so that any // notification published between the query and the subscription // is not lost (subscribe-first-then-query pattern). - var notifications <-chan coderdpubsub.ChatStreamNotifyMessage - var errCh <-chan error - if p.pubsub != nil { - notifyCh := make(chan coderdpubsub.ChatStreamNotifyMessage, 10) - errNotifyCh := make(chan error, 1) - notifications = notifyCh - errCh = errNotifyCh - - listener := func(_ context.Context, message []byte, listenErr error) { - if listenErr != nil { - select { - case <-mergedCtx.Done(): - case errNotifyCh <- listenErr: - } - return - } - var notify coderdpubsub.ChatStreamNotifyMessage - if unmarshalErr := json.Unmarshal(message, ¬ify); unmarshalErr != nil { - select { - case <-mergedCtx.Done(): - case errNotifyCh <- xerrors.Errorf("unmarshal chat stream notify: %w", unmarshalErr): - } - return - } + notifications := make(chan coderdpubsub.ChatStreamNotifyMessage, 10) + errCh := make(chan error, 1) + listener := func(_ context.Context, message []byte, listenErr error) { + if listenErr != nil { select { case <-mergedCtx.Done(): - case notifyCh <- notify: + case errCh <- listenErr: } + return } + var notify coderdpubsub.ChatStreamNotifyMessage + if unmarshalErr := json.Unmarshal(message, ¬ify); unmarshalErr != nil { + select { + case <-mergedCtx.Done(): + case errCh <- xerrors.Errorf("unmarshal chat stream notify: %w", unmarshalErr): + } + return + } + select { + case <-mergedCtx.Done(): + case notifications <- notify: + } + } - if pubsubCancel, pubsubErr := p.pubsub.SubscribeWithErr( - coderdpubsub.ChatStreamNotifyChannel(chatID), - listener, - ); pubsubErr == nil { - allCancels = append(allCancels, pubsubCancel) - } else { - p.logger.Warn(ctx, "failed to subscribe to chat stream notifications", - slog.F("chat_id", chatID), - slog.Error(pubsubErr), - ) - } + if pubsubCancel, pubsubErr := p.pubsub.SubscribeWithErr( + coderdpubsub.ChatStreamNotifyChannel(chatID), + listener, + ); pubsubErr == nil { + allCancels = append(allCancels, pubsubCancel) + } else { + p.logger.Warn(ctx, "failed to subscribe to chat stream notifications", + slog.F("chat_id", chatID), + slog.Error(pubsubErr), + ) } cancel := func() { @@ -5049,13 +4592,10 @@ func (p *Server) SubscribeAuthorized( Logger: p.logger, }) } - hasPubsub := false - if p.pubsub != nil { - // hasPubsub is only true when we actually subscribed - // successfully above (allCancels will contain the pubsub - // cancel func in that case). - hasPubsub = len(allCancels) > 1 - } + // hasPubsubSubscription is only true when we actually subscribed + // successfully above (allCancels will contain the pubsub + // cancel func in that case). + hasPubsubSubscription := len(allCancels) > 1 //nolint:nestif go func() { @@ -5270,12 +4810,12 @@ func (p *Server) SubscribeAuthorized( // Local parts channel closed. If pubsub is // active we continue with pubsub-driven events. // Otherwise terminate. - if !hasPubsub { + if !hasPubsubSubscription { return } continue } - if hasPubsub { + if hasPubsubSubscription { // Forward transient events from local. // Durable events (messages, queue updates) // come via pubsub + cache. Status is @@ -5301,7 +4841,7 @@ func (p *Server) SubscribeAuthorized( } } } else { - // No pubsub: forward all event types. + // No pubsub subscription: forward all event types. select { case <-mergedCtx.Done(): return @@ -5350,9 +4890,6 @@ func (p *Server) publishStatus(chatID uuid.UUID, status database.ChatStatus, wor // PostgreSQL pubsub so that all replicas can merge durable database updates // with transient control events. func (p *Server) publishChatStreamNotify(chatID uuid.UUID, notify coderdpubsub.ChatStreamNotifyMessage) { - if p.pubsub == nil { - return - } payload, err := json.Marshal(notify) if err != nil { p.logger.Error(context.Background(), "failed to marshal chat stream notify", @@ -5379,9 +4916,6 @@ func (p *Server) publishChatPubsubEvents(chats []database.Chat, kind codersdk.Ch // publishChatPubsubEvent broadcasts a chat lifecycle event via PostgreSQL // pubsub so that all replicas can push updates to watching clients. func (p *Server) publishChatPubsubEvent(chat database.Chat, kind codersdk.ChatWatchEventKind, diffStatus *codersdk.ChatDiffStatus) { - if p.pubsub == nil { - return - } // diffStatus is applied below. File metadata is intentionally // omitted from pubsub events to avoid an extra DB query per // publish. Clients must merge pubsub updates, not replace @@ -5429,9 +4963,6 @@ func pendingToStreamToolCalls(pending []chatloop.PendingToolCall) []codersdk.Cha // PostgreSQL pubsub so that global watchers can react to dynamic // tool calls without streaming each chat individually. func (p *Server) publishChatActionRequired(chat database.Chat, pending []chatloop.PendingToolCall) { - if p.pubsub == nil { - return - } toolCalls := pendingToStreamToolCalls(pending) sdkChat := db2sdk.Chat(chat, nil, nil) @@ -5461,10 +4992,6 @@ func (p *Server) publishChatActionRequired(chat database.Chat, pending []chatloo // status. This is called from the HTTP layer after the diff status // is updated in the database. func (p *Server) PublishDiffStatusChange(ctx context.Context, chatID uuid.UUID) error { - if p.pubsub == nil { - return nil - } - chat, err := p.db.GetChatByID(ctx, chatID) if err != nil { return xerrors.Errorf("get chat: %w", err) @@ -5668,10 +5195,6 @@ func (p *Server) subscribeChatControl( cancel context.CancelCauseFunc, logger slog.Logger, ) func() { - if p.pubsub == nil { - return nil - } - listener := func(_ context.Context, message []byte, err error) { if err != nil { logger.Warn(ctx, "chat control pubsub error", slog.Error(err)) diff --git a/coderd/x/chatd/chatd_internal_test.go b/coderd/x/chatd/chatd_internal_test.go index b4d891bd06..b23cd67792 100644 --- a/coderd/x/chatd/chatd_internal_test.go +++ b/coderd/x/chatd/chatd_internal_test.go @@ -40,6 +40,14 @@ import ( "github.com/coder/quartz" ) +func TestNewRequiresPubsub(t *testing.T) { + t.Parallel() + + require.PanicsWithValue(t, "chatd: Pubsub is nil", func() { + _ = New(Config{}) + }) +} + type testAgentTool struct { info fantasy.ToolInfo providerOptions fantasy.ProviderOptions diff --git a/coderd/x/chatd/chatd_test.go b/coderd/x/chatd/chatd_test.go index 5d1f373cd9..111f4853fc 100644 --- a/coderd/x/chatd/chatd_test.go +++ b/coderd/x/chatd/chatd_test.go @@ -41,7 +41,6 @@ import ( "github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/database/dbtime" dbpubsub "github.com/coder/coder/v2/coderd/database/pubsub" - coderdpubsub "github.com/coder/coder/v2/coderd/pubsub" "github.com/coder/coder/v2/coderd/rbac" "github.com/coder/coder/v2/coderd/util/slice" "github.com/coder/coder/v2/coderd/workspacestats" @@ -176,57 +175,6 @@ func newWorkspaceToolTestServer( }) } -func TestInterruptChatBroadcastsStatusAcrossInstances(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replicaA := newTestServer(t, db, ps, uuid.New()) - replicaB := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replicaA.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "interrupt-me", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - runningWorker := uuid.New() - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusRunning, - WorkerID: uuid.NullUUID{UUID: runningWorker, Valid: true}, - StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, - HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - }) - require.NoError(t, err) - - _, events, cancel, ok := replicaB.Subscribe(ctx, chat.ID, nil, 0) - require.True(t, ok) - t.Cleanup(cancel) - - updated := replicaA.InterruptChat(ctx, chat) - require.Equal(t, database.ChatStatusWaiting, updated.Status) - require.False(t, updated.WorkerID.Valid) - - require.Eventually(t, func() bool { - select { - case event := <-events: - if event.Type == codersdk.ChatStreamEventTypeStatus && event.Status != nil { - return event.Status.Status == codersdk.ChatStatusWaiting - } - t.Logf("skipping unexpected event: type=%s", event.Type) - return false - default: - return false - } - }, testutil.WaitMedium, testutil.IntervalFast) -} - func TestSubagentChatExcludesWorkspaceProvisioningTools(t *testing.T) { t.Parallel() @@ -1422,91 +1370,13 @@ func TestPlanModeRootChatAllowsApprovedExternalMCPTools(t *testing.T) { "ask mode should continue exposing workspace MCP tools") } -func TestInterruptChatClearsWorkerInDatabase(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "db-transition", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusRunning, - WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true}, - StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, - HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - }) - require.NoError(t, err) - - updated := replica.InterruptChat(ctx, chat) - require.Equal(t, database.ChatStatusWaiting, updated.Status) - require.False(t, updated.WorkerID.Valid) - - fromDB, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, database.ChatStatusWaiting, fromDB.Status) - require.False(t, fromDB.WorkerID.Valid) -} - -func TestArchiveChatMovesPendingChatToWaiting(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OwnerID: user.ID, - OrganizationID: org.ID, - Title: "archive-pending", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusPending, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - LastError: pqtype.NullRawMessage{}, - }) - require.NoError(t, err) - - err = replica.ArchiveChat(ctx, chat) - require.NoError(t, err) - - fromDB, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, database.ChatStatusWaiting, fromDB.Status) - require.False(t, fromDB.WorkerID.Valid) - require.False(t, fromDB.StartedAt.Valid) - require.False(t, fromDB.HeartbeatAt.Valid) - require.True(t, fromDB.Archived) - require.Zero(t, fromDB.PinOrder) -} - // TestUnarchiveChildChat covers the deterministic branches of the -// Server.UnarchiveChat child path: happy path, archived-parent reject, -// and already-active no-op. +// Server.UnarchiveChat child path: every child unarchive attempt is +// rejected with chatd.ErrArchiveRequiresRootChat. func TestUnarchiveChildChat(t *testing.T) { t.Parallel() - t.Run("ChildWithActiveParentUnarchives", func(t *testing.T) { + t.Run("ChildWithActiveParentRejected", func(t *testing.T) { t.Parallel() db, ps := dbtestutil.NewDB(t) @@ -1516,11 +1386,12 @@ func TestUnarchiveChildChat(t *testing.T) { parent, child := insertParentWithArchivedChild(ctx, t, db, user, org, model) - require.NoError(t, replica.UnarchiveChat(ctx, child)) + err := replica.UnarchiveChat(ctx, child) + require.ErrorIs(t, err, chatd.ErrArchiveRequiresRootChat) dbChild, err := db.GetChatByID(ctx, child.ID) require.NoError(t, err) - require.False(t, dbChild.Archived, "child should be unarchived") + require.True(t, dbChild.Archived, "child should remain archived") dbParent, err := db.GetChatByID(ctx, parent.ID) require.NoError(t, err) @@ -1540,14 +1411,14 @@ func TestUnarchiveChildChat(t *testing.T) { require.NoError(t, err) err = replica.UnarchiveChat(ctx, child) - require.ErrorIs(t, err, chatd.ErrChildUnarchiveParentArchived) + require.ErrorIs(t, err, chatd.ErrArchiveRequiresRootChat) dbChild, err := db.GetChatByID(ctx, child.ID) require.NoError(t, err) require.True(t, dbChild.Archived, "child should remain archived") }) - t.Run("AlreadyActiveChildNoOp", func(t *testing.T) { + t.Run("ActiveChildRejected", func(t *testing.T) { t.Parallel() db, ps := dbtestutil.NewDB(t) @@ -1557,7 +1428,8 @@ func TestUnarchiveChildChat(t *testing.T) { _, child := insertParentWithActiveChild(t, db, user, org, model) - require.NoError(t, replica.UnarchiveChat(ctx, child)) + err := replica.UnarchiveChat(ctx, child) + require.ErrorIs(t, err, chatd.ErrArchiveRequiresRootChat) dbChild, err := db.GetChatByID(ctx, child.ID) require.NoError(t, err) @@ -1565,6 +1437,60 @@ func TestUnarchiveChildChat(t *testing.T) { }) } +// TestArchiveChat_RejectsChildChat verifies that Server.ArchiveChat +// refuses every child chat with chatd.ErrArchiveRequiresRootChat +// regardless of the family's current archive state. Archive state +// changes must always be issued against the root chat so the whole +// family flips together. +func TestArchiveChat_RejectsChildChat(t *testing.T) { + t.Parallel() + + t.Run("ActiveChildRejected", func(t *testing.T) { + t.Parallel() + + db, ps := dbtestutil.NewDB(t) + replica := newTestServer(t, db, ps, uuid.New()) + ctx := testutil.Context(t, testutil.WaitLong) + user, org, model := seedChatDependencies(t, db) + + parent, child := insertParentWithActiveChild(t, db, user, org, model) + + err := replica.ArchiveChat(ctx, child) + require.ErrorIs(t, err, chatd.ErrArchiveRequiresRootChat) + + dbChild, err := db.GetChatByID(ctx, child.ID) + require.NoError(t, err) + require.False(t, dbChild.Archived, "child should stay active after rejected archive") + + dbParent, err := db.GetChatByID(ctx, parent.ID) + require.NoError(t, err) + require.False(t, dbParent.Archived, "parent should stay active after rejected child archive") + }) + + t.Run("AlreadyArchivedChildRejected", func(t *testing.T) { + t.Parallel() + + db, ps := dbtestutil.NewDB(t) + replica := newTestServer(t, db, ps, uuid.New()) + ctx := testutil.Context(t, testutil.WaitLong) + user, org, model := seedChatDependencies(t, db) + + parent, child := insertParentWithArchivedChild(ctx, t, db, user, org, model) + + err := replica.ArchiveChat(ctx, child) + require.ErrorIs(t, err, chatd.ErrArchiveRequiresRootChat, + "child archive must be rejected even when the child is already archived") + + dbChild, err := db.GetChatByID(ctx, child.ID) + require.NoError(t, err) + require.True(t, dbChild.Archived, "child archived flag should not change") + + dbParent, err := db.GetChatByID(ctx, parent.ID) + require.NoError(t, err) + require.False(t, dbParent.Archived, "parent should stay active") + }) +} + // insertParentWithActiveChild creates a parent chat and an active // child chat linked to it. Both are returned in their initial // (active) state. @@ -1613,141 +1539,6 @@ func insertParentWithArchivedChild( return parent, child } -func TestArchiveChatInterruptsActiveProcessing(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - ctx := testutil.Context(t, testutil.WaitLong) - - streamStarted := make(chan struct{}) - streamCanceled := make(chan struct{}) - openAIURL := chattest.NewOpenAI(t, func(req *chattest.OpenAIRequest) chattest.OpenAIResponse { - if !req.Stream { - return chattest.OpenAINonStreamingResponse("title") - } - chunks := make(chan chattest.OpenAIChunk, 1) - go func() { - defer close(chunks) - chunks <- chattest.OpenAITextChunks("partial")[0] - select { - case <-streamStarted: - default: - close(streamStarted) - } - <-req.Context().Done() - select { - case <-streamCanceled: - default: - close(streamCanceled) - } - }() - return chattest.OpenAIResponse{StreamingChunks: chunks} - }) - - server := newActiveTestServer(t, db, ps) - user, org, model := seedChatDependencies(t, db) - setOpenAIProviderBaseURL(ctx, t, db, openAIURL) - - chat, err := server.CreateChat(ctx, chatd.CreateOptions{ - OwnerID: user.ID, - OrganizationID: org.ID, - Title: "archive-interrupt", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - fromDB, dbErr := db.GetChatByID(ctx, chat.ID) - if dbErr != nil { - return false - } - return fromDB.Status == database.ChatStatusRunning && fromDB.WorkerID.Valid - }, testutil.IntervalFast) - - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - select { - case <-streamStarted: - return true - default: - return false - } - }, testutil.IntervalFast) - - _, events, cancel, ok := server.Subscribe(ctx, chat.ID, nil, 0) - require.True(t, ok) - defer cancel() - - queuedResult, err := server.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("queued")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.True(t, queuedResult.Queued) - require.NotNil(t, queuedResult.QueuedMessage) - - err = server.ArchiveChat(ctx, chat) - require.NoError(t, err) - - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - select { - case <-streamCanceled: - return true - default: - return false - } - }, testutil.IntervalFast) - - gotWaitingStatus := false - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - for { - select { - case ev := <-events: - if ev.Type == codersdk.ChatStreamEventTypeStatus && - ev.Status != nil && - ev.Status.Status == codersdk.ChatStatusWaiting { - gotWaitingStatus = true - return true - } - default: - return gotWaitingStatus - } - } - }, testutil.IntervalFast) - require.True(t, gotWaitingStatus, "expected a waiting status event after archive") - - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - fromDB, dbErr := db.GetChatByID(ctx, chat.ID) - if dbErr != nil { - return false - } - return fromDB.Archived && - fromDB.Status == database.ChatStatusWaiting && - !fromDB.WorkerID.Valid && - !fromDB.StartedAt.Valid && - !fromDB.HeartbeatAt.Valid - }, testutil.IntervalFast) - - queuedMessages, err := db.GetChatQueuedMessages(ctx, chat.ID) - require.NoError(t, err) - require.Len(t, queuedMessages, 1) - require.Equal(t, queuedResult.QueuedMessage.ID, queuedMessages[0].ID) - - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - userMessages := 0 - for _, msg := range messages { - if msg.Role == database.ChatMessageRoleUser { - userMessages++ - } - } - require.Equal(t, 1, userMessages, "expected queued message to stay queued after archive") -} - func TestUpdateChatHeartbeatsRequiresOwnership(t *testing.T) { t.Parallel() @@ -1921,73 +1712,6 @@ func TestPlanTurnPromptContract(t *testing.T) { } } -func TestSendMessageQueuesWhenWaitingWithQueuedBacklog(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "queue-when-waiting-with-backlog", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - queuedContent, err := json.Marshal([]codersdk.ChatMessagePart{ - codersdk.ChatMessageText("older queued"), - }) - require.NoError(t, err) - _, err = db.InsertChatQueuedMessage(ctx, database.InsertChatQueuedMessageParams{ - ChatID: chat.ID, - Content: queuedContent, - }) - require.NoError(t, err) - - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusWaiting, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - LastError: pqtype.NullRawMessage{}, - }) - require.NoError(t, err) - - result, err := replica.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("newer queued")}, - }) - require.NoError(t, err) - require.True(t, result.Queued) - require.NotNil(t, result.QueuedMessage) - require.Equal(t, database.ChatStatusWaiting, result.Chat.Status) - - queued, err := db.GetChatQueuedMessages(ctx, chat.ID) - require.NoError(t, err) - require.Len(t, queued, 2) - - olderSDK := db2sdk.ChatQueuedMessage(queued[0]) - require.Len(t, olderSDK.Content, 1) - require.Equal(t, "older queued", olderSDK.Content[0].Text) - - newerSDK := db2sdk.ChatQueuedMessage(queued[1]) - require.Len(t, newerSDK.Content, 1) - require.Equal(t, "newer queued", newerSDK.Content[0].Text) - - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - require.Len(t, messages, 1) -} - func TestSendMessageRejectsInvalidQueuedModelConfigID(t *testing.T) { t.Parallel() @@ -2018,178 +1742,6 @@ func TestSendMessageRejectsInvalidQueuedModelConfigID(t *testing.T) { require.Empty(t, queued) } -func TestSendMessageInterruptBehaviorQueuesAndInterruptsWhenBusy(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newStartedTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "interrupt-when-busy", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - // CreateChat calls signalWake which triggers processOnce in - // the background. Wait for that processing to finish so it - // doesn't race with the manual status update below. - waitForChatProcessed(ctx, t, db, chat.ID, replica) - - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusRunning, - WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true}, - StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, - HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - }) - require.NoError(t, err) - - result, err := replica.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("interrupt")}, - BusyBehavior: chatd.SendMessageBusyBehaviorInterrupt, - }) - require.NoError(t, err) - - // The message should be queued, not inserted directly. - require.True(t, result.Queued) - require.NotNil(t, result.QueuedMessage) - - // The chat should transition to waiting (interrupt signal), - // not pending. - require.Equal(t, database.ChatStatusWaiting, result.Chat.Status) - - fromDB, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, database.ChatStatusWaiting, fromDB.Status) - - // The message should be in the queue, not in chat_messages. - queued, err := db.GetChatQueuedMessages(ctx, chat.ID) - require.NoError(t, err) - require.Len(t, queued, 1) - - // Only messages from the initial processing round should be in - // chat_messages (user + assistant). The "interrupt" message must - // be in the queue, not inserted directly. - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - require.Len(t, messages, 2) -} - -func TestEditMessageUpdatesAndTruncatesAndClearsQueue(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "edit-message", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("original")}, - }) - require.NoError(t, err) - - initialMessages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - require.Len(t, initialMessages, 1) - editedMessageID := initialMessages[0].ID - - _, err = replica.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("follow-up")}, - BusyBehavior: chatd.SendMessageBusyBehaviorInterrupt, - }) - require.NoError(t, err) - _, err = replica.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("another")}, - BusyBehavior: chatd.SendMessageBusyBehaviorInterrupt, - }) - require.NoError(t, err) - - queuedContent, err := json.Marshal([]codersdk.ChatMessagePart{ - codersdk.ChatMessageText("queued"), - }) - require.NoError(t, err) - _, err = db.InsertChatQueuedMessage(ctx, database.InsertChatQueuedMessageParams{ - ChatID: chat.ID, - Content: queuedContent, - }) - require.NoError(t, err) - - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusRunning, - WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true}, - StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, - HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - }) - require.NoError(t, err) - - editResult, err := replica.EditMessage(ctx, chatd.EditMessageOptions{ - ChatID: chat.ID, - EditedMessageID: editedMessageID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("edited")}, - }) - require.NoError(t, err) - // The edited message is soft-deleted and a new message is inserted, - // so the returned message ID will differ from the original. - require.NotEqual(t, editedMessageID, editResult.Message.ID) - require.Equal(t, database.ChatStatusPending, editResult.Chat.Status) - require.False(t, editResult.Chat.WorkerID.Valid) - - editedSDK := db2sdk.ChatMessage(editResult.Message) - require.Len(t, editedSDK.Content, 1) - require.Equal(t, "edited", editedSDK.Content[0].Text) - - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - require.Len(t, messages, 1) - require.Equal(t, editResult.Message.ID, messages[0].ID) - onlyMessage := db2sdk.ChatMessage(messages[0]) - require.Len(t, onlyMessage.Content, 1) - require.Equal(t, "edited", onlyMessage.Content[0].Text) - - queued, err := db.GetChatQueuedMessages(ctx, chat.ID) - require.NoError(t, err) - require.Len(t, queued, 0) - - // WaitUntilIdleForTest drains the debug-cleanup goroutine - // from EditMessage. Must be called from the test goroutine - // (not inside require.Eventually) to avoid Add/Wait race. - chatd.WaitUntilIdleForTest(replica) - var chatFromDB database.Chat - require.Eventually(t, func() bool { - c, e := db.GetChatByID(ctx, chat.ID) - if e != nil { - return false - } - chatFromDB = c - return chatFromDB.Status != database.ChatStatusRunning - }, testutil.WaitShort, testutil.IntervalFast) - require.False(t, chatFromDB.WorkerID.Valid) -} - func TestCreateChatInsertsWorkspaceAwarenessMessage(t *testing.T) { t.Parallel() @@ -2358,238 +1910,6 @@ func TestCreateChatRejectsWhenUsageLimitReached(t *testing.T) { require.Len(t, afterChats, len(beforeChats)) } -func TestPromoteQueuedAllowsAlreadyQueuedMessageWhenUsageLimitReached(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newStartedTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - _, err := db.UpsertChatUsageLimitConfig(ctx, database.UpsertChatUsageLimitConfigParams{ - Enabled: true, - DefaultLimitMicros: 100, - Period: string(codersdk.ChatUsageLimitPeriodDay), - }) - require.NoError(t, err) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "queued-limit-reached", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - // CreateChat calls signalWake which triggers processOnce in - // the background. Wait for that processing to finish so it - // doesn't race with the manual status update below. - waitForChatProcessed(ctx, t, db, chat.ID, replica) - - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusRunning, - WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true}, - StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, - HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - }) - require.NoError(t, err) - - queuedResult, err := replica.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("queued")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.True(t, queuedResult.Queued) - require.NotNil(t, queuedResult.QueuedMessage) - - assistantContent, err := chatprompt.MarshalParts([]codersdk.ChatMessagePart{ - codersdk.ChatMessageText("assistant"), - }) - require.NoError(t, err) - - _ = dbgen.ChatMessage(t, db, database.ChatMessage{ - ChatID: chat.ID, - ModelConfigID: uuid.NullUUID{UUID: model.ID, Valid: true}, - Role: database.ChatMessageRoleAssistant, - ContentVersion: chatprompt.CurrentContentVersion, - Content: assistantContent, - TotalCostMicros: sql.NullInt64{Int64: 100, Valid: true}, - }) - - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusWaiting, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - LastError: pqtype.NullRawMessage{}, - }) - require.NoError(t, err) - - result, err := replica.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedResult.QueuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.Equal(t, database.ChatMessageRoleUser, result.PromotedMessage.Role) - - queued, err := db.GetChatQueuedMessages(ctx, chat.ID) - require.NoError(t, err) - require.Empty(t, queued) - - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - require.Len(t, messages, 4) - require.Equal(t, database.ChatMessageRoleUser, messages[3].Role) -} - -func TestPromoteQueuedMessageUsesQueuedModelConfigID(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, modelConfigA := seedChatDependencies(t, db) - modelConfigB := insertChatModelConfigWithCallConfig( - t, - db, - user.ID, - "openai", - "gpt-4o-mini-promote-"+uuid.NewString(), - codersdk.ChatModelCallConfig{}, - ) - - chat := dbgen.Chat(t, db, database.Chat{ - OrganizationID: org.ID, - OwnerID: user.ID, - LastModelConfigID: modelConfigA.ID, - Title: "promote queued uses stored model", - }) - - queuedContent, err := json.Marshal([]codersdk.ChatMessagePart{codersdk.ChatMessageText("queued with model b")}) - require.NoError(t, err) - queuedMessage, err := db.InsertChatQueuedMessage(ctx, database.InsertChatQueuedMessageParams{ - ChatID: chat.ID, - Content: queuedContent, - ModelConfigID: uuid.NullUUID{ - UUID: modelConfigB.ID, - Valid: true, - }, - }) - require.NoError(t, err) - - result, err := replica.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.True(t, result.PromotedMessage.ModelConfigID.Valid) - require.Equal(t, modelConfigB.ID, result.PromotedMessage.ModelConfigID.UUID) - - storedChat, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, modelConfigB.ID, storedChat.LastModelConfigID) - // The processor can pick up the pending chat immediately after - // promotion, so this test only requires that promotion moved it out of - // waiting and preserved the queued model configuration. - require.Contains(t, []database.ChatStatus{ - database.ChatStatusPending, - database.ChatStatusRunning, - }, storedChat.Status) -} - -func TestPromoteQueuedMessageReloadsChatWhenModelConfigChangesDuringPending(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, modelConfigA := seedChatDependencies(t, db) - modelConfigB := insertChatModelConfigWithCallConfig( - t, - db, - user.ID, - "openai", - "gpt-4o-mini-promote-pending-"+uuid.NewString(), - codersdk.ChatModelCallConfig{}, - ) - - watchEvents := make(chan struct { - payload codersdk.ChatWatchEvent - err error - }, 1) - cancelWatch, err := ps.SubscribeWithErr( - coderdpubsub.ChatWatchEventChannel(user.ID), - coderdpubsub.HandleChatWatchEvent(func(_ context.Context, payload codersdk.ChatWatchEvent, err error) { - select { - case watchEvents <- struct { - payload codersdk.ChatWatchEvent - err error - }{payload: payload, err: err}: - default: - } - }), - ) - require.NoError(t, err) - defer cancelWatch() - - chat := dbgen.Chat(t, db, database.Chat{ - OrganizationID: org.ID, - Status: database.ChatStatusPending, - OwnerID: user.ID, - LastModelConfigID: modelConfigA.ID, - Title: "promote queued reloads pending chat", - }) - - queuedContent, err := json.Marshal([]codersdk.ChatMessagePart{codersdk.ChatMessageText("queued with new model")}) - require.NoError(t, err) - queuedMessage, err := db.InsertChatQueuedMessage(ctx, database.InsertChatQueuedMessageParams{ - ChatID: chat.ID, - Content: queuedContent, - ModelConfigID: uuid.NullUUID{ - UUID: modelConfigB.ID, - Valid: true, - }, - }) - require.NoError(t, err) - - result, err := replica.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.True(t, result.PromotedMessage.ModelConfigID.Valid) - require.Equal(t, modelConfigB.ID, result.PromotedMessage.ModelConfigID.UUID) - - storedChat, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, database.ChatStatusPending, storedChat.Status) - require.Equal(t, modelConfigB.ID, storedChat.LastModelConfigID) - - select { - case event := <-watchEvents: - require.NoError(t, event.err) - require.Equal(t, codersdk.ChatWatchEventKindStatusChange, event.payload.Kind) - require.Equal(t, chat.ID, event.payload.Chat.ID) - require.Equal(t, codersdk.ChatStatusPending, event.payload.Chat.Status) - require.Equal(t, modelConfigB.ID, event.payload.Chat.LastModelConfigID) - case <-ctx.Done(): - t.Fatal("timed out waiting for status change watch event") - } -} - func TestAutoPromoteQueuedMessagesPreservesPerTurnModelOrder(t *testing.T) { t.Parallel() // TODO(CODAGT-353): Re-enable this test after the chatd notification flow @@ -2848,85 +2168,6 @@ func testAutoPromoteQueuedMessageFallback(t *testing.T, queuedModelConfigID uuid require.True(t, found) } -func TestPromoteQueuedMessageFallsBackForLegacyQueuedRows(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, modelConfigA := seedChatDependencies(t, db) - chat := dbgen.Chat(t, db, database.Chat{ - OrganizationID: org.ID, - OwnerID: user.ID, - LastModelConfigID: modelConfigA.ID, - Title: "promote queued legacy fallback", - }) - - queuedContent, err := json.Marshal([]codersdk.ChatMessagePart{codersdk.ChatMessageText("legacy queued row")}) - require.NoError(t, err) - queuedMessage, err := db.InsertChatQueuedMessage(ctx, database.InsertChatQueuedMessageParams{ - ChatID: chat.ID, - Content: queuedContent, - }) - require.NoError(t, err) - - result, err := replica.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.True(t, result.PromotedMessage.ModelConfigID.Valid) - require.Equal(t, modelConfigA.ID, result.PromotedMessage.ModelConfigID.UUID) - - storedChat, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, modelConfigA.ID, storedChat.LastModelConfigID) -} - -func TestPromoteQueuedMessageFallsBackForInvalidQueuedModelConfigID(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, modelConfig := seedChatDependencies(t, db) - - chat := dbgen.Chat(t, db, database.Chat{ - OrganizationID: org.ID, - OwnerID: user.ID, - LastModelConfigID: modelConfig.ID, - Title: "promote queued invalid fallback", - }) - - queuedContent, err := json.Marshal([]codersdk.ChatMessagePart{codersdk.ChatMessageText("invalid queued model")}) - require.NoError(t, err) - queuedMessage, err := db.InsertChatQueuedMessage(ctx, database.InsertChatQueuedMessageParams{ - ChatID: chat.ID, - Content: queuedContent, - ModelConfigID: uuid.NullUUID{ - UUID: uuid.New(), - Valid: true, - }, - }) - require.NoError(t, err) - - result, err := replica.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.True(t, result.PromotedMessage.ModelConfigID.Valid) - require.Equal(t, modelConfig.ID, result.PromotedMessage.ModelConfigID.UUID) - - storedChat, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, modelConfig.ID, storedChat.LastModelConfigID) -} - func TestInterruptAutoPromotionIgnoresLaterUsageLimitIncrease(t *testing.T) { t.Parallel() @@ -3438,91 +2679,6 @@ func TestEditMessageDebugCleanupPreservesRecentRuns(t *testing.T) { } // TestArchiveChatDebugCleanupDeletesPreArchiveRuns verifies that -// ArchiveChat schedules cleanup that deletes pre-archive debug runs -// for the archived chat. Covers the archiveCutoff sampled from -// ArchiveChatByID's DB-stamped updated_at and the DeleteByChatID -// delete path. -func TestArchiveChatDebugCleanupDeletesPreArchiveRuns(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newDebugEnabledTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "debug-archive-cleanup", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - staleStart := time.Now().Add(-time.Hour).UTC().Truncate(time.Microsecond) - staleRun, err := db.InsertChatDebugRun(ctx, database.InsertChatDebugRunParams{ - ChatID: chat.ID, - ModelConfigID: uuid.NullUUID{UUID: model.ID, Valid: true}, - Kind: "chat_turn", - Status: "in_progress", - Provider: sql.NullString{String: "openai", Valid: true}, - Model: sql.NullString{String: model.Model, Valid: true}, - StartedAt: sql.NullTime{Time: staleStart, Valid: true}, - UpdatedAt: sql.NullTime{Time: staleStart, Valid: true}, - }) - require.NoError(t, err) - - // Freshly-inserted run inside the skew buffer must survive the - // fast retry for the same reason as the edit-cleanup buffer test. - recentStart := time.Now().Add(-time.Second).UTC().Truncate(time.Microsecond) - recentRun, err := db.InsertChatDebugRun(ctx, database.InsertChatDebugRunParams{ - ChatID: chat.ID, - ModelConfigID: uuid.NullUUID{UUID: model.ID, Valid: true}, - Kind: "chat_turn", - Status: "in_progress", - Provider: sql.NullString{String: "openai", Valid: true}, - Model: sql.NullString{String: model.Model, Valid: true}, - StartedAt: sql.NullTime{Time: recentStart, Valid: true}, - UpdatedAt: sql.NullTime{Time: recentStart, Valid: true}, - }) - require.NoError(t, err) - - err = replica.ArchiveChat(ctx, chat) - require.NoError(t, err) - - chatd.WaitUntilIdleForTest(replica) - - // ErrNoRows proves the fast-retry path DELETED the row: - // FinalizeStale only UPDATEs in place, never deletes. - _, err = db.GetChatDebugRunByID(ctx, staleRun.ID) - require.ErrorIs(t, err, sql.ErrNoRows, - "pre-archive run outside the buffer should be deleted") - - remaining, err := db.GetChatDebugRunByID(ctx, recentRun.ID) - require.NoError(t, err, - "runs inside the clock-skew buffer must survive the fast retry") - require.Equal(t, recentRun.ID, remaining.ID) - - // Count the seeded survivors directly so the delete is verified - // not just by absence of a specific row. Scoped to seeded IDs - // because the archive transition may still race with other - // background debug writes. - remainingRuns, err := db.GetChatDebugRunsByChatID(ctx, database.GetChatDebugRunsByChatIDParams{ - ChatID: chat.ID, LimitVal: 100, - }) - require.NoError(t, err) - seeded := map[uuid.UUID]bool{staleRun.ID: true, recentRun.ID: true} - survivors := 0 - for _, r := range remainingRuns { - if seeded[r.ID] { - survivors++ - } - } - require.Equal(t, 1, survivors, - "only the recent (buffered) seeded run should survive") -} - func TestRecoverStaleChatsPeriodically(t *testing.T) { t.Parallel() @@ -4724,12 +3880,11 @@ func ptrRef[T any](v T) *T { return &v } -func TestSubscribeNoPubsubNoDuplicateMessageParts(t *testing.T) { +func TestSubscribeNoDuplicateMessageParts(t *testing.T) { t.Parallel() - // Use nil pubsub to force the no-pubsub path. - db, _ := dbtestutil.NewDB(t) - replica := newStartedTestServer(t, db, nil, uuid.New()) + db, ps := dbtestutil.NewDB(t) + replica := newTestServer(t, db, ps, uuid.New()) ctx := testutil.Context(t, testutil.WaitLong) user, org, model := seedChatDependencies(t, db) @@ -4743,14 +3898,6 @@ func TestSubscribeNoPubsubNoDuplicateMessageParts(t *testing.T) { }) require.NoError(t, err) - // Wait for any wake-triggered processing to settle before - // subscribing, so the snapshot captures the final state. - // The wake signal may trigger processOnce which will fail - // (no LLM configured) and set the chat to error status. - // Poll until the chat reaches a terminal state (not pending - // and not running), then wait for the goroutine to finish. - waitForChatProcessed(ctx, t, db, chat.ID, replica) - snapshot, events, cancel, ok := replica.Subscribe(ctx, chat.ID, nil, 0) require.True(t, ok) t.Cleanup(cancel) @@ -5652,10 +4799,12 @@ func TestPassiveServerDoesNotProcess(t *testing.T) { chatd.WaitUntilIdleForTest(server) - // Re-read from DB to catch any unexpected state transition. + // Re-read from DB to catch any unexpected processing. stored, err := db.GetChatByID(ctx, chat.ID) require.NoError(t, err) - require.Equal(t, database.ChatStatusPending, stored.Status) + require.Equal(t, database.ChatStatusRunning, stored.Status) + require.False(t, stored.WorkerID.Valid) + require.False(t, stored.RunnerID.Valid) } // newStartedTestServer creates a server with Start() called. @@ -6223,7 +5372,7 @@ func TestInterruptChatDoesNotSendWebPushNotification(t *testing.T) { }, testutil.IntervalFast) // Interrupt the chat. - updated := server.InterruptChat(ctx, chat) + updated, _ := server.InterruptChat(ctx, chat) require.Equal(t, database.ChatStatusWaiting, updated.Status) // Wait for the chat to finish processing and return to waiting. @@ -7125,7 +6274,7 @@ func TestInterruptChatPersistsPartialResponse(t *testing.T) { require.True(t, gotMessagePart, "should have received at least one message_part event") // Now interrupt the chat. The chatloop has processed content. - updated := server.InterruptChat(ctx, chat) + updated, _ := server.InterruptChat(ctx, chat) require.Equal(t, database.ChatStatusWaiting, updated.Status) // Wait for the partial assistant message to be persisted. @@ -8600,71 +7749,6 @@ func TestAgentContextFilesAndSkillsLoadedIntoChat(t *testing.T) { "plan-file-path block should be part of the main system prompt, not a standalone message") } -func TestSendMessageRejectsArchivedChat(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OwnerID: user.ID, - OrganizationID: org.ID, - Title: "send-archived", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - err = replica.ArchiveChat(ctx, chat) - require.NoError(t, err) - - _, err = replica.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("should fail")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.ErrorIs(t, err, chatd.ErrChatArchived) -} - -func TestEditMessageRejectsArchivedChat(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OwnerID: user.ID, - OrganizationID: org.ID, - Title: "edit-archived", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("original")}, - }) - require.NoError(t, err) - - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - require.Len(t, messages, 1) - - err = replica.ArchiveChat(ctx, chat) - require.NoError(t, err) - - _, err = replica.EditMessage(ctx, chatd.EditMessageOptions{ - ChatID: chat.ID, - EditedMessageID: messages[0].ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("edited")}, - }) - require.ErrorIs(t, err, chatd.ErrChatArchived) -} - // TestEditMessageWithModelConfigOverride verifies that callers can // change the model when editing a previous user message. The // replacement message must persist with the new model and the chat's @@ -8814,511 +7898,7 @@ func TestEditMessageRejectsUnknownModelConfig(t *testing.T) { require.Equal(t, modelA.ID, storedChat.LastModelConfigID) } -func TestPromoteQueuedRejectsArchivedChat(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OwnerID: user.ID, - OrganizationID: org.ID, - Title: "promote-archived", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - // Queue a message by setting the chat to running first. - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusRunning, - WorkerID: uuid.NullUUID{UUID: uuid.New(), Valid: true}, - StartedAt: sql.NullTime{Time: time.Now(), Valid: true}, - HeartbeatAt: sql.NullTime{Time: time.Now(), Valid: true}, - }) - require.NoError(t, err) - - queuedResult, err := replica.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("queued")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.True(t, queuedResult.Queued) - - // Move back to waiting, then archive. - chat, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusWaiting, - WorkerID: uuid.NullUUID{}, - StartedAt: sql.NullTime{}, - HeartbeatAt: sql.NullTime{}, - }) - require.NoError(t, err) - - err = replica.ArchiveChat(ctx, chat) - require.NoError(t, err) - - _, err = replica.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedResult.QueuedMessage.ID, - CreatedBy: user.ID, - }) - require.ErrorIs(t, err, chatd.ErrChatArchived) -} - -// TestPromoteQueuedWhileRequiresAction guards against the -// stops-dead failure mode: promoting on requires_action without -// closing pending dynamic tool calls leaves the assistant turn -// with unresolved tool_call parts that the LLM API rejects. It -// also asserts the synthetic tool-result row is published to live -// SSE subscribers before the promoted user message. -func TestPromoteQueuedWhileRequiresAction(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - ctx := testutil.Context(t, testutil.WaitLong) - - var streamedCallCount atomic.Int32 - openAIURL := chattest.NewOpenAI(t, func(req *chattest.OpenAIRequest) chattest.OpenAIResponse { - if !req.Stream { - return chattest.OpenAINonStreamingResponse("requires-action-promote") - } - if streamedCallCount.Add(1) == 1 { - return chattest.OpenAIStreamingResponse( - chattest.OpenAIToolCallChunk( - "my_dynamic_tool", - `{"input":"hello"}`, - ), - ) - } - // Second call: the resumed run after promote completes. - return chattest.OpenAIStreamingResponse( - chattest.OpenAITextChunks("Resumed after promotion.")..., - ) - }) - - user, org, model := seedChatDependenciesWithProvider(t, db, "openai-compat", openAIURL) - server := newActiveTestServer(t, db, ps) - - dynamicToolsJSON, err := json.Marshal([]mcpgo.Tool{{ - Name: "my_dynamic_tool", - Description: "A test dynamic tool.", - InputSchema: mcpgo.ToolInputSchema{ - Type: "object", - Properties: map[string]any{ - "input": map[string]any{"type": "string"}, - }, - Required: []string{"input"}, - }, - }}) - require.NoError(t, err) - - chat, err := server.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "promote-while-requires-action", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{ - codersdk.ChatMessageText("Please call the dynamic tool."), - }, - DynamicTools: dynamicToolsJSON, - }) - require.NoError(t, err) - - var chatBeforePromote database.Chat - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - got, getErr := db.GetChatByID(ctx, chat.ID) - if getErr != nil { - return false - } - chatBeforePromote = got - return got.Status == database.ChatStatusRequiresAction || - got.Status == database.ChatStatusError - }, testutil.IntervalFast) - require.Equal(t, database.ChatStatusRequiresAction, chatBeforePromote.Status, - "expected requires_action, got %s (last_error=%q)", - chatBeforePromote.Status, chatLastErrorMessage(chatBeforePromote.LastError)) - - var pendingToolCallID string - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - messages, dbErr := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - if dbErr != nil { - return false - } - for _, msg := range messages { - if msg.Role != database.ChatMessageRoleAssistant { - continue - } - parts, parseErr := chatprompt.ParseContent(msg) - if parseErr != nil { - continue - } - for _, part := range parts { - if part.Type == codersdk.ChatMessagePartTypeToolCall && part.ToolName == "my_dynamic_tool" { - pendingToolCallID = part.ToolCallID - return true - } - } - } - return false - }, testutil.IntervalFast) - require.NotEmpty(t, pendingToolCallID, "expected pending dynamic tool call") - - queuedResult, err := server.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("promote me")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.True(t, queuedResult.Queued) - require.NotNil(t, queuedResult.QueuedMessage) - - // Subscribe before promoting to capture published events. - _, events, subCancel, ok := server.Subscribe(ctx, chat.ID, nil, 0) - require.True(t, ok) - defer subCancel() - promoteResult, err := server.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedResult.QueuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.Equal(t, database.ChatMessageRoleUser, promoteResult.PromotedMessage.Role) - - // Synthetic row must publish before the promoted user message. - var ( - syntheticPublishedAt int - userPublishedAt int - messagesSeen int - ) - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - select { - case ev := <-events: - if ev.Type != codersdk.ChatStreamEventTypeMessage || ev.Message == nil { - return false - } - messagesSeen++ - switch ev.Message.Role { - case codersdk.ChatMessageRoleTool: - if syntheticPublishedAt == 0 { - syntheticPublishedAt = messagesSeen - } - case codersdk.ChatMessageRoleUser: - if ev.Message.ID == promoteResult.PromotedMessage.ID { - userPublishedAt = messagesSeen - } - } - return syntheticPublishedAt > 0 && userPublishedAt > 0 - default: - return false - } - }, testutil.IntervalFast) - require.Less(t, syntheticPublishedAt, userPublishedAt, - "synthetic tool-result must be published before the promoted user message") - - queuedAfter, err := db.GetChatQueuedMessages(ctx, chat.ID) - require.NoError(t, err) - require.Empty(t, queuedAfter, "queued message should be removed after sync promotion") - - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - - var ( - syntheticToolResult *database.ChatMessage - promotedUserMessage *database.ChatMessage - ) - for i := range messages { - msg := messages[i] - if msg.Role == database.ChatMessageRoleTool { - parts, parseErr := chatprompt.ParseContent(msg) - require.NoError(t, parseErr) - for _, part := range parts { - if part.Type != codersdk.ChatMessagePartTypeToolResult { - continue - } - if part.ToolCallID != pendingToolCallID { - continue - } - require.True(t, part.IsError, - "synthetic tool result should have IsError=true") - syntheticToolResult = &messages[i] - } - } - if msg.ID == promoteResult.PromotedMessage.ID { - promotedUserMessage = &messages[i] - } - } - require.NotNil(t, syntheticToolResult, - "expected a synthetic error tool result for the pending tool call") - require.NotNil(t, promotedUserMessage) - require.Less(t, syntheticToolResult.ID, promotedUserMessage.ID, - "synthetic tool result must precede the promoted user message") - - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - got, getErr := db.GetChatByID(ctx, chat.ID) - if getErr != nil { - return false - } - return got.Status == database.ChatStatusWaiting || got.Status == database.ChatStatusError - }, testutil.IntervalFast) - final, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, database.ChatStatusWaiting, final.Status, - "chat should resume to waiting after promotion (last_error=%q)", - chatLastErrorMessage(final.LastError)) -} - // TestPromoteQueuedWhileRequiresActionMixedTools guards against -// duplicating already-resolved built-in tool results: synthetic -// results must be scoped to dynamic tool names only. -func TestPromoteQueuedWhileRequiresActionMixedTools(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - ctx := testutil.Context(t, testutil.WaitLong) - - var streamedCallCount atomic.Int32 - openAIURL := chattest.NewOpenAI(t, func(req *chattest.OpenAIRequest) chattest.OpenAIResponse { - if !req.Stream { - return chattest.OpenAINonStreamingResponse("mixed-tools-promote") - } - if streamedCallCount.Add(1) == 1 { - builtinChunk := chattest.OpenAIToolCallChunk( - "read_file", - `{"path":"/tmp/test.txt"}`, - ) - dynamicChunk := chattest.OpenAIToolCallChunk( - "my_dynamic_tool", - `{"input":"hello world"}`, - ) - mergedChunk := builtinChunk - dynCall := dynamicChunk.Choices[0].ToolCalls[0] - dynCall.Index = 1 - mergedChunk.Choices[0].ToolCalls = append( - mergedChunk.Choices[0].ToolCalls, - dynCall, - ) - return chattest.OpenAIStreamingResponse(mergedChunk) - } - return chattest.OpenAIStreamingResponse( - chattest.OpenAITextChunks("Resumed after mixed-tool promotion.")..., - ) - }) - - user, org, model := seedChatDependenciesWithProvider(t, db, "openai-compat", openAIURL) - server := newActiveTestServer(t, db, ps) - - dynamicToolsJSON, err := json.Marshal([]mcpgo.Tool{{ - Name: "my_dynamic_tool", - Description: "A test dynamic tool.", - InputSchema: mcpgo.ToolInputSchema{ - Type: "object", - Properties: map[string]any{ - "input": map[string]any{"type": "string"}, - }, - Required: []string{"input"}, - }, - }}) - require.NoError(t, err) - - chat, err := server.CreateChat(ctx, chatd.CreateOptions{ - OrganizationID: org.ID, - OwnerID: user.ID, - Title: "promote-while-requires-action-mixed", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{ - codersdk.ChatMessageText("Call both tools."), - }, - DynamicTools: dynamicToolsJSON, - }) - require.NoError(t, err) - - var chatBeforePromote database.Chat - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - got, getErr := db.GetChatByID(ctx, chat.ID) - if getErr != nil { - return false - } - chatBeforePromote = got - return got.Status == database.ChatStatusRequiresAction || - got.Status == database.ChatStatusError - }, testutil.IntervalFast) - require.Equal(t, database.ChatStatusRequiresAction, chatBeforePromote.Status, - "expected requires_action, got %s (last_error=%q)", - chatBeforePromote.Status, chatLastErrorMessage(chatBeforePromote.LastError)) - - // The built-in tool resolves before requires_action; capture - // its row ID to assert the dynamic synthetic comes after. - var ( - dynamicToolCallID string - builtinToolResultID int64 - builtinToolResultSeen bool - ) - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - messages, dbErr := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - if dbErr != nil { - return false - } - for _, msg := range messages { - parts, parseErr := chatprompt.ParseContent(msg) - if parseErr != nil { - continue - } - for _, part := range parts { - if part.Type == codersdk.ChatMessagePartTypeToolResult && part.ToolName == "read_file" { - builtinToolResultID = msg.ID - builtinToolResultSeen = true - } - if part.Type == codersdk.ChatMessagePartTypeToolCall && part.ToolName == "my_dynamic_tool" { - dynamicToolCallID = part.ToolCallID - } - } - } - return builtinToolResultSeen && dynamicToolCallID != "" - }, testutil.IntervalFast) - require.NotEmpty(t, dynamicToolCallID) - require.NotZero(t, builtinToolResultID) - - queuedResult, err := server.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("promote me")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.True(t, queuedResult.Queued) - require.NotNil(t, queuedResult.QueuedMessage) - - _, events, subCancel, ok := server.Subscribe(ctx, chat.ID, nil, 0) - require.True(t, ok) - defer subCancel() - promoteResult, err := server.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedResult.QueuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.NotZero(t, promoteResult.PromotedMessage.ID, - "requires_action promotion is synchronous and returns the inserted message") - - // Only the dynamic tool's synth row publishes; the built-in's - // pre-existing result is not republished. - var ( - syntheticPublishCount int - userPublished bool - ) - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - select { - case ev := <-events: - if ev.Type != codersdk.ChatStreamEventTypeMessage || ev.Message == nil { - t.Logf("subscriber consumed non-message event type=%s", ev.Type) - return false - } - t.Logf("subscriber consumed message id=%d role=%s match_promoted=%t", ev.Message.ID, ev.Message.Role, ev.Message.ID == promoteResult.PromotedMessage.ID) - switch ev.Message.Role { - case codersdk.ChatMessageRoleTool: - syntheticPublishCount++ - case codersdk.ChatMessageRoleUser: - if ev.Message.ID == promoteResult.PromotedMessage.ID { - userPublished = true - } - } - return userPublished - default: - return false - } - }, testutil.IntervalFast) - require.Equal(t, 1, syntheticPublishCount, - "only the dynamic tool's synthetic result must be published; the built-in's pre-existing result must not be republished") - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - - var ( - dynamicSyntheticCount int - builtinResultsForReadFile int - ) - for _, msg := range messages { - parts, parseErr := chatprompt.ParseContent(msg) - require.NoError(t, parseErr) - for _, part := range parts { - if part.Type != codersdk.ChatMessagePartTypeToolResult { - continue - } - switch part.ToolName { - case "read_file": - builtinResultsForReadFile++ - case "my_dynamic_tool": - if part.IsError && part.ToolCallID == dynamicToolCallID && msg.ID > builtinToolResultID { - dynamicSyntheticCount++ - } - } - } - } - require.Equal(t, 1, dynamicSyntheticCount, - "expected exactly one synthetic error tool result for the dynamic tool call") - require.Equal(t, 1, builtinResultsForReadFile, - "built-in tool result should not be duplicated by promotion") - - require.Greater(t, promoteResult.PromotedMessage.ID, builtinToolResultID) -} - -func TestSubmitToolResultsRejectsArchivedChat(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - replica := newTestServer(t, db, ps, uuid.New()) - - ctx := testutil.Context(t, testutil.WaitLong) - user, org, model := seedChatDependencies(t, db) - - chat, err := replica.CreateChat(ctx, chatd.CreateOptions{ - OwnerID: user.ID, - OrganizationID: org.ID, - Title: "submit-tool-archived", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - err = replica.ArchiveChat(ctx, chat) - require.NoError(t, err) - - // Set requires_action so the test exercises a realistic - // scenario where SubmitToolResults would be called. - _, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusRequiresAction, - }) - require.NoError(t, err) - - err = replica.SubmitToolResults(ctx, chatd.SubmitToolResultsOptions{ - ChatID: chat.ID, - UserID: user.ID, - ModelConfigID: model.ID, - Results: []codersdk.ToolResult{{ - ToolCallID: "fake-tool-call-id", - Output: json.RawMessage(`{"result":"ignored"}`), - }}, - }) - require.ErrorIs(t, err, chatd.ErrChatArchived) -} - func TestAcquireChatsSkipsArchivedPendingChat(t *testing.T) { t.Parallel() @@ -10233,328 +8813,7 @@ func seedAdvisorConfig( require.NoError(t, err) } -// TestPromoteQueuedWhileRunning guards against the data-loss -// failure mode: promoting on a streaming chat must preserve -// partial assistant output by deferring the user-message insert -// to the worker's auto-promote. -func TestPromoteQueuedWhileRunning(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - ctx := testutil.Context(t, testutil.WaitLong) - - streamStarted := make(chan struct{}) - streamCanceled := make(chan struct{}) - var streamCallCount atomic.Int32 - openAIURL := chattest.NewOpenAI(t, func(req *chattest.OpenAIRequest) chattest.OpenAIResponse { - if !req.Stream { - return chattest.OpenAINonStreamingResponse("running-promote") - } - if streamCallCount.Add(1) > 1 { - // Subsequent calls are the resumed run; let it settle. - return chattest.OpenAIStreamingResponse( - chattest.OpenAITextChunks("resumed after promotion")..., - ) - } - chunks := make(chan chattest.OpenAIChunk, 1) - go func() { - defer close(chunks) - chunks <- chattest.OpenAITextChunks("partial-running-output")[0] - select { - case <-streamStarted: - default: - close(streamStarted) - } - <-req.Context().Done() - select { - case <-streamCanceled: - default: - close(streamCanceled) - } - }() - return chattest.OpenAIResponse{StreamingChunks: chunks} - }) - - server := newActiveTestServer(t, db, ps) - user, org, model := seedChatDependencies(t, db) - setOpenAIProviderBaseURL(ctx, t, db, openAIURL) - - chat, err := server.CreateChat(ctx, chatd.CreateOptions{ - OwnerID: user.ID, - OrganizationID: org.ID, - Title: "promote-while-running", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - fromDB, dbErr := db.GetChatByID(ctx, chat.ID) - if dbErr != nil { - return false - } - return fromDB.Status == database.ChatStatusRunning && fromDB.WorkerID.Valid - }, testutil.IntervalFast) - - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - select { - case <-streamStarted: - return true - default: - return false - } - }, testutil.IntervalFast) - - queuedResult, err := server.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("promote me")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.True(t, queuedResult.Queued) - require.NotNil(t, queuedResult.QueuedMessage) - - promoteResult, err := server.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queuedResult.QueuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - // Deferred promotion: no synchronous user message. - require.Zero(t, promoteResult.PromotedMessage.ID) - - // Worker observes waiting and cancels. - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - select { - case <-streamCanceled: - return true - default: - return false - } - }, testutil.IntervalFast) - - // Partial assistant output is preserved (not lost as it was - // pre-fix) and precedes the promoted user message. Poll on the - // messages themselves: the status passes through Waiting - // transiently before finishActiveChat's external-Waiting case - // promotes the queued message and flips the chat to Pending. - // Both messages being persisted implies cleanup completed. - var ( - partialAssistantID int64 - promotedUserID int64 - ) - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - if err != nil { - return false - } - var ( - assistantID int64 - userID int64 - ) - for _, msg := range messages { - switch msg.Role { - case database.ChatMessageRoleAssistant: - parts, parseErr := chatprompt.ParseContent(msg) - if parseErr != nil { - continue - } - for _, part := range parts { - if part.Type == codersdk.ChatMessagePartTypeText && strings.Contains(part.Text, "partial-running-output") { - assistantID = msg.ID - } - } - case database.ChatMessageRoleUser: - parts, parseErr := chatprompt.ParseContent(msg) - if parseErr != nil { - continue - } - for _, part := range parts { - if part.Type == codersdk.ChatMessagePartTypeText && strings.Contains(part.Text, "promote me") { - userID = msg.ID - } - } - } - } - if assistantID == 0 || userID == 0 { - return false - } - partialAssistantID = assistantID - promotedUserID = userID - return true - }, testutil.IntervalFast) - require.Less(t, partialAssistantID, promotedUserID, - "promoted user message must follow the persisted partial output") -} - // TestPromoteQueuedWhileRunningRespectsMessageOrder guards -// against losing or reshuffling sibling queued messages when one -// is promoted out-of-order. -func TestPromoteQueuedWhileRunningRespectsMessageOrder(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - ctx := testutil.Context(t, testutil.WaitLong) - - streamStarted := make(chan struct{}) - var streamCallCount atomic.Int32 - openAIURL := chattest.NewOpenAI(t, func(req *chattest.OpenAIRequest) chattest.OpenAIResponse { - if !req.Stream { - return chattest.OpenAINonStreamingResponse("running-promote-order") - } - if streamCallCount.Add(1) > 1 { - return chattest.OpenAIStreamingResponse( - chattest.OpenAITextChunks("resumed")..., - ) - } - chunks := make(chan chattest.OpenAIChunk, 1) - go func() { - defer close(chunks) - chunks <- chattest.OpenAITextChunks("partial")[0] - select { - case <-streamStarted: - default: - close(streamStarted) - } - <-req.Context().Done() - }() - return chattest.OpenAIResponse{StreamingChunks: chunks} - }) - - server := newActiveTestServer(t, db, ps) - user, org, model := seedChatDependencies(t, db) - setOpenAIProviderBaseURL(ctx, t, db, openAIURL) - - chat, err := server.CreateChat(ctx, chatd.CreateOptions{ - OwnerID: user.ID, - OrganizationID: org.ID, - Title: "promote-while-running-order", - ModelConfigID: model.ID, - InitialUserContent: []codersdk.ChatMessagePart{codersdk.ChatMessageText("hello")}, - }) - require.NoError(t, err) - - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - fromDB, dbErr := db.GetChatByID(ctx, chat.ID) - if dbErr != nil { - return false - } - return fromDB.Status == database.ChatStatusRunning && fromDB.WorkerID.Valid - }, testutil.IntervalFast) - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - select { - case <-streamStarted: - return true - default: - return false - } - }, testutil.IntervalFast) - - queueA, err := server.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("A")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.NotNil(t, queueA.QueuedMessage) - queueB, err := server.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("B")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.NotNil(t, queueB.QueuedMessage) - queueC, err := server.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("C")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.NotNil(t, queueC.QueuedMessage) - - promoteResult, err := server.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queueB.QueuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.Zero(t, promoteResult.PromotedMessage.ID, - "running-case promotion is deferred to auto-promote") - - // PromoteQueued reorders to [B, A, C]. IDs are stable because - // only created_at is mutated. - queuedAfterPromote, err := db.GetChatQueuedMessages(ctx, chat.ID) - require.NoError(t, err) - require.Len(t, queuedAfterPromote, 3) - require.Equal(t, queueB.QueuedMessage.ID, queuedAfterPromote[0].ID, - "promoted message must be first in the queue") - require.Equal(t, queueA.QueuedMessage.ID, queuedAfterPromote[1].ID, - "non-promoted messages preserve their relative order") - require.Equal(t, queueC.QueuedMessage.ID, queuedAfterPromote[2].ID, - "non-promoted messages preserve their relative order") - - // Poll for B in history rather than asserting the queue - // state, which races the worker's auto-promote pipeline. - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - messages, getErr := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - if getErr != nil { - return false - } - for _, msg := range messages { - if msg.Role != database.ChatMessageRoleUser { - continue - } - parts, parseErr := chatprompt.ParseContent(msg) - if parseErr != nil { - return false - } - for _, part := range parts { - if part.Type == codersdk.ChatMessagePartTypeText && part.Text == "B" { - return true - } - } - } - return false - }, testutil.IntervalFast, - "the promoted message B must appear in chat history") - - // A and C must end up in queue or history, not dropped. - remainingIDs := map[int64]bool{} - remainingQueue, err := db.GetChatQueuedMessages(ctx, chat.ID) - require.NoError(t, err) - for _, qm := range remainingQueue { - remainingIDs[qm.ID] = true - } - messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ - ChatID: chat.ID, - AfterID: 0, - }) - require.NoError(t, err) - promotedTexts := map[string]bool{} - for _, msg := range messages { - if msg.Role != database.ChatMessageRoleUser { - continue - } - parts, parseErr := chatprompt.ParseContent(msg) - require.NoError(t, parseErr) - for _, part := range parts { - if part.Type == codersdk.ChatMessagePartTypeText { - promotedTexts[part.Text] = true - } - } - } - require.True(t, remainingIDs[queueA.QueuedMessage.ID] || promotedTexts["A"], - "message A must not be lost") - require.True(t, remainingIDs[queueC.QueuedMessage.ID] || promotedTexts["C"], - "message C must not be lost") -} - // TestFinishActiveChatExternalWaitingInsertsSyntheticResults // asserts the cleanup TX inserts synthetic tool-result rows when // PromoteQueued's deferred path set Status=Waiting while the @@ -10703,79 +8962,6 @@ func TestFinishActiveChatExternalWaitingInsertsSyntheticResults(t *testing.T) { } // TestPromoteQueuedFallsThroughOnStaleHeartbeat asserts a stale -// heartbeat takes the synchronous path so the chat does not strand -// in Waiting waiting on a worker that will not return. -func TestPromoteQueuedFallsThroughOnStaleHeartbeat(t *testing.T) { - t.Parallel() - - db, ps := dbtestutil.NewDB(t) - ctx := testutil.Context(t, testutil.WaitLong) - - staleAfter := 100 * time.Millisecond - logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) - server := chatd.New(chatd.Config{ - Logger: logger, - Database: db, - ReplicaID: uuid.New(), - Pubsub: ps, - PendingChatAcquireInterval: testutil.WaitLong, - InFlightChatStaleAfter: staleAfter, - }) - t.Cleanup(func() { require.NoError(t, server.Close()) }) - - user, org, model := seedChatDependencies(t, db) - - chat, err := db.InsertChat(ctx, database.InsertChatParams{ - OrganizationID: org.ID, - Status: database.ChatStatusWaiting, - ClientType: database.ChatClientTypeUi, - OwnerID: user.ID, - Title: "stale-heartbeat-promote-fallthrough", - LastModelConfigID: model.ID, - }) - require.NoError(t, err) - - // Place the chat in Running with a stale heartbeat. We do not - // start the server's run loop, so no worker will ever pick this - // chat up; the test isolates the fall-through decision in - // PromoteQueued. - deadWorker := uuid.New() - staleTime := time.Now().Add(-2 * staleAfter) - _, err = db.UpdateChatStatus(ctx, database.UpdateChatStatusParams{ - ID: chat.ID, - Status: database.ChatStatusRunning, - WorkerID: uuid.NullUUID{UUID: deadWorker, Valid: true}, - StartedAt: sql.NullTime{Time: staleTime, Valid: true}, - HeartbeatAt: sql.NullTime{Time: staleTime, Valid: true}, - }) - require.NoError(t, err) - - queued, err := server.SendMessage(ctx, chatd.SendMessageOptions{ - ChatID: chat.ID, - Content: []codersdk.ChatMessagePart{codersdk.ChatMessageText("promote me")}, - BusyBehavior: chatd.SendMessageBusyBehaviorQueue, - }) - require.NoError(t, err) - require.True(t, queued.Queued) - require.NotNil(t, queued.QueuedMessage) - - result, err := server.PromoteQueued(ctx, chatd.PromoteQueuedOptions{ - ChatID: chat.ID, - QueuedMessageID: queued.QueuedMessage.ID, - CreatedBy: user.ID, - }) - require.NoError(t, err) - require.NotZero(t, result.PromotedMessage.ID, - "stale heartbeat must take the synchronous path and insert a user message inline") - - got, err := db.GetChatByID(ctx, chat.ID) - require.NoError(t, err) - require.Equal(t, database.ChatStatusPending, got.Status, - "synchronous promote ends Pending") - require.False(t, got.WorkerID.Valid, - "worker_id is cleared by the synchronous promote") -} - // TestRecoverStaleChatsRecoversWaitingWithQueue asserts a Waiting // chat with a non-empty queue and stale updated_at gets recovered // to Pending, closing the post-promote-stranding hole. diff --git a/coderd/x/chatd/chatstate_bridge.go b/coderd/x/chatd/chatstate_bridge.go new file mode 100644 index 0000000000..8a0af79190 --- /dev/null +++ b/coderd/x/chatd/chatstate_bridge.go @@ -0,0 +1,52 @@ +package chatd + +import ( + "github.com/google/uuid" + "github.com/sqlc-dev/pqtype" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/x/chatd/chatprompt" + "github.com/coder/coder/v2/coderd/x/chatd/chatstate" +) + +// newChatMachine constructs a chat-scoped state machine handle bound to +// the server's database and pubsub. +func (p *Server) newChatMachine(chatID uuid.UUID) *chatstate.ChatMachine { + return chatstate.NewChatMachine(p.db, p.pubsub, chatID, chatstate.Options{}) +} + +// systemMessage builds a chatstate.Message representing a system +// prompt entry for the initial-history slice of CreateChat. +func systemMessage(rawContent pqtype.NullRawMessage, modelConfigID uuid.UUID) chatstate.Message { + return chatstate.Message{ + Role: database.ChatMessageRoleSystem, + Content: rawContent, + Visibility: database.ChatMessageVisibilityModel, + ModelConfigID: uuid.NullUUID{UUID: modelConfigID, Valid: modelConfigID != uuid.Nil}, + ContentVersion: chatprompt.CurrentContentVersion, + } +} + +// userMessage builds a chatstate.Message representing a user message +// for CreateChat, SendMessage, or EditMessage. +func userMessage(rawContent pqtype.NullRawMessage, modelConfigID, createdBy uuid.UUID) chatstate.Message { + return chatstate.Message{ + Role: database.ChatMessageRoleUser, + Content: rawContent, + Visibility: database.ChatMessageVisibilityBoth, + ModelConfigID: uuid.NullUUID{UUID: modelConfigID, Valid: modelConfigID != uuid.Nil}, + CreatedBy: uuid.NullUUID{UUID: createdBy, Valid: createdBy != uuid.Nil}, + ContentVersion: chatprompt.CurrentContentVersion, + } +} + +// busyBehaviorToChatState converts the public busy-behavior enum used +// by the server API to the chatstate variant. +func busyBehaviorToChatState(b SendMessageBusyBehavior) chatstate.BusyBehavior { + switch b { + case SendMessageBusyBehaviorInterrupt: + return chatstate.BusyBehaviorInterrupt + default: + return chatstate.BusyBehaviorQueue + } +} diff --git a/coderd/x/chatd/subagent.go b/coderd/x/chatd/subagent.go index cc3e35f78c..108246fab9 100644 --- a/coderd/x/chatd/subagent.go +++ b/coderd/x/chatd/subagent.go @@ -1278,34 +1278,29 @@ func (p *Server) awaitSubagentCompletion( timer := p.clock.NewTimer(timeout, "chatd", "subagent_await") defer timer.Stop() - // When pubsub is available, subscribe for fast status - // notifications and use a less aggressive fallback poll. - // Without pubsub (single-instance / in-memory) fall back - // to the original 200ms polling. - pollInterval := subagentAwaitPollInterval - var notifyCh <-chan struct{} - if p.pubsub != nil { - pollInterval = subagentAwaitFallbackPoll - ch := make(chan struct{}, 1) - notifyCh = ch - cancel, subErr := p.pubsub.SubscribeWithErr( - coderdpubsub.ChatStreamNotifyChannel(targetChatID), - func(_ context.Context, _ []byte, _ error) { - // Non-blocking send so we never stall the - // pubsub dispatch goroutine. - select { - case ch <- struct{}{}: - default: - } - }, - ) - if subErr == nil { - defer cancel() - } else { - // Subscription failed; fall back to fast polling. - pollInterval = subagentAwaitPollInterval - notifyCh = nil - } + // Subscribe for fast status notifications and use a less + // aggressive fallback poll. If subscription fails, fall back to + // the original 200ms polling. + pollInterval := subagentAwaitFallbackPoll + ch := make(chan struct{}, 1) + notifyCh := (<-chan struct{})(ch) + cancel, subErr := p.pubsub.SubscribeWithErr( + coderdpubsub.ChatStreamNotifyChannel(targetChatID), + func(_ context.Context, _ []byte, _ error) { + // Non-blocking send so we never stall the + // pubsub dispatch goroutine. + select { + case ch <- struct{}{}: + default: + } + }, + ) + if subErr == nil { + defer cancel() + } else { + // Subscription failed; fall back to fast polling. + pollInterval = subagentAwaitPollInterval + notifyCh = nil } ticker := p.clock.NewTicker(pollInterval, "chatd", "subagent_poll") @@ -1369,10 +1364,18 @@ func (p *Server) closeSubagent( return targetChat, nil } - updatedChat := p.InterruptChat(ctx, targetChat) - if updatedChat.Status != database.ChatStatusWaiting { - return database.Chat{}, xerrors.New("set target chat waiting") + updatedChat, err := p.InterruptChat(ctx, targetChat) + if err != nil { + // Idle / archived chats no longer satisfy the + // chatstate.Interrupt precondition. Surface the error + // so the caller can decide whether the parent expected + // the subagent to already be waiting. + return database.Chat{}, xerrors.Errorf("interrupt subagent chat: %w", err) } + // chatstate.Interrupt lands active runs in `interrupting` + // and requires-action chats in `running`. Workers finalize + // the transition; accept either non-active status as long as + // the transition committed. return updatedChat, nil } diff --git a/coderd/x/chatd/subagent_internal_test.go b/coderd/x/chatd/subagent_internal_test.go index 55254db1a2..01dca37094 100644 --- a/coderd/x/chatd/subagent_internal_test.go +++ b/coderd/x/chatd/subagent_internal_test.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "encoding/json" + "errors" "sync" "testing" "time" @@ -139,6 +140,18 @@ func newInternalTestServerWithLoggerAndClock( return server } +type subscribeFailingPubsub struct { + pubsub.Pubsub +} + +func (subscribeFailingPubsub) Subscribe(_ string, _ pubsub.Listener) (func(), error) { + return nil, errors.New("subscribe disabled") +} + +func (subscribeFailingPubsub) SubscribeWithErr(_ string, _ pubsub.ListenerWithErr) (func(), error) { + return nil, errors.New("subscribe disabled") +} + type subagentTestLogSink struct { mu sync.Mutex entries []slog.SinkEntry @@ -3177,11 +3190,12 @@ func TestAwaitSubagentCompletion(t *testing.T) { t.Run("CompletesViaPoll", func(t *testing.T) { t.Parallel() - // Use nil pubsub so awaitSubagentCompletion falls back to - // the fast 200ms poll interval. + // Force subscription failure so awaitSubagentCompletion + // falls back to the fast 200ms poll interval. db, _ := dbtestutil.NewDB(t) mClock := quartz.NewMock(t) - server := newInternalTestServerWithClock(t, db, nil, chatprovider.ProviderAPIKeys{}, mClock) + ps := subscribeFailingPubsub{Pubsub: pubsub.NewInMemory()} + server := newInternalTestServerWithClock(t, db, ps, chatprovider.ProviderAPIKeys{}, mClock) ctx := chatdTestContext(t) user, org, model := seedInternalChatDeps(t, db) @@ -3378,21 +3392,6 @@ func TestAwaitSubagentCompletion(t *testing.T) { parent, child := createParentChildChats(ctx, t, server, user, org, model) - // signalWake from CreateChat triggers background - // processing. drainInflight waits for in-flight goroutines - // but can't guarantee a pending DB row has been acquired - // yet — the child chat may still be pending if the second - // wake signal hasn't been consumed. Poll until the child - // reaches a terminal DB state so processChat has fully - // finished, then reset to running for the cancellation - // test. - testutil.Eventually(ctx, t, func(ctx context.Context) bool { - c, err := db.GetChatByID(ctx, child.ID) - if err != nil { - return false - } - return c.Status != database.ChatStatusPending && c.Status != database.ChatStatusRunning - }, testutil.IntervalFast) setChatStatus(ctx, t, db, child.ID, database.ChatStatusRunning, "") // Use a short-lived context instead of goroutine + sleep. shortCtx, cancel := context.WithTimeout(ctx, testutil.IntervalMedium) diff --git a/coderd/x/chatd/turn_summary_internal_test.go b/coderd/x/chatd/turn_summary_internal_test.go index be3a595799..c38d57754f 100644 --- a/coderd/x/chatd/turn_summary_internal_test.go +++ b/coderd/x/chatd/turn_summary_internal_test.go @@ -24,7 +24,7 @@ import ( func TestUpdateLastTurnSummaryRejectsStaleWrites(t *testing.T) { t.Parallel() - db, _ := dbtestutil.NewDB(t) + db, ps := dbtestutil.NewDB(t) ctx := testutil.Context(t, testutil.WaitMedium) owner := dbgen.User(t, db, database.User{}) org := dbgen.Organization(t, db, database.Organization{}) @@ -66,7 +66,7 @@ func TestUpdateLastTurnSummaryRejectsStaleWrites(t *testing.T) { require.NoError(t, err) logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) - server := &Server{db: db} + server := &Server{db: db, pubsub: ps} server.updateLastTurnSummary(ctx, chat, chat.UpdatedAt, "fresh summary", logger) fetched, err := db.GetChatByID(ctx, chat.ID) @@ -92,7 +92,7 @@ func TestUpdateLastTurnSummaryRejectsStaleWrites(t *testing.T) { func TestPendingChatPersistsSummaryButSkipsWebPush(t *testing.T) { t.Parallel() - db, _ := dbtestutil.NewDB(t) + db, ps := dbtestutil.NewDB(t) ctx := testutil.Context(t, testutil.WaitMedium) owner := dbgen.User(t, db, database.User{}) org := dbgen.Organization(t, db, database.Organization{}) @@ -150,7 +150,7 @@ func TestPendingChatPersistsSummaryButSkipsWebPush(t *testing.T) { dispatcher := &recordingWebpushDispatcher{} logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) - server := &Server{db: db, webpushDispatcher: dispatcher} + server := &Server{db: db, pubsub: ps, webpushDispatcher: dispatcher} server.maybeFinalizeTurnStatusLabelAndPush( context.WithoutCancel(ctx), chat, diff --git a/codersdk/chats.go b/codersdk/chats.go index 665ace7aa8..075a38b80f 100644 --- a/codersdk/chats.go +++ b/codersdk/chats.go @@ -93,6 +93,7 @@ const ( ChatStatusCompleted ChatStatus = "completed" ChatStatusError ChatStatus = "error" ChatStatusRequiresAction ChatStatus = "requires_action" + ChatStatusInterrupting ChatStatus = "interrupting" ) // ChatClientType indicates whether a chat was created from the @@ -3222,6 +3223,22 @@ func (c *ExperimentalClient) InterruptChat(ctx context.Context, chatID uuid.UUID return chat, json.NewDecoder(res.Body).Decode(&chat) } +// ReconcileInvalidChatState recovers a chat stuck in an invalid +// execution state, moving it into an error state from which the caller +// can send a new message or edit history to continue. +func (c *ExperimentalClient) ReconcileInvalidChatState(ctx context.Context, chatID uuid.UUID) (Chat, error) { + res, err := c.Request(ctx, http.MethodPost, fmt.Sprintf("/api/experimental/chats/%s/reconcile-invalid", chatID), nil) + if err != nil { + return Chat{}, err + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return Chat{}, ReadBodyAsError(res) + } + var chat Chat + return chat, json.NewDecoder(res.Body).Decode(&chat) +} + // RegenerateChatTitle requests the server to regenerate the chat's // title using richer conversation context. func (c *ExperimentalClient) RegenerateChatTitle(ctx context.Context, chatID uuid.UUID) (Chat, error) { diff --git a/docs/reference/api/chats.md b/docs/reference/api/chats.md index 909cf2b24f..9b7771e866 100644 --- a/docs/reference/api/chats.md +++ b/docs/reference/api/chats.md @@ -2270,6 +2270,317 @@ message in the chat. To perform this operation, you must be authenticated. [Learn more](authentication.md). +## Reconcile invalid chat state + +### Code samples + +```shell +# Example request using curl +curl -X POST http://coder-server:8080/api/experimental/chats/{chat}/reconcile-invalid \ + -H 'Accept: application/json' \ + -H 'Coder-Session-Token: API_KEY' +``` + +`POST /api/experimental/chats/{chat}/reconcile-invalid` + +Experimental: this endpoint is subject to change. + +### Parameters + +| Name | In | Type | Required | Description | +|--------|------|--------------|----------|-------------| +| `chat` | path | string(uuid) | true | Chat ID | + +### Example responses + +> 200 Response + +```json +{ + "agent_id": "2b1e3b65-2c04-4fa2-a2d7-467901e98978", + "archived": true, + "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", + "children": [ + { + "agent_id": "2b1e3b65-2c04-4fa2-a2d7-467901e98978", + "archived": true, + "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", + "children": [], + "client_type": "ui", + "created_at": "2019-08-24T14:15:22Z", + "diff_status": { + "additions": 0, + "approved": true, + "author_avatar_url": "string", + "author_login": "string", + "base_branch": "string", + "changed_files": 0, + "changes_requested": true, + "chat_id": "efc9fe20-a1e5-4a8c-9c48-f1b30c1e4f86", + "commits": 0, + "deletions": 0, + "head_branch": "string", + "pr_number": 0, + "pull_request_draft": true, + "pull_request_state": "string", + "pull_request_title": "string", + "refreshed_at": "2019-08-24T14:15:22Z", + "reviewer_count": 0, + "stale_at": "2019-08-24T14:15:22Z", + "url": "string" + }, + "files": [ + { + "created_at": "2019-08-24T14:15:22Z", + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "mime_type": "string", + "name": "string", + "organization_id": "7c60d51f-b44e-4682-87d6-449835ea4de6", + "owner_id": "8826ee2e-7933-4665-aef2-2393f84a0d05" + } + ], + "has_unread": true, + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "labels": { + "property1": "string", + "property2": "string" + }, + "last_error": { + "detail": "string", + "kind": "generic", + "message": "string", + "provider": "string", + "retryable": true, + "status_code": 0 + }, + "last_injected_context": [ + { + "args": [ + 0 + ], + "args_delta": "string", + "completed_at": "2019-08-24T14:15:22Z", + "content": "string", + "context_file_agent_id": { + "uuid": "string", + "valid": true + }, + "context_file_content": "string", + "context_file_directory": "string", + "context_file_os": "string", + "context_file_path": "string", + "context_file_skill_meta_file": "string", + "context_file_truncated": true, + "created_at": "2019-08-24T14:15:22Z", + "data": [ + 0 + ], + "end_line": 0, + "file_id": { + "uuid": "string", + "valid": true + }, + "file_name": "string", + "is_error": true, + "is_media": true, + "mcp_server_config_id": { + "uuid": "string", + "valid": true + }, + "media_type": "string", + "name": "string", + "parsed_commands": [ + [ + "string" + ] + ], + "provider_executed": true, + "provider_metadata": [ + 0 + ], + "result": [ + 0 + ], + "result_delta": "string", + "result_reset": true, + "signature": "string", + "skill_description": "string", + "skill_dir": "string", + "skill_name": "string", + "source_id": "string", + "start_line": 0, + "text": "string", + "title": "string", + "tool_call_id": "string", + "tool_name": "string", + "type": "text", + "url": "string" + } + ], + "last_model_config_id": "30ebb95f-c255-4759-9429-89aa4ec1554c", + "last_turn_summary": "string", + "mcp_server_ids": [ + "497f6eca-6276-4993-bfeb-53cbbbba6f08" + ], + "organization_id": "7c60d51f-b44e-4682-87d6-449835ea4de6", + "owner_id": "8826ee2e-7933-4665-aef2-2393f84a0d05", + "owner_name": "string", + "owner_username": "string", + "parent_chat_id": "c3609ee6-3b11-4a93-b9ae-e4fabcc99359", + "pin_order": 0, + "plan_mode": "plan", + "root_chat_id": "2898031c-fdce-4e3e-8c53-4481dd42fcd7", + "status": "waiting", + "title": "string", + "updated_at": "2019-08-24T14:15:22Z", + "warnings": [ + "string" + ], + "workspace_id": "0967198e-ec7b-4c6b-b4d3-f71244cadbe9" + } + ], + "client_type": "ui", + "created_at": "2019-08-24T14:15:22Z", + "diff_status": { + "additions": 0, + "approved": true, + "author_avatar_url": "string", + "author_login": "string", + "base_branch": "string", + "changed_files": 0, + "changes_requested": true, + "chat_id": "efc9fe20-a1e5-4a8c-9c48-f1b30c1e4f86", + "commits": 0, + "deletions": 0, + "head_branch": "string", + "pr_number": 0, + "pull_request_draft": true, + "pull_request_state": "string", + "pull_request_title": "string", + "refreshed_at": "2019-08-24T14:15:22Z", + "reviewer_count": 0, + "stale_at": "2019-08-24T14:15:22Z", + "url": "string" + }, + "files": [ + { + "created_at": "2019-08-24T14:15:22Z", + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "mime_type": "string", + "name": "string", + "organization_id": "7c60d51f-b44e-4682-87d6-449835ea4de6", + "owner_id": "8826ee2e-7933-4665-aef2-2393f84a0d05" + } + ], + "has_unread": true, + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "labels": { + "property1": "string", + "property2": "string" + }, + "last_error": { + "detail": "string", + "kind": "generic", + "message": "string", + "provider": "string", + "retryable": true, + "status_code": 0 + }, + "last_injected_context": [ + { + "args": [ + 0 + ], + "args_delta": "string", + "completed_at": "2019-08-24T14:15:22Z", + "content": "string", + "context_file_agent_id": { + "uuid": "string", + "valid": true + }, + "context_file_content": "string", + "context_file_directory": "string", + "context_file_os": "string", + "context_file_path": "string", + "context_file_skill_meta_file": "string", + "context_file_truncated": true, + "created_at": "2019-08-24T14:15:22Z", + "data": [ + 0 + ], + "end_line": 0, + "file_id": { + "uuid": "string", + "valid": true + }, + "file_name": "string", + "is_error": true, + "is_media": true, + "mcp_server_config_id": { + "uuid": "string", + "valid": true + }, + "media_type": "string", + "name": "string", + "parsed_commands": [ + [ + "string" + ] + ], + "provider_executed": true, + "provider_metadata": [ + 0 + ], + "result": [ + 0 + ], + "result_delta": "string", + "result_reset": true, + "signature": "string", + "skill_description": "string", + "skill_dir": "string", + "skill_name": "string", + "source_id": "string", + "start_line": 0, + "text": "string", + "title": "string", + "tool_call_id": "string", + "tool_name": "string", + "type": "text", + "url": "string" + } + ], + "last_model_config_id": "30ebb95f-c255-4759-9429-89aa4ec1554c", + "last_turn_summary": "string", + "mcp_server_ids": [ + "497f6eca-6276-4993-bfeb-53cbbbba6f08" + ], + "organization_id": "7c60d51f-b44e-4682-87d6-449835ea4de6", + "owner_id": "8826ee2e-7933-4665-aef2-2393f84a0d05", + "owner_name": "string", + "owner_username": "string", + "parent_chat_id": "c3609ee6-3b11-4a93-b9ae-e4fabcc99359", + "pin_order": 0, + "plan_mode": "plan", + "root_chat_id": "2898031c-fdce-4e3e-8c53-4481dd42fcd7", + "status": "waiting", + "title": "string", + "updated_at": "2019-08-24T14:15:22Z", + "warnings": [ + "string" + ], + "workspace_id": "0967198e-ec7b-4c6b-b4d3-f71244cadbe9" +} +``` + +### Responses + +| Status | Meaning | Description | Schema | +|--------|---------------------------------------------------------|-------------|------------------------------------------| +| 200 | [OK](https://tools.ietf.org/html/rfc7231#section-6.3.1) | OK | [codersdk.Chat](schemas.md#codersdkchat) | + +To perform this operation, you must be authenticated. [Learn more](authentication.md). + ## Stream chat events via WebSockets ### Code samples