mirror of
https://github.com/coder/coder.git
synced 2026-06-03 04:58:23 +00:00
2bdacae5f5
## Summary Adds automatic retry with exponential backoff for transient LLM errors during chat streaming and title generation. Inspired by [coder/mux](https://github.com/coder/mux)'s retry mechanism. ## Key Behaviors - **Infinite retries** with exponential backoff: 1s → 2s → 4s → ... → 60s cap - **Deterministic delays** (no jitter) - **Error classification**: retryable (429, 5xx, overloaded, rate limit, network errors) vs non-retryable (auth, quota, context exceeded, model not found, canceled) - **Retry status published to SSE stream** so frontend can show "Retrying in Xs..." UI - **Title generation** retries silently (best-effort, nil onRetry callback) ## New Package: `coderd/chatd/chatretry/` | File | Purpose | |------|---------| | `classify.go` | `IsRetryable(err)` and `StatusCodeRetryable(code)` | | `backoff.go` | `Delay(attempt)` — exponential doubling with 60s cap | | `retry.go` | `Retry(ctx, fn, onRetry)` — infinite loop with context-aware timer | ## Test Helpers: `coderd/chatd/chattest/errors.go` Anthropic and OpenAI error response builders for use in chattest providers: - `AnthropicErrorResponse()`, `AnthropicOverloadedResponse()`, `AnthropicRateLimitResponse()` - `OpenAIErrorResponse()`, `OpenAIRateLimitResponse()`, `OpenAIServerErrorResponse()` ## SDK Changes: `codersdk/chats.go` - New `ChatStreamEventType: "retry"` - New `ChatStreamRetry` struct with `Attempt`, `DelayMs`, `Error`, `RetryingAt` fields - TypeScript types auto-generated ## Changed Files - `coderd/chatd/chatloop/chatloop.go` — wraps `agent.Stream()` in `chatretry.Retry()` - `coderd/chatd/chatd.go` — publishes retry events to SSE stream with logging - `coderd/chatd/title.go` — wraps `model.Generate()` in silent retry - `coderd/chatd/chattest/anthropic.go` / `openai.go` — error injection support ## Tests 42 tests covering classification (33), backoff (9), and retry scenarios (8).
225 lines
5.3 KiB
Go
225 lines
5.3 KiB
Go
package chatd
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"time"
|
|
|
|
"charm.land/fantasy"
|
|
"golang.org/x/xerrors"
|
|
|
|
"cdr.dev/slog/v3"
|
|
"github.com/coder/coder/v2/coderd/chatd/chatprompt"
|
|
"github.com/coder/coder/v2/coderd/chatd/chatretry"
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
coderdpubsub "github.com/coder/coder/v2/coderd/pubsub"
|
|
)
|
|
|
|
const titleGenerationPrompt = "Generate a concise title (max 8 words, under 128 characters) for " +
|
|
"the user's first message. Return plain text only — no quotes, no emoji, " +
|
|
"no markdown, no special characters."
|
|
|
|
// maybeGenerateChatTitle generates an AI title for the chat when
|
|
// appropriate (first user message, no assistant reply yet, and the
|
|
// current title is either empty or still the fallback truncation).
|
|
// It is a best-effort operation that logs and swallows errors.
|
|
func (p *Server) maybeGenerateChatTitle(
|
|
ctx context.Context,
|
|
chat database.Chat,
|
|
messages []database.ChatMessage,
|
|
model fantasy.LanguageModel,
|
|
logger slog.Logger,
|
|
) {
|
|
input, ok := titleInput(chat, messages)
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
titleCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
|
defer cancel()
|
|
|
|
title, err := generateTitle(titleCtx, model, input)
|
|
if err != nil {
|
|
logger.Debug(ctx, "failed to generate chat title",
|
|
slog.F("chat_id", chat.ID),
|
|
slog.Error(err),
|
|
)
|
|
return
|
|
}
|
|
if title == "" || title == chat.Title {
|
|
return
|
|
}
|
|
|
|
_, err = p.db.UpdateChatByID(ctx, database.UpdateChatByIDParams{
|
|
ID: chat.ID,
|
|
Title: title,
|
|
})
|
|
if err != nil {
|
|
logger.Warn(ctx, "failed to update generated chat title",
|
|
slog.F("chat_id", chat.ID),
|
|
slog.Error(err),
|
|
)
|
|
return
|
|
}
|
|
chat.Title = title
|
|
p.publishChatPubsubEvent(chat, coderdpubsub.ChatEventKindTitleChange)
|
|
}
|
|
|
|
// generateTitle calls the model with a title-generation system prompt
|
|
// and returns the normalized result. It retries transient LLM errors
|
|
// (rate limits, overloaded, etc.) with exponential backoff.
|
|
func generateTitle(
|
|
ctx context.Context,
|
|
model fantasy.LanguageModel,
|
|
input string,
|
|
) (string, error) {
|
|
prompt := []fantasy.Message{
|
|
{
|
|
Role: fantasy.MessageRoleSystem,
|
|
Content: []fantasy.MessagePart{
|
|
fantasy.TextPart{Text: titleGenerationPrompt},
|
|
},
|
|
},
|
|
{
|
|
Role: fantasy.MessageRoleUser,
|
|
Content: []fantasy.MessagePart{
|
|
fantasy.TextPart{Text: input},
|
|
},
|
|
},
|
|
}
|
|
toolChoice := fantasy.ToolChoiceNone
|
|
|
|
var response *fantasy.Response
|
|
err := chatretry.Retry(ctx, func(retryCtx context.Context) error {
|
|
var genErr error
|
|
response, genErr = model.Generate(retryCtx, fantasy.Call{
|
|
Prompt: prompt,
|
|
ToolChoice: &toolChoice,
|
|
})
|
|
return genErr
|
|
}, nil)
|
|
if err != nil {
|
|
return "", xerrors.Errorf("generate title text: %w", err)
|
|
}
|
|
|
|
title := normalizeTitleOutput(contentBlocksToText(response.Content))
|
|
if title == "" {
|
|
return "", xerrors.New("generated title was empty")
|
|
}
|
|
return title, nil
|
|
}
|
|
|
|
// titleInput returns the first user message text and whether title
|
|
// generation should proceed. It returns false when the chat already
|
|
// has assistant/tool replies, has more than one visible user message,
|
|
// or the current title doesn't look like a candidate for replacement.
|
|
func titleInput(
|
|
chat database.Chat,
|
|
messages []database.ChatMessage,
|
|
) (string, bool) {
|
|
userCount := 0
|
|
firstUserText := ""
|
|
|
|
for _, message := range messages {
|
|
if message.Visibility == database.ChatMessageVisibilityModel {
|
|
continue
|
|
}
|
|
|
|
switch message.Role {
|
|
case string(fantasy.MessageRoleAssistant), string(fantasy.MessageRoleTool):
|
|
return "", false
|
|
case string(fantasy.MessageRoleUser):
|
|
userCount++
|
|
if firstUserText == "" {
|
|
parsed, err := chatprompt.ParseContent(
|
|
string(fantasy.MessageRoleUser), message.Content,
|
|
)
|
|
if err != nil {
|
|
return "", false
|
|
}
|
|
firstUserText = strings.TrimSpace(
|
|
contentBlocksToText(parsed),
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
if userCount != 1 || firstUserText == "" {
|
|
return "", false
|
|
}
|
|
|
|
currentTitle := strings.TrimSpace(chat.Title)
|
|
if currentTitle == "" {
|
|
return firstUserText, true
|
|
}
|
|
|
|
if currentTitle != fallbackChatTitle(firstUserText) {
|
|
return "", false
|
|
}
|
|
|
|
return firstUserText, true
|
|
}
|
|
|
|
func normalizeTitleOutput(title string) string {
|
|
title = strings.TrimSpace(title)
|
|
if title == "" {
|
|
return ""
|
|
}
|
|
|
|
title = strings.Trim(title, "\"'`")
|
|
title = strings.Join(strings.Fields(title), " ")
|
|
return truncateRunes(title, 80)
|
|
}
|
|
|
|
func fallbackChatTitle(message string) string {
|
|
const maxWords = 6
|
|
const maxRunes = 80
|
|
|
|
words := strings.Fields(message)
|
|
if len(words) == 0 {
|
|
return "New Chat"
|
|
}
|
|
|
|
truncated := false
|
|
if len(words) > maxWords {
|
|
words = words[:maxWords]
|
|
truncated = true
|
|
}
|
|
|
|
title := strings.Join(words, " ")
|
|
if truncated {
|
|
title += "…"
|
|
}
|
|
|
|
return truncateRunes(title, maxRunes)
|
|
}
|
|
|
|
// contentBlocksToText concatenates the text parts of content blocks
|
|
// into a single space-separated string.
|
|
func contentBlocksToText(content []fantasy.Content) string {
|
|
parts := make([]string, 0, len(content))
|
|
for _, block := range content {
|
|
textBlock, ok := fantasy.AsContentType[fantasy.TextContent](block)
|
|
if !ok {
|
|
continue
|
|
}
|
|
text := strings.TrimSpace(textBlock.Text)
|
|
if text == "" {
|
|
continue
|
|
}
|
|
parts = append(parts, text)
|
|
}
|
|
return strings.Join(parts, " ")
|
|
}
|
|
|
|
func truncateRunes(value string, maxLen int) string {
|
|
if maxLen <= 0 {
|
|
return ""
|
|
}
|
|
runes := []rune(value)
|
|
if len(runes) <= maxLen {
|
|
return value
|
|
}
|
|
return string(runes[:maxLen])
|
|
}
|