Files
coder/coderd/chatd/title.go
T
Kyle Carberry 2bdacae5f5 feat(chatd): add LLM stream retry with exponential backoff (#22418)
## Summary

Adds automatic retry with exponential backoff for transient LLM errors
during chat streaming and title generation. Inspired by
[coder/mux](https://github.com/coder/mux)'s retry mechanism.

## Key Behaviors

- **Infinite retries** with exponential backoff: 1s → 2s → 4s → ... →
60s cap
- **Deterministic delays** (no jitter)
- **Error classification**: retryable (429, 5xx, overloaded, rate limit,
network errors) vs non-retryable (auth, quota, context exceeded, model
not found, canceled)
- **Retry status published to SSE stream** so frontend can show
"Retrying in Xs..." UI
- **Title generation** retries silently (best-effort, nil onRetry
callback)

## New Package: `coderd/chatd/chatretry/`

| File | Purpose |
|------|---------|
| `classify.go` | `IsRetryable(err)` and `StatusCodeRetryable(code)` |
| `backoff.go` | `Delay(attempt)` — exponential doubling with 60s cap |
| `retry.go` | `Retry(ctx, fn, onRetry)` — infinite loop with
context-aware timer |

## Test Helpers: `coderd/chatd/chattest/errors.go`

Anthropic and OpenAI error response builders for use in chattest
providers:
- `AnthropicErrorResponse()`, `AnthropicOverloadedResponse()`,
`AnthropicRateLimitResponse()`
- `OpenAIErrorResponse()`, `OpenAIRateLimitResponse()`,
`OpenAIServerErrorResponse()`

## SDK Changes: `codersdk/chats.go`

- New `ChatStreamEventType: "retry"`
- New `ChatStreamRetry` struct with `Attempt`, `DelayMs`, `Error`,
`RetryingAt` fields
- TypeScript types auto-generated

## Changed Files

- `coderd/chatd/chatloop/chatloop.go` — wraps `agent.Stream()` in
`chatretry.Retry()`
- `coderd/chatd/chatd.go` — publishes retry events to SSE stream with
logging
- `coderd/chatd/title.go` — wraps `model.Generate()` in silent retry
- `coderd/chatd/chattest/anthropic.go` / `openai.go` — error injection
support

## Tests

42 tests covering classification (33), backoff (9), and retry scenarios
(8).
2026-02-27 18:34:33 -05:00

225 lines
5.3 KiB
Go

package chatd
import (
"context"
"strings"
"time"
"charm.land/fantasy"
"golang.org/x/xerrors"
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/coderd/chatd/chatprompt"
"github.com/coder/coder/v2/coderd/chatd/chatretry"
"github.com/coder/coder/v2/coderd/database"
coderdpubsub "github.com/coder/coder/v2/coderd/pubsub"
)
const titleGenerationPrompt = "Generate a concise title (max 8 words, under 128 characters) for " +
"the user's first message. Return plain text only — no quotes, no emoji, " +
"no markdown, no special characters."
// maybeGenerateChatTitle generates an AI title for the chat when
// appropriate (first user message, no assistant reply yet, and the
// current title is either empty or still the fallback truncation).
// It is a best-effort operation that logs and swallows errors.
func (p *Server) maybeGenerateChatTitle(
ctx context.Context,
chat database.Chat,
messages []database.ChatMessage,
model fantasy.LanguageModel,
logger slog.Logger,
) {
input, ok := titleInput(chat, messages)
if !ok {
return
}
titleCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
title, err := generateTitle(titleCtx, model, input)
if err != nil {
logger.Debug(ctx, "failed to generate chat title",
slog.F("chat_id", chat.ID),
slog.Error(err),
)
return
}
if title == "" || title == chat.Title {
return
}
_, err = p.db.UpdateChatByID(ctx, database.UpdateChatByIDParams{
ID: chat.ID,
Title: title,
})
if err != nil {
logger.Warn(ctx, "failed to update generated chat title",
slog.F("chat_id", chat.ID),
slog.Error(err),
)
return
}
chat.Title = title
p.publishChatPubsubEvent(chat, coderdpubsub.ChatEventKindTitleChange)
}
// generateTitle calls the model with a title-generation system prompt
// and returns the normalized result. It retries transient LLM errors
// (rate limits, overloaded, etc.) with exponential backoff.
func generateTitle(
ctx context.Context,
model fantasy.LanguageModel,
input string,
) (string, error) {
prompt := []fantasy.Message{
{
Role: fantasy.MessageRoleSystem,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: titleGenerationPrompt},
},
},
{
Role: fantasy.MessageRoleUser,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: input},
},
},
}
toolChoice := fantasy.ToolChoiceNone
var response *fantasy.Response
err := chatretry.Retry(ctx, func(retryCtx context.Context) error {
var genErr error
response, genErr = model.Generate(retryCtx, fantasy.Call{
Prompt: prompt,
ToolChoice: &toolChoice,
})
return genErr
}, nil)
if err != nil {
return "", xerrors.Errorf("generate title text: %w", err)
}
title := normalizeTitleOutput(contentBlocksToText(response.Content))
if title == "" {
return "", xerrors.New("generated title was empty")
}
return title, nil
}
// titleInput returns the first user message text and whether title
// generation should proceed. It returns false when the chat already
// has assistant/tool replies, has more than one visible user message,
// or the current title doesn't look like a candidate for replacement.
func titleInput(
chat database.Chat,
messages []database.ChatMessage,
) (string, bool) {
userCount := 0
firstUserText := ""
for _, message := range messages {
if message.Visibility == database.ChatMessageVisibilityModel {
continue
}
switch message.Role {
case string(fantasy.MessageRoleAssistant), string(fantasy.MessageRoleTool):
return "", false
case string(fantasy.MessageRoleUser):
userCount++
if firstUserText == "" {
parsed, err := chatprompt.ParseContent(
string(fantasy.MessageRoleUser), message.Content,
)
if err != nil {
return "", false
}
firstUserText = strings.TrimSpace(
contentBlocksToText(parsed),
)
}
}
}
if userCount != 1 || firstUserText == "" {
return "", false
}
currentTitle := strings.TrimSpace(chat.Title)
if currentTitle == "" {
return firstUserText, true
}
if currentTitle != fallbackChatTitle(firstUserText) {
return "", false
}
return firstUserText, true
}
func normalizeTitleOutput(title string) string {
title = strings.TrimSpace(title)
if title == "" {
return ""
}
title = strings.Trim(title, "\"'`")
title = strings.Join(strings.Fields(title), " ")
return truncateRunes(title, 80)
}
func fallbackChatTitle(message string) string {
const maxWords = 6
const maxRunes = 80
words := strings.Fields(message)
if len(words) == 0 {
return "New Chat"
}
truncated := false
if len(words) > maxWords {
words = words[:maxWords]
truncated = true
}
title := strings.Join(words, " ")
if truncated {
title += "…"
}
return truncateRunes(title, maxRunes)
}
// contentBlocksToText concatenates the text parts of content blocks
// into a single space-separated string.
func contentBlocksToText(content []fantasy.Content) string {
parts := make([]string, 0, len(content))
for _, block := range content {
textBlock, ok := fantasy.AsContentType[fantasy.TextContent](block)
if !ok {
continue
}
text := strings.TrimSpace(textBlock.Text)
if text == "" {
continue
}
parts = append(parts, text)
}
return strings.Join(parts, " ")
}
func truncateRunes(value string, maxLen int) string {
if maxLen <= 0 {
return ""
}
runes := []rune(value)
if len(runes) <= maxLen {
return value
}
return string(runes[:maxLen])
}