Files
coder/coderd/x/chatd/quickgen.go
T
Michael Suchacz 033ed0bb82 feat: add admin-configurable chat title generation model (#24838)
Adds an admin-configurable deployment-wide setting that controls which
model is used for chat title generation. Admins can pick any enabled
chat model config from the Agents settings page, or leave the setting
unset to keep the existing fast-models-then-chat-model fallback
algorithm.

When a model is selected, both automatic and manual title generation use
only that model, with no silent fallback. When the configured model is
disabled, missing credentials, or otherwise unusable, automatic title
generation skips entirely (best-effort) and manual title regeneration
returns a clear error, so admins notice the misconfiguration instead of
silently routing title traffic through another provider.

## Surface

- New deployment-wide setting stored as a `site_configs` row
(`agents_chat_title_generation_model_override`).
- New experimental endpoint `GET/PUT
/api/experimental/chats/config/model-override/{context}`.
- Frontend: title generation now appears as a third dropdown on the
Agents admin settings page alongside the existing general and explore
context overrides.

## DRY refactors folded in

Title generation is integrated as a third value of the existing
`ChatModelOverrideContext` type alongside `general` and `explore`,
sharing the parameterized HTTP route, SDK methods, generated types, and
frontend API plumbing rather than introducing a parallel surface. The
`Agent` prefix was dropped from the type and route since title
generation is not a delegated agent.

The chatd model-override resolver is also shared.
`resolveConfiguredModelOverride` now takes a `failureMode` parameter:

- Subagent overrides use soft failure: misconfigured overrides are
logged and the parent model is used.
- Title generation uses hard failure: misconfigured overrides return an
explicit error so manual title regeneration surfaces the
misconfiguration and automatic title generation skips instead of
silently falling back.

> Mux is acting on Mike's behalf.
2026-05-04 13:13:00 +02:00

943 lines
25 KiB
Go

package chatd
import (
"context"
"errors"
"fmt"
"net/http"
"slices"
"strings"
"time"
"charm.land/fantasy"
"charm.land/fantasy/object"
fantasyanthropic "charm.land/fantasy/providers/anthropic"
fantasyazure "charm.land/fantasy/providers/azure"
fantasybedrock "charm.land/fantasy/providers/bedrock"
fantasygoogle "charm.land/fantasy/providers/google"
fantasyopenai "charm.land/fantasy/providers/openai"
fantasyopenrouter "charm.land/fantasy/providers/openrouter"
fantasyvercel "charm.land/fantasy/providers/vercel"
"golang.org/x/xerrors"
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/x/chatd/chatdebug"
"github.com/coder/coder/v2/coderd/x/chatd/chatprompt"
"github.com/coder/coder/v2/coderd/x/chatd/chatprovider"
"github.com/coder/coder/v2/coderd/x/chatd/chatretry"
"github.com/coder/coder/v2/codersdk"
)
const titleGenerationPrompt = "Write a short title for the user's message. " +
"Populate the title field with the result. " +
"Return only the title text in 2-8 words. " +
"Do not answer the user or describe the title-writing task. " +
"Preserve specific identifiers such as PR numbers, repo names, file paths, function names, and error messages. " +
"If the message is short or vague, stay close to the user's wording instead of inventing context. " +
"Sentence case. No quotes, emoji, markdown, or trailing punctuation."
const (
// maxConversationContextRunes caps the conversation sample in manual
// title prompts to avoid exceeding model context windows.
maxConversationContextRunes = 6000
// maxLatestUserMessageRunes caps the latest user message excerpt.
maxLatestUserMessageRunes = 1000
// recentTurnWindow is the number of most recent turns included
// alongside the first user turn in manual title context.
recentTurnWindow = 3
)
// preferredTitleModels are lightweight models used for title
// generation, one per provider type. Each entry uses the
// cheapest/fastest small model for that provider as identified
// by the charmbracelet/catwalk model catalog. Providers that
// aren't configured (no API key) are silently skipped.
var preferredTitleModels = []struct {
provider string
model string
}{
{fantasyanthropic.Name, "claude-haiku-4-5"},
{fantasyopenai.Name, "gpt-4o-mini"},
{fantasygoogle.Name, "gemini-2.5-flash"},
{fantasyazure.Name, "gpt-4o-mini"},
{fantasybedrock.Name, "anthropic.claude-haiku-4-5-20251001-v1:0"},
{fantasyopenrouter.Name, "anthropic/claude-3.5-haiku"},
{fantasyvercel.Name, "anthropic/claude-haiku-4.5"},
}
type shortTextCandidate struct {
provider string
model string
lm fantasy.LanguageModel
}
func selectPreferredConfiguredShortTextModelConfig(
configs []database.ChatModelConfig,
) (database.ChatModelConfig, bool) {
for _, preferred := range preferredTitleModels {
for _, config := range configs {
if chatprovider.NormalizeProvider(config.Provider) != preferred.provider {
continue
}
if !strings.EqualFold(strings.TrimSpace(config.Model), preferred.model) {
continue
}
return config, true
}
}
return database.ChatModelConfig{}, false
}
func normalizeShortTextOutput(text string) string {
text = strings.TrimSpace(text)
if text == "" {
return ""
}
text = strings.Trim(text, "\"'`")
return strings.Join(strings.Fields(text), " ")
}
type generatedTitle struct {
Title string `json:"title" description:"Short descriptive chat title"`
}
// maybeGenerateChatTitle generates an AI title for the chat when
// appropriate (first user message, no assistant reply yet, and the
// current title is either empty or still the fallback truncation).
// It uses the configured title generation model override when set.
// Otherwise, it tries cheap, fast models first and falls back to the
// user's chat model. It is a best-effort operation that logs and
// swallows errors.
func (p *Server) maybeGenerateChatTitle(
ctx context.Context,
chat database.Chat,
messages []database.ChatMessage,
fallbackProvider string,
fallbackModelName string,
fallbackModel fantasy.LanguageModel,
keys chatprovider.ProviderAPIKeys,
generatedTitle *generatedChatTitle,
logger slog.Logger,
debugSvc *chatdebug.Service,
) {
input, ok := titleInput(chat, messages)
if !ok {
return
}
debugEnabled := debugSvc != nil && debugSvc.IsEnabled(ctx, chat.ID, chat.OwnerID)
titleCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
overrideConfig, overrideModel, overrideSet, overrideErr := p.resolveTitleGenerationModelOverride(
titleCtx,
chat,
keys,
)
if overrideErr != nil {
if overrideSet {
logger.Warn(ctx, "title generation model override unavailable, skipping title generation",
slog.F("chat_id", chat.ID),
slog.F("override_context", titleGenerationOverrideContext),
slog.Error(overrideErr),
)
return
}
logger.Debug(ctx, "failed to resolve title generation model override",
slog.F("chat_id", chat.ID),
slog.F("override_context", titleGenerationOverrideContext),
slog.Error(overrideErr),
)
}
var candidates []shortTextCandidate
if overrideSet {
candidates = []shortTextCandidate{{
provider: overrideConfig.Provider,
model: overrideConfig.Model,
lm: overrideModel,
}}
} else {
// Build candidate list: preferred lightweight models first,
// then the user's chat model as last resort.
candidates = make([]shortTextCandidate, 0, len(preferredTitleModels)+1)
for _, c := range preferredTitleModels {
m, err := chatprovider.ModelFromConfig(
c.provider, c.model, keys, chatprovider.UserAgent(),
chatprovider.CoderHeaders(chat),
nil,
)
if err == nil {
candidates = append(candidates, shortTextCandidate{
provider: c.provider,
model: c.model,
lm: m,
})
}
}
candidates = append(candidates, shortTextCandidate{
provider: fallbackProvider,
model: fallbackModelName,
lm: fallbackModel,
})
}
var historyTipMessageID int64
if len(messages) > 0 {
historyTipMessageID = messages[len(messages)-1].ID
}
var triggerMessageID int64
for _, message := range messages {
if message.Visibility == database.ChatMessageVisibilityModel {
continue
}
if message.Role == database.ChatMessageRoleUser {
triggerMessageID = message.ID
break
}
}
seedSummary := chatdebug.SeedSummary(
chatdebug.TruncateLabel(input, chatdebug.MaxLabelLength),
)
var lastErr error
for _, candidate := range candidates {
candidateCtx := titleCtx
candidateModel := candidate.lm
finishDebugRun := func(error) {}
if debugEnabled {
candidateCtx, candidateModel, finishDebugRun = prepareQuickgenDebugCandidate(
titleCtx,
chat,
keys,
debugSvc,
candidate,
chatdebug.KindTitleGeneration,
triggerMessageID,
historyTipMessageID,
seedSummary,
logger,
)
}
title, err := generateTitle(candidateCtx, candidateModel, input)
finishDebugRun(err)
if err != nil {
lastErr = err
if overrideSet {
logger.Warn(ctx, "title model candidate failed",
slog.F("chat_id", chat.ID),
slog.F("override_context", titleGenerationOverrideContext),
slog.F("provider", candidate.provider),
slog.F("model", candidate.model),
slog.Error(err),
)
} else {
logger.Debug(ctx, "title model candidate failed",
slog.F("chat_id", chat.ID),
slog.Error(err),
)
}
continue
}
if title == "" || title == chat.Title {
return
}
_, err = p.db.UpdateChatByID(ctx, database.UpdateChatByIDParams{
ID: chat.ID,
Title: title,
})
if err != nil {
logger.Warn(ctx, "failed to update generated chat title",
slog.F("chat_id", chat.ID),
slog.Error(err),
)
return
}
chat.Title = title
generatedTitle.Store(title)
p.publishChatPubsubEvent(chat, codersdk.ChatWatchEventKindTitleChange, nil)
return
}
if lastErr != nil {
if overrideSet {
logger.Warn(ctx, "all title model candidates failed",
slog.F("chat_id", chat.ID),
slog.F("override_context", titleGenerationOverrideContext),
slog.Error(lastErr),
)
} else {
logger.Debug(ctx, "all title model candidates failed",
slog.F("chat_id", chat.ID),
slog.Error(lastErr),
)
}
}
}
func newQuickgenDebugModel(
chat database.Chat,
keys chatprovider.ProviderAPIKeys,
debugSvc *chatdebug.Service,
provider string,
model string,
) (fantasy.LanguageModel, error) {
httpClient := &http.Client{Transport: &chatdebug.RecordingTransport{}}
debugModel, err := chatprovider.ModelFromConfig(
provider,
model,
keys,
chatprovider.UserAgent(),
chatprovider.CoderHeaders(chat),
httpClient,
)
if err != nil {
return nil, err
}
if debugModel == nil {
return nil, xerrors.Errorf(
"create model for %s/%s returned nil",
provider,
model,
)
}
return chatdebug.WrapModel(debugModel, debugSvc, chatdebug.RecorderOptions{
ChatID: chat.ID,
OwnerID: chat.OwnerID,
Provider: provider,
Model: model,
}), nil
}
func prepareQuickgenDebugCandidate(
ctx context.Context,
chat database.Chat,
keys chatprovider.ProviderAPIKeys,
debugSvc *chatdebug.Service,
candidate shortTextCandidate,
kind chatdebug.RunKind,
triggerMessageID int64,
historyTipMessageID int64,
seedSummary map[string]any,
logger slog.Logger,
) (context.Context, fantasy.LanguageModel, func(error)) {
finishDebugRun := func(error) {}
if debugSvc == nil {
return ctx, candidate.lm, finishDebugRun
}
debugModel, err := newQuickgenDebugModel(
chat,
keys,
debugSvc,
candidate.provider,
candidate.model,
)
if err != nil {
logger.Warn(ctx, "failed to build short-text debug model",
slog.F("chat_id", chat.ID),
slog.F("run_kind", kind),
slog.F("provider", candidate.provider),
slog.F("model", candidate.model),
slog.Error(err),
)
return ctx, candidate.lm, finishDebugRun
}
// Debug instrumentation must not eat into the quickgen budget
// (30s titleCtx / summaryCtx on the caller). Detach and bound
// the insert so a slow DB can't delay title generation or push
// summaries, matching prepareManualTitleDebugRun,
// prepareChatTurnDebugRun, and startCompactionDebugRun.
createRunCtx, createRunCancel := context.WithTimeout(
context.WithoutCancel(ctx), debugCreateRunTimeout,
)
run, err := debugSvc.CreateRun(createRunCtx, chatdebug.CreateRunParams{
ChatID: chat.ID,
TriggerMessageID: triggerMessageID,
HistoryTipMessageID: historyTipMessageID,
Kind: kind,
Status: chatdebug.StatusInProgress,
Provider: candidate.provider,
Model: candidate.model,
Summary: seedSummary,
})
createRunCancel()
if err != nil {
logger.Warn(ctx, "failed to create short-text debug run",
slog.F("chat_id", chat.ID),
slog.F("run_kind", kind),
slog.F("provider", candidate.provider),
slog.F("model", candidate.model),
slog.Error(err),
)
return ctx, candidate.lm, finishDebugRun
}
runCtx := chatdebug.ContextWithRun(
ctx,
&chatdebug.RunContext{
RunID: run.ID,
ChatID: chat.ID,
TriggerMessageID: triggerMessageID,
HistoryTipMessageID: historyTipMessageID,
Kind: kind,
Provider: candidate.provider,
Model: candidate.model,
},
)
finishDebugRun = func(runErr error) {
if finalizeErr := debugSvc.FinalizeRun(ctx, chatdebug.FinalizeRunParams{
RunID: run.ID,
ChatID: chat.ID,
Status: chatdebug.ClassifyError(runErr),
SeedSummary: seedSummary,
Timeout: 10 * time.Second,
}); finalizeErr != nil {
logger.Warn(ctx, "failed to finalize short-text debug run",
slog.F("chat_id", chat.ID),
slog.F("run_kind", kind),
slog.F("run_id", run.ID),
slog.Error(finalizeErr),
)
}
}
return runCtx, debugModel, finishDebugRun
}
// generateTitle calls the model with a title-generation system prompt
// and returns the normalized result. It retries transient LLM errors
// (rate limits, overloaded, etc.) with exponential backoff.
func generateTitle(
ctx context.Context,
model fantasy.LanguageModel,
input string,
) (string, error) {
title, err := generateStructuredTitle(ctx, model, titleGenerationPrompt, input)
if err != nil {
return "", err
}
return title, nil
}
func generateStructuredTitle(
ctx context.Context,
model fantasy.LanguageModel,
systemPrompt string,
userInput string,
) (string, error) {
title, _, err := generateStructuredTitleWithUsage(
ctx,
model,
systemPrompt,
userInput,
)
if err != nil {
return "", err
}
return title, nil
}
func generateStructuredTitleWithUsage(
ctx context.Context,
model fantasy.LanguageModel,
systemPrompt string,
userInput string,
) (string, fantasy.Usage, error) {
userInput = strings.TrimSpace(userInput)
if userInput == "" {
return "", fantasy.Usage{}, xerrors.New("title input was empty")
}
prompt := fantasy.Prompt{
{
Role: fantasy.MessageRoleSystem,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: systemPrompt},
},
},
{
Role: fantasy.MessageRoleUser,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: userInput},
},
},
}
var maxOutputTokens int64 = 256
var result *fantasy.ObjectResult[generatedTitle]
err := chatretry.Retry(ctx, func(retryCtx context.Context) error {
var genErr error
result, genErr = object.Generate[generatedTitle](retryCtx, model, fantasy.ObjectCall{
Prompt: prompt,
SchemaName: "propose_title",
SchemaDescription: "Propose a short chat title.",
MaxOutputTokens: &maxOutputTokens,
})
return genErr
}, nil)
if err != nil {
var usage fantasy.Usage
var noObjErr *fantasy.NoObjectGeneratedError
if errors.As(err, &noObjErr) {
usage = noObjErr.Usage
}
return "", usage, xerrors.Errorf("generate structured title: %w", err)
}
title := normalizeTitleOutput(result.Object.Title)
if err := validateGeneratedTitle(title); err != nil {
return "", result.Usage, err
}
return title, result.Usage, nil
}
func validateGeneratedTitle(title string) error {
if title == "" {
return xerrors.New("generated title was empty")
}
if len(strings.Fields(title)) > 8 {
return xerrors.New("generated title exceeded 8 words")
}
return nil
}
// titleInput returns the first user message text and whether title
// generation should proceed. It returns false when the chat already
// has assistant/tool replies, has more than one visible user message,
// or the current title doesn't look like a candidate for replacement.
func titleInput(
chat database.Chat,
messages []database.ChatMessage,
) (string, bool) {
userCount := 0
firstUserText := ""
for _, message := range messages {
if message.Visibility == database.ChatMessageVisibilityModel {
continue
}
switch message.Role {
case database.ChatMessageRoleAssistant, database.ChatMessageRoleTool:
return "", false
case database.ChatMessageRoleUser:
userCount++
if firstUserText == "" {
parsed, err := chatprompt.ParseContent(message)
if err != nil {
return "", false
}
firstUserText = strings.TrimSpace(
contentBlocksToText(parsed),
)
}
}
}
if userCount != 1 || firstUserText == "" {
return "", false
}
currentTitle := strings.TrimSpace(chat.Title)
if currentTitle == "" {
return firstUserText, true
}
if currentTitle != fallbackChatTitle(firstUserText) {
return "", false
}
return firstUserText, true
}
func normalizeTitleOutput(title string) string {
title = normalizeShortTextOutput(title)
if title == "" {
return ""
}
return truncateRunes(title, 80)
}
func fallbackChatTitle(message string) string {
const maxWords = 6
const maxRunes = 80
words := strings.Fields(message)
if len(words) == 0 {
return "New Chat"
}
truncated := false
if len(words) > maxWords {
words = words[:maxWords]
truncated = true
}
title := strings.Join(words, " ")
if truncated {
return truncateRunes(title, maxRunes-1) + "…"
}
return truncateRunes(title, maxRunes)
}
// contentBlocksToText concatenates the text parts of SDK chat
// message parts into a single space-separated string.
func contentBlocksToText(parts []codersdk.ChatMessagePart) string {
texts := make([]string, 0, len(parts))
for _, part := range parts {
if part.Type != codersdk.ChatMessagePartTypeText {
continue
}
text := strings.TrimSpace(part.Text)
if text == "" {
continue
}
texts = append(texts, text)
}
return strings.Join(texts, " ")
}
func truncateRunes(value string, maxLen int) string {
if maxLen <= 0 {
return ""
}
runes := []rune(value)
if len(runes) <= maxLen {
return value
}
return string(runes[:maxLen])
}
// Manual title regeneration is user-initiated and can use richer
// conversation context than the automatic first-message title path
// above. These helpers keep the manual prompt-building logic private
// while reusing the shared title-generation utilities in this file.
type manualTitleTurn struct {
role string
text string
}
func extractManualTitleTurns(messages []database.ChatMessage) []manualTitleTurn {
turns := make([]manualTitleTurn, 0, len(messages))
for _, message := range messages {
if message.Visibility == database.ChatMessageVisibilityModel {
continue
}
role := ""
switch message.Role {
case database.ChatMessageRoleUser:
role = string(database.ChatMessageRoleUser)
case database.ChatMessageRoleAssistant:
role = string(database.ChatMessageRoleAssistant)
default:
continue
}
parts, err := chatprompt.ParseContent(message)
if err != nil {
continue
}
text := strings.TrimSpace(contentBlocksToText(parts))
if text == "" {
continue
}
turns = append(turns, manualTitleTurn{
role: role,
text: text,
})
}
return turns
}
func selectManualTitleTurnIndexes(turns []manualTitleTurn) []int {
firstUserIndex := slices.IndexFunc(turns, func(turn manualTitleTurn) bool {
return turn.role == string(database.ChatMessageRoleUser)
})
if firstUserIndex == -1 {
return nil
}
windowStart := max(0, len(turns)-recentTurnWindow)
selected := make([]int, 0, recentTurnWindow+1)
if firstUserIndex < windowStart {
selected = append(selected, firstUserIndex)
}
for i := windowStart; i < len(turns); i++ {
selected = append(selected, i)
}
return selected
}
func buildManualTitleContext(
turns []manualTitleTurn,
selected []int,
) (conversationBlock string, latestUserMsg string) {
userCount := 0
for _, turn := range turns {
if turn.role != string(database.ChatMessageRoleUser) {
continue
}
userCount++
latestUserMsg = turn.text
}
latestUserMsg = truncateRunes(latestUserMsg, maxLatestUserMessageRunes)
if userCount <= 1 || len(selected) == 0 {
return "", latestUserMsg
}
lines := make([]string, 0, len(selected)+1)
for i, idx := range selected {
if i == 1 {
if gap := idx - selected[i-1] - 1; gap > 0 {
lines = append(lines, fmt.Sprintf("[... %d earlier turns omitted ...]", gap))
}
}
lines = append(lines, fmt.Sprintf("[%s]: %s", turns[idx].role, turns[idx].text))
}
conversationBlock = strings.Join(lines, "\n")
conversationBlock = truncateRunes(conversationBlock, maxConversationContextRunes)
return conversationBlock, latestUserMsg
}
func renderManualTitlePrompt(
conversationBlock string,
firstUserText string,
latestUserMsg string,
) string {
var prompt strings.Builder
write := func(value string) {
_, _ = prompt.WriteString(value)
}
write("Write a short title for this AI coding conversation.\n")
write("Populate the title field with the result.\n\n")
write("Primary user objective:\n<primary_objective>\n")
write(firstUserText)
write("\n</primary_objective>")
if conversationBlock != "" {
write("\n\nConversation sample:\n<conversation_sample>\n")
write(conversationBlock)
write("\n</conversation_sample>")
}
if strings.TrimSpace(latestUserMsg) != strings.TrimSpace(truncateRunes(firstUserText, maxLatestUserMessageRunes)) {
write("\n\nThe user's most recent message:\n<latest_message>\n")
write(latestUserMsg)
write("\n</latest_message>\n")
write("Note: Weight the overall conversation arc more heavily than just the latest message.")
}
write("\n\nRequirements:\n")
write("- Return only the title text in 2-8 words.\n")
write("- Populate the title field only.\n")
write("- Do not answer the user or describe the title-writing task.\n")
write("- Preserve specific identifiers (PR numbers, repo names, file paths, function names, error messages).\n")
write("- If the conversation is short or vague, stay close to the user's wording.\n")
write("- Sentence case. No quotes, emoji, markdown, or trailing punctuation.\n")
return prompt.String()
}
func generateManualTitle(
ctx context.Context,
messages []database.ChatMessage,
fallbackModel fantasy.LanguageModel,
) (string, fantasy.Usage, error) {
turns := extractManualTitleTurns(messages)
selected := selectManualTitleTurnIndexes(turns)
firstUserIndex := slices.IndexFunc(turns, func(turn manualTitleTurn) bool {
return turn.role == string(database.ChatMessageRoleUser)
})
if firstUserIndex == -1 {
return "", fantasy.Usage{}, nil
}
firstUserText := truncateRunes(turns[firstUserIndex].text, maxLatestUserMessageRunes)
conversationBlock, latestUserMsg := buildManualTitleContext(turns, selected)
systemPrompt := renderManualTitlePrompt(
conversationBlock,
firstUserText,
latestUserMsg,
)
titleCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
userInput := strings.TrimSpace(latestUserMsg)
if userInput == "" {
userInput = strings.TrimSpace(firstUserText)
}
title, usage, err := generateStructuredTitleWithUsage(
titleCtx,
fallbackModel,
systemPrompt,
userInput,
)
if err != nil {
return "", usage, err
}
return title, usage, nil
}
const pushSummaryPrompt = "You are a notification assistant. Given a chat title " +
"and the agent's last message, write a single short sentence (under 100 characters) " +
"summarizing what the agent did. This will be shown as a push notification body. " +
"Return plain text only — no quotes, no emoji, no markdown."
// generatePushSummary calls a cheap model to produce a short push
// notification body from the chat title and the last assistant
// message text. It follows the same candidate-selection strategy
// as title generation: try preferred lightweight models first, then
// fall back to the provided model. Returns "" on any failure.
func generatePushSummary(
ctx context.Context,
chat database.Chat,
assistantText string,
fallbackProvider string,
fallbackModelName string,
fallbackModel fantasy.LanguageModel,
keys chatprovider.ProviderAPIKeys,
logger slog.Logger,
debugSvc *chatdebug.Service,
triggerMessageID int64,
historyTipMessageID int64,
) string {
debugEnabled := debugSvc != nil && debugSvc.IsEnabled(ctx, chat.ID, chat.OwnerID)
summaryCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
assistantText = truncateRunes(assistantText, maxConversationContextRunes)
input := "Chat title: " + chat.Title + "\n\nAgent's last message:\n" + assistantText
candidates := make([]shortTextCandidate, 0, len(preferredTitleModels)+1)
for _, c := range preferredTitleModels {
m, err := chatprovider.ModelFromConfig(
c.provider, c.model, keys, chatprovider.UserAgent(),
chatprovider.CoderHeaders(chat),
nil,
)
if err == nil {
candidates = append(candidates, shortTextCandidate{
provider: c.provider,
model: c.model,
lm: m,
})
}
}
candidates = append(candidates, shortTextCandidate{
provider: fallbackProvider,
model: fallbackModelName,
lm: fallbackModel,
})
pushSeedSummary := chatdebug.SeedSummary("Push summary")
for _, candidate := range candidates {
candidateCtx := summaryCtx
candidateModel := candidate.lm
finishDebugRun := func(error) {}
if debugEnabled {
candidateCtx, candidateModel, finishDebugRun = prepareQuickgenDebugCandidate(
summaryCtx,
chat,
keys,
debugSvc,
candidate,
chatdebug.KindQuickgen,
triggerMessageID,
historyTipMessageID,
pushSeedSummary,
logger,
)
}
summary, err := generateShortText(
candidateCtx,
candidateModel,
pushSummaryPrompt,
input,
)
finishDebugRun(err)
if err != nil {
logger.Debug(ctx, "push summary model candidate failed",
slog.Error(err),
)
continue
}
if summary != "" {
return summary
}
}
return ""
}
// generateShortText calls a model with a system prompt and user
// input, returning a cleaned-up short text response. It reuses the
// same retry logic as title generation. Retries can therefore
// produce multiple debug steps for a single quickgen run.
func generateShortText(
ctx context.Context,
model fantasy.LanguageModel,
systemPrompt string,
userInput string,
) (string, error) {
prompt := []fantasy.Message{
{
Role: fantasy.MessageRoleSystem,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: systemPrompt},
},
},
{
Role: fantasy.MessageRoleUser,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: userInput},
},
},
}
var maxOutputTokens int64 = 256
var response *fantasy.Response
err := chatretry.Retry(ctx, func(retryCtx context.Context) error {
var genErr error
response, genErr = model.Generate(retryCtx, fantasy.Call{
Prompt: prompt,
MaxOutputTokens: &maxOutputTokens,
})
return genErr
}, nil)
if err != nil {
return "", xerrors.Errorf("generate short text: %w", err)
}
responseParts := make([]codersdk.ChatMessagePart, 0, len(response.Content))
for _, block := range response.Content {
if p := chatprompt.PartFromContent(block); p.Type != "" {
responseParts = append(responseParts, p)
}
}
text := normalizeShortTextOutput(contentBlocksToText(responseParts))
return text, nil
}