mirror of
https://github.com/coder/coder.git
synced 2026-06-04 21:48:22 +00:00
53bfbf7c03
## Problem The summarization prompt explicitly tells the model to **"Omit pleasantries and next-step suggestions"** and the summary prefix frames the compacted context as passive history: `Summary of earlier chat context:`. After compaction mid-task, the model reads a factual recap with no forward momentum, loses its direction, and either stops or asks the user what to do. ## Research I compared our compaction prompt against several other agents: | Agent | Key Pattern | |---|---| | **Codex** | Prompt says *"Include what remains to be done (clear next steps)"*. Prefix: *"Another language model started to solve this problem..."* | | **Mux** | Includes *"Current state of the work (what's done, what's in progress)"* + appends the user's follow-up intent | | **Continue** | *"Make sure it is clear what the current stream of work was at the very end prior to compaction so that you can continue exactly where you left off"* | | **Copilot Chat** | Dedicated sections for *Active Work State*, *Recent Operations*, *Pre-Summary State*, and a *Continuation Plan* with explicit next actions | **Every other major agent explicitly preserves forward intent and in-progress state.** Coder was the only one telling the model to omit next steps. ## Changes **Summary prompt:** - Removes `Omit next-step suggestions` - Adds structured `Include:` list with explicit items for in-progress work, remaining work, and the specific action being performed when compaction fired - Frames the operation as `context compaction` (matching Codex's framing) **Summary prefix:** - Old: `Summary of earlier chat context:` - New: `The following is a summary of the earlier conversation. The assistant was actively working when the context was compacted. Continue the work described below:` The prefix is the first thing the model reads post-compaction — framing it as an active handoff with an explicit "Continue" directive primes the model to resume work rather than wait.
333 lines
9.7 KiB
Go
333 lines
9.7 KiB
Go
package chatloop
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"strings"
|
|
"time"
|
|
|
|
"charm.land/fantasy"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/codersdk"
|
|
)
|
|
|
|
const (
|
|
defaultCompactionThresholdPercent = int32(70)
|
|
minCompactionThresholdPercent = int32(0)
|
|
maxCompactionThresholdPercent = int32(100)
|
|
|
|
defaultCompactionSummaryPrompt = "You are performing a context compaction. " +
|
|
"Summarize the conversation so a new assistant can seamlessly " +
|
|
"continue the work in progress.\n\n" +
|
|
"Include:\n" +
|
|
"- The user's overall goal and current task\n" +
|
|
"- Key decisions made and their rationale\n" +
|
|
"- Concrete technical details: file paths, function names, " +
|
|
"commands, APIs, and configurations\n" +
|
|
"- Errors encountered and how they were resolved\n" +
|
|
"- Current state of the work: what is DONE, what is IN PROGRESS, " +
|
|
"and what REMAINS to be done\n" +
|
|
"- The specific action the assistant was performing or about to " +
|
|
"perform when this summary was triggered\n\n" +
|
|
"Be dense and factual. Every sentence should convey essential " +
|
|
"context for continuation. Do not include pleasantries or " +
|
|
"conversational filler."
|
|
defaultCompactionSystemSummaryPrefix = "The following is a summary of " +
|
|
"the earlier conversation. The assistant was actively working when " +
|
|
"the context was compacted. Continue the work described below:"
|
|
defaultCompactionTimeout = 90 * time.Second
|
|
)
|
|
|
|
type CompactionOptions struct {
|
|
ThresholdPercent int32
|
|
ContextLimit int64
|
|
SummaryPrompt string
|
|
SystemSummaryPrefix string
|
|
Timeout time.Duration
|
|
Persist func(context.Context, CompactionResult) error
|
|
|
|
// ToolCallID and ToolName identify the synthetic tool call
|
|
// used to represent compaction in the message stream.
|
|
ToolCallID string
|
|
ToolName string
|
|
|
|
// PublishMessagePart publishes streaming parts to connected
|
|
// clients so they see "Summarizing..." / "Summarized" UI
|
|
// transitions during compaction.
|
|
PublishMessagePart func(fantasy.MessageRole, codersdk.ChatMessagePart)
|
|
|
|
OnError func(error)
|
|
}
|
|
|
|
type CompactionResult struct {
|
|
SystemSummary string
|
|
SummaryReport string
|
|
ThresholdPercent int32
|
|
UsagePercent float64
|
|
ContextTokens int64
|
|
ContextLimit int64
|
|
}
|
|
|
|
// tryCompact checks whether context usage exceeds the compaction
|
|
// threshold and, if so, generates and persists a summary. Returns
|
|
// (true, nil) when compaction was performed, (false, nil) when not
|
|
// needed, and (false, err) on failure.
|
|
func tryCompact(
|
|
ctx context.Context,
|
|
model fantasy.LanguageModel,
|
|
compaction *CompactionOptions,
|
|
contextLimitFallback int64,
|
|
stepUsage fantasy.Usage,
|
|
stepMetadata fantasy.ProviderMetadata,
|
|
allMessages []fantasy.Message,
|
|
) (bool, error) {
|
|
config, ok := normalizedCompactionConfig(compaction)
|
|
if !ok {
|
|
return false, nil
|
|
}
|
|
|
|
contextTokens := contextTokensFromUsage(stepUsage)
|
|
if contextTokens <= 0 {
|
|
return false, nil
|
|
}
|
|
|
|
metadataLimit := extractContextLimit(stepMetadata)
|
|
contextLimit := resolveContextLimit(
|
|
metadataLimit.Int64,
|
|
config.ContextLimit,
|
|
contextLimitFallback,
|
|
)
|
|
|
|
usagePercent, compact := shouldCompact(
|
|
contextTokens, contextLimit, config.ThresholdPercent,
|
|
)
|
|
if !compact {
|
|
return false, nil
|
|
}
|
|
|
|
// Publish the "Summarizing..." tool-call indicator so
|
|
// connected clients see activity during summary generation.
|
|
if config.PublishMessagePart != nil && config.ToolCallID != "" {
|
|
config.PublishMessagePart(
|
|
fantasy.MessageRoleAssistant,
|
|
codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeToolCall,
|
|
ToolCallID: config.ToolCallID,
|
|
ToolName: config.ToolName,
|
|
},
|
|
)
|
|
}
|
|
|
|
summary, err := generateCompactionSummary(
|
|
ctx, model, allMessages, config,
|
|
)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if summary == "" {
|
|
// Publish a tool-result error so connected clients
|
|
// see the compaction failure.
|
|
publishCompactionError(config, "compaction produced an empty summary")
|
|
return false, xerrors.New("compaction produced an empty summary")
|
|
}
|
|
|
|
systemSummary := strings.TrimSpace(
|
|
config.SystemSummaryPrefix + "\n\n" + summary,
|
|
)
|
|
|
|
persistCtx := context.WithoutCancel(ctx)
|
|
err = config.Persist(persistCtx, CompactionResult{
|
|
SystemSummary: systemSummary,
|
|
SummaryReport: summary,
|
|
ThresholdPercent: config.ThresholdPercent,
|
|
UsagePercent: usagePercent,
|
|
ContextTokens: contextTokens,
|
|
ContextLimit: contextLimit,
|
|
})
|
|
if err != nil {
|
|
publishCompactionError(config, "failed to persist compaction result")
|
|
return false, xerrors.Errorf("persist compaction: %w", err)
|
|
}
|
|
|
|
// Publish the "Summarized" tool-result part so the client
|
|
// transitions from the in-progress indicator to the final
|
|
// state.
|
|
if config.PublishMessagePart != nil && config.ToolCallID != "" {
|
|
resultJSON, _ := json.Marshal(map[string]any{
|
|
"summary": summary,
|
|
"source": "automatic",
|
|
"threshold_percent": config.ThresholdPercent,
|
|
"usage_percent": usagePercent,
|
|
"context_tokens": contextTokens,
|
|
"context_limit_tokens": contextLimit,
|
|
})
|
|
config.PublishMessagePart(
|
|
fantasy.MessageRoleTool,
|
|
codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeToolResult,
|
|
ToolCallID: config.ToolCallID,
|
|
ToolName: config.ToolName,
|
|
Result: resultJSON,
|
|
},
|
|
)
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
// publishCompactionError sends a tool-result error part so
|
|
// connected clients see that compaction failed.
|
|
func publishCompactionError(config CompactionOptions, msg string) {
|
|
if config.PublishMessagePart == nil || config.ToolCallID == "" {
|
|
return
|
|
}
|
|
errJSON, _ := json.Marshal(map[string]any{
|
|
"error": msg,
|
|
})
|
|
config.PublishMessagePart(
|
|
fantasy.MessageRoleTool,
|
|
codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeToolResult,
|
|
ToolCallID: config.ToolCallID,
|
|
ToolName: config.ToolName,
|
|
Result: errJSON,
|
|
IsError: true,
|
|
},
|
|
)
|
|
}
|
|
|
|
// normalizedCompactionConfig returns a copy of the compaction options
|
|
// with defaults applied. The bool is false when compaction is
|
|
// disabled (nil options, missing Persist callback, or threshold at
|
|
// 100%).
|
|
func normalizedCompactionConfig(opts *CompactionOptions) (CompactionOptions, bool) {
|
|
if opts == nil {
|
|
return CompactionOptions{}, false
|
|
}
|
|
|
|
config := *opts
|
|
if config.Persist == nil {
|
|
return CompactionOptions{}, false
|
|
}
|
|
if strings.TrimSpace(config.SummaryPrompt) == "" {
|
|
config.SummaryPrompt = defaultCompactionSummaryPrompt
|
|
}
|
|
if strings.TrimSpace(config.SystemSummaryPrefix) == "" {
|
|
config.SystemSummaryPrefix = defaultCompactionSystemSummaryPrefix
|
|
}
|
|
if config.Timeout <= 0 {
|
|
config.Timeout = defaultCompactionTimeout
|
|
}
|
|
if config.ThresholdPercent < minCompactionThresholdPercent ||
|
|
config.ThresholdPercent > maxCompactionThresholdPercent {
|
|
config.ThresholdPercent = defaultCompactionThresholdPercent
|
|
}
|
|
if config.ThresholdPercent == maxCompactionThresholdPercent {
|
|
return CompactionOptions{}, false
|
|
}
|
|
|
|
return config, true
|
|
}
|
|
|
|
// contextTokensFromUsage returns the total context token count from
|
|
// a step's usage report. It sums input, cache-read, and
|
|
// cache-creation tokens when available, falling back to TotalTokens
|
|
// if none of the granular fields are set.
|
|
func contextTokensFromUsage(usage fantasy.Usage) int64 {
|
|
total := int64(0)
|
|
hasContextTokens := false
|
|
|
|
if usage.InputTokens > 0 {
|
|
total += usage.InputTokens
|
|
hasContextTokens = true
|
|
}
|
|
if usage.CacheReadTokens > 0 {
|
|
total += usage.CacheReadTokens
|
|
hasContextTokens = true
|
|
}
|
|
if usage.CacheCreationTokens > 0 {
|
|
total += usage.CacheCreationTokens
|
|
hasContextTokens = true
|
|
}
|
|
if !hasContextTokens && usage.TotalTokens > 0 {
|
|
total = usage.TotalTokens
|
|
}
|
|
|
|
return total
|
|
}
|
|
|
|
// resolveContextLimit picks the first positive value from metadata,
|
|
// configured limit, and fallback — in that priority order. Returns
|
|
// 0 when none are positive.
|
|
func resolveContextLimit(metadataLimit, configLimit, fallback int64) int64 {
|
|
if metadataLimit > 0 {
|
|
return metadataLimit
|
|
}
|
|
if configLimit > 0 {
|
|
return configLimit
|
|
}
|
|
if fallback > 0 {
|
|
return fallback
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// shouldCompact returns the usage percentage and whether it exceeds
|
|
// the threshold. Returns (0, false) when contextLimit is
|
|
// non-positive.
|
|
func shouldCompact(contextTokens, contextLimit int64, thresholdPercent int32) (float64, bool) {
|
|
if contextLimit <= 0 {
|
|
return 0, false
|
|
}
|
|
usagePercent := (float64(contextTokens) / float64(contextLimit)) * 100
|
|
return usagePercent, usagePercent >= float64(thresholdPercent)
|
|
}
|
|
|
|
// generateCompactionSummary asks the model to summarize the
|
|
// conversation so far. The provided messages should contain the
|
|
// complete history (system prompt, user/assistant turns, tool
|
|
// results). A final user message with the summary prompt is appended
|
|
// before calling the model.
|
|
func generateCompactionSummary(
|
|
ctx context.Context,
|
|
model fantasy.LanguageModel,
|
|
messages []fantasy.Message,
|
|
options CompactionOptions,
|
|
) (string, error) {
|
|
summaryPrompt := make([]fantasy.Message, 0, len(messages)+1)
|
|
summaryPrompt = append(summaryPrompt, messages...)
|
|
summaryPrompt = append(summaryPrompt, fantasy.Message{
|
|
Role: fantasy.MessageRoleUser,
|
|
Content: []fantasy.MessagePart{
|
|
fantasy.TextPart{Text: options.SummaryPrompt},
|
|
},
|
|
})
|
|
toolChoice := fantasy.ToolChoiceNone
|
|
|
|
summaryCtx, cancel := context.WithTimeout(ctx, options.Timeout)
|
|
defer cancel()
|
|
|
|
response, err := model.Generate(summaryCtx, fantasy.Call{
|
|
Prompt: summaryPrompt,
|
|
ToolChoice: &toolChoice,
|
|
})
|
|
if err != nil {
|
|
return "", xerrors.Errorf("generate summary text: %w", err)
|
|
}
|
|
|
|
parts := make([]string, 0, len(response.Content))
|
|
for _, block := range response.Content {
|
|
textBlock, ok := fantasy.AsContentType[fantasy.TextContent](block)
|
|
if !ok {
|
|
continue
|
|
}
|
|
text := strings.TrimSpace(textBlock.Text)
|
|
if text == "" {
|
|
continue
|
|
}
|
|
parts = append(parts, text)
|
|
}
|
|
return strings.TrimSpace(strings.Join(parts, " ")), nil
|
|
}
|