Files
coder/coderd/chatd/chatloop/compaction.go
T
Kyle Carberry ddfe630757 refactor(chatd): replace fantasy.Agent with custom agent loop (#22507)
## Summary

Replaces fantasy's `Agent` abstraction with a direct step loop calling
`LanguageModel.Stream()`. Fantasy is retained as the provider
abstraction layer (streaming parsers, types, tool schema) but we no
longer use `fantasy.Agent`, `AgentStreamCall`, `AgentResult`, or
`StepResult`.

## Problems solved

| Problem | Before | After |
|---|---|---|
| **Sentinel prompt hack** | fantasy.Agent requires non-empty Prompt →
UUID sentinel generated and stripped in PrepareStep | Messages passed
directly to `model.Stream()` |
| **Discarded PersistStep errors** | `_ = opts.OnStepFinish(result)`
silently swallows errors | Errors propagate directly from
`PersistStep()` |
| **Shadow draft state** | ~160 LOC tracking content in parallel because
fantasy doesn't expose in-progress content on interruption |
`stepResult` owns content directly; `flushActiveState()` is trivial |
| **Nested retry layers** | fantasy's 2-attempt retry nested inside
chatretry's indefinite retry | Single `chatretry.Retry` layer |
| **Callback-mediated compaction** | Mutex + boolean flag + coordination
between OnStepFinish/PrepareStep callbacks | Inline `if` statement
between steps |
| **Duplicate compaction paths** | `compactStep()` + `maybeCompact()`
sharing ~80% logic | Single `tryCompact()` function |

## Changes

### `coderd/chatd/chatloop/chatloop.go` — Rewritten
- **Removed**: `fantasy.NewAgent()`, `AgentStreamCall`, sentinel prompt,
shadow draft state (~160 LOC of closures), `compactedMu`/`compacted`
flag, `PrepareStepResult`
- **Added**: `stepResult` struct, `processStepStream()` (stream
consumer), `executeTools()` (sequential tool execution),
`flushActiveState()` (interrupt handling), `buildToolDefinitions()`,
`toResponseMessages()`
- **Changed**: `Run()` return type from `(*fantasy.AgentResult, error)`
to `error` (callers already discarded the result)
- **Preserved**: Anthropic prompt caching, reasoning title extraction,
`extractContextLimit()`, `ErrInterrupted` semantics

### `coderd/chatd/chatloop/compaction.go` — Simplified
- Merged `compactStep()` + `maybeCompact()` → single `tryCompact()`
- Removed `[]StepResult` parameter from `generateCompactionSummary()`
(caller provides complete message list)
- Kept helper functions: `normalizedCompactionConfig`,
`contextTokensFromUsage`, `resolveContextLimit`, `shouldCompact`

### `coderd/chatd/chatd.go` — Caller updates
- Removed `AgentStreamCall` construction
- Changed `_, err = chatloop.Run(...)` to `err = chatloop.Run(...)`
- Model parameters moved from `AgentStreamCall` fields to `RunOptions`
fields

### Tests — 4 new tests
- `MidLoopCompactionReloadsMessages` — compaction fires mid-loop,
messages reloaded
- `PostRunCompactionSkippedAfterMidLoop` — no double compaction
- `MultiStepToolExecution` — tools execute between steps, results feed
next step
- `PersistStepErrorPropagates` — persistence errors propagate (was
silently discarded)
2026-03-02 18:51:57 -05:00

319 lines
9.1 KiB
Go

package chatloop
import (
"context"
"encoding/json"
"strings"
"time"
"charm.land/fantasy"
"golang.org/x/xerrors"
"github.com/coder/coder/v2/codersdk"
)
const (
defaultCompactionThresholdPercent = int32(70)
minCompactionThresholdPercent = int32(0)
maxCompactionThresholdPercent = int32(100)
defaultCompactionSummaryPrompt = "Summarize the current chat so a " +
"new assistant can continue seamlessly. Include the user's goals, " +
"decisions made, concrete technical details (files, commands, APIs), " +
"errors encountered and fixes, and open questions. Be dense and factual. " +
"Omit pleasantries and next-step suggestions."
defaultCompactionSystemSummaryPrefix = "Summary of earlier chat context:"
defaultCompactionTimeout = 90 * time.Second
)
type CompactionOptions struct {
ThresholdPercent int32
ContextLimit int64
SummaryPrompt string
SystemSummaryPrefix string
Timeout time.Duration
Persist func(context.Context, CompactionResult) error
// ToolCallID and ToolName identify the synthetic tool call
// used to represent compaction in the message stream.
ToolCallID string
ToolName string
// PublishMessagePart publishes streaming parts to connected
// clients so they see "Summarizing..." / "Summarized" UI
// transitions during compaction.
PublishMessagePart func(fantasy.MessageRole, codersdk.ChatMessagePart)
OnError func(error)
}
type CompactionResult struct {
SystemSummary string
SummaryReport string
ThresholdPercent int32
UsagePercent float64
ContextTokens int64
ContextLimit int64
}
// tryCompact checks whether context usage exceeds the compaction
// threshold and, if so, generates and persists a summary. Returns
// (true, nil) when compaction was performed, (false, nil) when not
// needed, and (false, err) on failure.
func tryCompact(
ctx context.Context,
model fantasy.LanguageModel,
compaction *CompactionOptions,
contextLimitFallback int64,
stepUsage fantasy.Usage,
stepMetadata fantasy.ProviderMetadata,
allMessages []fantasy.Message,
) (bool, error) {
config, ok := normalizedCompactionConfig(compaction)
if !ok {
return false, nil
}
contextTokens := contextTokensFromUsage(stepUsage)
if contextTokens <= 0 {
return false, nil
}
metadataLimit := extractContextLimit(stepMetadata)
contextLimit := resolveContextLimit(
metadataLimit.Int64,
config.ContextLimit,
contextLimitFallback,
)
usagePercent, compact := shouldCompact(
contextTokens, contextLimit, config.ThresholdPercent,
)
if !compact {
return false, nil
}
// Publish the "Summarizing..." tool-call indicator so
// connected clients see activity during summary generation.
if config.PublishMessagePart != nil && config.ToolCallID != "" {
config.PublishMessagePart(
fantasy.MessageRoleAssistant,
codersdk.ChatMessagePart{
Type: codersdk.ChatMessagePartTypeToolCall,
ToolCallID: config.ToolCallID,
ToolName: config.ToolName,
},
)
}
summary, err := generateCompactionSummary(
ctx, model, allMessages, config,
)
if err != nil {
return false, err
}
if summary == "" {
// Publish a tool-result error so connected clients
// see the compaction failure.
publishCompactionError(config, "compaction produced an empty summary")
return false, xerrors.New("compaction produced an empty summary")
}
systemSummary := strings.TrimSpace(
config.SystemSummaryPrefix + "\n\n" + summary,
)
err = config.Persist(ctx, CompactionResult{
SystemSummary: systemSummary,
SummaryReport: summary,
ThresholdPercent: config.ThresholdPercent,
UsagePercent: usagePercent,
ContextTokens: contextTokens,
ContextLimit: contextLimit,
})
if err != nil {
publishCompactionError(config, "failed to persist compaction result")
return false, xerrors.Errorf("persist compaction: %w", err)
}
// Publish the "Summarized" tool-result part so the client
// transitions from the in-progress indicator to the final
// state.
if config.PublishMessagePart != nil && config.ToolCallID != "" {
resultJSON, _ := json.Marshal(map[string]any{
"summary": summary,
"source": "automatic",
"threshold_percent": config.ThresholdPercent,
"usage_percent": usagePercent,
"context_tokens": contextTokens,
"context_limit_tokens": contextLimit,
})
config.PublishMessagePart(
fantasy.MessageRoleTool,
codersdk.ChatMessagePart{
Type: codersdk.ChatMessagePartTypeToolResult,
ToolCallID: config.ToolCallID,
ToolName: config.ToolName,
Result: resultJSON,
},
)
}
return true, nil
}
// publishCompactionError sends a tool-result error part so
// connected clients see that compaction failed.
func publishCompactionError(config CompactionOptions, msg string) {
if config.PublishMessagePart == nil || config.ToolCallID == "" {
return
}
errJSON, _ := json.Marshal(map[string]any{
"error": msg,
})
config.PublishMessagePart(
fantasy.MessageRoleTool,
codersdk.ChatMessagePart{
Type: codersdk.ChatMessagePartTypeToolResult,
ToolCallID: config.ToolCallID,
ToolName: config.ToolName,
Result: errJSON,
IsError: true,
},
)
}
// normalizedCompactionConfig returns a copy of the compaction options
// with defaults applied. The bool is false when compaction is
// disabled (nil options, missing Persist callback, or threshold at
// 100%).
func normalizedCompactionConfig(opts *CompactionOptions) (CompactionOptions, bool) {
if opts == nil {
return CompactionOptions{}, false
}
config := *opts
if config.Persist == nil {
return CompactionOptions{}, false
}
if strings.TrimSpace(config.SummaryPrompt) == "" {
config.SummaryPrompt = defaultCompactionSummaryPrompt
}
if strings.TrimSpace(config.SystemSummaryPrefix) == "" {
config.SystemSummaryPrefix = defaultCompactionSystemSummaryPrefix
}
if config.Timeout <= 0 {
config.Timeout = defaultCompactionTimeout
}
if config.ThresholdPercent < minCompactionThresholdPercent ||
config.ThresholdPercent > maxCompactionThresholdPercent {
config.ThresholdPercent = defaultCompactionThresholdPercent
}
if config.ThresholdPercent == maxCompactionThresholdPercent {
return CompactionOptions{}, false
}
return config, true
}
// contextTokensFromUsage returns the total context token count from
// a step's usage report. It sums input, cache-read, and
// cache-creation tokens when available, falling back to TotalTokens
// if none of the granular fields are set.
func contextTokensFromUsage(usage fantasy.Usage) int64 {
total := int64(0)
hasContextTokens := false
if usage.InputTokens > 0 {
total += usage.InputTokens
hasContextTokens = true
}
if usage.CacheReadTokens > 0 {
total += usage.CacheReadTokens
hasContextTokens = true
}
if usage.CacheCreationTokens > 0 {
total += usage.CacheCreationTokens
hasContextTokens = true
}
if !hasContextTokens && usage.TotalTokens > 0 {
total = usage.TotalTokens
}
return total
}
// resolveContextLimit picks the first positive value from metadata,
// configured limit, and fallback — in that priority order. Returns
// 0 when none are positive.
func resolveContextLimit(metadataLimit, configLimit, fallback int64) int64 {
if metadataLimit > 0 {
return metadataLimit
}
if configLimit > 0 {
return configLimit
}
if fallback > 0 {
return fallback
}
return 0
}
// shouldCompact returns the usage percentage and whether it exceeds
// the threshold. Returns (0, false) when contextLimit is
// non-positive.
func shouldCompact(contextTokens, contextLimit int64, thresholdPercent int32) (float64, bool) {
if contextLimit <= 0 {
return 0, false
}
usagePercent := (float64(contextTokens) / float64(contextLimit)) * 100
return usagePercent, usagePercent >= float64(thresholdPercent)
}
// generateCompactionSummary asks the model to summarize the
// conversation so far. The provided messages should contain the
// complete history (system prompt, user/assistant turns, tool
// results). A final user message with the summary prompt is appended
// before calling the model.
func generateCompactionSummary(
ctx context.Context,
model fantasy.LanguageModel,
messages []fantasy.Message,
options CompactionOptions,
) (string, error) {
summaryPrompt := make([]fantasy.Message, 0, len(messages)+1)
summaryPrompt = append(summaryPrompt, messages...)
summaryPrompt = append(summaryPrompt, fantasy.Message{
Role: fantasy.MessageRoleUser,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: options.SummaryPrompt},
},
})
toolChoice := fantasy.ToolChoiceNone
summaryCtx, cancel := context.WithTimeout(ctx, options.Timeout)
defer cancel()
response, err := model.Generate(summaryCtx, fantasy.Call{
Prompt: summaryPrompt,
ToolChoice: &toolChoice,
})
if err != nil {
return "", xerrors.Errorf("generate summary text: %w", err)
}
parts := make([]string, 0, len(response.Content))
for _, block := range response.Content {
textBlock, ok := fantasy.AsContentType[fantasy.TextContent](block)
if !ok {
continue
}
text := strings.TrimSpace(textBlock.Text)
if text == "" {
continue
}
parts = append(parts, text)
}
return strings.TrimSpace(strings.Join(parts, " ")), nil
}