mirror of
https://github.com/coder/coder.git
synced 2026-06-03 04:58:23 +00:00
ddfe630757
## Summary Replaces fantasy's `Agent` abstraction with a direct step loop calling `LanguageModel.Stream()`. Fantasy is retained as the provider abstraction layer (streaming parsers, types, tool schema) but we no longer use `fantasy.Agent`, `AgentStreamCall`, `AgentResult`, or `StepResult`. ## Problems solved | Problem | Before | After | |---|---|---| | **Sentinel prompt hack** | fantasy.Agent requires non-empty Prompt → UUID sentinel generated and stripped in PrepareStep | Messages passed directly to `model.Stream()` | | **Discarded PersistStep errors** | `_ = opts.OnStepFinish(result)` silently swallows errors | Errors propagate directly from `PersistStep()` | | **Shadow draft state** | ~160 LOC tracking content in parallel because fantasy doesn't expose in-progress content on interruption | `stepResult` owns content directly; `flushActiveState()` is trivial | | **Nested retry layers** | fantasy's 2-attempt retry nested inside chatretry's indefinite retry | Single `chatretry.Retry` layer | | **Callback-mediated compaction** | Mutex + boolean flag + coordination between OnStepFinish/PrepareStep callbacks | Inline `if` statement between steps | | **Duplicate compaction paths** | `compactStep()` + `maybeCompact()` sharing ~80% logic | Single `tryCompact()` function | ## Changes ### `coderd/chatd/chatloop/chatloop.go` — Rewritten - **Removed**: `fantasy.NewAgent()`, `AgentStreamCall`, sentinel prompt, shadow draft state (~160 LOC of closures), `compactedMu`/`compacted` flag, `PrepareStepResult` - **Added**: `stepResult` struct, `processStepStream()` (stream consumer), `executeTools()` (sequential tool execution), `flushActiveState()` (interrupt handling), `buildToolDefinitions()`, `toResponseMessages()` - **Changed**: `Run()` return type from `(*fantasy.AgentResult, error)` to `error` (callers already discarded the result) - **Preserved**: Anthropic prompt caching, reasoning title extraction, `extractContextLimit()`, `ErrInterrupted` semantics ### `coderd/chatd/chatloop/compaction.go` — Simplified - Merged `compactStep()` + `maybeCompact()` → single `tryCompact()` - Removed `[]StepResult` parameter from `generateCompactionSummary()` (caller provides complete message list) - Kept helper functions: `normalizedCompactionConfig`, `contextTokensFromUsage`, `resolveContextLimit`, `shouldCompact` ### `coderd/chatd/chatd.go` — Caller updates - Removed `AgentStreamCall` construction - Changed `_, err = chatloop.Run(...)` to `err = chatloop.Run(...)` - Model parameters moved from `AgentStreamCall` fields to `RunOptions` fields ### Tests — 4 new tests - `MidLoopCompactionReloadsMessages` — compaction fires mid-loop, messages reloaded - `PostRunCompactionSkippedAfterMidLoop` — no double compaction - `MultiStepToolExecution` — tools execute between steps, results feed next step - `PersistStepErrorPropagates` — persistence errors propagate (was silently discarded)
319 lines
9.1 KiB
Go
319 lines
9.1 KiB
Go
package chatloop
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"strings"
|
|
"time"
|
|
|
|
"charm.land/fantasy"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/codersdk"
|
|
)
|
|
|
|
const (
|
|
defaultCompactionThresholdPercent = int32(70)
|
|
minCompactionThresholdPercent = int32(0)
|
|
maxCompactionThresholdPercent = int32(100)
|
|
|
|
defaultCompactionSummaryPrompt = "Summarize the current chat so a " +
|
|
"new assistant can continue seamlessly. Include the user's goals, " +
|
|
"decisions made, concrete technical details (files, commands, APIs), " +
|
|
"errors encountered and fixes, and open questions. Be dense and factual. " +
|
|
"Omit pleasantries and next-step suggestions."
|
|
defaultCompactionSystemSummaryPrefix = "Summary of earlier chat context:"
|
|
defaultCompactionTimeout = 90 * time.Second
|
|
)
|
|
|
|
type CompactionOptions struct {
|
|
ThresholdPercent int32
|
|
ContextLimit int64
|
|
SummaryPrompt string
|
|
SystemSummaryPrefix string
|
|
Timeout time.Duration
|
|
Persist func(context.Context, CompactionResult) error
|
|
|
|
// ToolCallID and ToolName identify the synthetic tool call
|
|
// used to represent compaction in the message stream.
|
|
ToolCallID string
|
|
ToolName string
|
|
|
|
// PublishMessagePart publishes streaming parts to connected
|
|
// clients so they see "Summarizing..." / "Summarized" UI
|
|
// transitions during compaction.
|
|
PublishMessagePart func(fantasy.MessageRole, codersdk.ChatMessagePart)
|
|
|
|
OnError func(error)
|
|
}
|
|
|
|
type CompactionResult struct {
|
|
SystemSummary string
|
|
SummaryReport string
|
|
ThresholdPercent int32
|
|
UsagePercent float64
|
|
ContextTokens int64
|
|
ContextLimit int64
|
|
}
|
|
|
|
// tryCompact checks whether context usage exceeds the compaction
|
|
// threshold and, if so, generates and persists a summary. Returns
|
|
// (true, nil) when compaction was performed, (false, nil) when not
|
|
// needed, and (false, err) on failure.
|
|
func tryCompact(
|
|
ctx context.Context,
|
|
model fantasy.LanguageModel,
|
|
compaction *CompactionOptions,
|
|
contextLimitFallback int64,
|
|
stepUsage fantasy.Usage,
|
|
stepMetadata fantasy.ProviderMetadata,
|
|
allMessages []fantasy.Message,
|
|
) (bool, error) {
|
|
config, ok := normalizedCompactionConfig(compaction)
|
|
if !ok {
|
|
return false, nil
|
|
}
|
|
|
|
contextTokens := contextTokensFromUsage(stepUsage)
|
|
if contextTokens <= 0 {
|
|
return false, nil
|
|
}
|
|
|
|
metadataLimit := extractContextLimit(stepMetadata)
|
|
contextLimit := resolveContextLimit(
|
|
metadataLimit.Int64,
|
|
config.ContextLimit,
|
|
contextLimitFallback,
|
|
)
|
|
|
|
usagePercent, compact := shouldCompact(
|
|
contextTokens, contextLimit, config.ThresholdPercent,
|
|
)
|
|
if !compact {
|
|
return false, nil
|
|
}
|
|
|
|
// Publish the "Summarizing..." tool-call indicator so
|
|
// connected clients see activity during summary generation.
|
|
if config.PublishMessagePart != nil && config.ToolCallID != "" {
|
|
config.PublishMessagePart(
|
|
fantasy.MessageRoleAssistant,
|
|
codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeToolCall,
|
|
ToolCallID: config.ToolCallID,
|
|
ToolName: config.ToolName,
|
|
},
|
|
)
|
|
}
|
|
|
|
summary, err := generateCompactionSummary(
|
|
ctx, model, allMessages, config,
|
|
)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if summary == "" {
|
|
// Publish a tool-result error so connected clients
|
|
// see the compaction failure.
|
|
publishCompactionError(config, "compaction produced an empty summary")
|
|
return false, xerrors.New("compaction produced an empty summary")
|
|
}
|
|
|
|
systemSummary := strings.TrimSpace(
|
|
config.SystemSummaryPrefix + "\n\n" + summary,
|
|
)
|
|
|
|
err = config.Persist(ctx, CompactionResult{
|
|
SystemSummary: systemSummary,
|
|
SummaryReport: summary,
|
|
ThresholdPercent: config.ThresholdPercent,
|
|
UsagePercent: usagePercent,
|
|
ContextTokens: contextTokens,
|
|
ContextLimit: contextLimit,
|
|
})
|
|
if err != nil {
|
|
publishCompactionError(config, "failed to persist compaction result")
|
|
return false, xerrors.Errorf("persist compaction: %w", err)
|
|
}
|
|
|
|
// Publish the "Summarized" tool-result part so the client
|
|
// transitions from the in-progress indicator to the final
|
|
// state.
|
|
if config.PublishMessagePart != nil && config.ToolCallID != "" {
|
|
resultJSON, _ := json.Marshal(map[string]any{
|
|
"summary": summary,
|
|
"source": "automatic",
|
|
"threshold_percent": config.ThresholdPercent,
|
|
"usage_percent": usagePercent,
|
|
"context_tokens": contextTokens,
|
|
"context_limit_tokens": contextLimit,
|
|
})
|
|
config.PublishMessagePart(
|
|
fantasy.MessageRoleTool,
|
|
codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeToolResult,
|
|
ToolCallID: config.ToolCallID,
|
|
ToolName: config.ToolName,
|
|
Result: resultJSON,
|
|
},
|
|
)
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
// publishCompactionError sends a tool-result error part so
|
|
// connected clients see that compaction failed.
|
|
func publishCompactionError(config CompactionOptions, msg string) {
|
|
if config.PublishMessagePart == nil || config.ToolCallID == "" {
|
|
return
|
|
}
|
|
errJSON, _ := json.Marshal(map[string]any{
|
|
"error": msg,
|
|
})
|
|
config.PublishMessagePart(
|
|
fantasy.MessageRoleTool,
|
|
codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeToolResult,
|
|
ToolCallID: config.ToolCallID,
|
|
ToolName: config.ToolName,
|
|
Result: errJSON,
|
|
IsError: true,
|
|
},
|
|
)
|
|
}
|
|
|
|
// normalizedCompactionConfig returns a copy of the compaction options
|
|
// with defaults applied. The bool is false when compaction is
|
|
// disabled (nil options, missing Persist callback, or threshold at
|
|
// 100%).
|
|
func normalizedCompactionConfig(opts *CompactionOptions) (CompactionOptions, bool) {
|
|
if opts == nil {
|
|
return CompactionOptions{}, false
|
|
}
|
|
|
|
config := *opts
|
|
if config.Persist == nil {
|
|
return CompactionOptions{}, false
|
|
}
|
|
if strings.TrimSpace(config.SummaryPrompt) == "" {
|
|
config.SummaryPrompt = defaultCompactionSummaryPrompt
|
|
}
|
|
if strings.TrimSpace(config.SystemSummaryPrefix) == "" {
|
|
config.SystemSummaryPrefix = defaultCompactionSystemSummaryPrefix
|
|
}
|
|
if config.Timeout <= 0 {
|
|
config.Timeout = defaultCompactionTimeout
|
|
}
|
|
if config.ThresholdPercent < minCompactionThresholdPercent ||
|
|
config.ThresholdPercent > maxCompactionThresholdPercent {
|
|
config.ThresholdPercent = defaultCompactionThresholdPercent
|
|
}
|
|
if config.ThresholdPercent == maxCompactionThresholdPercent {
|
|
return CompactionOptions{}, false
|
|
}
|
|
|
|
return config, true
|
|
}
|
|
|
|
// contextTokensFromUsage returns the total context token count from
|
|
// a step's usage report. It sums input, cache-read, and
|
|
// cache-creation tokens when available, falling back to TotalTokens
|
|
// if none of the granular fields are set.
|
|
func contextTokensFromUsage(usage fantasy.Usage) int64 {
|
|
total := int64(0)
|
|
hasContextTokens := false
|
|
|
|
if usage.InputTokens > 0 {
|
|
total += usage.InputTokens
|
|
hasContextTokens = true
|
|
}
|
|
if usage.CacheReadTokens > 0 {
|
|
total += usage.CacheReadTokens
|
|
hasContextTokens = true
|
|
}
|
|
if usage.CacheCreationTokens > 0 {
|
|
total += usage.CacheCreationTokens
|
|
hasContextTokens = true
|
|
}
|
|
if !hasContextTokens && usage.TotalTokens > 0 {
|
|
total = usage.TotalTokens
|
|
}
|
|
|
|
return total
|
|
}
|
|
|
|
// resolveContextLimit picks the first positive value from metadata,
|
|
// configured limit, and fallback — in that priority order. Returns
|
|
// 0 when none are positive.
|
|
func resolveContextLimit(metadataLimit, configLimit, fallback int64) int64 {
|
|
if metadataLimit > 0 {
|
|
return metadataLimit
|
|
}
|
|
if configLimit > 0 {
|
|
return configLimit
|
|
}
|
|
if fallback > 0 {
|
|
return fallback
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// shouldCompact returns the usage percentage and whether it exceeds
|
|
// the threshold. Returns (0, false) when contextLimit is
|
|
// non-positive.
|
|
func shouldCompact(contextTokens, contextLimit int64, thresholdPercent int32) (float64, bool) {
|
|
if contextLimit <= 0 {
|
|
return 0, false
|
|
}
|
|
usagePercent := (float64(contextTokens) / float64(contextLimit)) * 100
|
|
return usagePercent, usagePercent >= float64(thresholdPercent)
|
|
}
|
|
|
|
// generateCompactionSummary asks the model to summarize the
|
|
// conversation so far. The provided messages should contain the
|
|
// complete history (system prompt, user/assistant turns, tool
|
|
// results). A final user message with the summary prompt is appended
|
|
// before calling the model.
|
|
func generateCompactionSummary(
|
|
ctx context.Context,
|
|
model fantasy.LanguageModel,
|
|
messages []fantasy.Message,
|
|
options CompactionOptions,
|
|
) (string, error) {
|
|
summaryPrompt := make([]fantasy.Message, 0, len(messages)+1)
|
|
summaryPrompt = append(summaryPrompt, messages...)
|
|
summaryPrompt = append(summaryPrompt, fantasy.Message{
|
|
Role: fantasy.MessageRoleUser,
|
|
Content: []fantasy.MessagePart{
|
|
fantasy.TextPart{Text: options.SummaryPrompt},
|
|
},
|
|
})
|
|
toolChoice := fantasy.ToolChoiceNone
|
|
|
|
summaryCtx, cancel := context.WithTimeout(ctx, options.Timeout)
|
|
defer cancel()
|
|
|
|
response, err := model.Generate(summaryCtx, fantasy.Call{
|
|
Prompt: summaryPrompt,
|
|
ToolChoice: &toolChoice,
|
|
})
|
|
if err != nil {
|
|
return "", xerrors.Errorf("generate summary text: %w", err)
|
|
}
|
|
|
|
parts := make([]string, 0, len(response.Content))
|
|
for _, block := range response.Content {
|
|
textBlock, ok := fantasy.AsContentType[fantasy.TextContent](block)
|
|
if !ok {
|
|
continue
|
|
}
|
|
text := strings.TrimSpace(textBlock.Text)
|
|
if text == "" {
|
|
continue
|
|
}
|
|
parts = append(parts, text)
|
|
}
|
|
return strings.TrimSpace(strings.Join(parts, " ")), nil
|
|
}
|