coder/coderd/x/chatd/chatloop/compaction.go

package chatloop

import (
	"context"
	"encoding/json"
	"strings"
	"time"

	"charm.land/fantasy"
	"github.com/google/uuid"
	"golang.org/x/xerrors"

	"github.com/coder/coder/v2/coderd/x/chatd/chatdebug"
	"github.com/coder/coder/v2/codersdk"
)

const (
	defaultCompactionThresholdPercent = int32(70)
	minCompactionThresholdPercent     = int32(0)
	maxCompactionThresholdPercent     = int32(100)

	// compactionDebugCreateRunTimeout caps the compaction debug
	// CreateRun budget so a slow or locked DB cannot consume the
	// compaction's configured Timeout and cause model.Generate to
	// fail with deadline exceeded. Debug instrumentation is
	// best-effort; running without the debug row is preferable to
	// failing the compaction.
	compactionDebugCreateRunTimeout = 5 * time.Second

	defaultCompactionSummaryPrompt = "You are performing a context compaction. " +
		"Summarize the conversation so a new assistant can seamlessly " +
		"continue the work in progress.\n\n" +
		"Include:\n" +
		"- The user's overall goal and current task\n" +
		"- Key decisions made and their rationale\n" +
		"- Concrete technical details: file paths, function names, " +
		"commands, APIs, and configurations\n" +
		"- Errors encountered and how they were resolved. Keep error " +
		"notes specific: name the file, the error, and the fix. Do not " +
		"generalize from a specific failure to a blanket tool-avoidance " +
		"rule (e.g. \"tool X is unreliable\" or \"always use Y instead " +
		"of Z\")\n" +
		"- Current state of the work: what is DONE, what is IN PROGRESS, " +
		"and what REMAINS to be done\n" +
		"- The specific action the assistant was performing or about to " +
		"perform when this summary was triggered\n\n" +
		"Be dense and factual. Every sentence should convey essential " +
		"context for continuation. Do not include pleasantries or " +
		"conversational filler. For content that can be reproduced " +
		"(repo files, command output, API responses), reference how to " +
		"obtain it (file path, command, URL) rather than inlining the " +
		"full content. Include brief inline summaries when the content " +
		"itself would exceed a few lines."
	defaultCompactionSystemSummaryPrefix = "The following is a summary of " +
		"the earlier conversation. The assistant was actively working when " +
		"the context was compacted. Continue the work described below:"
	defaultCompactionTimeout = 90 * time.Second
)

type CompactionOptions struct {
	ThresholdPercent    int32
	ContextLimit        int64
	SummaryPrompt       string
	SystemSummaryPrefix string
	Timeout             time.Duration
	Persist             func(context.Context, CompactionResult) error
	DebugSvc            *chatdebug.Service
	ChatID              uuid.UUID
	HistoryTipMessageID int64

	// ToolCallID and ToolName identify the synthetic tool call
	// used to represent compaction in the message stream.
	ToolCallID string
	ToolName   string

	// PublishMessagePart publishes streaming parts to connected
	// clients so they see "Summarizing..." / "Summarized" UI
	// transitions during compaction.
	PublishMessagePart func(codersdk.ChatMessageRole, codersdk.ChatMessagePart)

	OnError func(error)
}

type CompactionResult struct {
	SystemSummary    string
	SummaryReport    string
	ThresholdPercent int32
	UsagePercent     float64
	ContextTokens    int64
	ContextLimit     int64
}

// tryCompact checks whether context usage exceeds the compaction
// threshold and, if so, generates and persists a summary. Returns
// (true, nil) when compaction was performed, (false, nil) when not
// needed, and (false, err) on failure.
func tryCompact(
	ctx context.Context,
	model fantasy.LanguageModel,
	compaction *CompactionOptions,
	contextLimitFallback int64,
	stepUsage fantasy.Usage,
	stepMetadata fantasy.ProviderMetadata,
	allMessages []fantasy.Message,
) (bool, error) {
	config, ok := normalizedCompactionConfig(compaction)
	if !ok {
		return false, nil
	}

	contextTokens := contextTokensFromUsage(stepUsage)
	if contextTokens <= 0 {
		return false, nil
	}

	metadataLimit := extractContextLimit(stepMetadata)
	contextLimit := resolveContextLimit(
		metadataLimit.Int64,
		config.ContextLimit,
		contextLimitFallback,
	)

	usagePercent, compact := shouldCompact(
		contextTokens, contextLimit, config.ThresholdPercent,
	)
	if !compact {
		return false, nil
	}

	// Publish the "Summarizing..." tool-call indicator so
	// connected clients see activity during summary generation.
	if config.PublishMessagePart != nil && config.ToolCallID != "" {
		config.PublishMessagePart(
			codersdk.ChatMessageRoleAssistant,
			codersdk.ChatMessageToolCall(config.ToolCallID, config.ToolName, nil),
		)
	}

	summary, err := generateCompactionSummary(
		ctx, model, allMessages, config,
	)
	if err != nil {
		return false, err
	}
	if summary == "" {
		// Publish a tool-result error so connected clients
		// see the compaction failure.
		publishCompactionError(config, "compaction produced an empty summary")
		return false, xerrors.New("compaction produced an empty summary")
	}

	systemSummary := strings.TrimSpace(
		config.SystemSummaryPrefix + "\n\n" + summary,
	)

	persistCtx := context.WithoutCancel(ctx)
	err = config.Persist(persistCtx, CompactionResult{
		SystemSummary:    systemSummary,
		SummaryReport:    summary,
		ThresholdPercent: config.ThresholdPercent,
		UsagePercent:     usagePercent,
		ContextTokens:    contextTokens,
		ContextLimit:     contextLimit,
	})
	if err != nil {
		publishCompactionError(config, "failed to persist compaction result")
		return false, xerrors.Errorf("persist compaction: %w", err)
	}

	// Publish the "Summarized" tool-result part so the client
	// transitions from the in-progress indicator to the final
	// state.
	if config.PublishMessagePart != nil && config.ToolCallID != "" {
		resultJSON, _ := json.Marshal(map[string]any{
			"summary":              summary,
			"source":               "automatic",
			"threshold_percent":    config.ThresholdPercent,
			"usage_percent":        usagePercent,
			"context_tokens":       contextTokens,
			"context_limit_tokens": contextLimit,
		})
		config.PublishMessagePart(
			codersdk.ChatMessageRoleTool,
			codersdk.ChatMessageToolResult(config.ToolCallID, config.ToolName, resultJSON, false, false),
		)
	}

	return true, nil
}

// publishCompactionError sends a tool-result error part so
// connected clients see that compaction failed.
func publishCompactionError(config CompactionOptions, msg string) {
	if config.PublishMessagePart == nil || config.ToolCallID == "" {
		return
	}
	errJSON, _ := json.Marshal(map[string]any{
		"error": msg,
	})
	config.PublishMessagePart(
		codersdk.ChatMessageRoleTool,
		codersdk.ChatMessageToolResult(config.ToolCallID, config.ToolName, errJSON, true, false),
	)
}

// normalizedCompactionConfig returns a copy of the compaction options
// with defaults applied. The bool is false when compaction is
// disabled (nil options, missing Persist callback, or threshold at
// 100%).
func normalizedCompactionConfig(opts *CompactionOptions) (CompactionOptions, bool) {
	if opts == nil {
		return CompactionOptions{}, false
	}

	config := *opts
	if config.Persist == nil {
		return CompactionOptions{}, false
	}
	if strings.TrimSpace(config.SummaryPrompt) == "" {
		config.SummaryPrompt = defaultCompactionSummaryPrompt
	}
	if strings.TrimSpace(config.SystemSummaryPrefix) == "" {
		config.SystemSummaryPrefix = defaultCompactionSystemSummaryPrefix
	}
	if config.Timeout <= 0 {
		config.Timeout = defaultCompactionTimeout
	}
	if config.ThresholdPercent < minCompactionThresholdPercent ||
		config.ThresholdPercent > maxCompactionThresholdPercent {
		config.ThresholdPercent = defaultCompactionThresholdPercent
	}
	if config.ThresholdPercent == maxCompactionThresholdPercent {
		return CompactionOptions{}, false
	}

	return config, true
}

// contextTokensFromUsage returns the total context token count from
// a step's usage report. It sums input, cache-read, and
// cache-creation tokens when available, falling back to TotalTokens
// if none of the granular fields are set.
func contextTokensFromUsage(usage fantasy.Usage) int64 {
	total := int64(0)
	hasContextTokens := false

	if usage.InputTokens > 0 {
		total += usage.InputTokens
		hasContextTokens = true
	}
	if usage.CacheReadTokens > 0 {
		total += usage.CacheReadTokens
		hasContextTokens = true
	}
	if usage.CacheCreationTokens > 0 {
		total += usage.CacheCreationTokens
		hasContextTokens = true
	}
	if !hasContextTokens && usage.TotalTokens > 0 {
		total = usage.TotalTokens
	}

	return total
}

// resolveContextLimit picks the first positive value from metadata,
// configured limit, and fallback — in that priority order. Returns
// 0 when none are positive.
func resolveContextLimit(metadataLimit, configLimit, fallback int64) int64 {
	if metadataLimit > 0 {
		return metadataLimit
	}
	if configLimit > 0 {
		return configLimit
	}
	if fallback > 0 {
		return fallback
	}
	return 0
}

// shouldCompact returns the usage percentage and whether it exceeds
// the threshold. Returns (0, false) when contextLimit is
// non-positive.
func shouldCompact(contextTokens, contextLimit int64, thresholdPercent int32) (float64, bool) {
	if contextLimit <= 0 {
		return 0, false
	}
	usagePercent := (float64(contextTokens) / float64(contextLimit)) * 100
	return usagePercent, usagePercent >= float64(thresholdPercent)
}

func startCompactionDebugRun(
	ctx context.Context,
	options CompactionOptions,
) (context.Context, func(error)) {
	if options.DebugSvc == nil || options.ChatID == uuid.Nil {
		return ctx, func(error) {}
	}

	parentRun, ok := chatdebug.RunFromContext(ctx)
	if !ok {
		return ctx, func(error) {}
	}

	historyTipMessageID := options.HistoryTipMessageID
	if historyTipMessageID == 0 {
		historyTipMessageID = parentRun.HistoryTipMessageID
	}

	// Use a separate short-lived context for the debug insert so a
	// slow or locked DB cannot consume the compaction timeout budget
	// and turn debug slowness into a compaction failure via
	// model.Generate hitting a deadline exceeded. Detached from the
	// parent so cancellation of the compaction run still lets the
	// insert reach a terminal state, matching the best-effort
	// contract of debug instrumentation.
	createRunCtx, createRunCancel := context.WithTimeout(
		context.WithoutCancel(ctx), compactionDebugCreateRunTimeout,
	)
	run, err := options.DebugSvc.CreateRun(createRunCtx, chatdebug.CreateRunParams{
		ChatID:              options.ChatID,
		RootChatID:          parentRun.RootChatID,
		ParentChatID:        parentRun.ParentChatID,
		ModelConfigID:       parentRun.ModelConfigID,
		TriggerMessageID:    parentRun.TriggerMessageID,
		HistoryTipMessageID: historyTipMessageID,
		Kind:                chatdebug.KindCompaction,
		Status:              chatdebug.StatusInProgress,
		Provider:            parentRun.Provider,
		Model:               parentRun.Model,
	})
	createRunCancel()
	if err != nil {
		// Debug instrumentation must not surface as a compaction failure.
		return ctx, func(error) {}
	}

	compactionCtx := chatdebug.ContextWithRun(ctx, &chatdebug.RunContext{
		RunID:               run.ID,
		ChatID:              options.ChatID,
		RootChatID:          parentRun.RootChatID,
		ParentChatID:        parentRun.ParentChatID,
		ModelConfigID:       parentRun.ModelConfigID,
		TriggerMessageID:    parentRun.TriggerMessageID,
		HistoryTipMessageID: historyTipMessageID,
		Kind:                chatdebug.KindCompaction,
		Provider:            parentRun.Provider,
		Model:               parentRun.Model,
	})

	return compactionCtx, func(runErr error) {
		status := chatdebug.ClassifyError(runErr)
		if runErr != nil && xerrors.Is(runErr, ErrInterrupted) {
			status = chatdebug.StatusInterrupted
		}
		// Debug instrumentation must not surface as a compaction failure.
		_ = options.DebugSvc.FinalizeRun(compactionCtx, chatdebug.FinalizeRunParams{
			RunID:  run.ID,
			ChatID: options.ChatID,
			Status: status,
		})
	}
}

// generateCompactionSummary asks the model to summarize the
// conversation so far. The provided messages should contain the
// complete history (system prompt, user/assistant turns, tool
// results). A final user message with the summary prompt is appended
// before calling the model.
func generateCompactionSummary(
	ctx context.Context,
	model fantasy.LanguageModel,
	messages []fantasy.Message,
	options CompactionOptions,
) (summary string, err error) {
	summaryPrompt := make([]fantasy.Message, 0, len(messages)+1)
	summaryPrompt = append(summaryPrompt, messages...)
	summaryPrompt = append(summaryPrompt, fantasy.Message{
		Role: fantasy.MessageRoleUser,
		Content: []fantasy.MessagePart{
			fantasy.TextPart{Text: options.SummaryPrompt},
		},
	})
	toolChoice := fantasy.ToolChoiceNone

	summaryCtx, cancel := context.WithTimeout(ctx, options.Timeout)
	defer cancel()

	summaryCtx, finishDebugRun := startCompactionDebugRun(summaryCtx, options)
	defer func() {
		// If model.Generate (or anything else below) panics, the
		// named err return is still nil at this point. Without the
		// recover hook we would finalize the debug run as Completed
		// in the exact crash path operators rely on to diagnose
		// failures. Finalize with the panic as an error status and
		// re-panic so the caller's recovery still observes the
		// original panic value.
		if r := recover(); r != nil {
			finishDebugRun(xerrors.Errorf("panic during compaction summary: %v", r))
			panic(r)
		}
		finishDebugRun(err)
	}()

	response, err := model.Generate(summaryCtx, fantasy.Call{
		Prompt:     summaryPrompt,
		ToolChoice: &toolChoice,
	})
	if err != nil {
		return "", xerrors.Errorf("generate summary text: %w", err)
	}

	parts := make([]string, 0, len(response.Content))
	for _, block := range response.Content {
		textBlock, ok := fantasy.AsContentType[fantasy.TextContent](block)
		if !ok {
			continue
		}
		text := strings.TrimSpace(textBlock.Text)
		if text == "" {
			continue
		}
		parts = append(parts, text)
	}
	return strings.TrimSpace(strings.Join(parts, " ")), nil
}