mirror of
https://github.com/coder/coder.git
synced 2026-06-03 21:18:24 +00:00
1013 lines
31 KiB
Go
1013 lines
31 KiB
Go
package chatloop //nolint:testpackage // Uses internal symbols.
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"charm.land/fantasy"
|
|
"github.com/google/uuid"
|
|
"github.com/sqlc-dev/pqtype"
|
|
"github.com/stretchr/testify/require"
|
|
"go.uber.org/mock/gomock"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
"github.com/coder/coder/v2/coderd/database/dbmock"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chatdebug"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chattest"
|
|
"github.com/coder/coder/v2/codersdk"
|
|
"github.com/coder/coder/v2/testutil"
|
|
)
|
|
|
|
func TestStartCompactionDebugRun_DoesNotReportDebugErrors(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
newParentContext := func(chatID uuid.UUID) context.Context {
|
|
return chatdebug.ContextWithRun(context.Background(), &chatdebug.RunContext{
|
|
RunID: uuid.New(),
|
|
ChatID: chatID,
|
|
RootChatID: uuid.New(),
|
|
ParentChatID: uuid.New(),
|
|
ModelConfigID: uuid.New(),
|
|
TriggerMessageID: 41,
|
|
HistoryTipMessageID: 42,
|
|
Kind: chatdebug.KindChatTurn,
|
|
Provider: "fake-provider",
|
|
Model: "fake-model",
|
|
})
|
|
}
|
|
|
|
t.Run("CreateRun", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
db := dbmock.NewMockStore(ctrl)
|
|
svc := chatdebug.NewService(db, testutil.Logger(t), nil)
|
|
chatID := uuid.New()
|
|
reportedErr := make(chan error, 1)
|
|
|
|
db.EXPECT().InsertChatDebugRun(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(database.InsertChatDebugRunParams{}),
|
|
).Return(database.ChatDebugRun{}, xerrors.New("insert compaction debug run"))
|
|
|
|
ctx := newParentContext(chatID)
|
|
compactionCtx, finish := startCompactionDebugRun(ctx, CompactionOptions{
|
|
DebugSvc: svc,
|
|
ChatID: chatID,
|
|
OnError: func(err error) {
|
|
reportedErr <- err
|
|
},
|
|
})
|
|
require.Same(t, ctx, compactionCtx)
|
|
finish(nil)
|
|
select {
|
|
case err := <-reportedErr:
|
|
t.Fatalf("unexpected OnError callback: %v", err)
|
|
default:
|
|
}
|
|
})
|
|
|
|
t.Run("FinalizeRunAggregatesSummary", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
db := dbmock.NewMockStore(ctrl)
|
|
svc := chatdebug.NewService(db, testutil.Logger(t), nil)
|
|
chatID := uuid.New()
|
|
runID := uuid.New()
|
|
usageJSON, err := json.Marshal(fantasy.Usage{InputTokens: 7, OutputTokens: 3})
|
|
require.NoError(t, err)
|
|
attemptsJSON, err := json.Marshal([]chatdebug.Attempt{{
|
|
Status: "completed",
|
|
Method: "POST",
|
|
Path: "/v1/messages",
|
|
}})
|
|
require.NoError(t, err)
|
|
|
|
db.EXPECT().InsertChatDebugRun(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(database.InsertChatDebugRunParams{}),
|
|
).Return(database.ChatDebugRun{ //nolint:exhaustruct // Test only needs IDs.
|
|
ID: runID,
|
|
ChatID: chatID,
|
|
}, nil)
|
|
db.EXPECT().GetChatDebugStepsByRunID(gomock.Any(), runID).Return([]database.ChatDebugStep{{
|
|
ID: uuid.New(),
|
|
RunID: runID,
|
|
ChatID: chatID,
|
|
Status: string(chatdebug.StatusCompleted),
|
|
Usage: pqtype.NullRawMessage{RawMessage: usageJSON, Valid: true},
|
|
Attempts: attemptsJSON,
|
|
}}, nil)
|
|
db.EXPECT().UpdateChatDebugRun(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(database.UpdateChatDebugRunParams{}),
|
|
).DoAndReturn(func(_ context.Context, params database.UpdateChatDebugRunParams) (database.ChatDebugRun, error) {
|
|
require.Equal(t, chatID, params.ChatID)
|
|
require.Equal(t, runID, params.ID)
|
|
require.True(t, params.Summary.Valid)
|
|
require.JSONEq(t, `{"endpoint_label":"POST /v1/messages","step_count":1,"total_input_tokens":7,"total_output_tokens":3}`,
|
|
string(params.Summary.RawMessage))
|
|
return database.ChatDebugRun{ID: runID, ChatID: chatID}, nil
|
|
})
|
|
|
|
ctx := newParentContext(chatID)
|
|
compactionCtx, finish := startCompactionDebugRun(ctx, CompactionOptions{
|
|
DebugSvc: svc,
|
|
ChatID: chatID,
|
|
})
|
|
require.NotSame(t, ctx, compactionCtx)
|
|
finish(nil)
|
|
})
|
|
|
|
t.Run("FinalizeRun", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
db := dbmock.NewMockStore(ctrl)
|
|
svc := chatdebug.NewService(db, testutil.Logger(t), nil)
|
|
chatID := uuid.New()
|
|
reportedErr := make(chan error, 1)
|
|
runID := uuid.New()
|
|
|
|
db.EXPECT().InsertChatDebugRun(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(database.InsertChatDebugRunParams{}),
|
|
).Return(database.ChatDebugRun{ //nolint:exhaustruct // Test only needs IDs.
|
|
ID: runID,
|
|
ChatID: chatID,
|
|
}, nil)
|
|
db.EXPECT().GetChatDebugStepsByRunID(gomock.Any(), runID).Return(nil, xerrors.New("aggregate compaction debug run"))
|
|
db.EXPECT().UpdateChatDebugRun(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(database.UpdateChatDebugRunParams{}),
|
|
).Return(database.ChatDebugRun{}, xerrors.New("finalize compaction debug run"))
|
|
|
|
ctx := newParentContext(chatID)
|
|
compactionCtx, finish := startCompactionDebugRun(ctx, CompactionOptions{
|
|
DebugSvc: svc,
|
|
ChatID: chatID,
|
|
OnError: func(err error) {
|
|
reportedErr <- err
|
|
},
|
|
})
|
|
require.NotSame(t, ctx, compactionCtx)
|
|
finish(nil)
|
|
select {
|
|
case err := <-reportedErr:
|
|
t.Fatalf("unexpected OnError callback: %v", err)
|
|
default:
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestGenerateCompactionSummary_PanicFinalizesAsError verifies that a
|
|
// panic originating inside the model call during compaction is
|
|
// captured by the deferred debug-run finalizer so the run is recorded
|
|
// with StatusError rather than StatusCompleted. Without the recover
|
|
// hook the named `err` return is still nil when the defer fires and
|
|
// the row silently misclassifies the crash path.
|
|
func TestGenerateCompactionSummary_PanicFinalizesAsError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
db := dbmock.NewMockStore(ctrl)
|
|
svc := chatdebug.NewService(db, testutil.Logger(t), nil)
|
|
chatID := uuid.New()
|
|
runID := uuid.New()
|
|
|
|
status := make(chan string, 1)
|
|
|
|
db.EXPECT().InsertChatDebugRun(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(database.InsertChatDebugRunParams{}),
|
|
).Return(database.ChatDebugRun{
|
|
ID: runID,
|
|
ChatID: chatID,
|
|
}, nil)
|
|
db.EXPECT().GetChatDebugStepsByRunID(gomock.Any(), runID).Return(nil, nil)
|
|
db.EXPECT().UpdateChatDebugRun(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(database.UpdateChatDebugRunParams{}),
|
|
).DoAndReturn(func(_ context.Context, params database.UpdateChatDebugRunParams) (database.ChatDebugRun, error) {
|
|
status <- params.Status.String
|
|
return database.ChatDebugRun{ID: runID, ChatID: chatID}, nil
|
|
})
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
GenerateFn: func(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
|
|
panic("compaction model crash")
|
|
},
|
|
}
|
|
|
|
parentCtx := chatdebug.ContextWithRun(context.Background(), &chatdebug.RunContext{
|
|
RunID: uuid.New(),
|
|
ChatID: chatID,
|
|
ModelConfigID: uuid.New(),
|
|
TriggerMessageID: 1,
|
|
HistoryTipMessageID: 2,
|
|
Kind: chatdebug.KindChatTurn,
|
|
Provider: "fake",
|
|
Model: "fake-model",
|
|
})
|
|
|
|
require.PanicsWithValue(t, "compaction model crash", func() {
|
|
_, _ = generateCompactionSummary(parentCtx, model,
|
|
[]fantasy.Message{textMessage(fantasy.MessageRoleUser, "hello")},
|
|
CompactionOptions{
|
|
DebugSvc: svc,
|
|
ChatID: chatID,
|
|
SummaryPrompt: "summarize",
|
|
Timeout: time.Second,
|
|
})
|
|
})
|
|
|
|
select {
|
|
case s := <-status:
|
|
require.Equal(t, string(chatdebug.StatusError), s,
|
|
"panic path must finalize the debug run with StatusError")
|
|
case <-time.After(testutil.WaitShort):
|
|
t.Fatal("FinalizeRun never reached UpdateChatDebugRun on panic")
|
|
}
|
|
}
|
|
|
|
func TestRun_Compaction(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
t.Run("PersistsWhenThresholdReached", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
persistCompactionCalls := 0
|
|
var persistedCompaction CompactionResult
|
|
const summaryText = "summary text for compaction"
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
|
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "done"},
|
|
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 80,
|
|
TotalTokens: 85,
|
|
},
|
|
},
|
|
}), nil
|
|
},
|
|
GenerateFn: func(_ context.Context, call fantasy.Call) (*fantasy.Response, error) {
|
|
require.NotEmpty(t, call.Prompt)
|
|
lastPrompt := call.Prompt[len(call.Prompt)-1]
|
|
require.Equal(t, fantasy.MessageRoleUser, lastPrompt.Role)
|
|
require.Len(t, lastPrompt.Content, 1)
|
|
|
|
instruction, ok := fantasy.AsMessagePart[fantasy.TextPart](lastPrompt.Content[0])
|
|
require.True(t, ok)
|
|
require.Equal(t, "summarize now", instruction.Text)
|
|
|
|
return &fantasy.Response{
|
|
Content: []fantasy.Content{
|
|
fantasy.TextContent{Text: summaryText},
|
|
},
|
|
}, nil
|
|
},
|
|
}
|
|
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
MaxSteps: 1,
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
SummaryPrompt: "summarize now",
|
|
Persist: func(_ context.Context, result CompactionResult) error {
|
|
persistCompactionCalls++
|
|
persistedCompaction = result
|
|
return nil
|
|
},
|
|
},
|
|
ReloadMessages: func(_ context.Context) ([]fantasy.Message, error) {
|
|
return []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
}, nil
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
// Compaction fires twice: once inline when the threshold is
|
|
// reached on step 0 (the only step, since MaxSteps=1), and
|
|
// once from the post-run safety net during the re-entry
|
|
// iteration (where totalSteps already equals MaxSteps so the
|
|
// inner loop doesn't execute, but lastUsage still exceeds
|
|
// the threshold).
|
|
require.Equal(t, 2, persistCompactionCalls)
|
|
require.Contains(t, persistedCompaction.SystemSummary, summaryText)
|
|
require.Equal(t, summaryText, persistedCompaction.SummaryReport)
|
|
require.Equal(t, int64(80), persistedCompaction.ContextTokens)
|
|
require.Equal(t, int64(100), persistedCompaction.ContextLimit)
|
|
require.InDelta(t, 80.0, persistedCompaction.UsagePercent, 0.0001)
|
|
})
|
|
|
|
t.Run("PublishesPartsBeforeAndAfterPersist", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
const summaryText = "compaction summary for ordering test"
|
|
|
|
// Track the order of callbacks to verify the tool-call
|
|
// part publishes before Generate (summary generation)
|
|
// and the tool-result part publishes after Persist.
|
|
var callOrder []string
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
|
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "done"},
|
|
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 80,
|
|
TotalTokens: 85,
|
|
},
|
|
},
|
|
}), nil
|
|
},
|
|
GenerateFn: func(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
|
|
callOrder = append(callOrder, "generate")
|
|
return &fantasy.Response{
|
|
Content: []fantasy.Content{
|
|
fantasy.TextContent{Text: summaryText},
|
|
},
|
|
}, nil
|
|
},
|
|
}
|
|
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
MaxSteps: 1,
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
SummaryPrompt: "summarize now",
|
|
ToolCallID: "test-tool-call-id",
|
|
ToolName: "chat_summarized",
|
|
PublishMessagePart: func(role codersdk.ChatMessageRole, part codersdk.ChatMessagePart) {
|
|
switch part.Type {
|
|
case codersdk.ChatMessagePartTypeToolCall:
|
|
callOrder = append(callOrder, "publish_tool_call")
|
|
case codersdk.ChatMessagePartTypeToolResult:
|
|
callOrder = append(callOrder, "publish_tool_result")
|
|
}
|
|
},
|
|
Persist: func(_ context.Context, _ CompactionResult) error {
|
|
callOrder = append(callOrder, "persist")
|
|
return nil
|
|
},
|
|
},
|
|
ReloadMessages: func(_ context.Context) ([]fantasy.Message, error) {
|
|
return []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
}, nil
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
// Compaction fires twice (see PersistsWhenThresholdReached
|
|
// for the full explanation). Each cycle follows the order:
|
|
// publish_tool_call → generate → persist → publish_tool_result.
|
|
require.Equal(t, []string{
|
|
"publish_tool_call",
|
|
"generate",
|
|
"persist",
|
|
"publish_tool_result",
|
|
"publish_tool_call",
|
|
"generate",
|
|
"persist",
|
|
"publish_tool_result",
|
|
}, callOrder)
|
|
})
|
|
|
|
t.Run("PublishNotCalledBelowThreshold", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
publishCalled := false
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 10,
|
|
},
|
|
},
|
|
}), nil
|
|
},
|
|
}
|
|
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
MaxSteps: 1,
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
ToolCallID: "test-tool-call-id",
|
|
ToolName: "chat_summarized",
|
|
PublishMessagePart: func(_ codersdk.ChatMessageRole, _ codersdk.ChatMessagePart) {
|
|
publishCalled = true
|
|
},
|
|
Persist: func(_ context.Context, _ CompactionResult) error {
|
|
return nil
|
|
},
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
require.False(t, publishCalled, "PublishMessagePart should not fire when usage is below threshold")
|
|
})
|
|
|
|
t.Run("MidLoopCompactionReloadsMessages", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var mu sync.Mutex
|
|
var streamCallCount int
|
|
persistCompactionCalls := 0
|
|
reloadCalls := 0
|
|
|
|
const summaryText = "compacted summary"
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
|
mu.Lock()
|
|
step := streamCallCount
|
|
streamCallCount++
|
|
mu.Unlock()
|
|
|
|
switch step {
|
|
case 0:
|
|
// Step 0: tool call with high usage (80/100 = 80% > 70%).
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeToolInputStart, ID: "tc-1", ToolCallName: "read_file"},
|
|
{Type: fantasy.StreamPartTypeToolInputDelta, ID: "tc-1", Delta: `{}`},
|
|
{Type: fantasy.StreamPartTypeToolInputEnd, ID: "tc-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeToolCall,
|
|
ID: "tc-1",
|
|
ToolCallName: "read_file",
|
|
ToolCallInput: `{}`,
|
|
},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonToolCalls,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 80,
|
|
TotalTokens: 85,
|
|
},
|
|
},
|
|
}), nil
|
|
default:
|
|
// Step 1: text with low usage (30/100 = 30% < 70%).
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
|
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "done"},
|
|
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 30,
|
|
TotalTokens: 35,
|
|
},
|
|
},
|
|
}), nil
|
|
}
|
|
},
|
|
GenerateFn: func(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
|
|
return &fantasy.Response{
|
|
Content: []fantasy.Content{
|
|
fantasy.TextContent{Text: summaryText},
|
|
},
|
|
}, nil
|
|
},
|
|
}
|
|
|
|
compactedMessages := []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleSystem, "compacted system"),
|
|
textMessage(fantasy.MessageRoleUser, "compacted user"),
|
|
}
|
|
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
Tools: []fantasy.AgentTool{
|
|
newNoopTool("read_file"),
|
|
},
|
|
MaxSteps: 5,
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
SummaryPrompt: "summarize now",
|
|
Persist: func(_ context.Context, _ CompactionResult) error {
|
|
persistCompactionCalls++
|
|
return nil
|
|
},
|
|
},
|
|
ReloadMessages: func(_ context.Context) ([]fantasy.Message, error) {
|
|
reloadCalls++
|
|
return compactedMessages, nil
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
// Compaction fired after step 0 (above threshold).
|
|
require.GreaterOrEqual(t, persistCompactionCalls, 1)
|
|
// ReloadMessages was called after mid-loop compaction.
|
|
require.GreaterOrEqual(t, reloadCalls, 1)
|
|
// Both steps ran (tool-call step + follow-up text step).
|
|
require.Equal(t, 2, streamCallCount)
|
|
})
|
|
|
|
t.Run("PostRunCompactionSkippedAfterMidLoop", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var mu sync.Mutex
|
|
var streamCallCount int
|
|
persistCompactionCalls := 0
|
|
|
|
const summaryText = "compacted summary for skip test"
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
|
mu.Lock()
|
|
step := streamCallCount
|
|
streamCallCount++
|
|
mu.Unlock()
|
|
|
|
switch step {
|
|
case 0:
|
|
// Step 0: tool call with high usage (80/100 = 80% > 70%).
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeToolInputStart, ID: "tc-1", ToolCallName: "read_file"},
|
|
{Type: fantasy.StreamPartTypeToolInputDelta, ID: "tc-1", Delta: `{}`},
|
|
{Type: fantasy.StreamPartTypeToolInputEnd, ID: "tc-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeToolCall,
|
|
ID: "tc-1",
|
|
ToolCallName: "read_file",
|
|
ToolCallInput: `{}`,
|
|
},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonToolCalls,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 80,
|
|
TotalTokens: 85,
|
|
},
|
|
},
|
|
}), nil
|
|
default:
|
|
// Step 1: text with low usage (20/100 = 20% < 70%).
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
|
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "done"},
|
|
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 20,
|
|
TotalTokens: 25,
|
|
},
|
|
},
|
|
}), nil
|
|
}
|
|
},
|
|
GenerateFn: func(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
|
|
return &fantasy.Response{
|
|
Content: []fantasy.Content{
|
|
fantasy.TextContent{Text: summaryText},
|
|
},
|
|
}, nil
|
|
},
|
|
}
|
|
|
|
compactedMessages := []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleSystem, "compacted system"),
|
|
textMessage(fantasy.MessageRoleUser, "compacted user"),
|
|
}
|
|
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
Tools: []fantasy.AgentTool{
|
|
newNoopTool("read_file"),
|
|
},
|
|
MaxSteps: 5,
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
SummaryPrompt: "summarize now",
|
|
Persist: func(_ context.Context, _ CompactionResult) error {
|
|
persistCompactionCalls++
|
|
return nil
|
|
},
|
|
},
|
|
ReloadMessages: func(_ context.Context) ([]fantasy.Message, error) {
|
|
return compactedMessages, nil
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
// Only mid-loop compaction fires after step 0. The post-run
|
|
// safety net is skipped because alreadyCompacted is true.
|
|
require.Equal(t, 1, persistCompactionCalls)
|
|
})
|
|
|
|
t.Run("ErrorsAreReported", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 80,
|
|
},
|
|
},
|
|
}), nil
|
|
},
|
|
GenerateFn: func(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
|
|
return nil, xerrors.New("generate failed")
|
|
},
|
|
}
|
|
|
|
compactionErr := xerrors.New("unset")
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
MaxSteps: 1,
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
Persist: func(_ context.Context, _ CompactionResult) error {
|
|
return nil
|
|
},
|
|
OnError: func(err error) {
|
|
compactionErr = err
|
|
},
|
|
},
|
|
ReloadMessages: func(_ context.Context) ([]fantasy.Message, error) {
|
|
return []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
}, nil
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
require.Error(t, compactionErr)
|
|
require.ErrorContains(t, compactionErr, "generate summary text")
|
|
})
|
|
|
|
t.Run("PostRunCompactionReEntersStepLoop", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// When post-run compaction fires (no mid-loop compaction)
|
|
// and ReloadMessages is provided, Run should re-enter the
|
|
// step loop with the reloaded messages so the agent
|
|
// continues working.
|
|
|
|
var mu sync.Mutex
|
|
var streamCallCount int
|
|
persistCompactionCalls := 0
|
|
reloadCalls := 0
|
|
|
|
const summaryText = "post-run compacted summary"
|
|
|
|
compactedMessages := []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleSystem, "compacted system"),
|
|
textMessage(fantasy.MessageRoleUser, "compacted user"),
|
|
}
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
|
mu.Lock()
|
|
step := streamCallCount
|
|
streamCallCount++
|
|
mu.Unlock()
|
|
|
|
switch step {
|
|
case 0:
|
|
// First turn: text-only response with high usage.
|
|
// No tool calls, so shouldContinue = false and
|
|
// the inner step loop breaks. Compaction should
|
|
// fire, then the outer loop re-enters.
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
|
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "initial response"},
|
|
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 80,
|
|
TotalTokens: 85,
|
|
},
|
|
},
|
|
}), nil
|
|
default:
|
|
// Second turn (after compaction re-entry):
|
|
// text-only with low usage — should finish.
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeTextStart, ID: "text-2"},
|
|
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-2", Delta: "continued after compaction"},
|
|
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-2"},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 20,
|
|
TotalTokens: 25,
|
|
},
|
|
},
|
|
}), nil
|
|
}
|
|
},
|
|
GenerateFn: func(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
|
|
return &fantasy.Response{
|
|
Content: []fantasy.Content{
|
|
fantasy.TextContent{Text: summaryText},
|
|
},
|
|
}, nil
|
|
},
|
|
}
|
|
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
MaxSteps: 5,
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
SummaryPrompt: "summarize now",
|
|
Persist: func(_ context.Context, _ CompactionResult) error {
|
|
persistCompactionCalls++
|
|
return nil
|
|
},
|
|
},
|
|
ReloadMessages: func(_ context.Context) ([]fantasy.Message, error) {
|
|
reloadCalls++
|
|
return compactedMessages, nil
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
// Compaction fired on the final step of the first pass.
|
|
// The inline path fires (ReloadMessages is set) and then
|
|
// the outer loop re-enters. On the second pass the usage
|
|
// is below threshold so no further compaction occurs.
|
|
require.GreaterOrEqual(t, persistCompactionCalls, 1)
|
|
// ReloadMessages was called (inline + re-entry).
|
|
require.GreaterOrEqual(t, reloadCalls, 1)
|
|
// Two stream calls: one before compaction, one after re-entry.
|
|
require.Equal(t, 2, streamCallCount)
|
|
})
|
|
|
|
t.Run("PostRunCompactionReEntryIncludesUserSummary", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// After compaction the summary is stored as a user-role
|
|
// message. When the loop re-enters, the reloaded prompt
|
|
// must contain this user message so the LLM provider
|
|
// receives a valid prompt (providers like Anthropic
|
|
// require at least one non-system message).
|
|
|
|
var mu sync.Mutex
|
|
var streamCallCount int
|
|
var reEntryPrompt []fantasy.Message
|
|
persistCompactionCalls := 0
|
|
|
|
const summaryText = "post-run compacted summary"
|
|
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, call fantasy.Call) (fantasy.StreamResponse, error) {
|
|
mu.Lock()
|
|
step := streamCallCount
|
|
streamCallCount++
|
|
mu.Unlock()
|
|
|
|
switch step {
|
|
case 0:
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
|
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "initial response"},
|
|
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 80,
|
|
TotalTokens: 85,
|
|
},
|
|
},
|
|
}), nil
|
|
default:
|
|
mu.Lock()
|
|
reEntryPrompt = append([]fantasy.Message(nil), call.Prompt...)
|
|
mu.Unlock()
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeTextStart, ID: "text-2"},
|
|
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-2", Delta: "continued"},
|
|
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-2"},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonStop,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 20,
|
|
TotalTokens: 25,
|
|
},
|
|
},
|
|
}), nil
|
|
}
|
|
},
|
|
GenerateFn: func(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
|
|
return &fantasy.Response{
|
|
Content: []fantasy.Content{
|
|
fantasy.TextContent{Text: summaryText},
|
|
},
|
|
}, nil
|
|
},
|
|
}
|
|
|
|
// Simulate real post-compaction DB state: the summary is
|
|
// a user-role message (the only non-system content).
|
|
compactedMessages := []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleSystem, "system prompt"),
|
|
textMessage(fantasy.MessageRoleUser, "Summary of earlier chat context:\n\ncompacted summary"),
|
|
}
|
|
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
MaxSteps: 5,
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
SummaryPrompt: "summarize now",
|
|
Persist: func(_ context.Context, _ CompactionResult) error {
|
|
persistCompactionCalls++
|
|
return nil
|
|
},
|
|
},
|
|
ReloadMessages: func(_ context.Context) ([]fantasy.Message, error) {
|
|
return compactedMessages, nil
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
require.GreaterOrEqual(t, persistCompactionCalls, 1)
|
|
// Re-entry happened: stream was called at least twice.
|
|
require.Equal(t, 2, streamCallCount)
|
|
// The re-entry prompt must contain the user summary.
|
|
require.NotEmpty(t, reEntryPrompt)
|
|
hasUser := false
|
|
for _, msg := range reEntryPrompt {
|
|
if msg.Role == fantasy.MessageRoleUser {
|
|
hasUser = true
|
|
break
|
|
}
|
|
}
|
|
require.True(t, hasUser, "re-entry prompt must contain a user message (the compaction summary)")
|
|
})
|
|
|
|
t.Run("TriggersOnDynamicToolExit", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var persistCompactionCalls int
|
|
const summaryText = "compaction summary for dynamic tool exit"
|
|
|
|
// The LLM calls a dynamic tool. Usage is above the
|
|
// compaction threshold so compaction should fire even
|
|
// though the chatloop exits via ErrDynamicToolCall.
|
|
model := &chattest.FakeModel{
|
|
ProviderName: "fake",
|
|
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
|
return streamFromParts([]fantasy.StreamPart{
|
|
{Type: fantasy.StreamPartTypeToolInputStart, ID: "tc-1", ToolCallName: "my_dynamic_tool"},
|
|
{Type: fantasy.StreamPartTypeToolInputDelta, ID: "tc-1", Delta: `{"query": "test"}`},
|
|
{Type: fantasy.StreamPartTypeToolInputEnd, ID: "tc-1"},
|
|
{
|
|
Type: fantasy.StreamPartTypeToolCall,
|
|
ID: "tc-1",
|
|
ToolCallName: "my_dynamic_tool",
|
|
ToolCallInput: `{"query": "test"}`,
|
|
},
|
|
{
|
|
Type: fantasy.StreamPartTypeFinish,
|
|
FinishReason: fantasy.FinishReasonToolCalls,
|
|
Usage: fantasy.Usage{
|
|
InputTokens: 80,
|
|
TotalTokens: 85,
|
|
},
|
|
},
|
|
}), nil
|
|
},
|
|
GenerateFn: func(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
|
|
return &fantasy.Response{
|
|
Content: []fantasy.Content{
|
|
fantasy.TextContent{Text: summaryText},
|
|
},
|
|
}, nil
|
|
},
|
|
}
|
|
|
|
err := Run(context.Background(), RunOptions{
|
|
Model: model,
|
|
Messages: []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
},
|
|
MaxSteps: 5,
|
|
DynamicToolNames: map[string]bool{"my_dynamic_tool": true},
|
|
PersistStep: func(_ context.Context, _ PersistedStep) error {
|
|
return nil
|
|
},
|
|
ContextLimitFallback: 100,
|
|
Compaction: &CompactionOptions{
|
|
ThresholdPercent: 70,
|
|
SummaryPrompt: "summarize now",
|
|
Persist: func(_ context.Context, result CompactionResult) error {
|
|
persistCompactionCalls++
|
|
require.Contains(t, result.SystemSummary, summaryText)
|
|
return nil
|
|
},
|
|
},
|
|
ReloadMessages: func(_ context.Context) ([]fantasy.Message, error) {
|
|
return []fantasy.Message{
|
|
textMessage(fantasy.MessageRoleUser, "hello"),
|
|
}, nil
|
|
},
|
|
})
|
|
require.ErrorIs(t, err, ErrDynamicToolCall)
|
|
require.Equal(t, 1, persistCompactionCalls,
|
|
"compaction must fire before dynamic tool exit")
|
|
})
|
|
}
|