mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix(coderd/x/chatd/chatadvisor): truncate oversized advisor questions (#25489)
Advisor tool calls currently reject questions over 2000 runes, which can leave the parent model retrying the same invalid call. This documents the limit in the advisor tool schema and guidance, then truncates oversized questions rune-safely before building the nested advisor prompt. > Mux working on behalf of Mike.
This commit is contained in:
@@ -19,6 +19,7 @@ Use the built-in advisor tool when you need strategic guidance on planning
|
||||
ambiguity, architectural tradeoffs, debugging strategy, or repeated failures.
|
||||
The advisor sees recent conversation context, runs as a single-step nested model
|
||||
call with no tools, and returns concise guidance for the parent agent rather
|
||||
than the end user.
|
||||
than the end user. Provide a brief question, no more than 2000 runes. Summarize
|
||||
context instead of pasting long logs or transcripts.
|
||||
</advisor-guidance>`
|
||||
)
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"charm.land/fantasy"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
stringutil "github.com/coder/coder/v2/coderd/util/strings"
|
||||
"github.com/coder/coder/v2/coderd/x/chatd/chatloop"
|
||||
"github.com/coder/coder/v2/coderd/x/chatd/chatretry"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
@@ -29,9 +30,11 @@ func (rt *Runtime) RunAdvisor(
|
||||
) (AdvisorResult, error) {
|
||||
// Model, MaxUsesPerRun, and MaxOutputTokens are validated by NewRuntime.
|
||||
// Runtime fields are unexported so callers cannot bypass that.
|
||||
if strings.TrimSpace(question) == "" {
|
||||
question = strings.TrimSpace(question)
|
||||
if question == "" {
|
||||
return AdvisorResult{}, xerrors.New("advisor question is required")
|
||||
}
|
||||
question = stringutil.Truncate(question, advisorQuestionMaxRunes)
|
||||
|
||||
if !rt.tryAcquire() {
|
||||
return AdvisorResult{
|
||||
|
||||
@@ -63,6 +63,37 @@ func TestAdvisorRunAdvice(t *testing.T) {
|
||||
require.Equal(t, question, singleText(t, capturedCall.Prompt[len(capturedCall.Prompt)-1]))
|
||||
}
|
||||
|
||||
func TestAdvisorRunTruncatesLongQuestion(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var capturedQuestion string
|
||||
runtime, err := chatadvisor.NewRuntime(chatadvisor.RuntimeConfig{
|
||||
Model: &chattest.FakeModel{
|
||||
ProviderName: "test-provider",
|
||||
ModelName: "test-model",
|
||||
StreamFn: func(_ context.Context, call fantasy.Call) (fantasy.StreamResponse, error) {
|
||||
require.NotEmpty(t, call.Prompt)
|
||||
capturedQuestion = singleText(t, call.Prompt[len(call.Prompt)-1])
|
||||
return streamFromParts([]fantasy.StreamPart{
|
||||
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
||||
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "Use the smaller diff."},
|
||||
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
||||
{Type: fantasy.StreamPartTypeFinish, FinishReason: fantasy.FinishReasonStop},
|
||||
}), nil
|
||||
},
|
||||
},
|
||||
MaxUsesPerRun: 1,
|
||||
MaxOutputTokens: 128,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
question := strings.Repeat("界", 2001)
|
||||
result, err := runtime.RunAdvisor(t.Context(), question, nil, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, chatadvisor.ResultTypeAdvice, result.Type)
|
||||
require.Equal(t, strings.Repeat("界", 2000), capturedQuestion)
|
||||
}
|
||||
|
||||
func TestAdvisorRunStreamsAdviceDeltas(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
@@ -3,9 +3,7 @@ package chatadvisor
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"charm.land/fantasy"
|
||||
)
|
||||
@@ -34,7 +32,7 @@ type ToolOptions struct {
|
||||
func Tool(opts ToolOptions) fantasy.AgentTool {
|
||||
return fantasy.NewAgentTool(
|
||||
ToolName,
|
||||
"Ask a separate advisor pass for strategic guidance about planning, architecture, tradeoffs, or debugging strategy. Provide a brief question. The advisor sees recent conversation context, runs without tools for a single step, and responds to the parent agent rather than the end user.",
|
||||
"Ask a separate advisor pass for strategic guidance about planning, architecture, tradeoffs, or debugging strategy. Provide a brief question of 2000 runes or fewer, summarizing context instead of pasting long logs or transcripts. The advisor sees recent conversation context, runs without tools for a single step, and responds to the parent agent rather than the end user.",
|
||||
func(ctx context.Context, args AdvisorArgs, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
|
||||
if opts.Runtime == nil {
|
||||
return fantasy.NewTextErrorResponse("advisor runtime is not configured"), nil
|
||||
@@ -47,11 +45,6 @@ func Tool(opts ToolOptions) fantasy.AgentTool {
|
||||
if question == "" {
|
||||
return fantasy.NewTextErrorResponse("question is required"), nil
|
||||
}
|
||||
if utf8.RuneCountInString(question) > advisorQuestionMaxRunes {
|
||||
return fantasy.NewTextErrorResponse(
|
||||
fmt.Sprintf("question must be %d runes or fewer", advisorQuestionMaxRunes),
|
||||
), nil
|
||||
}
|
||||
|
||||
var runOpts *RunAdvisorOptions
|
||||
if call.ID != "" && (opts.PublishAdviceDelta != nil || opts.PublishAdviceReset != nil) {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"charm.land/fantasy"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -193,19 +194,61 @@ func TestAdvisorToolRejectsEmptyQuestion(t *testing.T) {
|
||||
require.Contains(t, resp.Content, "question is required")
|
||||
}
|
||||
|
||||
func TestAdvisorToolRejectsLongQuestion(t *testing.T) {
|
||||
func TestAdvisorToolPassesNormalQuestion(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var capturedQuestion string
|
||||
tool := advisorToolCapturingQuestion(t, &capturedQuestion)
|
||||
|
||||
resp := runAdvisorTool(t, tool, chatadvisor.AdvisorArgs{Question: "What's safest?"})
|
||||
require.False(t, resp.IsError)
|
||||
require.Equal(t, "What's safest?", capturedQuestion)
|
||||
}
|
||||
|
||||
func TestAdvisorToolPreservesQuestionAtLimit(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var capturedQuestion string
|
||||
tool := advisorToolCapturingQuestion(t, &capturedQuestion)
|
||||
question := strings.Repeat("界", 2000)
|
||||
|
||||
resp := runAdvisorTool(t, tool, chatadvisor.AdvisorArgs{Question: question})
|
||||
require.False(t, resp.IsError)
|
||||
require.Equal(t, 2000, utf8.RuneCountInString(capturedQuestion))
|
||||
require.Equal(t, question, capturedQuestion)
|
||||
}
|
||||
|
||||
func TestAdvisorToolTruncatesLongQuestion(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var capturedQuestion string
|
||||
tool := advisorToolCapturingQuestion(t, &capturedQuestion)
|
||||
longQuestion := strings.Repeat("界", 2001)
|
||||
|
||||
resp := runAdvisorTool(t, tool, chatadvisor.AdvisorArgs{Question: longQuestion})
|
||||
require.False(t, resp.IsError)
|
||||
require.True(t, utf8.ValidString(capturedQuestion))
|
||||
require.Equal(t, 2000, utf8.RuneCountInString(capturedQuestion))
|
||||
require.Equal(t, strings.Repeat("界", 2000), capturedQuestion)
|
||||
}
|
||||
|
||||
func TestAdvisorToolInfoDocumentsQuestionLimit(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tool := chatadvisor.Tool(chatadvisor.ToolOptions{
|
||||
Runtime: mustAdvisorRuntime(t),
|
||||
GetConversationSnapshot: func() []fantasy.Message {
|
||||
return nil
|
||||
},
|
||||
Runtime: mustAdvisorRuntime(t),
|
||||
GetConversationSnapshot: func() []fantasy.Message { return nil },
|
||||
})
|
||||
|
||||
resp := runAdvisorTool(t, tool, chatadvisor.AdvisorArgs{Question: strings.Repeat("x", 2001)})
|
||||
require.True(t, resp.IsError)
|
||||
require.Contains(t, resp.Content, "2000 runes or fewer")
|
||||
info := tool.Info()
|
||||
require.Contains(t, info.Description, "2000 runes")
|
||||
require.Contains(t, chatadvisor.ParentGuidanceBlock, "2000 runes")
|
||||
|
||||
questionParam, ok := info.Parameters["question"].(map[string]any)
|
||||
require.True(t, ok)
|
||||
description, ok := questionParam["description"].(string)
|
||||
require.True(t, ok)
|
||||
require.Contains(t, description, "2000 runes")
|
||||
}
|
||||
|
||||
func TestAdvisorToolRejectsMissingRuntime(t *testing.T) {
|
||||
@@ -366,6 +409,35 @@ func mustAdvisorRuntime(t *testing.T) *chatadvisor.Runtime {
|
||||
return runtime
|
||||
}
|
||||
|
||||
func advisorToolCapturingQuestion(t *testing.T, capturedQuestion *string) fantasy.AgentTool {
|
||||
t.Helper()
|
||||
|
||||
runtime, err := chatadvisor.NewRuntime(chatadvisor.RuntimeConfig{
|
||||
Model: &chattest.FakeModel{
|
||||
ProviderName: "test-provider",
|
||||
ModelName: "test-model",
|
||||
StreamFn: func(_ context.Context, call fantasy.Call) (fantasy.StreamResponse, error) {
|
||||
require.NotEmpty(t, call.Prompt)
|
||||
*capturedQuestion = singleText(t, call.Prompt[len(call.Prompt)-1])
|
||||
return streamFromParts([]fantasy.StreamPart{
|
||||
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
||||
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "captured advice"},
|
||||
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
||||
{Type: fantasy.StreamPartTypeFinish, FinishReason: fantasy.FinishReasonStop},
|
||||
}), nil
|
||||
},
|
||||
},
|
||||
MaxUsesPerRun: 1,
|
||||
MaxOutputTokens: 64,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
return chatadvisor.Tool(chatadvisor.ToolOptions{
|
||||
Runtime: runtime,
|
||||
GetConversationSnapshot: func() []fantasy.Message { return nil },
|
||||
})
|
||||
}
|
||||
|
||||
func runAdvisorTool(
|
||||
t *testing.T,
|
||||
tool fantasy.AgentTool,
|
||||
|
||||
@@ -15,7 +15,7 @@ const (
|
||||
|
||||
// AdvisorArgs contains the tool-visible advisor question.
|
||||
type AdvisorArgs struct {
|
||||
Question string `json:"question"`
|
||||
Question string `json:"question" description:"A brief question for the advisor. Must be 2000 runes or fewer. Summarize context instead of pasting long logs or transcripts."`
|
||||
}
|
||||
|
||||
// AdvisorResult is the structured result returned by the advisor runtime.
|
||||
|
||||
Reference in New Issue
Block a user