fix(coderd/x/chatd): retune subagent selection guidance (#25311)

> Mux working on behalf of Mike.

## Summary

- retune chatd subagent guidance to prefer `general` for substantial
delegated work, including read-only synthesis and planning support
- narrow `explore` guidance to repository-local code lookup and bounded
tracing
- add regression tests for planning, spawn tool, and Plan Mode guidance
text

## Tests

- `go test ./coderd/x/chatd -run
'Test(DefaultSystemPromptPlanningGuidance_SteersSubagentSelection|SpawnAgent_DescriptionSteersGeneralForSubstantialResearch|SpawnAgent_PlanModeDescriptionOmitsComputerUse|PlanningOverlaySubagentGuidance_UsesPlanModeSafeDescriptions|ExploreSubagentIsReadOnly)$'`
- `make lint`
- `make test TEST_PACKAGES=./coderd/x/chatd RUN=Guidance && make test
TEST_PACKAGES=./coderd/x/chatd RUN=Description`
- pre-commit hook during `git commit`
This commit is contained in:
Michael Suchacz
2026-05-13 23:10:21 +02:00
committed by GitHub
parent 341051ceee
commit d1a471e29e
2 changed files with 89 additions and 16 deletions
+46 -16
View File
@@ -21,10 +21,20 @@ const (
subagentTypeComputerUse = "computer_use"
defaultSystemPromptPlanningGuidance = "1. Use " + spawnAgentToolName +
" with type=\"" + subagentTypeExplore +
"\" and wait_agent to research the codebase and gather context as needed. " +
"Reserve type=\"" + subagentTypeGeneral +
"\" for writable delegated work."
" and wait_agent when delegation helps gather context. Prefer type=\"" +
subagentTypeGeneral +
"\" for substantial delegated research, analysis, reasoning, review, " +
"planning support, or implementation. Use type=\"" + subagentTypeGeneral +
"\" even for read-only work when the task is open-ended, multi-step, " +
"parallel, requires synthesis, or may later need edits. When planning, " +
"type=\"" + subagentTypeGeneral +
"\" remains non-mutating until implementation is approved. Use type=\"" +
subagentTypeExplore +
"\" only for narrow repository-local read-only code discovery or code " +
"tracing, such as locating files, callsites, or a bounded existing flow. " +
"Do not use type=\"" + subagentTypeExplore +
"\" for generic research, broad architecture analysis, planning synthesis, " +
"external or web research, parallel research, or tasks that may need edits."
)
type spawnAgentArgs struct {
@@ -44,7 +54,7 @@ func allSubagentDefinitions() []subagentDefinition {
return []subagentDefinition{
{
id: subagentTypeGeneral,
description: "delegated work that may inspect or modify workspace files",
description: "substantial delegated research, analysis, reasoning, review, planning support, and implementation",
buildOptions: func(ctx context.Context, p *Server, parent database.Chat, _ database.Chat, _ uuid.UUID, _ string) (childSubagentChatOptions, error) {
modelConfigID, err := p.resolveSubagentModelConfigID(
ctx,
@@ -63,7 +73,7 @@ func allSubagentDefinitions() []subagentDefinition {
},
{
id: subagentTypeExplore,
description: "read-only discovery, code tracing, and system understanding",
description: "narrow repository-local read-only code discovery and code tracing",
buildOptions: func(ctx context.Context, p *Server, _ database.Chat, turnParent database.Chat, currentModelConfigID uuid.UUID, _ string) (childSubagentChatOptions, error) {
modelConfigID, err := p.resolveSubagentModelConfigID(
ctx,
@@ -274,15 +284,31 @@ func buildSpawnAgentDescription(
"the right specialist. Available type values: " +
formatSubagentDefinitions(availableDefs) + ". Do not use this for " +
"simple or quick operations you can handle directly with execute, " +
"read_file, or write_file. Reserve writable subagents for tasks that " +
"require intellectual work such as code analysis, writing new code, or " +
"complex refactoring. Be careful when running parallel subagents: if " +
"two subagents modify the same files they will conflict with each " +
"other, so ensure parallel subagent tasks are independent. The child " +
"agent receives the same workspace tools but cannot spawn its own " +
"subagents. After spawning, use wait_agent to collect the result."
"read_file, or write_file. Prefer type=\"" + subagentTypeGeneral +
"\" for substantial delegated research, analysis, reasoning, review, " +
"planning support, or implementation, even when the child should only " +
"report findings. When using type=\"" + subagentTypeGeneral +
"\" for read-only work, explicitly instruct the child not to modify " +
"files and to return findings. Use type=\"" + subagentTypeExplore +
"\" only for narrow repository-local read-only code discovery or code " +
"tracing, such as locating files, callsites, or a bounded existing flow. " +
"Do not use type=\"" + subagentTypeExplore +
"\" for generic research, broad architecture analysis, planning " +
"synthesis, external or web research, parallel research, or tasks that " +
"may need edits. Be careful when running parallel subagents: if two " +
"subagents modify the same files they will conflict with each other, " +
"so ensure parallel subagent tasks are independent. The child agent " +
"receives the same workspace tools but cannot spawn its own subagents. " +
"After spawning, use wait_agent to collect the result."
if currentChat.PlanMode.Valid && currentChat.PlanMode.ChatPlanMode == database.ChatPlanModePlan {
description += " During plan mode, general and explore subagents may use shell commands for exploration, such as cloning repositories, searching code, and running inspection commands, but they must not implement changes or intentionally modify workspace files."
description += " During plan mode, type=\"" + subagentTypeGeneral +
"\" is for non-mutating substantial investigation and planning support, " +
"and type=\"" + subagentTypeExplore +
"\" is for narrow repository-local lookup or tracing. Both may use " +
"shell commands for exploration and inspection, but only type=\"" +
subagentTypeGeneral +
"\" should be used for cloning repositories or non-local investigation. " +
"They must not implement changes or intentionally modify workspace files."
}
return description
}
@@ -308,14 +334,18 @@ func formatSubagentDefinitionsWithDescriptionOverrides(
func planningOverlaySubagentGuidance() string {
planModeDescriptions := map[string]string{
subagentTypeGeneral: "delegated investigation, planning support, and non-mutating exploration",
subagentTypeGeneral: "non-mutating substantial investigation, analysis, and planning support",
subagentTypeExplore: "narrow repository-local codebase lookup and code tracing",
}
return "Use read_file, execute, process_output, list_templates, read_template, " +
spawnAgentToolName + ", and approved external MCP tools when available to gather context. " +
"Workspace MCP tools are not available in root plan mode, and side-effecting built-in tools such as process_list, process_signal, message_agent, close_agent, and computer-use actions remain unavailable. In Plan Mode, " +
spawnAgentToolName + " delegation is for investigation and planning " +
"support, not code writing or implementation. Allowed type " +
"support, not code writing or implementation. Use type=\"" + subagentTypeGeneral +
"\" for substantial investigation, reasoning, and planning support. " +
"Use type=\"" + subagentTypeExplore +
"\" only for narrow repository-local lookup or tracing. Allowed type " +
"values in Plan Mode: " +
formatSubagentDefinitionsWithDescriptionOverrides(
subagentDefinitionsByID(
+43
View File
@@ -1741,6 +1741,17 @@ func TestSpawnAgent_ExploreFallsBackWhenOverrideCredentialsAreUnavailable(t *tes
require.Equal(t, currentTurnModel.ID, childChat.LastModelConfigID)
}
func TestDefaultSystemPromptPlanningGuidance_SteersSubagentSelection(t *testing.T) {
t.Parallel()
require.Contains(t, defaultSystemPromptPlanningGuidance, `Prefer type="general" for substantial delegated research, analysis, reasoning, review, planning support, or implementation`)
require.Contains(t, defaultSystemPromptPlanningGuidance, `Use type="general" even for read-only work when the task is open-ended, multi-step, parallel, requires synthesis, or may later need edits`)
require.Contains(t, defaultSystemPromptPlanningGuidance, `Use type="explore" only for narrow repository-local read-only code discovery or code tracing`)
require.Contains(t, defaultSystemPromptPlanningGuidance, `Do not use type="explore" for generic research, broad architecture analysis, planning synthesis, external or web research, parallel research, or tasks that may need edits`)
require.NotContains(t, defaultSystemPromptPlanningGuidance, "research the codebase")
require.NotContains(t, defaultSystemPromptPlanningGuidance, "Reserve type=\"general\" for writable delegated work")
}
func TestSpawnAgent_DescriptionListsAllAvailableTypes(t *testing.T) {
t.Parallel()
@@ -1765,6 +1776,30 @@ func TestSpawnAgent_DescriptionListsAllAvailableTypes(t *testing.T) {
require.Contains(t, description, subagentTypeComputerUse)
}
func TestSpawnAgent_DescriptionSteersGeneralForSubstantialResearch(t *testing.T) {
t.Parallel()
db, ps := dbtestutil.NewDB(t)
server := newInternalTestServer(t, db, ps, chatprovider.ProviderAPIKeys{})
ctx := chatdTestContext(t)
user, org, model := seedInternalChatDeps(t, db)
parentChat := createInternalParentChat(
ctx, t, server, db, org.ID, user.ID, model.ID, "parent-description-selection-guidance",
)
tools := server.subagentTools(ctx, func() database.Chat { return parentChat }, parentChat.LastModelConfigID)
tool := findToolByName(tools, spawnAgentToolName)
require.NotNil(t, tool, "spawn_agent tool must be present")
description := tool.Info().Description
require.Contains(t, description, `Prefer type="general" for substantial delegated research, analysis, reasoning, review, planning support, or implementation`)
require.Contains(t, description, "even when the child should only report findings")
require.Contains(t, description, `When using type="general" for read-only work, explicitly instruct the child not to modify files and to return findings`)
require.Contains(t, description, `Use type="explore" only for narrow repository-local read-only code discovery or code tracing`)
require.Contains(t, description, `Do not use type="explore" for generic research, broad architecture analysis, planning synthesis, external or web research, parallel research, or tasks that may need edits`)
}
func TestSpawnAgent_DescriptionIncludesComputerUseWithMissingProviderKey(t *testing.T) {
t.Parallel()
@@ -1820,6 +1855,10 @@ func TestSpawnAgent_PlanModeDescriptionOmitsComputerUse(t *testing.T) {
require.Contains(t, description, subagentTypeGeneral)
require.Contains(t, description, subagentTypeExplore)
require.NotContains(t, description, subagentTypeComputerUse)
require.Contains(t, description, `type="general" is for non-mutating substantial investigation and planning support`)
require.Contains(t, description, `type="explore" is for narrow repository-local lookup or tracing`)
require.Contains(t, description, `only type="general" should be used for cloning repositories or non-local investigation`)
require.NotContains(t, description, "Both may use shell commands for exploration, such as cloning repositories")
require.Contains(t, description, "must not implement changes or intentionally modify workspace files")
}
@@ -1864,6 +1903,10 @@ func TestPlanningOverlaySubagentGuidance_UsesPlanModeSafeDescriptions(t *testing
require.Contains(t, guidance, subagentTypeGeneral)
require.Contains(t, guidance, subagentTypeExplore)
require.Contains(t, guidance, `Use type="general" for substantial investigation, reasoning, and planning support`)
require.Contains(t, guidance, `Use type="explore" only for narrow repository-local lookup or tracing`)
require.Contains(t, guidance, "general (non-mutating substantial investigation, analysis, and planning support)")
require.Contains(t, guidance, "explore (narrow repository-local codebase lookup and code tracing)")
require.NotContains(t, guidance, subagentTypeComputerUse)
require.NotContains(t, guidance, "modify")
require.NotContains(t, guidance, "may inspect or modify workspace files")