Files
coder/coderd/x/chatd/chattool/computeruse_test.go
T
Ethan ef6969dd70 feat(coderd/x/chatd): agent-created file attachments in chat (#24280)
Agents can already see workspace files and take screenshots, but users could not download those artifacts from chat. This PR adds durable chat attachments to chatd. `attach_file`, explicit `computer` screenshot actions (not the automatic post-action screenshots), and `propose_plan` now fetch bytes over the agent connection, store them in `chat_files`, link them to the chat, and carry attachment metadata in tool responses so `buildAssistantPartsForPersist` can materialize ordinary `type:"file"` assistant parts that the chat file APIs serve.

The same storage helpers are reused for other artifact-producing paths. `wait_agent` recordings and thumbnails are stored as chat files and linked back to the parent chat, with best-effort relinking so parent chats retain those artifacts without leaving orphaned rows when chat-file caps reject links. `storeChatAttachment` wraps insert + link in one transaction, files are capped at 10 MB each and 20 per chat, and serving defaults to `Content-Disposition: attachment` with an explicit inline-safe allowlist.

This PR also consolidates chat-file media policy in `coderd/chatfiles`. Uploads and tool-generated attachments share byte-based MIME detection, SVG blocking, inline-safety rules, and compatible `text/plain` refinement for JSON, CSV, and Markdown. Prompt construction still only inlines synthetic pasted text for model consumption; assistant-created attachments are persisted for the user and intentionally not replayed into later LLM turns.

UI follow-up lives in #24281.

Relates to CODAGT-91
2026-04-20 18:04:35 +10:00

356 lines
13 KiB
Go

package chattool_test
import (
"bytes"
"context"
"encoding/base64"
"testing"
"charm.land/fantasy"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
"golang.org/x/xerrors"
"cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/x/chatd/chattool"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/codersdk/workspacesdk/agentconnmock"
"github.com/coder/quartz"
)
func TestComputerUseProviderTool(t *testing.T) {
t.Parallel()
geometry := workspacesdk.DefaultDesktopGeometry()
def := chattool.ComputerUseProviderTool(geometry.DeclaredWidth, geometry.DeclaredHeight)
pdt, ok := def.(fantasy.ProviderDefinedTool)
require.True(t, ok, "ComputerUseProviderTool should return a ProviderDefinedTool")
assert.Contains(t, pdt.ID, "computer")
assert.Equal(t, "computer", pdt.Name)
assert.Equal(t, int64(geometry.DeclaredWidth), pdt.Args["display_width_px"])
assert.Equal(t, int64(geometry.DeclaredHeight), pdt.Args["display_height_px"])
}
func TestComputerUseTool_Run_Screenshot(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
mockConn := agentconnmock.NewMockAgentConn(ctrl)
geometry := workspacesdk.DefaultDesktopGeometry()
mockConn.EXPECT().ExecuteDesktopAction(
gomock.Any(),
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
require.NotNil(t, action.ScaledWidth)
require.NotNil(t, action.ScaledHeight)
assert.Equal(t, geometry.DeclaredWidth, *action.ScaledWidth)
assert.Equal(t, geometry.DeclaredHeight, *action.ScaledHeight)
return workspacesdk.DesktopActionResponse{
Output: "screenshot",
ScreenshotData: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4n539HwAHFwLVF8kc1wAAAABJRU5ErkJggg==",
ScreenshotWidth: geometry.DeclaredWidth,
ScreenshotHeight: geometry.DeclaredHeight,
}, nil
})
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
return mockConn, nil
}, nil, quartz.NewReal(), slogtest.Make(t, nil))
call := fantasy.ToolCall{
ID: "test-1",
Name: "computer",
Input: `{"action":"screenshot"}`,
}
resp, err := tool.Run(context.Background(), call)
require.NoError(t, err)
assert.Equal(t, "image", resp.Type)
assert.Equal(t, "image/png", resp.MediaType)
assert.Equal(t, []byte("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4n539HwAHFwLVF8kc1wAAAABJRU5ErkJggg=="), resp.Data)
assert.False(t, resp.IsError)
}
func TestComputerUseTool_Run_Screenshot_PersistsAttachment(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
mockConn := agentconnmock.NewMockAgentConn(ctrl)
geometry := workspacesdk.DefaultDesktopGeometry()
const screenshotPNG = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4n539HwAHFwLVF8kc1wAAAABJRU5ErkJggg=="
mockConn.EXPECT().ExecuteDesktopAction(
gomock.Any(),
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
require.Equal(t, "screenshot", action.Action)
return workspacesdk.DesktopActionResponse{
Output: "screenshot",
ScreenshotData: screenshotPNG,
ScreenshotWidth: geometry.DeclaredWidth,
ScreenshotHeight: geometry.DeclaredHeight,
}, nil
})
var storedName string
var storedType string
var storedData []byte
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
return mockConn, nil
}, func(_ context.Context, name string, detectName string, data []byte) (chattool.AttachmentMetadata, error) {
storedName = name
require.Equal(t, name, detectName)
storedType = "image/png"
storedData = append([]byte(nil), data...)
return chattool.AttachmentMetadata{
FileID: uuid.MustParse("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"),
MediaType: storedType,
Name: name,
}, nil
}, quartz.NewReal(), slogtest.Make(t, nil))
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
ID: "test-screenshot-persist", Name: "computer", Input: `{"action":"screenshot"}`,
})
require.NoError(t, err)
assert.Equal(t, "image", resp.Type)
assert.Equal(t, "image/png", resp.MediaType)
assert.Equal(t, []byte(screenshotPNG), resp.Data)
assert.Contains(t, storedName, "screenshot-")
assert.Equal(t, "image/png", storedType)
expectedPNG, decodeErr := base64.StdEncoding.DecodeString(screenshotPNG)
require.NoError(t, decodeErr)
require.Equal(t, expectedPNG, storedData)
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
require.NoError(t, err)
require.Len(t, attachments, 1)
assert.Equal(t, uuid.MustParse("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"), attachments[0].FileID)
assert.Equal(t, "image/png", attachments[0].MediaType)
}
func TestComputerUseTool_Run_Screenshot_StoreErrorFallsBackToImage(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
mockConn := agentconnmock.NewMockAgentConn(ctrl)
geometry := workspacesdk.DefaultDesktopGeometry()
const screenshotPNG = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4n539HwAHFwLVF8kc1wAAAABJRU5ErkJggg=="
mockConn.EXPECT().ExecuteDesktopAction(
gomock.Any(),
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
).Return(workspacesdk.DesktopActionResponse{
Output: "screenshot",
ScreenshotData: screenshotPNG,
ScreenshotWidth: geometry.DeclaredWidth,
ScreenshotHeight: geometry.DeclaredHeight,
}, nil)
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
return mockConn, nil
}, func(_ context.Context, _ string, _ string, _ []byte) (chattool.AttachmentMetadata, error) {
return chattool.AttachmentMetadata{}, xerrors.New("chat already has the maximum of 20 linked files")
}, quartz.NewReal(), slogtest.Make(t, nil))
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
ID: "test-screenshot-store-error", Name: "computer", Input: `{"action":"screenshot"}`,
})
require.NoError(t, err)
assert.Equal(t, "image", resp.Type)
assert.Equal(t, "image/png", resp.MediaType)
assert.False(t, resp.IsError)
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
require.NoError(t, err)
assert.Empty(t, attachments)
}
func TestComputerUseTool_Run_Screenshot_OversizedAttachmentFallsBackToImage(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
mockConn := agentconnmock.NewMockAgentConn(ctrl)
geometry := workspacesdk.DefaultDesktopGeometry()
oversizedScreenshot := base64.StdEncoding.EncodeToString(bytes.Repeat([]byte{0xAB}, 10<<20+1))
mockConn.EXPECT().ExecuteDesktopAction(
gomock.Any(),
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
).Return(workspacesdk.DesktopActionResponse{
Output: "screenshot",
ScreenshotData: oversizedScreenshot,
ScreenshotWidth: geometry.DeclaredWidth,
ScreenshotHeight: geometry.DeclaredHeight,
}, nil)
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
return mockConn, nil
}, func(_ context.Context, _ string, _ string, _ []byte) (chattool.AttachmentMetadata, error) {
t.Fatal("storeFile should not be called for oversized screenshots")
return chattool.AttachmentMetadata{}, nil
}, quartz.NewReal(), slogtest.Make(t, nil))
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
ID: "test-screenshot-oversized", Name: "computer", Input: `{"action":"screenshot"}`,
})
require.NoError(t, err)
assert.Equal(t, "image", resp.Type)
assert.Equal(t, "image/png", resp.MediaType)
assert.False(t, resp.IsError)
require.Len(t, resp.Data, len(oversizedScreenshot))
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
require.NoError(t, err)
assert.Empty(t, attachments)
}
func TestComputerUseTool_Run_LeftClick(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
mockConn := agentconnmock.NewMockAgentConn(ctrl)
geometry := workspacesdk.DefaultDesktopGeometry()
followUpScreenshot := base64.StdEncoding.EncodeToString([]byte("after-click"))
mockConn.EXPECT().ExecuteDesktopAction(
gomock.Any(),
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
require.NotNil(t, action.Coordinate)
assert.Equal(t, [2]int{100, 200}, *action.Coordinate)
require.NotNil(t, action.ScaledWidth)
require.NotNil(t, action.ScaledHeight)
assert.Equal(t, geometry.DeclaredWidth, *action.ScaledWidth)
assert.Equal(t, geometry.DeclaredHeight, *action.ScaledHeight)
return workspacesdk.DesktopActionResponse{Output: "left_click performed"}, nil
})
mockConn.EXPECT().ExecuteDesktopAction(
gomock.Any(),
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
assert.Equal(t, "screenshot", action.Action)
require.NotNil(t, action.ScaledWidth)
require.NotNil(t, action.ScaledHeight)
assert.Equal(t, geometry.DeclaredWidth, *action.ScaledWidth)
assert.Equal(t, geometry.DeclaredHeight, *action.ScaledHeight)
return workspacesdk.DesktopActionResponse{
Output: "screenshot",
ScreenshotData: followUpScreenshot,
ScreenshotWidth: geometry.DeclaredWidth,
ScreenshotHeight: geometry.DeclaredHeight,
}, nil
})
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
return mockConn, nil
}, func(_ context.Context, _ string, _ string, _ []byte) (chattool.AttachmentMetadata, error) {
t.Fatal("storeFile should not be called for left_click follow-up screenshots")
return chattool.AttachmentMetadata{}, nil
}, quartz.NewReal(), slogtest.Make(t, nil))
call := fantasy.ToolCall{
ID: "test-2",
Name: "computer",
Input: `{"action":"left_click","coordinate":[100,200]}`,
}
resp, err := tool.Run(context.Background(), call)
require.NoError(t, err)
assert.Equal(t, "image", resp.Type)
assert.Equal(t, []byte(followUpScreenshot), resp.Data)
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
require.NoError(t, err)
assert.Empty(t, attachments)
}
func TestComputerUseTool_Run_Wait(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
mockConn := agentconnmock.NewMockAgentConn(ctrl)
geometry := workspacesdk.DefaultDesktopGeometry()
followUpScreenshot := base64.StdEncoding.EncodeToString([]byte("after-wait"))
mockConn.EXPECT().ExecuteDesktopAction(
gomock.Any(),
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
require.NotNil(t, action.ScaledWidth)
require.NotNil(t, action.ScaledHeight)
assert.Equal(t, geometry.DeclaredWidth, *action.ScaledWidth)
assert.Equal(t, geometry.DeclaredHeight, *action.ScaledHeight)
return workspacesdk.DesktopActionResponse{
Output: "screenshot",
ScreenshotData: followUpScreenshot,
ScreenshotWidth: geometry.DeclaredWidth,
ScreenshotHeight: geometry.DeclaredHeight,
}, nil
})
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
return mockConn, nil
}, func(_ context.Context, _ string, _ string, _ []byte) (chattool.AttachmentMetadata, error) {
t.Fatal("storeFile should not be called for wait screenshots")
return chattool.AttachmentMetadata{}, nil
}, quartz.NewReal(), slogtest.Make(t, nil))
call := fantasy.ToolCall{
ID: "test-3",
Name: "computer",
Input: `{"action":"wait","duration":10}`,
}
resp, err := tool.Run(context.Background(), call)
require.NoError(t, err)
assert.Equal(t, "image", resp.Type)
assert.Equal(t, "image/png", resp.MediaType)
assert.Equal(t, []byte(followUpScreenshot), resp.Data)
assert.False(t, resp.IsError)
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
require.NoError(t, err)
assert.Empty(t, attachments)
}
func TestComputerUseTool_Run_ConnError(t *testing.T) {
t.Parallel()
geometry := workspacesdk.DefaultDesktopGeometry()
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
return nil, xerrors.New("workspace not available")
}, nil, quartz.NewReal(), slogtest.Make(t, nil))
call := fantasy.ToolCall{
ID: "test-4",
Name: "computer",
Input: `{"action":"screenshot"}`,
}
resp, err := tool.Run(context.Background(), call)
require.NoError(t, err)
assert.True(t, resp.IsError)
assert.Contains(t, resp.Content, "workspace not available")
}
func TestComputerUseTool_Run_InvalidInput(t *testing.T) {
t.Parallel()
geometry := workspacesdk.DefaultDesktopGeometry()
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
return nil, xerrors.New("should not be called")
}, nil, quartz.NewReal(), slogtest.Make(t, nil))
call := fantasy.ToolCall{
ID: "test-5",
Name: "computer",
Input: `{invalid json`,
}
resp, err := tool.Run(context.Background(), call)
require.NoError(t, err)
assert.True(t, resp.IsError)
assert.Contains(t, resp.Content, "invalid computer use input")
}