mirror of
https://github.com/coder/coder.git
synced 2026-06-03 21:18:24 +00:00
a02339c66a
- **computeruse.go**: Decode base64 screenshot data before storing in
`ToolResponse.Data` (was casting base64 string to bytes without
decoding)
- **chatloop.go**: Re-encode `ToolResponse.Data` to base64 via
`base64.StdEncoding.EncodeToString` instead of `string()` cast
- **mcpclient.go**: UTF-8 validate all text from MCP responses in
`convertCallResult()` using `strings.ToValidUTF8`
- **chatprompt.go (persist)**: Defense-in-depth UTF-8 sanitization of
text and media Text fields before database storage
- **chatprompt.go (replay)**: Antivenom layer that validates base64 and
UTF-8 at read time, auto-healing already-poisoned chats without
requiring a migration
- `TestToolResultAntivenom`: 4 subtests covering poisoned text, poisoned
media, valid media round-trip, and media with invalid UTF-8 text
- Adds `TestConvertCallResult_UTF8Sanitization`: 4 subtests covering invalid
UTF-8 in TextContent, EmbeddedResource, valid passthrough, and
multi-part
- Adds `TestComputerUseTool_Run_ScreenshotDataIsDecodedBinary`: Verifies no
double-encode in the computer-use path
- Updated existing computer-use tests for the new decoded-binary
contract
> 🤖
424 lines
16 KiB
Go
424 lines
16 KiB
Go
package chattool_test
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/base64"
|
|
"testing"
|
|
|
|
"charm.land/fantasy"
|
|
"github.com/google/uuid"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"go.uber.org/mock/gomock"
|
|
"golang.org/x/xerrors"
|
|
|
|
"cdr.dev/slog/v3/sloggers/slogtest"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chattool"
|
|
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
|
"github.com/coder/coder/v2/codersdk/workspacesdk/agentconnmock"
|
|
"github.com/coder/quartz"
|
|
)
|
|
|
|
func TestComputerUseProviderTool(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
def := chattool.ComputerUseProviderTool(geometry.DeclaredWidth, geometry.DeclaredHeight)
|
|
pdt, ok := def.(fantasy.ProviderDefinedTool)
|
|
require.True(t, ok, "ComputerUseProviderTool should return a ProviderDefinedTool")
|
|
assert.Contains(t, pdt.ID, "computer")
|
|
assert.Equal(t, "computer", pdt.Name)
|
|
assert.Equal(t, int64(geometry.DeclaredWidth), pdt.Args["display_width_px"])
|
|
assert.Equal(t, int64(geometry.DeclaredHeight), pdt.Args["display_height_px"])
|
|
}
|
|
|
|
func TestComputerUseTool_Run_Screenshot(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
|
|
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
|
|
require.NotNil(t, action.ScaledWidth)
|
|
require.NotNil(t, action.ScaledHeight)
|
|
assert.Equal(t, geometry.DeclaredWidth, *action.ScaledWidth)
|
|
assert.Equal(t, geometry.DeclaredHeight, *action.ScaledHeight)
|
|
return workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4n539HwAHFwLVF8kc1wAAAABJRU5ErkJggg==",
|
|
ScreenshotWidth: geometry.DeclaredWidth,
|
|
ScreenshotHeight: geometry.DeclaredHeight,
|
|
}, nil
|
|
})
|
|
|
|
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, nil, quartz.NewReal(), slogtest.Make(t, nil))
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-1",
|
|
Name: "computer",
|
|
Input: `{"action":"screenshot"}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, "image/png", resp.MediaType)
|
|
expectedBinary, decErr := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4n539HwAHFwLVF8kc1wAAAABJRU5ErkJggg==")
|
|
require.NoError(t, decErr)
|
|
assert.Equal(t, expectedBinary, resp.Data)
|
|
assert.False(t, resp.IsError)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_Screenshot_PersistsAttachment(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
const screenshotPNG = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4n539HwAHFwLVF8kc1wAAAABJRU5ErkJggg=="
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
|
|
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
|
|
require.Equal(t, "screenshot", action.Action)
|
|
return workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: screenshotPNG,
|
|
ScreenshotWidth: geometry.DeclaredWidth,
|
|
ScreenshotHeight: geometry.DeclaredHeight,
|
|
}, nil
|
|
})
|
|
|
|
var storedName string
|
|
var storedType string
|
|
var storedData []byte
|
|
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, func(_ context.Context, name string, detectName string, data []byte) (chattool.AttachmentMetadata, error) {
|
|
storedName = name
|
|
require.Equal(t, name, detectName)
|
|
storedType = "image/png"
|
|
storedData = append([]byte(nil), data...)
|
|
return chattool.AttachmentMetadata{
|
|
FileID: uuid.MustParse("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"),
|
|
MediaType: storedType,
|
|
Name: name,
|
|
}, nil
|
|
}, quartz.NewReal(), slogtest.Make(t, nil))
|
|
|
|
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
|
|
ID: "test-screenshot-persist", Name: "computer", Input: `{"action":"screenshot"}`,
|
|
})
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, "image/png", resp.MediaType)
|
|
expectedBinary, decErr := base64.StdEncoding.DecodeString(screenshotPNG)
|
|
require.NoError(t, decErr)
|
|
assert.Equal(t, expectedBinary, resp.Data)
|
|
assert.Contains(t, storedName, "screenshot-")
|
|
assert.Equal(t, "image/png", storedType)
|
|
expectedPNG, decodeErr := base64.StdEncoding.DecodeString(screenshotPNG)
|
|
require.NoError(t, decodeErr)
|
|
require.Equal(t, expectedPNG, storedData)
|
|
|
|
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
|
|
require.NoError(t, err)
|
|
require.Len(t, attachments, 1)
|
|
assert.Equal(t, uuid.MustParse("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"), attachments[0].FileID)
|
|
assert.Equal(t, "image/png", attachments[0].MediaType)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_Screenshot_StoreErrorFallsBackToImage(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
const screenshotPNG = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4n539HwAHFwLVF8kc1wAAAABJRU5ErkJggg=="
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
|
|
).Return(workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: screenshotPNG,
|
|
ScreenshotWidth: geometry.DeclaredWidth,
|
|
ScreenshotHeight: geometry.DeclaredHeight,
|
|
}, nil)
|
|
|
|
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, func(_ context.Context, _ string, _ string, _ []byte) (chattool.AttachmentMetadata, error) {
|
|
return chattool.AttachmentMetadata{}, xerrors.New("chat already has the maximum of 20 linked files")
|
|
}, quartz.NewReal(), slogtest.Make(t, nil))
|
|
|
|
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
|
|
ID: "test-screenshot-store-error", Name: "computer", Input: `{"action":"screenshot"}`,
|
|
})
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, "image/png", resp.MediaType)
|
|
assert.False(t, resp.IsError)
|
|
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
|
|
require.NoError(t, err)
|
|
assert.Empty(t, attachments)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_Screenshot_OversizedAttachmentFallsBackToImage(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
oversizedScreenshot := base64.StdEncoding.EncodeToString(bytes.Repeat([]byte{0xAB}, 10<<20+1))
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
|
|
).Return(workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: oversizedScreenshot,
|
|
ScreenshotWidth: geometry.DeclaredWidth,
|
|
ScreenshotHeight: geometry.DeclaredHeight,
|
|
}, nil)
|
|
|
|
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, func(_ context.Context, _ string, _ string, _ []byte) (chattool.AttachmentMetadata, error) {
|
|
t.Fatal("storeFile should not be called for oversized screenshots")
|
|
return chattool.AttachmentMetadata{}, nil
|
|
}, quartz.NewReal(), slogtest.Make(t, nil))
|
|
|
|
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
|
|
ID: "test-screenshot-oversized", Name: "computer", Input: `{"action":"screenshot"}`,
|
|
})
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, "image/png", resp.MediaType)
|
|
assert.False(t, resp.IsError)
|
|
expectedOversized, decErr := base64.StdEncoding.DecodeString(oversizedScreenshot)
|
|
require.NoError(t, decErr)
|
|
require.Len(t, resp.Data, len(expectedOversized))
|
|
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
|
|
require.NoError(t, err)
|
|
assert.Empty(t, attachments)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_LeftClick(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
followUpScreenshot := base64.StdEncoding.EncodeToString([]byte("after-click"))
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
|
|
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
|
|
require.NotNil(t, action.Coordinate)
|
|
assert.Equal(t, [2]int{100, 200}, *action.Coordinate)
|
|
require.NotNil(t, action.ScaledWidth)
|
|
require.NotNil(t, action.ScaledHeight)
|
|
assert.Equal(t, geometry.DeclaredWidth, *action.ScaledWidth)
|
|
assert.Equal(t, geometry.DeclaredHeight, *action.ScaledHeight)
|
|
return workspacesdk.DesktopActionResponse{Output: "left_click performed"}, nil
|
|
})
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
|
|
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
|
|
assert.Equal(t, "screenshot", action.Action)
|
|
require.NotNil(t, action.ScaledWidth)
|
|
require.NotNil(t, action.ScaledHeight)
|
|
assert.Equal(t, geometry.DeclaredWidth, *action.ScaledWidth)
|
|
assert.Equal(t, geometry.DeclaredHeight, *action.ScaledHeight)
|
|
return workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: followUpScreenshot,
|
|
ScreenshotWidth: geometry.DeclaredWidth,
|
|
ScreenshotHeight: geometry.DeclaredHeight,
|
|
}, nil
|
|
})
|
|
|
|
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, func(_ context.Context, _ string, _ string, _ []byte) (chattool.AttachmentMetadata, error) {
|
|
t.Fatal("storeFile should not be called for left_click follow-up screenshots")
|
|
return chattool.AttachmentMetadata{}, nil
|
|
}, quartz.NewReal(), slogtest.Make(t, nil))
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-2",
|
|
Name: "computer",
|
|
Input: `{"action":"left_click","coordinate":[100,200]}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
expectedBinary, decErr := base64.StdEncoding.DecodeString(followUpScreenshot)
|
|
require.NoError(t, decErr)
|
|
assert.Equal(t, expectedBinary, resp.Data)
|
|
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
|
|
require.NoError(t, err)
|
|
assert.Empty(t, attachments)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_Wait(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
followUpScreenshot := base64.StdEncoding.EncodeToString([]byte("after-wait"))
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
|
|
).DoAndReturn(func(_ context.Context, action workspacesdk.DesktopAction) (workspacesdk.DesktopActionResponse, error) {
|
|
require.NotNil(t, action.ScaledWidth)
|
|
require.NotNil(t, action.ScaledHeight)
|
|
assert.Equal(t, geometry.DeclaredWidth, *action.ScaledWidth)
|
|
assert.Equal(t, geometry.DeclaredHeight, *action.ScaledHeight)
|
|
return workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: followUpScreenshot,
|
|
ScreenshotWidth: geometry.DeclaredWidth,
|
|
ScreenshotHeight: geometry.DeclaredHeight,
|
|
}, nil
|
|
})
|
|
|
|
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, func(_ context.Context, _ string, _ string, _ []byte) (chattool.AttachmentMetadata, error) {
|
|
t.Fatal("storeFile should not be called for wait screenshots")
|
|
return chattool.AttachmentMetadata{}, nil
|
|
}, quartz.NewReal(), slogtest.Make(t, nil))
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-3",
|
|
Name: "computer",
|
|
Input: `{"action":"wait","duration":10}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, "image/png", resp.MediaType)
|
|
expectedBinary, decErr := base64.StdEncoding.DecodeString(followUpScreenshot)
|
|
require.NoError(t, decErr)
|
|
assert.Equal(t, expectedBinary, resp.Data)
|
|
assert.False(t, resp.IsError)
|
|
attachments, err := chattool.AttachmentsFromMetadata(resp.Metadata)
|
|
require.NoError(t, err)
|
|
assert.Empty(t, attachments)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_ScreenshotDataIsDecodedBinary(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
|
|
// A known base64 string (1x1 red PNG).
|
|
const screenshotBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8BQDwAEgAF/pooBPQAAAABJRU5ErkJggg=="
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.AssignableToTypeOf(workspacesdk.DesktopAction{}),
|
|
).Return(workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: screenshotBase64,
|
|
ScreenshotWidth: geometry.DeclaredWidth,
|
|
ScreenshotHeight: geometry.DeclaredHeight,
|
|
}, nil)
|
|
|
|
tool := chattool.NewComputerUseTool(
|
|
geometry.DeclaredWidth,
|
|
geometry.DeclaredHeight,
|
|
func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
},
|
|
nil,
|
|
quartz.NewReal(),
|
|
slogtest.Make(t, nil),
|
|
)
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-decode-1",
|
|
Name: "computer",
|
|
Input: `{"action":"screenshot"}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, "image/png", resp.MediaType)
|
|
|
|
// Data must contain decoded binary, not the base64 string
|
|
// reinterpreted as bytes.
|
|
expectedBinary, err := base64.StdEncoding.DecodeString(screenshotBase64)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, expectedBinary, resp.Data,
|
|
"ToolResponse.Data should contain decoded binary, not base64-as-bytes")
|
|
|
|
// Verify that re-encoding produces the original base64 string.
|
|
// This is the round-trip that the chat loop performs when
|
|
// building the API response.
|
|
reEncoded := base64.StdEncoding.EncodeToString(resp.Data)
|
|
assert.Equal(t, screenshotBase64, reEncoded,
|
|
"re-encoding Data should produce the original base64 string (no double-encode)")
|
|
}
|
|
|
|
func TestComputerUseTool_Run_ConnError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return nil, xerrors.New("workspace not available")
|
|
}, nil, quartz.NewReal(), slogtest.Make(t, nil))
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-4",
|
|
Name: "computer",
|
|
Input: `{"action":"screenshot"}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.True(t, resp.IsError)
|
|
assert.Contains(t, resp.Content, "workspace not available")
|
|
}
|
|
|
|
func TestComputerUseTool_Run_InvalidInput(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
geometry := workspacesdk.DefaultDesktopGeometry()
|
|
tool := chattool.NewComputerUseTool(geometry.DeclaredWidth, geometry.DeclaredHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return nil, xerrors.New("should not be called")
|
|
}, nil, quartz.NewReal(), slogtest.Make(t, nil))
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-5",
|
|
Name: "computer",
|
|
Input: `{invalid json`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.True(t, resp.IsError)
|
|
assert.Contains(t, resp.Content, "invalid computer use input")
|
|
}
|