mirror of
https://github.com/coder/coder.git
synced 2026-06-03 13:08:25 +00:00
84527390c6
Implement the backend for the desktop feature for agents. - Adds a new `/api/experimental/chats/$id/desktop` endpoint to coderd which exposes a VNC stream from a [portabledesktop](https://github.com/coder/portabledesktop) process running inside the workspace - Adds a new `spawn_computer_use_agent` tool to chatd, which spawns a subagent that has access to the `computer` tool which lets it interact with the `portabledesktop` process running inside the workspace - Adds the plumbing to make the above possible There's a follow up frontend PR here: https://github.com/coder/coder/pull/23006
187 lines
5.5 KiB
Go
187 lines
5.5 KiB
Go
package chattool_test
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
|
|
"charm.land/fantasy"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"go.uber.org/mock/gomock"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/coderd/chatd/chattool"
|
|
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
|
"github.com/coder/coder/v2/codersdk/workspacesdk/agentconnmock"
|
|
"github.com/coder/quartz"
|
|
)
|
|
|
|
func TestComputerUseTool_Info(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
tool := chattool.NewComputerUseTool(workspacesdk.DesktopDisplayWidth, workspacesdk.DesktopDisplayHeight, nil, quartz.NewReal())
|
|
info := tool.Info()
|
|
assert.Equal(t, "computer", info.Name)
|
|
assert.NotEmpty(t, info.Description)
|
|
}
|
|
|
|
func TestComputerUseProviderTool(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
def := chattool.ComputerUseProviderTool(workspacesdk.DesktopDisplayWidth, workspacesdk.DesktopDisplayHeight)
|
|
pdt, ok := def.(fantasy.ProviderDefinedTool)
|
|
require.True(t, ok, "ComputerUseProviderTool should return a ProviderDefinedTool")
|
|
assert.Contains(t, pdt.ID, "computer")
|
|
assert.Equal(t, "computer", pdt.Name)
|
|
// Verify display dimensions are passed through.
|
|
assert.Equal(t, int64(workspacesdk.DesktopDisplayWidth), pdt.Args["display_width_px"])
|
|
assert.Equal(t, int64(workspacesdk.DesktopDisplayHeight), pdt.Args["display_height_px"])
|
|
}
|
|
|
|
func TestComputerUseTool_Run_Screenshot(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.Any(),
|
|
).Return(workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: "base64png",
|
|
ScreenshotWidth: 1024,
|
|
ScreenshotHeight: 768,
|
|
}, nil)
|
|
|
|
tool := chattool.NewComputerUseTool(workspacesdk.DesktopDisplayWidth, workspacesdk.DesktopDisplayHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, quartz.NewReal())
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-1",
|
|
Name: "computer",
|
|
Input: `{"action":"screenshot"}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, "image/png", resp.MediaType)
|
|
assert.Equal(t, []byte("base64png"), resp.Data)
|
|
assert.False(t, resp.IsError)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_LeftClick(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
|
|
// Expect the action call first.
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.Any(),
|
|
).Return(workspacesdk.DesktopActionResponse{
|
|
Output: "left_click performed",
|
|
}, nil)
|
|
|
|
// Then expect a screenshot (auto-screenshot after action).
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.Any(),
|
|
).Return(workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: "after-click",
|
|
ScreenshotWidth: 1024,
|
|
ScreenshotHeight: 768,
|
|
}, nil)
|
|
|
|
tool := chattool.NewComputerUseTool(workspacesdk.DesktopDisplayWidth, workspacesdk.DesktopDisplayHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, quartz.NewReal())
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-2",
|
|
Name: "computer",
|
|
Input: `{"action":"left_click","coordinate":[100,200]}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, []byte("after-click"), resp.Data)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_Wait(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctrl := gomock.NewController(t)
|
|
mockConn := agentconnmock.NewMockAgentConn(ctrl)
|
|
// Expect a screenshot after the wait completes.
|
|
mockConn.EXPECT().ExecuteDesktopAction(
|
|
gomock.Any(),
|
|
gomock.Any(),
|
|
).Return(workspacesdk.DesktopActionResponse{
|
|
Output: "screenshot",
|
|
ScreenshotData: "after-wait",
|
|
ScreenshotWidth: 1024,
|
|
ScreenshotHeight: 768,
|
|
}, nil)
|
|
|
|
tool := chattool.NewComputerUseTool(workspacesdk.DesktopDisplayWidth, workspacesdk.DesktopDisplayHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return mockConn, nil
|
|
}, quartz.NewReal())
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-3",
|
|
Name: "computer",
|
|
Input: `{"action":"wait","duration":10}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "image", resp.Type)
|
|
assert.Equal(t, "image/png", resp.MediaType)
|
|
assert.Equal(t, []byte("after-wait"), resp.Data)
|
|
assert.False(t, resp.IsError)
|
|
}
|
|
|
|
func TestComputerUseTool_Run_ConnError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
tool := chattool.NewComputerUseTool(workspacesdk.DesktopDisplayWidth, workspacesdk.DesktopDisplayHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return nil, xerrors.New("workspace not available")
|
|
}, quartz.NewReal())
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-4",
|
|
Name: "computer",
|
|
Input: `{"action":"screenshot"}`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.True(t, resp.IsError)
|
|
assert.Contains(t, resp.Content, "workspace not available")
|
|
}
|
|
|
|
func TestComputerUseTool_Run_InvalidInput(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
tool := chattool.NewComputerUseTool(workspacesdk.DesktopDisplayWidth, workspacesdk.DesktopDisplayHeight, func(_ context.Context) (workspacesdk.AgentConn, error) {
|
|
return nil, xerrors.New("should not be called")
|
|
}, quartz.NewReal())
|
|
|
|
call := fantasy.ToolCall{
|
|
ID: "test-5",
|
|
Name: "computer",
|
|
Input: `{invalid json`,
|
|
}
|
|
|
|
resp, err := tool.Run(context.Background(), call)
|
|
require.NoError(t, err)
|
|
assert.True(t, resp.IsError)
|
|
assert.Contains(t, resp.Content, "invalid computer use input")
|
|
}
|