mirror of
https://github.com/coder/coder.git
synced 2026-06-03 13:08:25 +00:00
84527390c6
Implement the backend for the desktop feature for agents. - Adds a new `/api/experimental/chats/$id/desktop` endpoint to coderd which exposes a VNC stream from a [portabledesktop](https://github.com/coder/portabledesktop) process running inside the workspace - Adds a new `spawn_computer_use_agent` tool to chatd, which spawns a subagent that has access to the `computer` tool which lets it interact with the `portabledesktop` process running inside the workspace - Adds the plumbing to make the above possible There's a follow up frontend PR here: https://github.com/coder/coder/pull/23006
468 lines
12 KiB
Go
468 lines
12 KiB
Go
package agentdesktop_test
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"net"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"golang.org/x/xerrors"
|
|
|
|
"cdr.dev/slog/v3/sloggers/slogtest"
|
|
"github.com/coder/coder/v2/agent/agentdesktop"
|
|
"github.com/coder/coder/v2/codersdk"
|
|
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
|
"github.com/coder/quartz"
|
|
)
|
|
|
|
// Ensure fakeDesktop satisfies the Desktop interface at compile time.
|
|
var _ agentdesktop.Desktop = (*fakeDesktop)(nil)
|
|
|
|
// fakeDesktop is a minimal Desktop implementation for unit tests.
|
|
type fakeDesktop struct {
|
|
startErr error
|
|
startCfg agentdesktop.DisplayConfig
|
|
vncConnErr error
|
|
screenshotErr error
|
|
screenshotRes agentdesktop.ScreenshotResult
|
|
closed bool
|
|
|
|
// Track calls for assertions.
|
|
lastMove [2]int
|
|
lastClick [3]int // x, y, button
|
|
lastScroll [4]int // x, y, dx, dy
|
|
lastKey string
|
|
lastTyped string
|
|
lastKeyDown string
|
|
lastKeyUp string
|
|
}
|
|
|
|
func (f *fakeDesktop) Start(context.Context) (agentdesktop.DisplayConfig, error) {
|
|
return f.startCfg, f.startErr
|
|
}
|
|
|
|
func (f *fakeDesktop) VNCConn(context.Context) (net.Conn, error) {
|
|
return nil, f.vncConnErr
|
|
}
|
|
|
|
func (f *fakeDesktop) Screenshot(_ context.Context, _ agentdesktop.ScreenshotOptions) (agentdesktop.ScreenshotResult, error) {
|
|
return f.screenshotRes, f.screenshotErr
|
|
}
|
|
|
|
func (f *fakeDesktop) Move(_ context.Context, x, y int) error {
|
|
f.lastMove = [2]int{x, y}
|
|
return nil
|
|
}
|
|
|
|
func (f *fakeDesktop) Click(_ context.Context, x, y int, _ agentdesktop.MouseButton) error {
|
|
f.lastClick = [3]int{x, y, 1}
|
|
return nil
|
|
}
|
|
|
|
func (f *fakeDesktop) DoubleClick(_ context.Context, x, y int, _ agentdesktop.MouseButton) error {
|
|
f.lastClick = [3]int{x, y, 2}
|
|
return nil
|
|
}
|
|
|
|
func (*fakeDesktop) ButtonDown(context.Context, agentdesktop.MouseButton) error { return nil }
|
|
func (*fakeDesktop) ButtonUp(context.Context, agentdesktop.MouseButton) error { return nil }
|
|
|
|
func (f *fakeDesktop) Scroll(_ context.Context, x, y, dx, dy int) error {
|
|
f.lastScroll = [4]int{x, y, dx, dy}
|
|
return nil
|
|
}
|
|
|
|
func (*fakeDesktop) Drag(context.Context, int, int, int, int) error { return nil }
|
|
|
|
func (f *fakeDesktop) KeyPress(_ context.Context, key string) error {
|
|
f.lastKey = key
|
|
return nil
|
|
}
|
|
|
|
func (f *fakeDesktop) KeyDown(_ context.Context, key string) error {
|
|
f.lastKeyDown = key
|
|
return nil
|
|
}
|
|
|
|
func (f *fakeDesktop) KeyUp(_ context.Context, key string) error {
|
|
f.lastKeyUp = key
|
|
return nil
|
|
}
|
|
|
|
func (f *fakeDesktop) Type(_ context.Context, text string) error {
|
|
f.lastTyped = text
|
|
return nil
|
|
}
|
|
|
|
func (*fakeDesktop) CursorPosition(context.Context) (x int, y int, err error) {
|
|
return 10, 20, nil
|
|
}
|
|
|
|
func (f *fakeDesktop) Close() error {
|
|
f.closed = true
|
|
return nil
|
|
}
|
|
|
|
func TestHandleDesktopVNC_StartError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{startErr: xerrors.New("no desktop")}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodGet, "/vnc", nil)
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusInternalServerError, rr.Code)
|
|
|
|
var resp codersdk.Response
|
|
err := json.NewDecoder(rr.Body).Decode(&resp)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "Failed to start desktop session.", resp.Message)
|
|
}
|
|
|
|
func TestHandleAction_Screenshot(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
startCfg: agentdesktop.DisplayConfig{Width: workspacesdk.DesktopDisplayWidth, Height: workspacesdk.DesktopDisplayHeight},
|
|
screenshotRes: agentdesktop.ScreenshotResult{Data: "base64data"},
|
|
}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
body := agentdesktop.DesktopAction{Action: "screenshot"}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rr.Code)
|
|
|
|
var result agentdesktop.DesktopActionResponse
|
|
err = json.NewDecoder(rr.Body).Decode(&result)
|
|
require.NoError(t, err)
|
|
// Dimensions come from DisplayConfig, not the screenshot CLI.
|
|
assert.Equal(t, "screenshot", result.Output)
|
|
assert.Equal(t, "base64data", result.ScreenshotData)
|
|
assert.Equal(t, workspacesdk.DesktopDisplayWidth, result.ScreenshotWidth)
|
|
assert.Equal(t, workspacesdk.DesktopDisplayHeight, result.ScreenshotHeight)
|
|
}
|
|
|
|
func TestHandleAction_LeftClick(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
startCfg: agentdesktop.DisplayConfig{Width: 1920, Height: 1080},
|
|
}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
body := agentdesktop.DesktopAction{
|
|
Action: "left_click",
|
|
Coordinate: &[2]int{100, 200},
|
|
}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rr.Code)
|
|
|
|
var resp agentdesktop.DesktopActionResponse
|
|
err = json.NewDecoder(rr.Body).Decode(&resp)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "left_click action performed", resp.Output)
|
|
assert.Equal(t, [3]int{100, 200, 1}, fake.lastClick)
|
|
}
|
|
|
|
func TestHandleAction_UnknownAction(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
startCfg: agentdesktop.DisplayConfig{Width: 1920, Height: 1080},
|
|
}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
body := agentdesktop.DesktopAction{Action: "explode"}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusBadRequest, rr.Code)
|
|
}
|
|
|
|
func TestHandleAction_KeyAction(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
startCfg: agentdesktop.DisplayConfig{Width: 1920, Height: 1080},
|
|
}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
text := "Return"
|
|
body := agentdesktop.DesktopAction{
|
|
Action: "key",
|
|
Text: &text,
|
|
}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rr.Code)
|
|
assert.Equal(t, "Return", fake.lastKey)
|
|
}
|
|
|
|
func TestHandleAction_TypeAction(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
startCfg: agentdesktop.DisplayConfig{Width: 1920, Height: 1080},
|
|
}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
text := "hello world"
|
|
body := agentdesktop.DesktopAction{
|
|
Action: "type",
|
|
Text: &text,
|
|
}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rr.Code)
|
|
assert.Equal(t, "hello world", fake.lastTyped)
|
|
}
|
|
|
|
func TestHandleAction_HoldKey(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
startCfg: agentdesktop.DisplayConfig{Width: 1920, Height: 1080},
|
|
}
|
|
mClk := quartz.NewMock(t)
|
|
trap := mClk.Trap().NewTimer("agentdesktop", "hold_key")
|
|
defer trap.Close()
|
|
api := agentdesktop.NewAPI(logger, fake, mClk)
|
|
defer api.Close()
|
|
|
|
text := "Shift_L"
|
|
dur := 100
|
|
body := agentdesktop.DesktopAction{
|
|
Action: "hold_key",
|
|
Text: &text,
|
|
Duration: &dur,
|
|
}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
|
|
done := make(chan struct{})
|
|
go func() {
|
|
defer close(done)
|
|
handler.ServeHTTP(rr, req)
|
|
}()
|
|
|
|
// Wait for the timer to be created, then advance past it.
|
|
trap.MustWait(req.Context()).MustRelease(req.Context())
|
|
mClk.Advance(time.Duration(dur) * time.Millisecond).MustWait(req.Context())
|
|
|
|
<-done
|
|
|
|
assert.Equal(t, http.StatusOK, rr.Code)
|
|
|
|
var resp agentdesktop.DesktopActionResponse
|
|
err = json.NewDecoder(rr.Body).Decode(&resp)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "hold_key action performed", resp.Output)
|
|
assert.Equal(t, "Shift_L", fake.lastKeyDown)
|
|
assert.Equal(t, "Shift_L", fake.lastKeyUp)
|
|
}
|
|
|
|
func TestHandleAction_HoldKeyMissingText(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
startCfg: agentdesktop.DisplayConfig{Width: 1920, Height: 1080},
|
|
}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
body := agentdesktop.DesktopAction{Action: "hold_key"}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusBadRequest, rr.Code)
|
|
|
|
var resp codersdk.Response
|
|
err = json.NewDecoder(rr.Body).Decode(&resp)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, "Missing \"text\" for hold_key action.", resp.Message)
|
|
}
|
|
|
|
func TestHandleAction_ScrollDown(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
startCfg: agentdesktop.DisplayConfig{Width: 1920, Height: 1080},
|
|
}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
dir := "down"
|
|
amount := 5
|
|
body := agentdesktop.DesktopAction{
|
|
Action: "scroll",
|
|
Coordinate: &[2]int{500, 400},
|
|
ScrollDirection: &dir,
|
|
ScrollAmount: &amount,
|
|
}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rr.Code)
|
|
// dy should be positive 5 for "down".
|
|
assert.Equal(t, [4]int{500, 400, 0, 5}, fake.lastScroll)
|
|
}
|
|
|
|
func TestHandleAction_CoordinateScaling(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{
|
|
// Native display is 1920x1080.
|
|
startCfg: agentdesktop.DisplayConfig{Width: 1920, Height: 1080},
|
|
}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
defer api.Close()
|
|
|
|
// Model is working in a 1280x720 coordinate space.
|
|
sw := 1280
|
|
sh := 720
|
|
body := agentdesktop.DesktopAction{
|
|
Action: "mouse_move",
|
|
Coordinate: &[2]int{640, 360},
|
|
ScaledWidth: &sw,
|
|
ScaledHeight: &sh,
|
|
}
|
|
b, err := json.Marshal(body)
|
|
require.NoError(t, err)
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/action", bytes.NewReader(b))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rr.Code)
|
|
// 640 in 1280-space → 960 in 1920-space (midpoint maps to
|
|
// midpoint).
|
|
assert.Equal(t, 960, fake.lastMove[0])
|
|
assert.Equal(t, 540, fake.lastMove[1])
|
|
}
|
|
|
|
func TestClose_DelegatesToDesktop(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
fake := &fakeDesktop{}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
|
|
err := api.Close()
|
|
require.NoError(t, err)
|
|
assert.True(t, fake.closed)
|
|
}
|
|
|
|
func TestClose_PreventsNewSessions(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := slogtest.Make(t, nil)
|
|
// After Close(), Start() will return an error because the
|
|
// underlying Desktop is closed.
|
|
fake := &fakeDesktop{}
|
|
api := agentdesktop.NewAPI(logger, fake, nil)
|
|
|
|
err := api.Close()
|
|
require.NoError(t, err)
|
|
|
|
// Simulate the closed desktop returning an error on Start().
|
|
fake.startErr = xerrors.New("desktop is closed")
|
|
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodGet, "/vnc", nil)
|
|
|
|
handler := api.Routes()
|
|
handler.ServeHTTP(rr, req)
|
|
|
|
assert.Equal(t, http.StatusInternalServerError, rr.Code)
|
|
}
|