mirror of
https://github.com/coder/coder.git
synced 2026-06-03 04:58:23 +00:00
0bb09935bc
Adds a deployment-wide setting to select the computer-use provider (Anthropic or OpenAI) for AI agents, plus the OpenAI computer-use runner needed to honor that selection. The setting is stored in `site_configs` under `agents_computer_use_provider`, defaults to Anthropic when unset, and is exposed via experimental GET/PUT endpoints under `/api/experimental/chats/config/computer-use-provider`. The chatd computer-use tool now dispatches to either `runAnthropicComputerUse` or `runOpenAIComputerUse` based on the resolved provider, with provider-specific result metadata for OpenAI screenshots. Frontend adds a provider dropdown to the Agents Experiments settings page nested under the virtual desktop toggle, with disabled state handling while virtual desktop is off and skeleton loaders while config queries are in flight. Hugo and Codex review follow-up: - Uses shared provider validation and clearer computer-use constant names. - Removes stale OpenAI pending-safety-checks commentary. - Documents why provider result metadata is needed for OpenAI screenshots. - Keeps the computer-use subagent visible when provider credentials are missing, then returns a clear spawn-time configuration error. - Uses OpenAI's recommended 1600x900 screenshot geometry to preserve the native 16:9 aspect ratio. - Moves OpenAI-specific computer-use helpers into `coderd/x/chatd/chatopenai/computeruse` after rebasing onto the provider package refactor in `main`. - Converts OpenAI pixel scroll deltas to Coder desktop wheel-click amounts. - Preserves OpenAI pointer modifiers with key down/up desktop actions and rejects unsupported non-left double-click buttons explicitly. - Maps OpenAI back/forward side-button clicks to browser navigation key actions. - Defaults omitted OpenAI click buttons to left-click. - Retries mouse release cleanup if the final OpenAI drag release fails. - Keeps computer-use subagent availability messages stable when provider config cannot be loaded, while logging the backend error. - Releases remaining OpenAI modifier keys if a synthetic key-up cleanup action fails. - Updates Storybook interaction stories so provider snapshots show the selected final provider. > Mux updated this PR description on behalf of Mike.
168 lines
4.8 KiB
Go
168 lines
4.8 KiB
Go
package chatd
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
"charm.land/fantasy"
|
|
"golang.org/x/xerrors"
|
|
|
|
"cdr.dev/slog/v3"
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
"github.com/coder/coder/v2/coderd/database/dbauthz"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chatloop"
|
|
openaicomputeruse "github.com/coder/coder/v2/coderd/x/chatd/chatopenai/computeruse"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chatprovider"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chattool"
|
|
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
|
"github.com/coder/quartz"
|
|
)
|
|
|
|
// computerUseConfigContext lets internal and worker callers read
|
|
// deployment-wide chat settings when they lack an HTTP-derived actor. HTTP
|
|
// handlers always carry an actor, so the AsChatd fallback never elevates user
|
|
// contexts and this function is a no-op in that path. The setting it gates is
|
|
// global and readable by any authenticated actor, not a back-door.
|
|
func computerUseConfigContext(ctx context.Context) context.Context {
|
|
if _, ok := dbauthz.ActorFromContext(ctx); ok {
|
|
return ctx
|
|
}
|
|
//nolint:gocritic // Worker contexts may lack an actor.
|
|
return dbauthz.AsChatd(ctx)
|
|
}
|
|
|
|
func (p *Server) computerUseProviderAndModelFromConfig(
|
|
ctx context.Context,
|
|
) (provider, modelProvider, modelName string, err error) {
|
|
rawProvider, err := p.db.GetChatComputerUseProvider(
|
|
computerUseConfigContext(ctx),
|
|
)
|
|
if err != nil {
|
|
return "", "", "", xerrors.Errorf("get computer use provider: %w", err)
|
|
}
|
|
|
|
provider = strings.TrimSpace(rawProvider)
|
|
if provider == "" {
|
|
provider = chattool.ComputerUseProviderAnthropic
|
|
}
|
|
|
|
modelProvider, modelName, ok := chattool.DefaultComputerUseModel(provider)
|
|
if !ok {
|
|
return "", "", "", xerrors.Errorf(
|
|
"unknown computer-use provider %q configured in agents_computer_use_provider",
|
|
provider,
|
|
)
|
|
}
|
|
|
|
return provider, modelProvider, modelName, nil
|
|
}
|
|
|
|
func (p *Server) resolveComputerUseModel(
|
|
ctx context.Context,
|
|
chat database.Chat,
|
|
providerKeys chatprovider.ProviderAPIKeys,
|
|
computerUseProvider string,
|
|
computerUseModelProvider string,
|
|
computerUseModelName string,
|
|
) (
|
|
model fantasy.LanguageModel,
|
|
debugEnabled bool,
|
|
resolvedProvider string,
|
|
resolvedModel string,
|
|
err error,
|
|
) {
|
|
resolvedProvider, resolvedModel, err = chatprovider.ResolveModelWithProviderHint(
|
|
computerUseModelName,
|
|
computerUseModelProvider,
|
|
)
|
|
if err != nil {
|
|
return nil, false, "", "", xerrors.Errorf(
|
|
"resolve computer use model metadata for provider %q model %q: %w",
|
|
computerUseProvider,
|
|
computerUseModelName,
|
|
err,
|
|
)
|
|
}
|
|
|
|
model, debugEnabled, err = p.newDebugAwareModelFromConfig(
|
|
ctx,
|
|
chat,
|
|
computerUseModelProvider,
|
|
computerUseModelName,
|
|
providerKeys,
|
|
chatprovider.UserAgent(),
|
|
chatprovider.CoderHeaders(chat),
|
|
)
|
|
if err != nil {
|
|
return nil, false, "", "", xerrors.Errorf(
|
|
"resolve computer use model for provider %q model %q: %w",
|
|
computerUseProvider,
|
|
computerUseModelName,
|
|
err,
|
|
)
|
|
}
|
|
|
|
return model, debugEnabled, resolvedProvider, resolvedModel, nil
|
|
}
|
|
|
|
type computerUseProviderToolOptions struct {
|
|
provider string
|
|
isPlanModeTurn bool
|
|
isComputerUse bool
|
|
getWorkspaceConn func(context.Context) (workspacesdk.AgentConn, error)
|
|
storeFile chattool.StoreFileFunc
|
|
clock quartz.Clock
|
|
logger slog.Logger
|
|
}
|
|
|
|
func appendComputerUseProviderTool(
|
|
providerTools []chatloop.ProviderTool,
|
|
opts computerUseProviderToolOptions,
|
|
) ([]chatloop.ProviderTool, error) {
|
|
// This helper is called for every chat turn. Only chats created by the
|
|
// computer_use subagent definition have ChatModeComputerUse, which filters
|
|
// out root, general, and explore chats. Plan mode is separate from Mode, so
|
|
// planning turns stay gated even for computer-use chats.
|
|
if opts.isPlanModeTurn || !opts.isComputerUse {
|
|
return providerTools, nil
|
|
}
|
|
|
|
desktopGeometry := chattool.DefaultComputerUseDesktopGeometry(opts.provider)
|
|
definition, err := chattool.ComputerUseProviderTool(
|
|
opts.provider,
|
|
desktopGeometry.DeclaredWidth,
|
|
desktopGeometry.DeclaredHeight,
|
|
)
|
|
if err != nil {
|
|
return providerTools, xerrors.Errorf(
|
|
"build computer use provider tool for provider %q: %w",
|
|
opts.provider,
|
|
err,
|
|
)
|
|
}
|
|
|
|
clock := opts.clock
|
|
if clock == nil {
|
|
clock = quartz.NewReal()
|
|
}
|
|
providerTool := chatloop.ProviderTool{
|
|
Definition: definition,
|
|
Runner: chattool.NewComputerUseTool(
|
|
opts.provider,
|
|
desktopGeometry.DeclaredWidth,
|
|
desktopGeometry.DeclaredHeight,
|
|
opts.getWorkspaceConn,
|
|
opts.storeFile,
|
|
clock,
|
|
opts.logger,
|
|
),
|
|
}
|
|
if opts.provider == chattool.ComputerUseProviderOpenAI {
|
|
// OpenAI computer-use image results need detail metadata so the model receives
|
|
// the screenshot at original detail when the chat loop sends the tool result.
|
|
providerTool.ResultProviderMetadata = openaicomputeruse.ResultProviderMetadata
|
|
}
|
|
|
|
return append(providerTools, providerTool), nil
|
|
}
|