Files
coder/coderd/chatd/chattool/startworkspace.go
T
Kyle Carberry fc9e04da67 fix(chatd): handle soft-deleted workspaces in chattool start/create (#22997)
## Problem

Both `start_workspace` and `create_workspace` chattool tools failed to
handle soft-deleted workspaces correctly.

Coder uses soft-delete for workspaces (`deleted = true` on the row).
Both tools called `GetWorkspaceByID`, which queries
`workspaces_expanded` with **no** `deleted = false` filter — so it
returns the workspace row even when soft-deleted. The only deletion
check was for `sql.ErrNoRows`, which never fires because the row still
exists.

### `start_workspace` behavior (before fix)
1. Loads the soft-deleted workspace successfully
2. Finds the latest build (a delete transition)
3. Falls through to attempt to **start** the deleted workspace
4. Produces a confusing downstream error

### `create_workspace` behavior (before fix)
1. `checkExistingWorkspace` loads the soft-deleted workspace
2. If a delete build is **in-progress**: waits for it, then falsely
reports `already_exists` — blocks new workspace creation
3. If the delete build **succeeded**: accidentally allows creation
(because no agents are found), but via fragile logic rather than an
explicit check

## Fix

Add `ws.Deleted` checks immediately after `GetWorkspaceByID` succeeds in
both tools:

- **`startworkspace.go`**: Returns `"workspace was deleted; use
create_workspace to make a new one"`
- **`createworkspace.go`** (`checkExistingWorkspace`): Returns `(nil,
false, nil)` to allow new workspace creation

## Tests

- `TestStartWorkspace/DeletedWorkspace` — verifies `start_workspace`
returns deleted error and never calls `StartFn`
- `TestCheckExistingWorkspace_DeletedWorkspace` — verifies
`checkExistingWorkspace` allows creation for soft-deleted workspaces
2026-03-12 16:09:17 +00:00

176 lines
5.3 KiB
Go

package chattool
import (
"context"
"sync"
"charm.land/fantasy"
"github.com/google/uuid"
"golang.org/x/xerrors"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/codersdk"
)
// StartWorkspaceFn starts a workspace by creating a new build with
// the "start" transition.
type StartWorkspaceFn func(
ctx context.Context,
ownerID uuid.UUID,
workspaceID uuid.UUID,
req codersdk.CreateWorkspaceBuildRequest,
) (codersdk.WorkspaceBuild, error)
// StartWorkspaceOptions configures the start_workspace tool.
type StartWorkspaceOptions struct {
DB database.Store
OwnerID uuid.UUID
ChatID uuid.UUID
StartFn StartWorkspaceFn
AgentConnFn AgentConnFunc
WorkspaceMu *sync.Mutex
}
// StartWorkspace returns a tool that starts a stopped workspace
// associated with the current chat. The tool is idempotent: if the
// workspace is already running or building, it returns immediately.
func StartWorkspace(options StartWorkspaceOptions) fantasy.AgentTool {
return fantasy.NewAgentTool(
"start_workspace",
"Start the chat's workspace if it is currently stopped. "+
"This tool is idempotent — if the workspace is already "+
"running, it returns immediately. Use create_workspace "+
"first if no workspace exists yet.",
func(ctx context.Context, _ struct{}, _ fantasy.ToolCall) (fantasy.ToolResponse, error) {
if options.StartFn == nil {
return fantasy.NewTextErrorResponse("workspace starter is not configured"), nil
}
// Serialize with create_workspace to prevent races.
if options.WorkspaceMu != nil {
options.WorkspaceMu.Lock()
defer options.WorkspaceMu.Unlock()
}
if options.DB == nil || options.ChatID == uuid.Nil {
return fantasy.NewTextErrorResponse("start_workspace is not properly configured"), nil
}
chat, err := options.DB.GetChatByID(ctx, options.ChatID)
if err != nil {
return fantasy.NewTextErrorResponse(
xerrors.Errorf("load chat: %w", err).Error(),
), nil
}
if !chat.WorkspaceID.Valid {
return fantasy.NewTextErrorResponse(
"chat has no workspace; use create_workspace first",
), nil
}
ws, err := options.DB.GetWorkspaceByID(ctx, chat.WorkspaceID.UUID)
if err != nil {
return fantasy.NewTextErrorResponse(
xerrors.Errorf("load workspace: %w", err).Error(),
), nil
}
if ws.Deleted {
return fantasy.NewTextErrorResponse(
"workspace was deleted; use create_workspace to make a new one",
), nil
}
build, err := options.DB.GetLatestWorkspaceBuildByWorkspaceID(ctx, ws.ID)
if err != nil {
return fantasy.NewTextErrorResponse(
xerrors.Errorf("get latest build: %w", err).Error(),
), nil
}
job, err := options.DB.GetProvisionerJobByID(ctx, build.JobID)
if err != nil {
return fantasy.NewTextErrorResponse(
xerrors.Errorf("get provisioner job: %w", err).Error(),
), nil
}
// If a build is already in progress, wait for it.
switch job.JobStatus {
case database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning:
if err := waitForBuild(ctx, options.DB, ws.ID); err != nil {
return fantasy.NewTextErrorResponse(
xerrors.Errorf("waiting for in-progress build: %w", err).Error(),
), nil
}
return waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws)
case database.ProvisionerJobStatusSucceeded:
// If the latest successful build is a start
// transition, the workspace should be running.
if build.Transition == database.WorkspaceTransitionStart {
return waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws)
}
// Otherwise it is stopped (or deleted) — proceed
// to start it below.
default:
// Failed, canceled, etc — try starting anyway.
}
// Set up dbauthz context for the start call.
ownerCtx, ownerErr := asOwner(ctx, options.DB, options.OwnerID)
if ownerErr != nil {
return fantasy.NewTextErrorResponse(ownerErr.Error()), nil
}
_, err = options.StartFn(ownerCtx, options.OwnerID, ws.ID, codersdk.CreateWorkspaceBuildRequest{
Transition: codersdk.WorkspaceTransitionStart,
})
if err != nil {
return fantasy.NewTextErrorResponse(
xerrors.Errorf("start workspace: %w", err).Error(),
), nil
}
if err := waitForBuild(ctx, options.DB, ws.ID); err != nil {
return fantasy.NewTextErrorResponse(
xerrors.Errorf("workspace start build failed: %w", err).Error(),
), nil
}
return waitForAgentAndRespond(ctx, options.DB, options.AgentConnFn, ws)
},
)
}
// waitForAgentAndRespond looks up the first agent in the workspace's
// latest build, waits for it to become reachable, and returns a
// success response.
func waitForAgentAndRespond(
ctx context.Context,
db database.Store,
agentConnFn AgentConnFunc,
ws database.Workspace,
) (fantasy.ToolResponse, error) {
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, ws.ID)
if err != nil || len(agents) == 0 {
// Workspace started but no agent found — still report
// success so the model knows the workspace is up.
return toolResponse(map[string]any{
"started": true,
"workspace_name": ws.Name,
"agent_status": "no_agent",
}), nil
}
result := map[string]any{
"started": true,
"workspace_name": ws.Name,
}
for k, v := range waitForAgentReady(ctx, db, agents[0].ID, agentConnFn) {
result[k] = v
}
return toolResponse(result), nil
}