feat: support multiple agents with shared instance-identity auth (#24325)

> This PR was authored by Mux on behalf of Mike.

## Summary

Adds support for multiple peer root workspace agents sharing the same
`auth_instance_id`, so AWS, Azure, and GCP instance-identity auth can
issue the correct session token for a selected agent instead of assuming
a
single root agent per instance.

## Problem

When a Terraform template attaches two or more `coder_agent` resources
(with `auth = "aws-instance-identity"`) to a single compute instance,
every agent shares the same cloud instance ID. The existing singular
lookup picks whichever agent was created most recently, silently
ignoring
the others.

## Solution

Introduce an optional pre-auth agent selector (`CODER_AGENT_NAME`) and
make the server-side lookup ambiguity-aware.

**Database layer:**
- `GetWorkspaceAgentsByInstanceID` (`:many`): returns all matching root
  agents for an instance ID.
- `GetWorkspaceAgentByInstanceIDAndName` (`:one`): returns the named
root
  agent for disambiguation.

**SDK and CLI:**
- `agent_name` field added to AWS, Azure, and GCP request structs
  (`omitempty` for backward compatibility).
- `CODER_AGENT_NAME` env var and `--agent-name` flag wired into the
agent
  bootstrap before instance-identity auth runs.

**Server handler (`handleAuthInstanceID`):**
- When `agent_name` is present: direct lookup by (instance ID, name).
- When absent: legacy lookup, then resource-scoped ambiguity check.
  Returns 409 with available agent names if multiple root agents match.
- Whitespace-only names are trimmed and treated as unspecified.
- Sub-agents remain excluded (`parent_id IS NULL` filter).

**Verification template:**
- `examples/templates/aws-multi-agent/` provisions one EC2 instance with
  two agents (`main` and `dev`), both using instance-identity auth with
  `CODER_AGENT_NAME` set in the cloud-init user data.

## Backward compatibility

Existing single-agent deployments work unchanged. The `agent_name` field
is optional with `omitempty`, and the unnamed path preserves today's
behavior when only one root agent matches.
This commit is contained in:
Michael Suchacz
2026-04-16 13:59:09 +02:00
committed by GitHub
parent 1cf0354f72
commit e5707a13d6
29 changed files with 1563 additions and 286 deletions
+73 -57
View File
@@ -26566,63 +26566,6 @@ func (q *sqlQuerier) GetWorkspaceAgentByID(ctx context.Context, id uuid.UUID) (W
return i, err
}
const getWorkspaceAgentByInstanceID = `-- name: GetWorkspaceAgentByInstanceID :one
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, expanded_directory, logs_length, logs_overflowed, started_at, ready_at, subsystems, display_apps, api_version, display_order, parent_id, api_key_scope, deleted
FROM
workspace_agents
WHERE
auth_instance_id = $1 :: TEXT
-- Filter out deleted sub agents.
AND deleted = FALSE
-- Filter out sub agents, they do not authenticate with auth_instance_id.
AND parent_id IS NULL
ORDER BY
created_at DESC
`
func (q *sqlQuerier) GetWorkspaceAgentByInstanceID(ctx context.Context, authInstanceID string) (WorkspaceAgent, error) {
row := q.db.QueryRowContext(ctx, getWorkspaceAgentByInstanceID, authInstanceID)
var i WorkspaceAgent
err := row.Scan(
&i.ID,
&i.CreatedAt,
&i.UpdatedAt,
&i.Name,
&i.FirstConnectedAt,
&i.LastConnectedAt,
&i.DisconnectedAt,
&i.ResourceID,
&i.AuthToken,
&i.AuthInstanceID,
&i.Architecture,
&i.EnvironmentVariables,
&i.OperatingSystem,
&i.InstanceMetadata,
&i.ResourceMetadata,
&i.Directory,
&i.Version,
&i.LastConnectedReplicaID,
&i.ConnectionTimeoutSeconds,
&i.TroubleshootingURL,
&i.MOTDFile,
&i.LifecycleState,
&i.ExpandedDirectory,
&i.LogsLength,
&i.LogsOverflowed,
&i.StartedAt,
&i.ReadyAt,
pq.Array(&i.Subsystems),
pq.Array(&i.DisplayApps),
&i.APIVersion,
&i.DisplayOrder,
&i.ParentID,
&i.APIKeyScope,
&i.Deleted,
)
return i, err
}
const getWorkspaceAgentLifecycleStateByID = `-- name: GetWorkspaceAgentLifecycleStateByID :one
SELECT
lifecycle_state,
@@ -26836,6 +26779,79 @@ func (q *sqlQuerier) GetWorkspaceAgentScriptTimingsByBuildID(ctx context.Context
return items, nil
}
const getWorkspaceAgentsByInstanceID = `-- name: GetWorkspaceAgentsByInstanceID :many
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, expanded_directory, logs_length, logs_overflowed, started_at, ready_at, subsystems, display_apps, api_version, display_order, parent_id, api_key_scope, deleted
FROM
workspace_agents
WHERE
auth_instance_id = $1 :: TEXT
-- Filter out deleted agents.
AND deleted = FALSE
-- Filter out sub agents, they do not authenticate with auth_instance_id.
AND parent_id IS NULL
ORDER BY
created_at DESC
`
func (q *sqlQuerier) GetWorkspaceAgentsByInstanceID(ctx context.Context, authInstanceID string) ([]WorkspaceAgent, error) {
rows, err := q.db.QueryContext(ctx, getWorkspaceAgentsByInstanceID, authInstanceID)
if err != nil {
return nil, err
}
defer rows.Close()
var items []WorkspaceAgent
for rows.Next() {
var i WorkspaceAgent
if err := rows.Scan(
&i.ID,
&i.CreatedAt,
&i.UpdatedAt,
&i.Name,
&i.FirstConnectedAt,
&i.LastConnectedAt,
&i.DisconnectedAt,
&i.ResourceID,
&i.AuthToken,
&i.AuthInstanceID,
&i.Architecture,
&i.EnvironmentVariables,
&i.OperatingSystem,
&i.InstanceMetadata,
&i.ResourceMetadata,
&i.Directory,
&i.Version,
&i.LastConnectedReplicaID,
&i.ConnectionTimeoutSeconds,
&i.TroubleshootingURL,
&i.MOTDFile,
&i.LifecycleState,
&i.ExpandedDirectory,
&i.LogsLength,
&i.LogsOverflowed,
&i.StartedAt,
&i.ReadyAt,
pq.Array(&i.Subsystems),
pq.Array(&i.DisplayApps),
&i.APIVersion,
&i.DisplayOrder,
&i.ParentID,
&i.APIKeyScope,
&i.Deleted,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Close(); err != nil {
return nil, err
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const getWorkspaceAgentsByParentID = `-- name: GetWorkspaceAgentsByParentID :many
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, expanded_directory, logs_length, logs_overflowed, started_at, ready_at, subsystems, display_apps, api_version, display_order, parent_id, api_key_scope, deleted