mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: support multiple agents with shared instance-identity auth (#24325)
> This PR was authored by Mux on behalf of Mike. ## Summary Adds support for multiple peer root workspace agents sharing the same `auth_instance_id`, so AWS, Azure, and GCP instance-identity auth can issue the correct session token for a selected agent instead of assuming a single root agent per instance. ## Problem When a Terraform template attaches two or more `coder_agent` resources (with `auth = "aws-instance-identity"`) to a single compute instance, every agent shares the same cloud instance ID. The existing singular lookup picks whichever agent was created most recently, silently ignoring the others. ## Solution Introduce an optional pre-auth agent selector (`CODER_AGENT_NAME`) and make the server-side lookup ambiguity-aware. **Database layer:** - `GetWorkspaceAgentsByInstanceID` (`:many`): returns all matching root agents for an instance ID. - `GetWorkspaceAgentByInstanceIDAndName` (`:one`): returns the named root agent for disambiguation. **SDK and CLI:** - `agent_name` field added to AWS, Azure, and GCP request structs (`omitempty` for backward compatibility). - `CODER_AGENT_NAME` env var and `--agent-name` flag wired into the agent bootstrap before instance-identity auth runs. **Server handler (`handleAuthInstanceID`):** - When `agent_name` is present: direct lookup by (instance ID, name). - When absent: legacy lookup, then resource-scoped ambiguity check. Returns 409 with available agent names if multiple root agents match. - Whitespace-only names are trimmed and treated as unspecified. - Sub-agents remain excluded (`parent_id IS NULL` filter). **Verification template:** - `examples/templates/aws-multi-agent/` provisions one EC2 instance with two agents (`main` and `dev`), both using instance-identity auth with `CODER_AGENT_NAME` set in the cloud-init user data. ## Backward compatibility Existing single-agent deployments work unchanged. The `agent_name` field is optional with `omitempty`, and the unnamed path preserves today's behavior when only one root agent matches.
This commit is contained in:
+119
-26
@@ -7184,38 +7184,55 @@ func TestGetWorkspaceAgentsByParentID(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetWorkspaceAgentByInstanceID(t *testing.T) {
|
||||
func setupWorkspaceAgentQueryResources(t *testing.T, db database.Store, count int) []database.WorkspaceResource {
|
||||
t.Helper()
|
||||
|
||||
org := dbgen.Organization(t, db, database.Organization{})
|
||||
job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
|
||||
Type: database.ProvisionerJobTypeTemplateVersionImport,
|
||||
OrganizationID: org.ID,
|
||||
})
|
||||
|
||||
resources := make([]database.WorkspaceResource, 0, count)
|
||||
for i := 0; i < count; i++ {
|
||||
resources = append(resources, dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
|
||||
JobID: job.ID,
|
||||
}))
|
||||
}
|
||||
|
||||
return resources
|
||||
}
|
||||
|
||||
func markWorkspaceAgentDeleted(ctx context.Context, t *testing.T, sqlDB *sql.DB, agentID uuid.UUID) {
|
||||
t.Helper()
|
||||
|
||||
_, err := sqlDB.ExecContext(ctx, "UPDATE workspace_agents SET deleted = TRUE WHERE id = $1", agentID)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestGetWorkspaceAgentsByInstanceID(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Context: https://github.com/coder/coder/pull/22196
|
||||
t.Run("DoesNotReturnSubAgents", func(t *testing.T) {
|
||||
t.Run("ReturnsAllMatchingRootAgents", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Given: A parent workspace agent with an AuthInstanceID and a
|
||||
// sub-agent that shares the same AuthInstanceID.
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
org := dbgen.Organization(t, db, database.Organization{})
|
||||
job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
|
||||
Type: database.ProvisionerJobTypeTemplateVersionImport,
|
||||
OrganizationID: org.ID,
|
||||
})
|
||||
resource := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
|
||||
JobID: job.ID,
|
||||
})
|
||||
|
||||
resources := setupWorkspaceAgentQueryResources(t, db, 2)
|
||||
authInstanceID := fmt.Sprintf("instance-%s-%d", t.Name(), time.Now().UnixNano())
|
||||
parentAgent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resource.ID,
|
||||
olderCreatedAt := dbtime.Now().Add(-time.Hour)
|
||||
newerCreatedAt := dbtime.Now()
|
||||
|
||||
olderAgent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resources[0].ID,
|
||||
CreatedAt: olderCreatedAt,
|
||||
AuthInstanceID: sql.NullString{
|
||||
String: authInstanceID,
|
||||
Valid: true,
|
||||
},
|
||||
})
|
||||
// Create a sub-agent with the same AuthInstanceID (simulating
|
||||
// the old behavior before the fix).
|
||||
_ = dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ParentID: uuid.NullUUID{UUID: parentAgent.ID, Valid: true},
|
||||
ResourceID: resource.ID,
|
||||
newerAgent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resources[1].ID,
|
||||
CreatedAt: newerCreatedAt,
|
||||
AuthInstanceID: sql.NullString{
|
||||
String: authInstanceID,
|
||||
Valid: true,
|
||||
@@ -7224,13 +7241,89 @@ func TestGetWorkspaceAgentByInstanceID(t *testing.T) {
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
// When: We look up the agent by instance ID.
|
||||
agent, err := db.GetWorkspaceAgentByInstanceID(ctx, authInstanceID)
|
||||
agents, err := db.GetWorkspaceAgentsByInstanceID(ctx, authInstanceID)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, agents, 2)
|
||||
assert.Equal(t, []uuid.UUID{newerAgent.ID, olderAgent.ID}, []uuid.UUID{agents[0].ID, agents[1].ID})
|
||||
})
|
||||
|
||||
// Then: The result must be the parent agent, not the sub-agent.
|
||||
assert.Equal(t, parentAgent.ID, agent.ID, "instance ID lookup should return the parent agent, not a sub-agent")
|
||||
assert.False(t, agent.ParentID.Valid, "returned agent should not have a parent (should be the parent itself)")
|
||||
t.Run("ExcludesDeletedAndSubAgents", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
db, _, sqlDB := dbtestutil.NewDBWithSQLDB(t)
|
||||
resources := setupWorkspaceAgentQueryResources(t, db, 2)
|
||||
authInstanceID := fmt.Sprintf("instance-%s-%d", t.Name(), time.Now().UnixNano())
|
||||
baseCreatedAt := dbtime.Now()
|
||||
|
||||
rootAgent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resources[0].ID,
|
||||
CreatedAt: baseCreatedAt.Add(-time.Hour),
|
||||
AuthInstanceID: sql.NullString{
|
||||
String: authInstanceID,
|
||||
Valid: true,
|
||||
},
|
||||
})
|
||||
_ = dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ParentID: uuid.NullUUID{UUID: rootAgent.ID, Valid: true},
|
||||
ResourceID: resources[0].ID,
|
||||
CreatedAt: baseCreatedAt,
|
||||
AuthInstanceID: sql.NullString{
|
||||
String: authInstanceID,
|
||||
Valid: true,
|
||||
},
|
||||
})
|
||||
deletedRootAgent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resources[1].ID,
|
||||
CreatedAt: baseCreatedAt.Add(time.Minute),
|
||||
AuthInstanceID: sql.NullString{
|
||||
String: authInstanceID,
|
||||
Valid: true,
|
||||
},
|
||||
})
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
markWorkspaceAgentDeleted(ctx, t, sqlDB, deletedRootAgent.ID)
|
||||
|
||||
agents, err := db.GetWorkspaceAgentsByInstanceID(ctx, authInstanceID)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, agents, 1)
|
||||
assert.Equal(t, rootAgent.ID, agents[0].ID)
|
||||
assert.False(t, agents[0].ParentID.Valid)
|
||||
})
|
||||
|
||||
t.Run("OrdersNewestFirst", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
resources := setupWorkspaceAgentQueryResources(t, db, 2)
|
||||
authInstanceID := fmt.Sprintf("instance-%s-%d", t.Name(), time.Now().UnixNano())
|
||||
olderCreatedAt := dbtime.Now().Add(-time.Hour)
|
||||
newerCreatedAt := dbtime.Now()
|
||||
|
||||
olderAgent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resources[0].ID,
|
||||
CreatedAt: olderCreatedAt,
|
||||
AuthInstanceID: sql.NullString{
|
||||
String: authInstanceID,
|
||||
Valid: true,
|
||||
},
|
||||
})
|
||||
newerAgent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resources[1].ID,
|
||||
CreatedAt: newerCreatedAt,
|
||||
AuthInstanceID: sql.NullString{
|
||||
String: authInstanceID,
|
||||
Valid: true,
|
||||
},
|
||||
})
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
agents, err := db.GetWorkspaceAgentsByInstanceID(ctx, authInstanceID)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, agents, 2)
|
||||
assert.Equal(t, newerAgent.ID, agents[0].ID)
|
||||
assert.Equal(t, olderAgent.ID, agents[1].ID)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user