mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: support multiple agents with shared instance-identity auth (#24325)
> This PR was authored by Mux on behalf of Mike. ## Summary Adds support for multiple peer root workspace agents sharing the same `auth_instance_id`, so AWS, Azure, and GCP instance-identity auth can issue the correct session token for a selected agent instead of assuming a single root agent per instance. ## Problem When a Terraform template attaches two or more `coder_agent` resources (with `auth = "aws-instance-identity"`) to a single compute instance, every agent shares the same cloud instance ID. The existing singular lookup picks whichever agent was created most recently, silently ignoring the others. ## Solution Introduce an optional pre-auth agent selector (`CODER_AGENT_NAME`) and make the server-side lookup ambiguity-aware. **Database layer:** - `GetWorkspaceAgentsByInstanceID` (`:many`): returns all matching root agents for an instance ID. - `GetWorkspaceAgentByInstanceIDAndName` (`:one`): returns the named root agent for disambiguation. **SDK and CLI:** - `agent_name` field added to AWS, Azure, and GCP request structs (`omitempty` for backward compatibility). - `CODER_AGENT_NAME` env var and `--agent-name` flag wired into the agent bootstrap before instance-identity auth runs. **Server handler (`handleAuthInstanceID`):** - When `agent_name` is present: direct lookup by (instance ID, name). - When absent: legacy lookup, then resource-scoped ambiguity check. Returns 409 with available agent names if multiple root agents match. - Whitespace-only names are trimmed and treated as unspecified. - Sub-agents remain excluded (`parent_id IS NULL` filter). **Verification template:** - `examples/templates/aws-multi-agent/` provisions one EC2 instance with two agents (`main` and `dev`), both using instance-identity auth with `CODER_AGENT_NAME` set in the cloud-init user data. ## Backward compatibility Existing single-agent deployments work unchanged. The `agent_name` field is optional with `omitempty`, and the unnamed path preserves today's behavior when only one root agent matches.
This commit is contained in:
@@ -0,0 +1,81 @@
|
||||
---
|
||||
display_name: AWS EC2 Multi-Agent Instance Identity
|
||||
description: Verify AWS instance identity auth for two Coder agents on one EC2 instance
|
||||
icon: ../../../site/static/icon/aws.svg
|
||||
maintainer_github: coder
|
||||
verified: true
|
||||
tags: [vm, linux, aws, multi-agent, instance-identity]
|
||||
---
|
||||
|
||||
# AWS multi-agent instance identity verification
|
||||
|
||||
This template verifies the multi-agent instance-identity authentication flow on
|
||||
AWS. It provisions a single EC2 instance with two peer root workspace agents,
|
||||
`main` and `dev`, that both use AWS instance identity authentication.
|
||||
|
||||
The key behavior under test is `CODER_AGENT_NAME` disambiguation. Each agent
|
||||
starts on the same VM with the same EC2 instance identity, but sets a distinct
|
||||
`CODER_AGENT_NAME` so the Coder server can issue a separate session token for
|
||||
that specific agent.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- AWS credentials configured for Terraform, such as environment variables or an
|
||||
attached IAM role.
|
||||
- A Coder deployment that includes the multi-agent instance-auth changes from
|
||||
this branch.
|
||||
- No special Coder server configuration. AWS instance identity certificates are
|
||||
built in.
|
||||
|
||||
## What this template creates
|
||||
|
||||
- One VPC, subnet, internet gateway, route table, and route table association.
|
||||
- One security group that allows SSH from anywhere for test access.
|
||||
- One Ubuntu 24.04 EC2 instance.
|
||||
- Two Coder agents, `main` and `dev`, on that single EC2 instance.
|
||||
- Two agent startup flows that set `CODER_AGENT_NAME` before launching the
|
||||
corresponding agent init script.
|
||||
|
||||
## How to verify
|
||||
|
||||
```bash
|
||||
cd examples/templates/aws-multi-agent
|
||||
coder templates push verify-multi-agent
|
||||
|
||||
coder create test-multi-agent --template verify-multi-agent
|
||||
|
||||
coder list
|
||||
```
|
||||
|
||||
After the workspace starts, verify that both agents are connected in the Coder
|
||||
Dashboard for `test-multi-agent`. You can also connect to each agent directly:
|
||||
|
||||
```bash
|
||||
coder ssh test-multi-agent -a main true
|
||||
coder ssh test-multi-agent -a dev true
|
||||
```
|
||||
|
||||
## Expected behavior
|
||||
|
||||
- Both agents authenticate independently using AWS instance identity.
|
||||
- Each agent receives its own session token.
|
||||
- The workspace shows two connected agents in the Coder Dashboard.
|
||||
- If `CODER_AGENT_NAME` is omitted, the server should return `409 Conflict`
|
||||
because the shared instance identity is ambiguous.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- If one agent gets `409 Conflict`, `CODER_AGENT_NAME` is not being set
|
||||
correctly for that agent.
|
||||
- If both agents fail, instance identity authentication is not working. Check
|
||||
EC2 metadata service access from the instance.
|
||||
- Check cloud-init logs with `journalctl -u cloud-init`.
|
||||
- Check agent logs at `/tmp/coder-agent-main.log` and
|
||||
`/tmp/coder-agent-dev.log`.
|
||||
|
||||
## Cleanup
|
||||
|
||||
```bash
|
||||
coder delete test-multi-agent
|
||||
coder templates delete verify-multi-agent
|
||||
```
|
||||
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Create the user if it doesn't exist.
|
||||
if ! id -u "${linux_user}" >/dev/null 2>&1; then
|
||||
useradd -m -s /bin/bash "${linux_user}"
|
||||
fi
|
||||
|
||||
# Start main agent with disambiguation name.
|
||||
CODER_AGENT_NAME=main sudo -u '${linux_user}' sh -c '${main_init_script}' \
|
||||
>/tmp/coder-agent-main.log 2>&1 &
|
||||
|
||||
# Start dev agent with disambiguation name.
|
||||
CODER_AGENT_NAME=dev sudo -u '${linux_user}' sh -c '${dev_init_script}' \
|
||||
>/tmp/coder-agent-dev.log 2>&1 &
|
||||
|
||||
# Wait for both agent processes to start.
|
||||
wait
|
||||
@@ -0,0 +1,340 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
coder = {
|
||||
source = "coder/coder"
|
||||
}
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
}
|
||||
cloudinit = {
|
||||
source = "hashicorp/cloudinit"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Last updated 2023-03-14
|
||||
# aws ec2 describe-regions | jq -r '[.Regions[].RegionName] | sort'
|
||||
data "coder_parameter" "region" {
|
||||
name = "region"
|
||||
display_name = "Region"
|
||||
description = "The region to deploy the workspace in."
|
||||
default = "us-east-1"
|
||||
mutable = false
|
||||
option {
|
||||
name = "Asia Pacific (Tokyo)"
|
||||
value = "ap-northeast-1"
|
||||
icon = "/emojis/1f1ef-1f1f5.png"
|
||||
}
|
||||
option {
|
||||
name = "Asia Pacific (Seoul)"
|
||||
value = "ap-northeast-2"
|
||||
icon = "/emojis/1f1f0-1f1f7.png"
|
||||
}
|
||||
option {
|
||||
name = "Asia Pacific (Osaka)"
|
||||
value = "ap-northeast-3"
|
||||
icon = "/emojis/1f1ef-1f1f5.png"
|
||||
}
|
||||
option {
|
||||
name = "Asia Pacific (Mumbai)"
|
||||
value = "ap-south-1"
|
||||
icon = "/emojis/1f1ee-1f1f3.png"
|
||||
}
|
||||
option {
|
||||
name = "Asia Pacific (Singapore)"
|
||||
value = "ap-southeast-1"
|
||||
icon = "/emojis/1f1f8-1f1ec.png"
|
||||
}
|
||||
option {
|
||||
name = "Asia Pacific (Sydney)"
|
||||
value = "ap-southeast-2"
|
||||
icon = "/emojis/1f1e6-1f1fa.png"
|
||||
}
|
||||
option {
|
||||
name = "Canada (Central)"
|
||||
value = "ca-central-1"
|
||||
icon = "/emojis/1f1e8-1f1e6.png"
|
||||
}
|
||||
option {
|
||||
name = "EU (Frankfurt)"
|
||||
value = "eu-central-1"
|
||||
icon = "/emojis/1f1ea-1f1fa.png"
|
||||
}
|
||||
option {
|
||||
name = "EU (Stockholm)"
|
||||
value = "eu-north-1"
|
||||
icon = "/emojis/1f1ea-1f1fa.png"
|
||||
}
|
||||
option {
|
||||
name = "EU (Ireland)"
|
||||
value = "eu-west-1"
|
||||
icon = "/emojis/1f1ea-1f1fa.png"
|
||||
}
|
||||
option {
|
||||
name = "EU (London)"
|
||||
value = "eu-west-2"
|
||||
icon = "/emojis/1f1ea-1f1fa.png"
|
||||
}
|
||||
option {
|
||||
name = "EU (Paris)"
|
||||
value = "eu-west-3"
|
||||
icon = "/emojis/1f1ea-1f1fa.png"
|
||||
}
|
||||
option {
|
||||
name = "South America (São Paulo)"
|
||||
value = "sa-east-1"
|
||||
icon = "/emojis/1f1e7-1f1f7.png"
|
||||
}
|
||||
option {
|
||||
name = "US East (N. Virginia)"
|
||||
value = "us-east-1"
|
||||
icon = "/emojis/1f1fa-1f1f8.png"
|
||||
}
|
||||
option {
|
||||
name = "US East (Ohio)"
|
||||
value = "us-east-2"
|
||||
icon = "/emojis/1f1fa-1f1f8.png"
|
||||
}
|
||||
option {
|
||||
name = "US West (N. California)"
|
||||
value = "us-west-1"
|
||||
icon = "/emojis/1f1fa-1f1f8.png"
|
||||
}
|
||||
option {
|
||||
name = "US West (Oregon)"
|
||||
value = "us-west-2"
|
||||
icon = "/emojis/1f1fa-1f1f8.png"
|
||||
}
|
||||
}
|
||||
|
||||
data "coder_parameter" "instance_type" {
|
||||
name = "instance_type"
|
||||
display_name = "Instance type"
|
||||
description = "What instance type should your workspace use?"
|
||||
default = "t3.micro"
|
||||
mutable = false
|
||||
option {
|
||||
name = "2 vCPU, 1 GiB RAM"
|
||||
value = "t3.micro"
|
||||
}
|
||||
option {
|
||||
name = "2 vCPU, 2 GiB RAM"
|
||||
value = "t3.small"
|
||||
}
|
||||
option {
|
||||
name = "2 vCPU, 4 GiB RAM"
|
||||
value = "t3.medium"
|
||||
}
|
||||
option {
|
||||
name = "2 vCPU, 8 GiB RAM"
|
||||
value = "t3.large"
|
||||
}
|
||||
option {
|
||||
name = "4 vCPU, 16 GiB RAM"
|
||||
value = "t3.xlarge"
|
||||
}
|
||||
option {
|
||||
name = "8 vCPU, 32 GiB RAM"
|
||||
value = "t3.2xlarge"
|
||||
}
|
||||
}
|
||||
|
||||
provider "aws" {
|
||||
region = data.coder_parameter.region.value
|
||||
}
|
||||
|
||||
data "coder_workspace" "me" {}
|
||||
data "coder_workspace_owner" "me" {}
|
||||
|
||||
data "aws_ami" "ubuntu" {
|
||||
most_recent = true
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-amd64-server-*"]
|
||||
}
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
owners = ["099720109477"] # Canonical
|
||||
}
|
||||
|
||||
resource "coder_agent" "main" {
|
||||
count = data.coder_workspace.me.start_count
|
||||
os = "linux"
|
||||
arch = "amd64"
|
||||
auth = "aws-instance-identity"
|
||||
startup_script = <<-EOT
|
||||
#!/bin/bash
|
||||
set -e
|
||||
echo "Agent 'main' started successfully"
|
||||
echo "CODER_AGENT_NAME=$CODER_AGENT_NAME"
|
||||
EOT
|
||||
|
||||
metadata {
|
||||
key = "agent-identity"
|
||||
display_name = "Agent Identity"
|
||||
interval = 60
|
||||
timeout = 5
|
||||
script = "echo main"
|
||||
}
|
||||
}
|
||||
|
||||
resource "coder_agent" "dev" {
|
||||
count = data.coder_workspace.me.start_count
|
||||
os = "linux"
|
||||
arch = "amd64"
|
||||
auth = "aws-instance-identity"
|
||||
startup_script = <<-EOT
|
||||
#!/bin/bash
|
||||
set -e
|
||||
echo "Agent 'dev' started successfully"
|
||||
echo "CODER_AGENT_NAME=$CODER_AGENT_NAME"
|
||||
EOT
|
||||
|
||||
metadata {
|
||||
key = "agent-identity"
|
||||
display_name = "Agent Identity"
|
||||
interval = 60
|
||||
timeout = 5
|
||||
script = "echo dev"
|
||||
}
|
||||
}
|
||||
|
||||
locals {
|
||||
aws_availability_zone = "${data.coder_parameter.region.value}a"
|
||||
hostname = lower(data.coder_workspace.me.name)
|
||||
linux_user = "coder"
|
||||
}
|
||||
|
||||
data "cloudinit_config" "user_data" {
|
||||
gzip = false
|
||||
base64_encode = false
|
||||
|
||||
boundary = "//"
|
||||
|
||||
part {
|
||||
filename = "userdata.sh"
|
||||
content_type = "text/x-shellscript"
|
||||
|
||||
content = templatefile("${path.module}/cloud-init/userdata.sh.tftpl", {
|
||||
linux_user = local.linux_user
|
||||
main_init_script = try(coder_agent.main[0].init_script, "")
|
||||
dev_init_script = try(coder_agent.dev[0].init_script, "")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_vpc" "workspace" {
|
||||
cidr_block = "10.0.0.0/16"
|
||||
enable_dns_hostnames = true
|
||||
enable_dns_support = true
|
||||
|
||||
tags = {
|
||||
Name = "coder-${data.coder_workspace_owner.me.name}-${local.hostname}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_subnet" "workspace" {
|
||||
vpc_id = aws_vpc.workspace.id
|
||||
cidr_block = "10.0.1.0/24"
|
||||
availability_zone = local.aws_availability_zone
|
||||
map_public_ip_on_launch = true
|
||||
|
||||
tags = {
|
||||
Name = "coder-${data.coder_workspace_owner.me.name}-${local.hostname}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_internet_gateway" "workspace" {
|
||||
vpc_id = aws_vpc.workspace.id
|
||||
|
||||
tags = {
|
||||
Name = "coder-${data.coder_workspace_owner.me.name}-${local.hostname}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_route_table" "workspace" {
|
||||
vpc_id = aws_vpc.workspace.id
|
||||
|
||||
route {
|
||||
cidr_block = "0.0.0.0/0"
|
||||
gateway_id = aws_internet_gateway.workspace.id
|
||||
}
|
||||
|
||||
tags = {
|
||||
Name = "coder-${data.coder_workspace_owner.me.name}-${local.hostname}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_route_table_association" "workspace" {
|
||||
subnet_id = aws_subnet.workspace.id
|
||||
route_table_id = aws_route_table.workspace.id
|
||||
}
|
||||
|
||||
resource "aws_security_group" "workspace" {
|
||||
name_prefix = "coder-${local.hostname}-"
|
||||
description = "Allow SSH access for testing."
|
||||
vpc_id = aws_vpc.workspace.id
|
||||
|
||||
ingress {
|
||||
description = "SSH"
|
||||
from_port = 22
|
||||
to_port = 22
|
||||
protocol = "tcp"
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
egress {
|
||||
from_port = 0
|
||||
to_port = 0
|
||||
protocol = "-1"
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
tags = {
|
||||
Name = "coder-${data.coder_workspace_owner.me.name}-${local.hostname}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_instance" "dev" {
|
||||
ami = data.aws_ami.ubuntu.id
|
||||
availability_zone = local.aws_availability_zone
|
||||
instance_type = data.coder_parameter.instance_type.value
|
||||
subnet_id = aws_subnet.workspace.id
|
||||
vpc_security_group_ids = [aws_security_group.workspace.id]
|
||||
associate_public_ip_address = true
|
||||
|
||||
user_data = data.cloudinit_config.user_data.rendered
|
||||
tags = {
|
||||
Name = "coder-${data.coder_workspace_owner.me.name}-${data.coder_workspace.me.name}"
|
||||
# Required if you are using our example policy, see template README
|
||||
Coder_Provisioned = "true"
|
||||
}
|
||||
lifecycle {
|
||||
ignore_changes = [ami]
|
||||
}
|
||||
|
||||
depends_on = [aws_route_table_association.workspace]
|
||||
}
|
||||
|
||||
resource "coder_metadata" "workspace_info" {
|
||||
resource_id = aws_instance.dev.id
|
||||
item {
|
||||
key = "region"
|
||||
value = data.coder_parameter.region.value
|
||||
}
|
||||
item {
|
||||
key = "instance type"
|
||||
value = aws_instance.dev.instance_type
|
||||
}
|
||||
item {
|
||||
key = "ami"
|
||||
value = aws_instance.dev.ami
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_ec2_instance_state" "dev" {
|
||||
instance_id = aws_instance.dev.id
|
||||
state = data.coder_workspace.me.transition == "start" ? "running" : "stopped"
|
||||
}
|
||||
Reference in New Issue
Block a user