mirror of
https://github.com/coder/registry.git
synced 2026-06-03 04:58:15 +00:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| db433e4d34 | |||
| 99f3524160 |
@@ -0,0 +1,73 @@
|
||||
---
|
||||
display_name: Claude Code self-hosted runner
|
||||
description: Run Anthropic's Claude Code self-hosted runner as a long-lived process inside a Coder workspace, with per-workspace scoped self-eviction so the prebuild reconciler keeps the pool warm.
|
||||
icon: ../../../../.icons/claude.svg
|
||||
verified: false
|
||||
tags: [ai, claude, claude-code, anthropic, runner]
|
||||
---
|
||||
|
||||
# Claude Code self-hosted runner
|
||||
|
||||
Drops Anthropic's [Claude Code self-hosted runner](https://docs.anthropic.com/en/docs/claude-code/self-hosted-runners) into any Coder template that has a `coder_agent` and a workspace image with the runner binary installed (`/usr/local/bin/claude self-hosted-runner` by default).
|
||||
|
||||
The module owns the runner script (writes a per-session wrapper that forces `--permission-mode bypassPermissions`, then spawns a detached supervisor that runs the runner in the foreground and POSTs a delete build to self-evict on drain), the agent environment variables it needs, an optional bot-git askpass setup, and a host Docker socket gid fixup. Agent metadata items (lock status, active sessions, runner ID, last poll) are emitted via the `agent_metadata` output for the parent to splat into a `dynamic "metadata"` block.
|
||||
|
||||
The parent template still owns the `coder_agent` itself, the per-workspace scope-restricted self-evict token (minted via the `Mastercard/restapi` provider against an admin bootstrap token), the prebuild preset, and the infra block (`docker_container`, `kubernetes_pod`, etc.).
|
||||
|
||||
> [!IMPORTANT]
|
||||
> This module is part of the [Claude Code self-hosted runners on Coder](https://coder.com/docs/ai-coder/claude-code-self-hosted-runners) recipe, which currently targets Anthropic's EAP build of the runner. Both the runner binary and the wire contract are still evolving; expect API drift until Anthropic ships GA.
|
||||
|
||||
## Usage
|
||||
|
||||
```tf
|
||||
module "claude_self_hosted_runner" {
|
||||
source = "registry.coder.com/coder-labs/claude-self-hosted-runner/coder"
|
||||
version = "1.0.0"
|
||||
|
||||
agent_id = coder_agent.main.id
|
||||
workspace_id = data.coder_workspace.me.id
|
||||
pool_secret = var.pool_secret
|
||||
self_evict_token = jsondecode(restapi_object.self_evict_token.api_response).key
|
||||
git_bot_token = var.git_bot_token
|
||||
capacity = tonumber(data.coder_parameter.capacity.value)
|
||||
}
|
||||
|
||||
resource "coder_agent" "main" {
|
||||
# ... arch, os, dir, startup_script_behavior, etc.
|
||||
|
||||
# Static metadata blocks coexist with the dynamic block below;
|
||||
# Terraform concatenates them on the same coder_agent.
|
||||
metadata {
|
||||
display_name = "CPU"
|
||||
key = "cpu"
|
||||
script = "top -bn1 | awk '/Cpu/ {print $2 \"%\"}'"
|
||||
interval = 10
|
||||
timeout = 5
|
||||
}
|
||||
|
||||
dynamic "metadata" {
|
||||
for_each = module.claude_self_hosted_runner.agent_metadata
|
||||
content {
|
||||
display_name = metadata.value.display_name
|
||||
key = metadata.value.key
|
||||
interval = metadata.value.interval
|
||||
timeout = metadata.value.timeout
|
||||
script = metadata.value.script
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## What the module does
|
||||
|
||||
- Writes `$HOME/.claude/wrapper.sh` at agent start. The wrapper appends `--permission-mode bypassPermissions` after `"$@"` so unattended sessions never stall on a tool-approval prompt; Claude Code's flag parser is last-occurrence-wins, so this overrides the server-supplied permission mode.
|
||||
- Sets up the runner's required environment (`CLAUDE_POOL_SECRET`, `CLAUDE_CAPACITY`, `GIT_BOT_TOKEN`, `CODER_SELF_TOKEN`, `CODER_WORKSPACE_ID`) via `coder_env` resources on the agent.
|
||||
- Spawns a `setsid nohup` supervisor that runs the runner in the foreground. When the runner exits on drain, the supervisor POSTs `/api/v2/workspaces/{id}/builds` with `{"transition":"delete"}` to self-evict, so Coder's prebuild reconciler can queue a replacement.
|
||||
- Wires up `GIT_ASKPASS` if `git_bot_token` is supplied so the runner's child claude can `git push` without baking credentials into the image.
|
||||
- If the parent template mounts the host Docker socket at `/var/run/docker.sock` and the gid does not match the in-container `docker` group, chgrps the socket so the workspace user can use it without sudo.
|
||||
|
||||
## Self-eviction security model
|
||||
|
||||
The `self_evict_token` input is minted by the parent template via the `Mastercard/restapi` provider at template build time, against an admin bootstrap token that lives in Terraform state and is never injected into the workspace. The minted token is scoped to `workspace:delete + workspace:read + template:read + user:read` and allow-listed to this single workspace's UUID. A leaked copy can do exactly one thing: delete this one workspace. No read of peer prebuilds, no SSH, no external auth, no git creds.
|
||||
|
||||
The supervisor uses raw `curl` against `/api/v2/workspaces/{id}/builds`, not the `coder delete` CLI. The CLI fetches workspace resources first, which fails against the scoped token whose allow-list intersection excludes peer workspaces.
|
||||
@@ -0,0 +1,185 @@
|
||||
terraform {
|
||||
required_version = ">= 1.5"
|
||||
|
||||
required_providers {
|
||||
coder = {
|
||||
source = "coder/coder"
|
||||
version = ">= 2.13"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
variable "agent_id" {
|
||||
type = string
|
||||
description = "The ID of a Coder agent."
|
||||
}
|
||||
|
||||
variable "workspace_id" {
|
||||
type = string
|
||||
description = "data.coder_workspace.me.id from the parent template. Used by the supervisor to self-evict via the workspace builds endpoint."
|
||||
}
|
||||
|
||||
variable "pool_secret" {
|
||||
type = string
|
||||
description = "Claude Code self-hosted runner pool secret (from claude.ai)."
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "self_evict_token" {
|
||||
type = string
|
||||
description = "Per-workspace, scope-restricted Coder API token. Scope = workspace:delete + workspace:read + template:read + user:read, allow_list = this workspace's UUID. A leaked copy can only delete this one workspace. The parent template mints it via the Mastercard/restapi provider at build time."
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "git_bot_token" {
|
||||
type = string
|
||||
description = "Optional git PAT for the bot identity. Wired through GIT_ASKPASS so the runner's child claude can push without baking credentials into the image."
|
||||
sensitive = true
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "capacity" {
|
||||
type = number
|
||||
description = "Maximum sessions the runner serves at once. The runner locks to one Anthropic user; this caps parallelism within that user's queue."
|
||||
default = 4
|
||||
validation {
|
||||
condition = var.capacity >= 1 && var.capacity <= 16
|
||||
error_message = "capacity must be between 1 and 16."
|
||||
}
|
||||
}
|
||||
|
||||
variable "runner_binary_path" {
|
||||
type = string
|
||||
description = "Path to the `claude self-hosted-runner` binary inside the workspace."
|
||||
default = "/usr/local/bin/claude"
|
||||
}
|
||||
|
||||
variable "claude_binary_path" {
|
||||
type = string
|
||||
description = "Path to the Claude Code binary the wrapper execs for each session."
|
||||
default = "/opt/claude/claude"
|
||||
}
|
||||
|
||||
variable "order" {
|
||||
type = number
|
||||
description = "Order of the runner script in the agent UI."
|
||||
default = null
|
||||
}
|
||||
|
||||
resource "coder_env" "pool_secret" {
|
||||
agent_id = var.agent_id
|
||||
name = "CLAUDE_POOL_SECRET"
|
||||
value = var.pool_secret
|
||||
}
|
||||
|
||||
resource "coder_env" "capacity" {
|
||||
agent_id = var.agent_id
|
||||
name = "CLAUDE_CAPACITY"
|
||||
value = tostring(var.capacity)
|
||||
}
|
||||
|
||||
resource "coder_env" "git_bot_token" {
|
||||
agent_id = var.agent_id
|
||||
name = "GIT_BOT_TOKEN"
|
||||
value = var.git_bot_token
|
||||
}
|
||||
|
||||
resource "coder_env" "self_token" {
|
||||
agent_id = var.agent_id
|
||||
name = "CODER_SELF_TOKEN"
|
||||
value = var.self_evict_token
|
||||
}
|
||||
|
||||
resource "coder_env" "workspace_id" {
|
||||
agent_id = var.agent_id
|
||||
name = "CODER_WORKSPACE_ID"
|
||||
value = var.workspace_id
|
||||
}
|
||||
|
||||
resource "coder_script" "claude_runner" {
|
||||
agent_id = var.agent_id
|
||||
display_name = "Claude self-hosted runner"
|
||||
icon = "/icon/code.svg"
|
||||
run_on_start = true
|
||||
start_blocks_login = false
|
||||
script = templatefile("${path.module}/scripts/run.sh", {
|
||||
CLAUDE_BINARY_PATH = var.claude_binary_path
|
||||
RUNNER_BINARY_PATH = var.runner_binary_path
|
||||
})
|
||||
}
|
||||
|
||||
# Agent metadata items. The parent splats this list into a
|
||||
# `dynamic "metadata"` block on its own `coder_agent` because nested
|
||||
# blocks cannot be injected from a module. Scraped from the runner's
|
||||
# local /healthz and /metrics endpoints; this is the only window a
|
||||
# Coder admin has into who the Anthropic pool has bound this workspace
|
||||
# to (the runner does not expose the locked user's email over its
|
||||
# local endpoints; that lives in claude.ai > Self-hosted runner pools).
|
||||
output "agent_metadata" {
|
||||
description = "List of agent metadata items the parent template should splat into a `dynamic \"metadata\"` block on its coder_agent."
|
||||
value = [
|
||||
{
|
||||
display_name = "Lock status"
|
||||
key = "0_lock_status"
|
||||
interval = 10
|
||||
timeout = 5
|
||||
# The runner does not expose its locked state via /metrics or
|
||||
# /healthz in the current BYOC build, so we infer it from
|
||||
# active_sessions and latch a sticky flag on disk: once a
|
||||
# session has landed, the runner is locked to that Anthropic
|
||||
# user for its entire lifetime per Anthropic's spec, even when
|
||||
# the active count drops back to zero between sessions.
|
||||
script = <<-EOT
|
||||
flag="$HOME/.claude/locked"
|
||||
active=$(curl -fsS http://127.0.0.1:8080/healthz 2>/dev/null \
|
||||
| jq -r '.active_sessions // 0')
|
||||
if [ "$${active:-0}" -gt 0 ] && [ ! -f "$flag" ]; then
|
||||
touch "$flag" 2>/dev/null || true
|
||||
fi
|
||||
if [ -f "$flag" ]; then
|
||||
printf 'locked'
|
||||
else
|
||||
printf 'unlocked'
|
||||
fi
|
||||
EOT
|
||||
},
|
||||
{
|
||||
display_name = "Active sessions"
|
||||
key = "1_active_sessions"
|
||||
interval = 5
|
||||
timeout = 5
|
||||
script = <<-EOT
|
||||
active=$(curl -fsS http://127.0.0.1:8080/healthz 2>/dev/null \
|
||||
| jq -r '.active_sessions // empty')
|
||||
if [ -z "$active" ]; then echo '?'; exit 0; fi
|
||||
printf '%s / %s' "$active" "$${CLAUDE_CAPACITY:-1}"
|
||||
EOT
|
||||
},
|
||||
{
|
||||
display_name = "Runner ID"
|
||||
key = "2_runner_id"
|
||||
interval = 30
|
||||
timeout = 5
|
||||
script = <<-EOT
|
||||
curl -fsS http://127.0.0.1:8080/healthz 2>/dev/null \
|
||||
| jq -r '.runner_id // "(starting)"'
|
||||
EOT
|
||||
},
|
||||
{
|
||||
display_name = "Last Anthropic poll"
|
||||
key = "3_last_poll"
|
||||
interval = 15
|
||||
timeout = 5
|
||||
script = <<-EOT
|
||||
age=$(curl -fsS http://127.0.0.1:8080/healthz 2>/dev/null \
|
||||
| jq -r '.last_poll_age_ms // empty')
|
||||
if [ -z "$age" ]; then echo '?'; exit 0; fi
|
||||
if [ "$age" -lt 30000 ]; then
|
||||
printf 'ok (%sms ago)' "$age"
|
||||
else
|
||||
printf 'stale (%ss ago)' $((age/1000))
|
||||
fi
|
||||
EOT
|
||||
},
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
run "plan_with_required_vars" {
|
||||
command = plan
|
||||
|
||||
variables {
|
||||
agent_id = "test-agent"
|
||||
workspace_id = "test-workspace"
|
||||
pool_secret = "test-pool-secret"
|
||||
self_evict_token = "test-self-token"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = length(resource.coder_env.pool_secret.value) > 0
|
||||
error_message = "pool_secret env should be set"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = resource.coder_env.capacity.value == "4"
|
||||
error_message = "default capacity should be 4"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = resource.coder_script.claude_runner.display_name == "Claude self-hosted runner"
|
||||
error_message = "expected the runner coder_script display_name"
|
||||
}
|
||||
}
|
||||
|
||||
run "custom_capacity_and_binary_paths" {
|
||||
command = plan
|
||||
|
||||
variables {
|
||||
agent_id = "test-agent"
|
||||
workspace_id = "test-workspace"
|
||||
pool_secret = "test-pool-secret"
|
||||
self_evict_token = "test-self-token"
|
||||
capacity = 8
|
||||
claude_binary_path = "/custom/claude"
|
||||
runner_binary_path = "/custom/runner"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = resource.coder_env.capacity.value == "8"
|
||||
error_message = "capacity input should flow into CLAUDE_CAPACITY env"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = strcontains(resource.coder_script.claude_runner.script, "/custom/claude")
|
||||
error_message = "claude_binary_path should appear in the rendered script"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = strcontains(resource.coder_script.claude_runner.script, "/custom/runner")
|
||||
error_message = "runner_binary_path should appear in the rendered script"
|
||||
}
|
||||
}
|
||||
|
||||
run "git_bot_token_optional" {
|
||||
command = plan
|
||||
|
||||
variables {
|
||||
agent_id = "test-agent"
|
||||
workspace_id = "test-workspace"
|
||||
pool_secret = "test-pool-secret"
|
||||
self_evict_token = "test-self-token"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = resource.coder_env.git_bot_token.value == ""
|
||||
error_message = "git_bot_token should default to empty string"
|
||||
}
|
||||
}
|
||||
|
||||
run "capacity_validation_rejects_zero" {
|
||||
command = plan
|
||||
|
||||
variables {
|
||||
agent_id = "test-agent"
|
||||
workspace_id = "test-workspace"
|
||||
pool_secret = "test-pool-secret"
|
||||
self_evict_token = "test-self-token"
|
||||
capacity = 0
|
||||
}
|
||||
|
||||
expect_failures = [
|
||||
var.capacity,
|
||||
]
|
||||
}
|
||||
|
||||
run "capacity_validation_rejects_high" {
|
||||
command = plan
|
||||
|
||||
variables {
|
||||
agent_id = "test-agent"
|
||||
workspace_id = "test-workspace"
|
||||
pool_secret = "test-pool-secret"
|
||||
self_evict_token = "test-self-token"
|
||||
capacity = 17
|
||||
}
|
||||
|
||||
expect_failures = [
|
||||
var.capacity,
|
||||
]
|
||||
}
|
||||
|
||||
run "agent_metadata_output_has_four_items" {
|
||||
command = apply
|
||||
|
||||
variables {
|
||||
agent_id = "test-agent"
|
||||
workspace_id = "test-workspace"
|
||||
pool_secret = "test-pool-secret"
|
||||
self_evict_token = "test-self-token"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = length(output.agent_metadata) == 4
|
||||
error_message = "agent_metadata should expose four scraping items"
|
||||
}
|
||||
|
||||
assert {
|
||||
condition = output.agent_metadata[0].key == "0_lock_status"
|
||||
error_message = "first metadata item should be lock_status"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env bash
|
||||
# Wires up everything the Claude Code self-hosted runner needs at agent
|
||||
# start, then spawns a detached supervisor that keeps the runner alive
|
||||
# and self-evicts on drain.
|
||||
#
|
||||
# Runtime env (set by coder_env in main.tf):
|
||||
# CLAUDE_POOL_SECRET Anthropic pool secret (mandatory).
|
||||
# CLAUDE_CAPACITY Max parallel sessions per runner (default 1).
|
||||
# GIT_BOT_TOKEN Optional bot PAT for GIT_ASKPASS.
|
||||
# CODER_SELF_TOKEN Per-workspace scope-restricted Coder API token.
|
||||
# CODER_WORKSPACE_ID This workspace's UUID, used by self-eviction.
|
||||
# CODER_AGENT_URL Set by the Coder agent itself.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
CLAUDE_BINARY_PATH='${CLAUDE_BINARY_PATH}'
|
||||
RUNNER_BINARY_PATH='${RUNNER_BINARY_PATH}'
|
||||
|
||||
if [ -z "$${CLAUDE_POOL_SECRET:-}" ]; then
|
||||
echo "CLAUDE_POOL_SECRET is empty. Set the pool_secret input on the module."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
install -d -m 0700 "$HOME/.claude"
|
||||
|
||||
# --- Bot git askpass ----------------------------------------------------
|
||||
if [ -n "$${GIT_BOT_TOKEN:-}" ]; then
|
||||
install -d -m 0700 "$HOME/.git-creds"
|
||||
cat > "$HOME/.git-creds/askpass.sh" << 'ASK'
|
||||
#!/bin/sh
|
||||
printf '%s' "$GIT_BOT_TOKEN"
|
||||
ASK
|
||||
chmod 0500 "$HOME/.git-creds/askpass.sh"
|
||||
git config --global core.askPass "$HOME/.git-creds/askpass.sh"
|
||||
git config --global credential.helper ''
|
||||
fi
|
||||
|
||||
# --- Host Docker socket gid fixup --------------------------------------
|
||||
if [ -S /var/run/docker.sock ]; then
|
||||
sock_gid=$(stat -c %g /var/run/docker.sock)
|
||||
docker_gid=$(getent group docker | cut -d: -f3 || true)
|
||||
if [ -n "$${docker_gid:-}" ] && [ "$${sock_gid}" != "$${docker_gid}" ]; then
|
||||
sudo chgrp "$${docker_gid}" /var/run/docker.sock 2> /dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Pool secret on disk -----------------------------------------------
|
||||
POOL_SECRET_FILE="$HOME/.claude/pool-secret"
|
||||
rm -f "$POOL_SECRET_FILE"
|
||||
umask 077
|
||||
printf '%s' "$${CLAUDE_POOL_SECRET}" > "$POOL_SECRET_FILE"
|
||||
chmod 0400 "$POOL_SECRET_FILE"
|
||||
|
||||
# --- Wrapper script -----------------------------------------------------
|
||||
# Runner execs this once per session, appending its server-computed
|
||||
# flags. Claude Code's flag parser is last-occurrence-wins, so flags
|
||||
# after "$@" win. Force --permission-mode bypassPermissions so
|
||||
# unattended sessions never stall on a tool-approval prompt.
|
||||
WRAPPER="$HOME/.claude/wrapper.sh"
|
||||
{
|
||||
echo '#!/bin/bash'
|
||||
echo "exec $${CLAUDE_BINARY_PATH} \"\$@\" --permission-mode bypassPermissions"
|
||||
} > "$WRAPPER"
|
||||
chmod 0755 "$WRAPPER"
|
||||
|
||||
# --- Supervisor --------------------------------------------------------
|
||||
# Runs the runner in the foreground; on runner exit POSTs a delete
|
||||
# build to self-evict. Raw curl, not `coder delete`: the CLI fetches
|
||||
# workspace resources first, which fails with the per-workspace
|
||||
# scoped token whose allow-list excludes peer prebuilds.
|
||||
#
|
||||
# Single-quoted heredoc, so nothing is expanded by the outer shell.
|
||||
# The supervisor reads its env vars (CODER_SELF_TOKEN, CODER_AGENT_URL,
|
||||
# etc.) at runtime, when it's invoked under setsid.
|
||||
SUPERVISOR="$HOME/.claude/supervisor.sh"
|
||||
cat > "$SUPERVISOR" << SUP
|
||||
#!/usr/bin/env bash
|
||||
set -uo pipefail
|
||||
exec >>"\$HOME/.claude/supervisor.log" 2>&1
|
||||
echo "[supervisor] start \$(date -Is)"
|
||||
|
||||
$${RUNNER_BINARY_PATH} self-hosted-runner \\
|
||||
--pool-secret-file "\$HOME/.claude/pool-secret" \\
|
||||
--capacity "\$${CLAUDE_CAPACITY:-1}" \\
|
||||
--log-file "\$HOME/.claude/runner.log" \\
|
||||
--exec-path "\$HOME/.claude/wrapper.sh"
|
||||
echo "[supervisor] runner exited rc=\$? \$(date -Is)"
|
||||
|
||||
if [ -z "\$${CODER_SELF_TOKEN:-}" ]; then
|
||||
echo "[supervisor] CODER_SELF_TOKEN is empty; skipping self-eviction."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
http_code=\$(curl -s -o /tmp/evict.out -w "%%{http_code}" \\
|
||||
-X POST \\
|
||||
-H "Coder-Session-Token: \$CODER_SELF_TOKEN" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"transition":"delete"}' \\
|
||||
"\$CODER_AGENT_URL/api/v2/workspaces/\$CODER_WORKSPACE_ID/builds")
|
||||
if [ "\$http_code" = "201" ]; then
|
||||
echo "[supervisor] self-eviction queued (HTTP 201)."
|
||||
else
|
||||
echo "[supervisor] self-eviction failed (HTTP \$http_code): \$(head -c 300 /tmp/evict.out)"
|
||||
fi
|
||||
SUP
|
||||
chmod 0700 "$SUPERVISOR"
|
||||
|
||||
# Detach with setsid + nohup. The supervisor reopens stdout/stderr to
|
||||
# its own logfile; redirect all standard fds here to /dev/null so this
|
||||
# script's exit doesn't drag the supervisor with it.
|
||||
setsid nohup "$SUPERVISOR" < /dev/null > /dev/null 2>&1 &
|
||||
disown
|
||||
|
||||
echo "Runner spawned as detached supervisor (pid=$!). See ~/.claude/supervisor.log."
|
||||
Reference in New Issue
Block a user