chore: enable agent socket by default (#22352)

relates to #21335

Enables the agent socket by default and updates docs to strike references to having to enable it.

The PRs in this stack change the MCP server that Tasks use to update their status to rely on the agent socket, rather than directly dialing Coderd with the agent token.

Default disable was a reasonable default when it was only used for the experimental script ordering features, but now that we want to use it for Tasks, it should be default on.
This commit is contained in:
Spike Curtis
2026-03-03 21:23:59 +04:00
committed by GitHub
parent 2ceac319b8
commit 56eb57caf4
12 changed files with 17 additions and 114 deletions
+1 -1
View File
@@ -410,7 +410,7 @@ func (a *agent) initSocketServer() {
agentsocket.WithPath(a.socketPath),
)
if err != nil {
a.logger.Warn(a.hardCtx, "failed to create socket server", slog.Error(err), slog.F("path", a.socketPath))
a.logger.Error(a.hardCtx, "failed to create socket server", slog.Error(err), slog.F("path", a.socketPath))
return
}
+1
View File
@@ -24,6 +24,7 @@ func New(t testing.TB, coderURL *url.URL, agentToken string, opts ...func(*agent
var o agent.Options
log := testutil.Logger(t).Named("agent")
o.Logger = log
o.SocketPath = testutil.AgentSocketPath(t)
for _, opt := range opts {
opt(&o)
+1 -1
View File
@@ -489,7 +489,7 @@ func workspaceAgent() *serpent.Command {
},
{
Flag: "socket-server-enabled",
Default: "false",
Default: "true",
Env: "CODER_AGENT_SOCKET_SERVER_ENABLED",
Description: "Enable the agent socket server.",
Value: serpent.BoolOf(&socketServerEnabled),
+4
View File
@@ -44,6 +44,7 @@ func TestWorkspaceAgent(t *testing.T) {
"--agent-token", r.AgentToken,
"--agent-url", client.URL.String(),
"--log-dir", logDir,
"--socket-path", testutil.AgentSocketPath(t),
)
clitest.Start(t, inv)
@@ -76,6 +77,7 @@ func TestWorkspaceAgent(t *testing.T) {
"--agent-token", r.AgentToken,
"--agent-url", client.URL.String(),
"--log-dir", logDir,
"--socket-path", testutil.AgentSocketPath(t),
)
// Set the subsystems for the agent.
inv.Environ.Set(agent.EnvAgentSubsystem, fmt.Sprintf("%s,%s", codersdk.AgentSubsystemExectrace, codersdk.AgentSubsystemEnvbox))
@@ -158,6 +160,7 @@ func TestWorkspaceAgent(t *testing.T) {
"--agent-header", "X-Testing=agent",
"--agent-header", "Cool-Header=Ethan was Here!",
"--agent-header-command", "printf X-Process-Testing=very-wow-"+coderURLEnv+"'\\r\\n'X-Process-Testing2=more-wow",
"--socket-path", testutil.AgentSocketPath(t),
)
clitest.Start(t, agentInv)
coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).
@@ -199,6 +202,7 @@ func TestWorkspaceAgent(t *testing.T) {
"--pprof-address", "",
"--prometheus-address", "",
"--debug-address", "",
"--socket-path", testutil.AgentSocketPath(t),
)
clitest.Start(t, inv)
+1 -1
View File
@@ -74,7 +74,7 @@ OPTIONS:
--socket-path string, $CODER_AGENT_SOCKET_PATH
Specify the path for the agent socket.
--socket-server-enabled bool, $CODER_AGENT_SOCKET_SERVER_ENABLED (default: false)
--socket-server-enabled bool, $CODER_AGENT_SOCKET_SERVER_ENABLED (default: true)
Enable the agent socket server.
--ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 72h)
@@ -151,7 +151,6 @@ resource "docker_container" "workspace" {
entrypoint = ["sh", "-c", coder_agent.main.init_script]
env = [
"CODER_AGENT_TOKEN=${coder_agent.main.token}",
"CODER_AGENT_SOCKET_SERVER_ENABLED=true"
]
}
@@ -205,7 +204,6 @@ resource "coder_script" "pip-install" {
A short summary of the changes:
- We've added `CODER_AGENT_SOCKET_SERVER_ENABLED=true` to the environment variables of the Docker container in which the Coder agent runs.
- We've broken the monolithic "setup" script into two separate scripts: one for the `apt` commands, and one for the `pip` commands.
- In each script, we've added a `coder exp sync start $SCRIPT_NAME` command to mark the startup script as started.
- We've also added an exit trap to ensure that we mark the startup scripts as completed. Without this, the `coder exp sync wait` command would eventually time out.
@@ -24,21 +24,7 @@ The goal of startup script coordination is to provide a single reliable source o
## Quick Start
To start using workspace startup coordination, follow these steps:
1. Set the environment variable `CODER_AGENT_SOCKET_SERVER_ENABLED=true` in your template to enable the agent socket server. The environment variable *must* be readable to the agent process. For example, in a template using the `kreuzwerker/docker` provider:
```terraform
resource "docker_container" "workspace" {
image = "codercom/enterprise-base:ubuntu"
env = [
"CODER_AGENT_TOKEN=${coder_agent.main.token}",
"CODER_AGENT_SOCKET_SERVER_ENABLED=true",
]
}
```
1. Add calls to `coder exp sync (start|complete)` in your startup scripts where required:
To start using workspace startup coordination, add calls to `coder exp sync (start|complete)` in your startup scripts where required:
```bash
trap 'coder exp sync complete my-script' EXIT
@@ -49,23 +49,7 @@ No dependencies found
## Common Issues
### Socket not enabled
If the Coder Agent Socket Server is not enabled, you will see an error message similar to the below when running `coder exp sync ping`:
```bash
error: connect to agent socket: connect to socket: dial unix /tmp/coder-agent.sock: connect: no such file or directory
```
Verify `CODER_AGENT_SOCKET_SERVER_ENABLED=true` is set in the Coder agent's environment:
```bash
tr '\0' '\n' < /proc/$(pidof -s coder)/environ | grep CODER_AGENT_SOCKET_SERVER_ENABLED
```
If the output of the above command is empty, review your template and ensure that the environment variable is set such that it is readable by the Coder agent process. Setting it on the `coder_agent` resource directly is **not** sufficient.
## Workspace startup script hangs
### Workspace startup script hangs
If the workspace startup scripts appear to 'hang', one or more of your startup scripts may be waiting for a dependency that never completes.
@@ -74,7 +58,7 @@ If the workspace startup scripts appear to 'hang', one or more of your startup s
* Review your template and verify that `coder exp sync complete <unit>` is called after the script completes e.g. with an exit trap.
* View the unit status using `coder exp sync status <unit>`.
## Workspace startup scripts fail
### Workspace startup scripts fail
If the workspace startup scripts fail:
@@ -85,7 +69,7 @@ If the workspace startup scripts fail:
command -v coder
```
## Cycle detected
### Cycle detected
If you see an error similar to the below in your startup script logs, you have defined a cyclic dependency:
@@ -22,78 +22,8 @@ task.
To use startup dependencies in your templates, you must:
- Enable the Coder Agent Socket Server.
- Modify your workspace startup scripts to run in parallel and declare dependencies as required using `coder exp sync`.
### Enable the Coder Agent Socket Server
The agent socket server provides the communication layer for startup
coordination. To enable it, set `CODER_AGENT_SOCKET_SERVER_ENABLED=true` in the environment in which the agent is running.
The exact method for doing this depends on your infrastructure platform:
<div class="tabs">
#### Docker / Podman
```hcl
resource "docker_container" "workspace" {
count = data.coder_workspace.me.start_count
image = "codercom/enterprise-base:ubuntu"
name = "coder-${data.coder_workspace_owner.me.name}-${lower(data.coder_workspace.me.name)}"
env = [
"CODER_AGENT_SOCKET_SERVER_ENABLED=true"
]
command = ["sh", "-c", coder_agent.main.init_script]
}
```
#### Kubernetes
```hcl
resource "kubernetes_pod" "main" {
count = data.coder_workspace.me.start_count
metadata {
name = "coder-${data.coder_workspace_owner.me.name}-${lower(data.coder_workspace.me.name)}"
namespace = var.workspaces_namespace
}
spec {
container {
name = "dev"
image = "codercom/enterprise-base:ubuntu"
command = ["sh", "-c", coder_agent.main.init_script]
env {
name = "CODER_AGENT_SOCKET_SERVER_ENABLED"
value = "true"
}
}
}
}
```
#### AWS EC2 / VMs
For virtual machines, pass the environment variable through cloud-init or your
provisioning system:
```hcl
locals {
agent_env = {
"CODER_AGENT_SOCKET_SERVER_ENABLED" = "true"
}
}
# In your cloud-init userdata template:
# %{ for key, value in local.agent_env ~}
# export ${key}="${value}"
# %{ endfor ~}
```
</div>
- Modify your workspace startup scripts to run in parallel
- Declare dependencies as required using `coder exp sync`
### Declare Dependencies in your Workspace Startup Scripts
-1
View File
@@ -765,7 +765,6 @@ resource "docker_container" "workspace" {
"CODER_PROC_OOM_SCORE=10",
"CODER_PROC_NICE_SCORE=1",
"CODER_AGENT_DEVCONTAINERS_ENABLE=1",
"CODER_AGENT_SOCKET_SERVER_ENABLED=true",
]
host {
host = "host.docker.internal"
@@ -214,6 +214,7 @@ func TestReinitializeAgent(t *testing.T) {
"--agent-token", agentToken.String(),
"--agent-url", client.URL.String(),
"--log-dir", logDir,
"--socket-path", testutil.AgentSocketPath(t),
)
clitest.Start(t, inv)
+2 -2
View File
@@ -22,7 +22,7 @@ import (
//
// On Linux, we also hit this limit on GitHub Actions runners where TMPDIR is
// set to a long path like /home/runner/work/_temp/go-tmp/.
func TempDirUnixSocket(t *testing.T) string {
func TempDirUnixSocket(t testing.TB) string {
t.Helper()
// Windows doesn't have the same unix socket path length limits,
// and callers of this function are generally gated to !windows.
@@ -41,7 +41,7 @@ func TempDirUnixSocket(t *testing.T) string {
return dir
}
func AgentSocketPath(t *testing.T) string {
func AgentSocketPath(t testing.TB) string {
if runtime.GOOS == "windows" {
return fmt.Sprintf(`\\.\pipe\com.coder.agentsocket_test.%s.%s`, t.Name(), rand.Text())
}