From 49a42eff5c6d5fe810ff3d660a0cdb1a61802005 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 13 Jan 2026 10:50:57 +0200 Subject: [PATCH] feat: make database connection pool size configurable (#21403) Closes https://github.com/coder/coder/issues/21360 A few considerations/notes: - I've kept the number of conns to 10 in all other places, except coderd - which uses the config value - I opted to also make idle conns configurable; the greater the delta between max open and max idle, the more connection churn - Postgres maintains a [_process_ per connection](https://www.postgresql.org/docs/current/connect-estab.html), contrary to what the comment said previously - Operators should be able to tune this, since process churn can negatively affect OS scheduling - I've set the value to `"auto"` by default so it's not another knob one _has to_ twiddle, and sets max idle = max conns / 3 --------- Signed-off-by: Danny Kopping --- cli/server.go | 67 +++++++--- cli/testdata/coder_server_--help.golden | 8 ++ cli/testdata/server-config.yaml.golden | 8 ++ coderd/apidoc/docs.go | 6 + coderd/apidoc/swagger.json | 6 + codersdk/deployment.go | 55 ++++++++ codersdk/deployment_test.go | 117 ++++++++++++++++++ docs/reference/api/general.md | 2 + docs/reference/api/schemas.md | 6 + docs/reference/cli/server.md | 22 ++++ docs/tutorials/best-practices/scale-coder.md | 37 ++++++ .../cli/testdata/coder_server_--help.golden | 8 ++ site/src/api/typesGenerated.ts | 9 ++ 13 files changed, 334 insertions(+), 17 deletions(-) diff --git a/cli/server.go b/cli/server.go index c17db2aa5f..7c47563a3d 100644 --- a/cli/server.go +++ b/cli/server.go @@ -747,7 +747,16 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. // "bare" read on this channel. var pubsubWatchdogTimeout <-chan struct{} - sqlDB, dbURL, err := getAndMigratePostgresDB(ctx, logger, vals.PostgresURL.String(), codersdk.PostgresAuth(vals.PostgresAuth), sqlDriver) + maxOpenConns := int(vals.PostgresConnMaxOpen.Value()) + maxIdleConns, err := codersdk.ComputeMaxIdleConns(maxOpenConns, vals.PostgresConnMaxIdle.Value()) + if err != nil { + return xerrors.Errorf("compute max idle connections: %w", err) + } + logger.Debug(ctx, "creating database connection pool", slog.F("max_open_conns", maxOpenConns), slog.F("max_idle_conns", maxIdleConns)) + sqlDB, dbURL, err := getAndMigratePostgresDB(ctx, logger, vals.PostgresURL.String(), codersdk.PostgresAuth(vals.PostgresAuth), sqlDriver, + WithMaxOpenConns(maxOpenConns), + WithMaxIdleConns(maxIdleConns), + ) if err != nil { return xerrors.Errorf("connect to postgres: %w", err) } @@ -2324,6 +2333,29 @@ func IsLocalhost(host string) bool { return host == "localhost" || host == "127.0.0.1" || host == "::1" } +// PostgresConnectOptions contains options for connecting to Postgres. +type PostgresConnectOptions struct { + MaxOpenConns int + MaxIdleConns int +} + +// PostgresConnectOption is a functional option for ConnectToPostgres. +type PostgresConnectOption func(*PostgresConnectOptions) + +// WithMaxOpenConns sets the maximum number of open connections to the database. +func WithMaxOpenConns(n int) PostgresConnectOption { + return func(o *PostgresConnectOptions) { + o.MaxOpenConns = n + } +} + +// WithMaxIdleConns sets the maximum number of idle connections in the pool. +func WithMaxIdleConns(n int) PostgresConnectOption { + return func(o *PostgresConnectOptions) { + o.MaxIdleConns = n + } +} + // ConnectToPostgres takes in the migration command to run on the database once // it connects. To avoid running migrations, pass in `nil` or a no-op function. // Regardless of the passed in migration function, if the database is not fully @@ -2331,7 +2363,15 @@ func IsLocalhost(host string) bool { // future or past migration version. // // If no error is returned, the database is fully migrated and up to date. -func ConnectToPostgres(ctx context.Context, logger slog.Logger, driver string, dbURL string, migrate func(db *sql.DB) error) (*sql.DB, error) { +func ConnectToPostgres(ctx context.Context, logger slog.Logger, driver string, dbURL string, migrate func(db *sql.DB) error, opts ...PostgresConnectOption) (*sql.DB, error) { + // Apply defaults. + options := PostgresConnectOptions{ + MaxOpenConns: 10, + MaxIdleConns: 3, + } + for _, opt := range opts { + opt(&options) + } logger.Debug(ctx, "connecting to postgresql") var err error @@ -2414,19 +2454,12 @@ func ConnectToPostgres(ctx context.Context, logger slog.Logger, driver string, d // cannot accept new connections, so we try to limit that here. // Requests will wait for a new connection instead of a hard error // if a limit is set. - sqlDB.SetMaxOpenConns(10) - // Allow a max of 3 idle connections at a time. Lower values end up - // creating a lot of connection churn. Since each connection uses about - // 10MB of memory, we're allocating 30MB to Postgres connections per - // replica, but is better than causing Postgres to spawn a thread 15-20 - // times/sec. PGBouncer's transaction pooling is not the greatest so - // it's not optimal for us to deploy. - // - // This was set to 10 before we started doing HA deployments, but 3 was - // later determined to be a better middle ground as to not use up all - // of PGs default connection limit while simultaneously avoiding a lot - // of connection churn. - sqlDB.SetMaxIdleConns(3) + sqlDB.SetMaxOpenConns(options.MaxOpenConns) + // Limit idle connections to reduce connection churn while keeping some + // connections ready for reuse. When a connection is returned to the pool + // but the idle pool is full, it's closed immediately - which can cause + // connection establishment overhead when load fluctuates. + sqlDB.SetMaxIdleConns(options.MaxIdleConns) dbNeedsClosing = false return sqlDB, nil @@ -2830,7 +2863,7 @@ func signalNotifyContext(ctx context.Context, inv *serpent.Invocation, sig ...os return inv.SignalNotifyContext(ctx, sig...) } -func getAndMigratePostgresDB(ctx context.Context, logger slog.Logger, postgresURL string, auth codersdk.PostgresAuth, sqlDriver string) (*sql.DB, string, error) { +func getAndMigratePostgresDB(ctx context.Context, logger slog.Logger, postgresURL string, auth codersdk.PostgresAuth, sqlDriver string, opts ...PostgresConnectOption) (*sql.DB, string, error) { dbURL, err := escapePostgresURLUserInfo(postgresURL) if err != nil { return nil, "", xerrors.Errorf("escaping postgres URL: %w", err) @@ -2843,7 +2876,7 @@ func getAndMigratePostgresDB(ctx context.Context, logger slog.Logger, postgresUR } } - sqlDB, err := ConnectToPostgres(ctx, logger, sqlDriver, dbURL, migrations.Up) + sqlDB, err := ConnectToPostgres(ctx, logger, sqlDriver, dbURL, migrations.Up, opts...) if err != nil { return nil, "", xerrors.Errorf("connect to postgres: %w", err) } diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index 5486f51b0a..c9747c061c 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -65,6 +65,14 @@ OPTIONS: Type of auth to use when connecting to postgres. For AWS RDS, using IAM authentication (awsiamrds) is recommended. + --postgres-conn-max-idle string, $CODER_PG_CONN_MAX_IDLE (default: auto) + Maximum number of idle connections to the database. Set to "auto" (the + default) to use max open / 3. Value must be greater or equal to 0; 0 + means explicitly no idle connections. + + --postgres-conn-max-open int, $CODER_PG_CONN_MAX_OPEN (default: 10) + Maximum number of open connections to the database. Defaults to 10. + --postgres-url string, $CODER_PG_CONNECTION_URL URL of a PostgreSQL database. If empty, PostgreSQL binaries will be downloaded from Maven (https://repo1.maven.org/maven2) and store all diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index deca918036..66a7f910c7 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -483,6 +483,14 @@ ephemeralDeployment: false # authentication (awsiamrds) is recommended. # (default: password, type: enum[password\|awsiamrds]) pgAuth: password +# Maximum number of open connections to the database. Defaults to 10. +# (default: 10, type: int) +pgConnMaxOpen: 10 +# Maximum number of idle connections to the database. Set to "auto" (the default) +# to use max open / 3. Value must be greater or equal to 0; 0 means explicitly no +# idle connections. +# (default: auto, type: string) +pgConnMaxIdle: auto # A URL to an external Terms of Service that must be accepted by users when # logging in. # (default: , type: string) diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index f450fe05b0..c65d62fd0d 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -14380,6 +14380,12 @@ const docTemplate = `{ "pg_auth": { "type": "string" }, + "pg_conn_max_idle": { + "type": "string" + }, + "pg_conn_max_open": { + "type": "integer" + }, "pg_connection_url": { "type": "string" }, diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index fab51b33bf..a40981e9a7 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -12959,6 +12959,12 @@ "pg_auth": { "type": "string" }, + "pg_conn_max_idle": { + "type": "string" + }, + "pg_conn_max_open": { + "type": "integer" + }, "pg_connection_url": { "type": "string" }, diff --git a/codersdk/deployment.go b/codersdk/deployment.go index b77fd6bf18..f75c041fe0 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -442,6 +442,10 @@ var PostgresAuthDrivers = []string{ string(PostgresAuthAWSIAMRDS), } +// PostgresConnMaxIdleAuto is the value for auto-computing max idle connections +// based on max open connections. +const PostgresConnMaxIdleAuto = "auto" + // DeploymentValues is the central configuration values the coder server. type DeploymentValues struct { Verbose serpent.Bool `json:"verbose,omitempty"` @@ -462,6 +466,8 @@ type DeploymentValues struct { EphemeralDeployment serpent.Bool `json:"ephemeral_deployment,omitempty" typescript:",notnull"` PostgresURL serpent.String `json:"pg_connection_url,omitempty" typescript:",notnull"` PostgresAuth string `json:"pg_auth,omitempty" typescript:",notnull"` + PostgresConnMaxOpen serpent.Int64 `json:"pg_conn_max_open,omitempty" typescript:",notnull"` + PostgresConnMaxIdle serpent.String `json:"pg_conn_max_idle,omitempty" typescript:",notnull"` OAuth2 OAuth2Config `json:"oauth2,omitempty" typescript:",notnull"` OIDC OIDCConfig `json:"oidc,omitempty" typescript:",notnull"` Telemetry TelemetryConfig `json:"telemetry,omitempty" typescript:",notnull"` @@ -2623,6 +2629,30 @@ func (c *DeploymentValues) Options() serpent.OptionSet { Value: serpent.EnumOf(&c.PostgresAuth, PostgresAuthDrivers...), YAML: "pgAuth", }, + { + Name: "Postgres Connection Max Open", + Description: "Maximum number of open connections to the database. Defaults to 10.", + Flag: "postgres-conn-max-open", + Env: "CODER_PG_CONN_MAX_OPEN", + Default: "10", + Value: serpent.Validate(&c.PostgresConnMaxOpen, func(value *serpent.Int64) error { + if value.Value() <= 0 { + return xerrors.New("must be greater than zero") + } + return nil + }), + YAML: "pgConnMaxOpen", + }, + { + Name: "Postgres Connection Max Idle", + Description: "Maximum number of idle connections to the database. Set to \"auto\" (the default) to use max open / 3. " + + "Value must be greater or equal to 0; 0 means explicitly no idle connections.", + Flag: "postgres-conn-max-idle", + Env: "CODER_PG_CONN_MAX_IDLE", + Default: PostgresConnMaxIdleAuto, + Value: &c.PostgresConnMaxIdle, + YAML: "pgConnMaxIdle", + }, { Name: "Secure Auth Cookie", Description: "Controls if the 'Secure' property is set on browser session cookies.", @@ -4128,3 +4158,28 @@ func (c CryptoKey) CanVerify(now time.Time) bool { beforeDelete := c.DeletesAt.IsZero() || now.Before(c.DeletesAt) return hasSecret && beforeDelete } + +// ComputeMaxIdleConns calculates the effective maxIdleConns value. If +// configuredIdle is "auto", it returns maxOpen/3 with a minimum of 1. If +// configuredIdle exceeds maxOpen, it returns an error. +func ComputeMaxIdleConns(maxOpen int, configuredIdle string) (int, error) { + configuredIdle = strings.TrimSpace(configuredIdle) + if configuredIdle == PostgresConnMaxIdleAuto { + computed := maxOpen / 3 + if computed < 1 { + return 1, nil + } + return computed, nil + } + idle, err := strconv.Atoi(configuredIdle) + if err != nil { + return 0, xerrors.Errorf("invalid max idle connections %q: must be %q or >= 0", configuredIdle, PostgresConnMaxIdleAuto) + } + if idle < 0 { + return 0, xerrors.Errorf("max idle connections must be %q or >= 0", PostgresConnMaxIdleAuto) + } + if idle > maxOpen { + return 0, xerrors.Errorf("max idle connections (%d) cannot exceed max open connections (%d)", idle, maxOpen) + } + return idle, nil +} diff --git a/codersdk/deployment_test.go b/codersdk/deployment_test.go index bd3c597dc5..3590e5455c 100644 --- a/codersdk/deployment_test.go +++ b/codersdk/deployment_test.go @@ -765,3 +765,120 @@ func TestRetentionConfigParsing(t *testing.T) { }) } } + +func TestComputeMaxIdleConns(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + maxOpen int + configuredIdle string + expectedIdle int + expectError bool + errorContains string + }{ + { + name: "auto_default_10_open", + maxOpen: 10, + configuredIdle: "auto", + expectedIdle: 3, // 10/3 = 3 + }, + { + name: "auto_with_whitespace", + maxOpen: 10, + configuredIdle: " auto ", + expectedIdle: 3, // 10/3 = 3 + }, + { + name: "auto_30_open", + maxOpen: 30, + configuredIdle: "auto", + expectedIdle: 10, // 30/3 = 10 + }, + { + name: "auto_minimum_1", + maxOpen: 1, + configuredIdle: "auto", + expectedIdle: 1, // 1/3 = 0, but minimum is 1 + }, + { + name: "auto_minimum_2_open", + maxOpen: 2, + configuredIdle: "auto", + expectedIdle: 1, // 2/3 = 0, but minimum is 1 + }, + { + name: "auto_3_open", + maxOpen: 3, + configuredIdle: "auto", + expectedIdle: 1, // 3/3 = 1 + }, + { + name: "explicit_equal_to_max", + maxOpen: 10, + configuredIdle: "10", + expectedIdle: 10, + }, + { + name: "explicit_less_than_max", + maxOpen: 10, + configuredIdle: "5", + expectedIdle: 5, + }, + { + name: "explicit_with_whitespace", + maxOpen: 10, + configuredIdle: " 5 ", + expectedIdle: 5, + }, + { + name: "explicit_0", + maxOpen: 10, + configuredIdle: "0", + expectedIdle: 0, + }, + { + name: "error_exceeds_max", + maxOpen: 10, + configuredIdle: "15", + expectError: true, + errorContains: "cannot exceed", + }, + { + name: "error_exceeds_max_by_1", + maxOpen: 10, + configuredIdle: "11", + expectError: true, + errorContains: "cannot exceed", + }, + { + name: "error_invalid_string", + maxOpen: 10, + configuredIdle: "invalid", + expectError: true, + errorContains: "must be \"auto\" or >= 0", + }, + { + name: "error_negative", + maxOpen: 10, + configuredIdle: "-1", + expectError: true, + errorContains: "must be \"auto\" or >= 0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + result, err := codersdk.ComputeMaxIdleConns(tt.maxOpen, tt.configuredIdle) + if tt.expectError { + require.Error(t, err) + require.Contains(t, err.Error(), tt.errorContains) + } else { + require.NoError(t, err) + require.Equal(t, tt.expectedIdle, result) + } + }) + } +} diff --git a/docs/reference/api/general.md b/docs/reference/api/general.md index 2358aaf842..fc2ca32f10 100644 --- a/docs/reference/api/general.md +++ b/docs/reference/api/general.md @@ -433,6 +433,8 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \ "username_field": "string" }, "pg_auth": "string", + "pg_conn_max_idle": "string", + "pg_conn_max_open": 0, "pg_connection_url": "string", "pprof": { "address": { diff --git a/docs/reference/api/schemas.md b/docs/reference/api/schemas.md index dfbd084d64..ab4507cf3b 100644 --- a/docs/reference/api/schemas.md +++ b/docs/reference/api/schemas.md @@ -2893,6 +2893,8 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o "username_field": "string" }, "pg_auth": "string", + "pg_conn_max_idle": "string", + "pg_conn_max_open": 0, "pg_connection_url": "string", "pprof": { "address": { @@ -3432,6 +3434,8 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o "username_field": "string" }, "pg_auth": "string", + "pg_conn_max_idle": "string", + "pg_conn_max_open": 0, "pg_connection_url": "string", "pprof": { "address": { @@ -3622,6 +3626,8 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o | `oauth2` | [codersdk.OAuth2Config](#codersdkoauth2config) | false | | | | `oidc` | [codersdk.OIDCConfig](#codersdkoidcconfig) | false | | | | `pg_auth` | string | false | | | +| `pg_conn_max_idle` | string | false | | | +| `pg_conn_max_open` | integer | false | | | | `pg_connection_url` | string | false | | | | `pprof` | [codersdk.PprofConfig](#codersdkpprofconfig) | false | | | | `prometheus` | [codersdk.PrometheusConfig](#codersdkprometheusconfig) | false | | | diff --git a/docs/reference/cli/server.md b/docs/reference/cli/server.md index e41caf4af8..f93825c193 100644 --- a/docs/reference/cli/server.md +++ b/docs/reference/cli/server.md @@ -1015,6 +1015,28 @@ URL of a PostgreSQL database. If empty, PostgreSQL binaries will be downloaded f Type of auth to use when connecting to postgres. For AWS RDS, using IAM authentication (awsiamrds) is recommended. +### --postgres-conn-max-open + +| | | +|-------------|--------------------------------------| +| Type | int | +| Environment | $CODER_PG_CONN_MAX_OPEN | +| YAML | pgConnMaxOpen | +| Default | 10 | + +Maximum number of open connections to the database. Defaults to 10. + +### --postgres-conn-max-idle + +| | | +|-------------|--------------------------------------| +| Type | string | +| Environment | $CODER_PG_CONN_MAX_IDLE | +| YAML | pgConnMaxIdle | +| Default | auto | + +Maximum number of idle connections to the database. Set to "auto" (the default) to use max open / 3. Value must be greater or equal to 0; 0 means explicitly no idle connections. + ### --secure-auth-cookie | | | diff --git a/docs/tutorials/best-practices/scale-coder.md b/docs/tutorials/best-practices/scale-coder.md index 7fbb55c10a..b87d809fc3 100644 --- a/docs/tutorials/best-practices/scale-coder.md +++ b/docs/tutorials/best-practices/scale-coder.md @@ -218,6 +218,43 @@ performance. Coder's [validated architectures](../../admin/infrastructure/validated-architectures/index.md) give specific sizing recommendations for various user scales. +### Connection pool tuning + +Coder Server maintains a pool of connections to PostgreSQL. You can tune the +pool size with these settings: + +- `--postgres-conn-max-open` (env: `CODER_PG_CONN_MAX_OPEN`): Maximum number of open + connections. Default: 10. Ensure that your PostgreSQL Server has `max_connections` + set appropriately to accommodate all Coder Server replicas multiplied by the + maximum number of open connections. We recommend configuring an additional 20% + of connections to account for churn and other clients. +- `--postgres-conn-max-idle` (env: `CODER_PG_CONN_MAX_IDLE`): Maximum number of idle + connections kept in the pool. Default: "auto", which uses max open / 3. + +When a connection is returned to the pool and the idle pool is already full, the +connection is closed immediately. This can cause connection establishment +overhead (churn) when load fluctuates. Monitor these metrics to understand your +connection pool behavior: + +- **Capacity**: `go_sql_max_open_connections - go_sql_in_use_connections` shows + how many connections are available for new requests. If this is 0, Coder + Server performance will start to degrade. This just provides a point-in-time view + of the connections, however. + + For a more systematic view, consider running + `sum by (pod) (increase(go_sql_wait_duration_seconds_total[1m]))` to see how long + each Coder replica spent waiting on the connection pool (i.e. no free connections); + `sum by (pod) (increase(go_sql_wait_count_total[$__interval]))` shows how many + connections were waited for. + + If either of these values seem unacceptably high, try tuning the above settings. +- **Churn**: `sum(rate(go_sql_max_idle_closed_total[$__rate_interval]))` shows + how many connections are being closed because the idle pool is full. + +If you see high churn, consider increasing `--pg-conn-max-idle` to keep more +connections ready for reuse. If you see capacity consistently near zero, +consider increasing `--pg-conn-max-open`. + ## Workspace proxies Workspace proxies proxy HTTP traffic from end users to workspaces for Coder apps diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index 3b8393a286..10b772e2e4 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -66,6 +66,14 @@ OPTIONS: Type of auth to use when connecting to postgres. For AWS RDS, using IAM authentication (awsiamrds) is recommended. + --postgres-conn-max-idle string, $CODER_PG_CONN_MAX_IDLE (default: auto) + Maximum number of idle connections to the database. Set to "auto" (the + default) to use max open / 3. Value must be greater or equal to 0; 0 + means explicitly no idle connections. + + --postgres-conn-max-open int, $CODER_PG_CONN_MAX_OPEN (default: 10) + Maximum number of open connections to the database. Defaults to 10. + --postgres-url string, $CODER_PG_CONNECTION_URL URL of a PostgreSQL database. If empty, PostgreSQL binaries will be downloaded from Maven (https://repo1.maven.org/maven2) and store all diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index e38ed995a9..054b280dbf 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -1736,6 +1736,8 @@ export interface DeploymentValues { readonly ephemeral_deployment?: boolean; readonly pg_connection_url?: string; readonly pg_auth?: string; + readonly pg_conn_max_open?: number; + readonly pg_conn_max_idle?: string; readonly oauth2?: OAuth2Config; readonly oidc?: OIDCConfig; readonly telemetry?: TelemetryConfig; @@ -3450,6 +3452,13 @@ export type PostgresAuth = "awsiamrds" | "password"; export const PostgresAuths: PostgresAuth[] = ["awsiamrds", "password"]; +// From codersdk/deployment.go +/** + * PostgresConnMaxIdleAuto is the value for auto-computing max idle connections + * based on max open connections. + */ +export const PostgresConnMaxIdleAuto = "auto"; + // From codersdk/deployment.go export interface PprofConfig { readonly enable: boolean;