mirror of
https://github.com/coder/coder.git
synced 2026-06-04 05:28:20 +00:00
6ef9670384
## Description This PR addresses database connection pool exhaustion during prebuilds reconciliation by introducing two changes: * `CanSkipReconciliation`: Filters out presets that don't need reconciliation before spawning goroutines. This ensures we only create goroutines for presets that will (_most likely_) perform database operations, avoiding unnecessary connection pool usage. * Dynamic `eg.SetLimit`: Limits concurrent goroutines based on the configured database connection pool size (`CODER_PG_CONN_MAX_OPEN / 2`). This replaces the previous hardcoded limit of 5, ensuring the reconciliation loop scales appropriately with the configured pool size while leaving capacity for other database operations. ## Changes * Add `CanSkipReconciliation()` method to `PresetSnapshot` that returns true for inactive presets with no running workspaces, no pending jobs, or expired prebuilds. * Add `maxDBConnections` parameter to `NewStoreReconciler` and compute `reconciliationConcurrency` as half the pool size (minimum 1). * Add `ReconciliationConcurrency()` getter method to `StoreReconciler`. * Add `eg.SetLimit(c.reconciliationConcurrency)` to bound concurrent reconciliation goroutines. * Add `PresetsTotal` and `PresetsReconciled` to `ReconcileStats` for observability. * Add `TestCanSkipReconciliation` unit tests. * Add `TestReconciliationConcurrency` unit tests. * Add benchmark tests for reconciliation performance. ## Benchmarks * `BenchmarkReconcileAll_NoOps`: Tests presets with no reconciliation actions. All presets are filtered by `CanSkipReconciliation`, resulting in no goroutines spawned and no database connections used. * `BenchmarkReconcileAll_ConnectionContention`: Tests presets where all require reconciliation actions. All presets spawn goroutines, but concurrency is limited by `eg.SetLimit(reconciliationConcurrency)`. * `BenchmarkReconcileAll_Mix`: Simulates a realistic scenario with a large subset of inactive presets (filtered by `CanSkipReconciliation`) and a smaller subset requiring reconciliation (limited by `eg.SetLimit`). Closes: https://github.com/coder/coder/issues/20606
77 lines
2.8 KiB
Go
77 lines
2.8 KiB
Go
package prebuilds
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
|
|
)
|
|
|
|
var (
|
|
ErrNoClaimablePrebuiltWorkspaces = xerrors.New("no claimable prebuilt workspaces found")
|
|
ErrAGPLDoesNotSupportPrebuiltWorkspaces = xerrors.New("prebuilt workspaces functionality is not supported under the AGPL license")
|
|
)
|
|
|
|
// ReconciliationOrchestrator manages the lifecycle of prebuild reconciliation.
|
|
// It runs a continuous loop to check and reconcile prebuild states, and can be stopped gracefully.
|
|
type ReconciliationOrchestrator interface {
|
|
Reconciler
|
|
|
|
// Run starts a continuous reconciliation loop that periodically calls ReconcileAll
|
|
// to ensure all prebuilds are in their desired states. The loop runs until the context
|
|
// is canceled or Stop is called.
|
|
Run(ctx context.Context)
|
|
|
|
// Stop gracefully shuts down the orchestrator with the given cause.
|
|
// The cause is used for logging and error reporting.
|
|
Stop(ctx context.Context, cause error)
|
|
|
|
// TrackResourceReplacement handles a pathological situation whereby a terraform resource is replaced due to drift,
|
|
// which can obviate the whole point of pre-provisioning a prebuilt workspace.
|
|
// See more detail at https://coder.com/docs/admin/templates/extending-templates/prebuilt-workspaces#preventing-resource-replacement.
|
|
TrackResourceReplacement(ctx context.Context, workspaceID, buildID uuid.UUID, replacements []*sdkproto.ResourceReplacement)
|
|
}
|
|
|
|
// ReconcileStats contains statistics about a reconciliation cycle.
|
|
type ReconcileStats struct {
|
|
Elapsed time.Duration
|
|
PresetsTotal int
|
|
PresetsReconciled int
|
|
}
|
|
|
|
type Reconciler interface {
|
|
StateSnapshotter
|
|
|
|
// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
|
|
// It takes a global snapshot of the system state and then reconciles each preset
|
|
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
|
|
ReconcileAll(ctx context.Context) (ReconcileStats, error)
|
|
}
|
|
|
|
// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.
|
|
type StateSnapshotter interface {
|
|
// SnapshotState captures the current state of all prebuilds across templates.
|
|
// It creates a global database snapshot that can be viewed as a collection of PresetSnapshots,
|
|
// each representing the state of prebuilds for a specific preset.
|
|
// MUST be called inside a repeatable-read transaction.
|
|
SnapshotState(ctx context.Context, store database.Store) (*GlobalSnapshot, error)
|
|
}
|
|
|
|
type Claimer interface {
|
|
Claim(
|
|
ctx context.Context,
|
|
now time.Time,
|
|
userID uuid.UUID,
|
|
name string,
|
|
presetID uuid.UUID,
|
|
autostartSchedule sql.NullString,
|
|
nextStartAt sql.NullTime,
|
|
ttl sql.NullInt64,
|
|
) (*uuid.UUID, error)
|
|
}
|