mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix: soft-delete stale workspace agents on new build (#25207)
This commit is contained in:
@@ -6272,6 +6272,27 @@ func (q *querier) SoftDeleteContextFileMessages(ctx context.Context, chatID uuid
|
||||
return q.db.SoftDeleteContextFileMessages(ctx, chatID)
|
||||
}
|
||||
|
||||
func (q *querier) SoftDeletePriorWorkspaceAgents(ctx context.Context, arg database.SoftDeletePriorWorkspaceAgentsParams) error {
|
||||
// Internal bookkeeping called from wsbuilder.Builder.Build inside the
|
||||
// same transaction as an already-authorized InsertWorkspaceBuild.
|
||||
// Callers pass a system-restricted context.
|
||||
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
|
||||
return err
|
||||
}
|
||||
return q.db.SoftDeletePriorWorkspaceAgents(ctx, arg)
|
||||
}
|
||||
|
||||
func (q *querier) SoftDeleteWorkspaceAgentsByWorkspaceID(ctx context.Context, workspaceID uuid.UUID) error {
|
||||
// Internal bookkeeping called from wsbuilder (orphan-delete) and
|
||||
// provisionerdserver.CompleteJob (normal delete) inside the same
|
||||
// transaction as an already-authorized workspace deletion.
|
||||
// Callers pass a system-restricted context.
|
||||
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
|
||||
return err
|
||||
}
|
||||
return q.db.SoftDeleteWorkspaceAgentsByWorkspaceID(ctx, workspaceID)
|
||||
}
|
||||
|
||||
func (q *querier) TouchChatDebugRunUpdatedAt(ctx context.Context, arg database.TouchChatDebugRunUpdatedAtParams) error {
|
||||
chat, err := q.db.GetChatByID(ctx, arg.ChatID)
|
||||
if err != nil {
|
||||
|
||||
@@ -4596,6 +4596,19 @@ func (s *MethodTestSuite) TestSystemFunctions() {
|
||||
dbm.EXPECT().UpdateWorkspaceAgentConnectionByID(gomock.Any(), arg).Return(nil).AnyTimes()
|
||||
check.Args(arg).Asserts(rbac.ResourceSystem, policy.ActionUpdate).Returns()
|
||||
}))
|
||||
s.Run("SoftDeletePriorWorkspaceAgents", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
|
||||
arg := database.SoftDeletePriorWorkspaceAgentsParams{
|
||||
WorkspaceID: uuid.New(),
|
||||
CurrentBuildID: uuid.New(),
|
||||
}
|
||||
dbm.EXPECT().SoftDeletePriorWorkspaceAgents(gomock.Any(), arg).Return(nil).AnyTimes()
|
||||
check.Args(arg).Asserts(rbac.ResourceSystem, policy.ActionUpdate).Returns()
|
||||
}))
|
||||
s.Run("SoftDeleteWorkspaceAgentsByWorkspaceID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
|
||||
wsID := uuid.New()
|
||||
dbm.EXPECT().SoftDeleteWorkspaceAgentsByWorkspaceID(gomock.Any(), wsID).Return(nil).AnyTimes()
|
||||
check.Args(wsID).Asserts(rbac.ResourceSystem, policy.ActionUpdate).Returns()
|
||||
}))
|
||||
s.Run("AcquireProvisionerJob", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
|
||||
arg := database.AcquireProvisionerJobParams{StartedAt: sql.NullTime{Valid: true, Time: dbtime.Now()}, OrganizationID: uuid.New(), Types: []database.ProvisionerType{database.ProvisionerTypeEcho}, ProvisionerTags: json.RawMessage("{}")}
|
||||
dbm.EXPECT().AcquireProvisionerJob(gomock.Any(), arg).Return(testutil.Fake(s.T(), faker, database.ProvisionerJob{}), nil).AnyTimes()
|
||||
|
||||
@@ -4537,6 +4537,22 @@ func (m queryMetricsStore) SoftDeleteContextFileMessages(ctx context.Context, ch
|
||||
return r0
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) SoftDeletePriorWorkspaceAgents(ctx context.Context, arg database.SoftDeletePriorWorkspaceAgentsParams) error {
|
||||
start := time.Now()
|
||||
r0 := m.s.SoftDeletePriorWorkspaceAgents(ctx, arg)
|
||||
m.queryLatencies.WithLabelValues("SoftDeletePriorWorkspaceAgents").Observe(time.Since(start).Seconds())
|
||||
m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "SoftDeletePriorWorkspaceAgents").Inc()
|
||||
return r0
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) SoftDeleteWorkspaceAgentsByWorkspaceID(ctx context.Context, workspaceID uuid.UUID) error {
|
||||
start := time.Now()
|
||||
r0 := m.s.SoftDeleteWorkspaceAgentsByWorkspaceID(ctx, workspaceID)
|
||||
m.queryLatencies.WithLabelValues("SoftDeleteWorkspaceAgentsByWorkspaceID").Observe(time.Since(start).Seconds())
|
||||
m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "SoftDeleteWorkspaceAgentsByWorkspaceID").Inc()
|
||||
return r0
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) TouchChatDebugRunUpdatedAt(ctx context.Context, arg database.TouchChatDebugRunUpdatedAtParams) error {
|
||||
start := time.Now()
|
||||
r0 := m.s.TouchChatDebugRunUpdatedAt(ctx, arg)
|
||||
|
||||
@@ -8588,6 +8588,34 @@ func (mr *MockStoreMockRecorder) SoftDeleteContextFileMessages(ctx, chatID any)
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SoftDeleteContextFileMessages", reflect.TypeOf((*MockStore)(nil).SoftDeleteContextFileMessages), ctx, chatID)
|
||||
}
|
||||
|
||||
// SoftDeletePriorWorkspaceAgents mocks base method.
|
||||
func (m *MockStore) SoftDeletePriorWorkspaceAgents(ctx context.Context, arg database.SoftDeletePriorWorkspaceAgentsParams) error {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "SoftDeletePriorWorkspaceAgents", ctx, arg)
|
||||
ret0, _ := ret[0].(error)
|
||||
return ret0
|
||||
}
|
||||
|
||||
// SoftDeletePriorWorkspaceAgents indicates an expected call of SoftDeletePriorWorkspaceAgents.
|
||||
func (mr *MockStoreMockRecorder) SoftDeletePriorWorkspaceAgents(ctx, arg any) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SoftDeletePriorWorkspaceAgents", reflect.TypeOf((*MockStore)(nil).SoftDeletePriorWorkspaceAgents), ctx, arg)
|
||||
}
|
||||
|
||||
// SoftDeleteWorkspaceAgentsByWorkspaceID mocks base method.
|
||||
func (m *MockStore) SoftDeleteWorkspaceAgentsByWorkspaceID(ctx context.Context, workspaceID uuid.UUID) error {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "SoftDeleteWorkspaceAgentsByWorkspaceID", ctx, workspaceID)
|
||||
ret0, _ := ret[0].(error)
|
||||
return ret0
|
||||
}
|
||||
|
||||
// SoftDeleteWorkspaceAgentsByWorkspaceID indicates an expected call of SoftDeleteWorkspaceAgentsByWorkspaceID.
|
||||
func (mr *MockStoreMockRecorder) SoftDeleteWorkspaceAgentsByWorkspaceID(ctx, workspaceID any) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SoftDeleteWorkspaceAgentsByWorkspaceID", reflect.TypeOf((*MockStore)(nil).SoftDeleteWorkspaceAgentsByWorkspaceID), ctx, workspaceID)
|
||||
}
|
||||
|
||||
// TouchChatDebugRunUpdatedAt mocks base method.
|
||||
func (m *MockStore) TouchChatDebugRunUpdatedAt(ctx context.Context, arg database.TouchChatDebugRunUpdatedAtParams) error {
|
||||
m.ctrl.T.Helper()
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
-- The backfill is not reversed: soft-deleted agents tied to stopped/deleted
|
||||
-- builds are no longer referenced anywhere. Restoring deleted=FALSE would
|
||||
-- only re-create the ambiguity the forward migration fixed.
|
||||
@@ -0,0 +1,62 @@
|
||||
-- Soft-delete stale `workspace_agents` rows.
|
||||
--
|
||||
-- Before v2.33.0, the auth path `GetWorkspaceAgentByInstanceID :one` silently
|
||||
-- picked the newest matching row, so stale rows from earlier builds were
|
||||
-- harmless. After #24325 replaced that with a `:many` lookup that rejects
|
||||
-- ambiguity with HTTP 409, the accumulation becomes a hard failure: any
|
||||
-- workspace whose EC2 instance hosted more than one build can no longer
|
||||
-- re-authenticate its agent.
|
||||
--
|
||||
-- This migration backfills the "at most one non-deleted agent per workspace
|
||||
-- that is itself not deleted" invariant over existing data. Going forward:
|
||||
-- - `wsbuilder.Builder.Build` maintains it per-build via
|
||||
-- `SoftDeletePriorWorkspaceAgents`.
|
||||
-- - `provisionerdserver.CompleteJob` and `wsbuilder` also call
|
||||
-- `SoftDeleteWorkspaceAgentsByWorkspaceID` when a workspace itself is
|
||||
-- soft-deleted, so the table doesn't retain orphaned-but-non-deleted
|
||||
-- agents referencing a deleted workspace.
|
||||
--
|
||||
-- Backfill scope:
|
||||
-- 1. Every agent belonging to a soft-deleted workspace -> deleted = TRUE.
|
||||
-- 2. For each still-live workspace, keep only agents belonging to the
|
||||
-- current (highest build_number) build; soft-delete earlier builds'
|
||||
-- agents.
|
||||
--
|
||||
-- Related:
|
||||
-- #24325 (feature that regressed the behavior)
|
||||
-- #24973 (partial fix, pool starvation)
|
||||
-- #25031 (partial fix, handler cleanup + deleted-workspace filter)
|
||||
-- #25155 (bug report)
|
||||
|
||||
-- 1. Soft-delete all agents on workspaces that are themselves deleted.
|
||||
UPDATE workspace_agents
|
||||
SET deleted = TRUE
|
||||
WHERE id IN (
|
||||
SELECT wa.id
|
||||
FROM workspace_agents wa
|
||||
JOIN workspace_resources wr ON wr.id = wa.resource_id
|
||||
JOIN workspace_builds wb ON wb.job_id = wr.job_id
|
||||
JOIN workspaces w ON w.id = wb.workspace_id
|
||||
WHERE wa.deleted = FALSE
|
||||
AND w.deleted = TRUE
|
||||
);
|
||||
|
||||
-- 2. For every live workspace, soft-delete agents not tied to the latest build.
|
||||
WITH latest_builds AS (
|
||||
SELECT DISTINCT ON (workspace_id) id, workspace_id
|
||||
FROM workspace_builds
|
||||
ORDER BY workspace_id, build_number DESC
|
||||
)
|
||||
UPDATE workspace_agents
|
||||
SET deleted = TRUE
|
||||
WHERE id IN (
|
||||
SELECT wa.id
|
||||
FROM workspace_agents wa
|
||||
JOIN workspace_resources wr ON wr.id = wa.resource_id
|
||||
JOIN workspace_builds wb ON wb.job_id = wr.job_id
|
||||
JOIN workspaces w ON w.id = wb.workspace_id
|
||||
LEFT JOIN latest_builds lb ON lb.workspace_id = wb.workspace_id
|
||||
WHERE wa.deleted = FALSE
|
||||
AND w.deleted = FALSE
|
||||
AND (lb.id IS NULL OR wb.id <> lb.id)
|
||||
);
|
||||
@@ -1185,3 +1185,214 @@ func TestMigration000475AgentsAccessOrgRole(t *testing.T) {
|
||||
"trigger should only create org-member and org-service-account system roles",
|
||||
)
|
||||
}
|
||||
|
||||
func TestMigration000498SoftDeleteStaleWorkspaceAgents(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
const migrationVersion = 498
|
||||
|
||||
sqlDB := testSQLDB(t)
|
||||
|
||||
// Step up to migrationVersion - 1.
|
||||
next, err := migrations.Stepper(sqlDB)
|
||||
require.NoError(t, err)
|
||||
for {
|
||||
version, more, err := next()
|
||||
require.NoError(t, err)
|
||||
if !more {
|
||||
t.Fatalf("migration %d not found", migrationVersion)
|
||||
}
|
||||
if version == migrationVersion-1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
||||
now := time.Now().UTC().Truncate(time.Microsecond)
|
||||
|
||||
// Seed the prerequisite tables. Two workspaces share the same EC2-style
|
||||
// instance id across several builds; a third workspace has a single
|
||||
// build on a different instance (baseline, must not be affected).
|
||||
userID := uuid.New()
|
||||
orgID := uuid.New()
|
||||
templateID := uuid.New()
|
||||
templateVersionID := uuid.New()
|
||||
fileID := uuid.New()
|
||||
|
||||
wsA := uuid.New()
|
||||
wsB := uuid.New()
|
||||
wsSingle := uuid.New()
|
||||
wsDeleted := uuid.New()
|
||||
|
||||
instanceAB := "i-shared-ab"
|
||||
instanceSingle := "i-solo"
|
||||
instanceDeleted := "i-deleted"
|
||||
|
||||
// For workspace A: 3 builds on the same instance.
|
||||
// For workspace B: 2 builds on the same instance (different workspace,
|
||||
// same instance id, exercises the cross-workspace scoping case).
|
||||
// For wsSingle: 1 build, should stay non-deleted after the backfill.
|
||||
// For wsDeleted: 1 build on a soft-deleted workspace. Agent should be
|
||||
// marked deleted even though it's on the latest build.
|
||||
type build struct {
|
||||
id uuid.UUID
|
||||
jobID uuid.UUID
|
||||
resourceID uuid.UUID
|
||||
agentID uuid.UUID
|
||||
buildNum int32
|
||||
wsID uuid.UUID
|
||||
instanceID string
|
||||
}
|
||||
|
||||
mkBuild := func(ws uuid.UUID, buildNum int32, instance string) build {
|
||||
return build{
|
||||
id: uuid.New(),
|
||||
jobID: uuid.New(),
|
||||
resourceID: uuid.New(),
|
||||
agentID: uuid.New(),
|
||||
buildNum: buildNum,
|
||||
wsID: ws,
|
||||
instanceID: instance,
|
||||
}
|
||||
}
|
||||
|
||||
aBuilds := []build{
|
||||
mkBuild(wsA, 1, instanceAB),
|
||||
mkBuild(wsA, 2, instanceAB),
|
||||
mkBuild(wsA, 3, instanceAB),
|
||||
}
|
||||
bBuilds := []build{
|
||||
mkBuild(wsB, 1, instanceAB),
|
||||
mkBuild(wsB, 2, instanceAB),
|
||||
}
|
||||
singleBuilds := []build{
|
||||
mkBuild(wsSingle, 1, instanceSingle),
|
||||
}
|
||||
deletedBuilds := []build{
|
||||
mkBuild(wsDeleted, 1, instanceDeleted),
|
||||
}
|
||||
allBuilds := append(append(append(append([]build{}, aBuilds...), bBuilds...), singleBuilds...), deletedBuilds...)
|
||||
|
||||
tx, err := sqlDB.BeginTx(ctx, nil)
|
||||
require.NoError(t, err)
|
||||
defer tx.Rollback()
|
||||
|
||||
// Minimal user / org / template / template_version / file.
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO users (id, username, email, hashed_password, created_at, updated_at, status, rbac_roles, login_type)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`,
|
||||
userID, "seed", "seed@test.com", []byte{}, now, now, "active", pq.StringArray{}, "password",
|
||||
)
|
||||
require.NoError(t, err)
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO organizations (id, name, display_name, description, icon, created_at, updated_at, is_default)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
|
||||
orgID, "seed-org", "Seed Org", "", "", now, now, false,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO files (id, hash, created_at, created_by, mimetype, data) VALUES ($1, $2, $3, $4, $5, $6)`,
|
||||
fileID, "hash", now, userID, "application/octet-stream", []byte{},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO templates (id, created_at, updated_at, organization_id, name, provisioner, active_version_id, description, created_by, group_acl, user_acl, display_name)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)`,
|
||||
templateID, now, now, orgID, "tpl", "echo", templateVersionID, "", userID, "{}", "{}", "",
|
||||
)
|
||||
require.NoError(t, err)
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO template_versions (id, template_id, organization_id, created_at, updated_at, name, readme, job_id, created_by, message)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`,
|
||||
templateVersionID, templateID, orgID, now, now, "v", "", uuid.New(), userID, "",
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
for _, ws := range []uuid.UUID{wsA, wsB, wsSingle} {
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO workspaces (id, created_at, updated_at, owner_id, organization_id, template_id, name, deleted, automatic_updates)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, false, 'never')`,
|
||||
ws, now, now, userID, orgID, templateID, "ws-"+ws.String()[:8],
|
||||
)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
// wsDeleted is a soft-deleted workspace. Its agent is on the latest
|
||||
// build but must still be soft-deleted by the migration.
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO workspaces (id, created_at, updated_at, owner_id, organization_id, template_id, name, deleted, automatic_updates)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, true, 'never')`,
|
||||
wsDeleted, now, now, userID, orgID, templateID, "ws-"+wsDeleted.String()[:8],
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
// For every build: provisioner_job -> workspace_build -> workspace_resource -> workspace_agent.
|
||||
for _, b := range allBuilds {
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO provisioner_jobs (id, created_at, updated_at, organization_id, initiator_id, provisioner, storage_method, type, input, file_id)
|
||||
VALUES ($1, $2, $3, $4, $5, 'echo', 'file', 'workspace_build', '{}', $6)`,
|
||||
b.jobID, now, now, orgID, userID, fileID,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO workspace_builds (id, created_at, updated_at, workspace_id, template_version_id, build_number, transition, initiator_id, job_id, reason)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, 'start', $7, $8, 'initiator')`,
|
||||
b.id, now, now, b.wsID, templateVersionID, b.buildNum, userID, b.jobID,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO workspace_resources (id, created_at, job_id, transition, type, name)
|
||||
VALUES ($1, $2, $3, 'start', 'aws_instance', 'dev')`,
|
||||
b.resourceID, now, b.jobID,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
_, err = tx.ExecContext(ctx,
|
||||
`INSERT INTO workspace_agents (id, created_at, updated_at, name, resource_id, auth_token, auth_instance_id, architecture, operating_system, deleted)
|
||||
VALUES ($1, $2, $3, 'main', $4, $5, $6, 'amd64', 'linux', false)`,
|
||||
b.agentID, now, now, b.resourceID, uuid.New(), b.instanceID,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.NoError(t, tx.Commit())
|
||||
|
||||
// Sanity check pre-migration: all agents should be deleted=false.
|
||||
var preDeletedCount int
|
||||
err = sqlDB.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*) FROM workspace_agents WHERE deleted = true`).Scan(&preDeletedCount)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, preDeletedCount, "no agents should be deleted pre-migration")
|
||||
|
||||
// Run migration 491.
|
||||
version, more, err := next()
|
||||
require.NoError(t, err)
|
||||
require.True(t, more)
|
||||
require.EqualValues(t, migrationVersion, version)
|
||||
|
||||
// Backfill assertions:
|
||||
// wsA: builds 1,2,3 → keep agent for build 3, delete for 1 and 2.
|
||||
// wsB: builds 1,2 → keep agent for build 2, delete for 1.
|
||||
// wsSingle: 1 build → keep.
|
||||
// Per workspace, exactly one agent remains deleted=false.
|
||||
check := func(label string, expectDeleted bool, agent uuid.UUID) {
|
||||
var deleted bool
|
||||
err := sqlDB.QueryRowContext(ctx,
|
||||
`SELECT deleted FROM workspace_agents WHERE id = $1`, agent).Scan(&deleted)
|
||||
require.NoError(t, err, label)
|
||||
require.Equal(t, expectDeleted, deleted, label)
|
||||
}
|
||||
check("wsA build 1 (old) should be deleted", true, aBuilds[0].agentID)
|
||||
check("wsA build 2 (old) should be deleted", true, aBuilds[1].agentID)
|
||||
check("wsA build 3 (latest) should be kept", false, aBuilds[2].agentID)
|
||||
check("wsB build 1 (old) should be deleted", true, bBuilds[0].agentID)
|
||||
check("wsB build 2 (latest) should be kept", false, bBuilds[1].agentID)
|
||||
check("wsSingle build 1 (solo latest) should be kept", false, singleBuilds[0].agentID)
|
||||
check("wsDeleted: agent on deleted workspace should be soft-deleted even though it's the latest build",
|
||||
true, deletedBuilds[0].agentID)
|
||||
|
||||
// The ongoing invariants are enforced by wsbuilder.Builder.Build and
|
||||
// provisionerdserver.CompleteJob via SoftDeletePriorWorkspaceAgents and
|
||||
// SoftDeleteWorkspaceAgentsByWorkspaceID. Those paths are covered by
|
||||
// the querier tests TestSoftDeletePriorWorkspaceAgents and
|
||||
// TestSoftDeleteWorkspaceAgentsByWorkspaceID, plus integration tests
|
||||
// under coderd/coderd_test.go; not retested here.
|
||||
}
|
||||
|
||||
@@ -1077,6 +1077,18 @@ type sqlcQuerier interface {
|
||||
SoftDeleteChatMessageByID(ctx context.Context, id int64) error
|
||||
SoftDeleteChatMessagesAfterID(ctx context.Context, arg SoftDeleteChatMessagesAfterIDParams) error
|
||||
SoftDeleteContextFileMessages(ctx context.Context, chatID uuid.UUID) error
|
||||
// Marks agents from all prior builds of this workspace as deleted,
|
||||
// preserving only agents belonging to @current_build_id. Called from
|
||||
// provisionerdserver when a workspace build completes, after the new
|
||||
// build's agents have been inserted, so running agents are not
|
||||
// deleted while a build is still queued or provisioning.
|
||||
SoftDeletePriorWorkspaceAgents(ctx context.Context, arg SoftDeletePriorWorkspaceAgentsParams) error
|
||||
// Marks every non-deleted agent belonging to the given workspace as
|
||||
// deleted. Called alongside UpdateWorkspaceDeletedByID when a workspace
|
||||
// itself is soft-deleted, so the agent instance-identity auth path
|
||||
// (which filters on workspace_agents.deleted) doesn't keep seeing
|
||||
// orphaned rows.
|
||||
SoftDeleteWorkspaceAgentsByWorkspaceID(ctx context.Context, workspaceID uuid.UUID) error
|
||||
// Overrides updated_at on the parent run without touching any
|
||||
// other column. Used by tests that need to stamp a run with a
|
||||
// specific timestamp after the InsertChatDebugStep CTE has
|
||||
|
||||
@@ -13765,3 +13765,230 @@ func TestChatHasUnread(t *testing.T) {
|
||||
insertMsg(database.ChatMessageRoleUser, "user msg")
|
||||
require.False(t, getHasUnread(), "user messages should not trigger unread")
|
||||
}
|
||||
|
||||
// TestSoftDeletePriorWorkspaceAgents verifies the invariant maintained by
|
||||
// wsbuilder.Builder.Build: when a new build of a workspace is created, all
|
||||
// agents belonging to prior builds of that same workspace are soft-deleted,
|
||||
// and agents belonging to *other* workspaces are untouched.
|
||||
func TestSoftDeletePriorWorkspaceAgents(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
db, _, sqlDB := dbtestutil.NewDBWithSQLDB(t)
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
// Helper: create a workspace + one build + its agent. Returns the IDs we
|
||||
// need to assert on. The agent uses the shared EC2-style auth_instance_id
|
||||
// so we can prove per-workspace scoping.
|
||||
type buildBundle struct {
|
||||
workspaceID uuid.UUID
|
||||
buildID uuid.UUID
|
||||
agentID uuid.UUID
|
||||
}
|
||||
|
||||
user := dbgen.User(t, db, database.User{})
|
||||
org := dbgen.Organization(t, db, database.Organization{})
|
||||
tpl := dbgen.Template(t, db, database.Template{
|
||||
OrganizationID: org.ID,
|
||||
CreatedBy: user.ID,
|
||||
})
|
||||
tplVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||||
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
|
||||
OrganizationID: org.ID,
|
||||
CreatedBy: user.ID,
|
||||
})
|
||||
|
||||
newBuild := func(t *testing.T, wsID uuid.UUID, buildNumber int32, instanceID string) buildBundle {
|
||||
t.Helper()
|
||||
job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
|
||||
OrganizationID: org.ID,
|
||||
Type: database.ProvisionerJobTypeWorkspaceBuild,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
|
||||
WorkspaceID: wsID,
|
||||
JobID: job.ID,
|
||||
TemplateVersionID: tplVersion.ID,
|
||||
BuildNumber: buildNumber,
|
||||
Transition: database.WorkspaceTransitionStart,
|
||||
})
|
||||
resource := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: job.ID})
|
||||
agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resource.ID,
|
||||
AuthInstanceID: sql.NullString{String: instanceID, Valid: true},
|
||||
})
|
||||
return buildBundle{workspaceID: wsID, buildID: build.ID, agentID: agent.ID}
|
||||
}
|
||||
|
||||
// Read `deleted` via raw SQL. GetWorkspaceAgentByID filters deleted rows
|
||||
// out, which is exactly what we want to observe here.
|
||||
agentDeleted := func(id uuid.UUID) bool {
|
||||
t.Helper()
|
||||
var deleted bool
|
||||
err := sqlDB.QueryRowContext(ctx,
|
||||
`SELECT deleted FROM workspace_agents WHERE id = $1`, id).Scan(&deleted)
|
||||
require.NoError(t, err)
|
||||
return deleted
|
||||
}
|
||||
|
||||
// Two workspaces share a single EC2 instance ID across their lifetimes.
|
||||
wsA := dbgen.Workspace(t, db, database.WorkspaceTable{
|
||||
OrganizationID: org.ID,
|
||||
TemplateID: tpl.ID,
|
||||
OwnerID: user.ID,
|
||||
}).ID
|
||||
wsB := dbgen.Workspace(t, db, database.WorkspaceTable{
|
||||
OrganizationID: org.ID,
|
||||
TemplateID: tpl.ID,
|
||||
OwnerID: user.ID,
|
||||
}).ID
|
||||
instance := "i-shared"
|
||||
|
||||
a1 := newBuild(t, wsA, 1, instance)
|
||||
a2 := newBuild(t, wsA, 2, instance)
|
||||
a3 := newBuild(t, wsA, 3, instance)
|
||||
b1 := newBuild(t, wsB, 1, instance)
|
||||
b2 := newBuild(t, wsB, 2, instance)
|
||||
|
||||
// Sanity check: all agents start non-deleted.
|
||||
require.False(t, agentDeleted(a1.agentID))
|
||||
require.False(t, agentDeleted(a2.agentID))
|
||||
require.False(t, agentDeleted(a3.agentID))
|
||||
require.False(t, agentDeleted(b1.agentID))
|
||||
require.False(t, agentDeleted(b2.agentID))
|
||||
|
||||
// Run: "wsA's current build is a3; soft-delete all other wsA agents."
|
||||
err := db.SoftDeletePriorWorkspaceAgents(ctx, database.SoftDeletePriorWorkspaceAgentsParams{
|
||||
WorkspaceID: wsA,
|
||||
CurrentBuildID: a3.buildID,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.True(t, agentDeleted(a1.agentID), "wsA build 1 agent should be soft-deleted")
|
||||
assert.True(t, agentDeleted(a2.agentID), "wsA build 2 agent should be soft-deleted")
|
||||
assert.False(t, agentDeleted(a3.agentID), "wsA current build's agent must stay")
|
||||
assert.False(t, agentDeleted(b1.agentID), "wsB build 1 agent must not be touched")
|
||||
assert.False(t, agentDeleted(b2.agentID), "wsB build 2 agent must not be touched")
|
||||
|
||||
// Idempotency: re-running with the same params is a no-op.
|
||||
err = db.SoftDeletePriorWorkspaceAgents(ctx, database.SoftDeletePriorWorkspaceAgentsParams{
|
||||
WorkspaceID: wsA,
|
||||
CurrentBuildID: a3.buildID,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, agentDeleted(a3.agentID))
|
||||
|
||||
// Now age wsB: new current build is b2; b1's agent should flip.
|
||||
err = db.SoftDeletePriorWorkspaceAgents(ctx, database.SoftDeletePriorWorkspaceAgentsParams{
|
||||
WorkspaceID: wsB,
|
||||
CurrentBuildID: b2.buildID,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, agentDeleted(b1.agentID))
|
||||
assert.False(t, agentDeleted(b2.agentID))
|
||||
}
|
||||
|
||||
// TestSoftDeleteWorkspaceAgentsByWorkspaceID verifies the delete-path
|
||||
// invariant: when a workspace is soft-deleted, every one of its agents
|
||||
// (across all builds) gets soft-deleted in the same transaction. Agents on
|
||||
// *other* workspaces, even ones sharing an auth_instance_id, must be
|
||||
// untouched.
|
||||
func TestSoftDeleteWorkspaceAgentsByWorkspaceID(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
db, _, sqlDB := dbtestutil.NewDBWithSQLDB(t)
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
type buildBundle struct {
|
||||
workspaceID uuid.UUID
|
||||
buildID uuid.UUID
|
||||
agentID uuid.UUID
|
||||
}
|
||||
|
||||
user := dbgen.User(t, db, database.User{})
|
||||
org := dbgen.Organization(t, db, database.Organization{})
|
||||
tpl := dbgen.Template(t, db, database.Template{
|
||||
OrganizationID: org.ID,
|
||||
CreatedBy: user.ID,
|
||||
})
|
||||
tplVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||||
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
|
||||
OrganizationID: org.ID,
|
||||
CreatedBy: user.ID,
|
||||
})
|
||||
|
||||
newBuild := func(t *testing.T, wsID uuid.UUID, buildNumber int32, instanceID string) buildBundle {
|
||||
t.Helper()
|
||||
job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
|
||||
OrganizationID: org.ID,
|
||||
Type: database.ProvisionerJobTypeWorkspaceBuild,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
|
||||
WorkspaceID: wsID,
|
||||
JobID: job.ID,
|
||||
TemplateVersionID: tplVersion.ID,
|
||||
BuildNumber: buildNumber,
|
||||
Transition: database.WorkspaceTransitionStart,
|
||||
})
|
||||
resource := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: job.ID})
|
||||
agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||||
ResourceID: resource.ID,
|
||||
AuthInstanceID: sql.NullString{String: instanceID, Valid: true},
|
||||
})
|
||||
return buildBundle{workspaceID: wsID, buildID: build.ID, agentID: agent.ID}
|
||||
}
|
||||
|
||||
agentDeleted := func(id uuid.UUID) bool {
|
||||
t.Helper()
|
||||
var deleted bool
|
||||
err := sqlDB.QueryRowContext(ctx,
|
||||
`SELECT deleted FROM workspace_agents WHERE id = $1`, id).Scan(&deleted)
|
||||
require.NoError(t, err)
|
||||
return deleted
|
||||
}
|
||||
|
||||
// wsA: 3 builds (so multiple agents to sweep on delete).
|
||||
// wsB: 1 build, same auth_instance_id as wsA (proves scoping).
|
||||
wsA := dbgen.Workspace(t, db, database.WorkspaceTable{
|
||||
OrganizationID: org.ID,
|
||||
TemplateID: tpl.ID,
|
||||
OwnerID: user.ID,
|
||||
}).ID
|
||||
wsB := dbgen.Workspace(t, db, database.WorkspaceTable{
|
||||
OrganizationID: org.ID,
|
||||
TemplateID: tpl.ID,
|
||||
OwnerID: user.ID,
|
||||
}).ID
|
||||
instance := "i-shared"
|
||||
|
||||
a1 := newBuild(t, wsA, 1, instance)
|
||||
a2 := newBuild(t, wsA, 2, instance)
|
||||
a3 := newBuild(t, wsA, 3, instance)
|
||||
b1 := newBuild(t, wsB, 1, instance)
|
||||
|
||||
// Sanity: all 4 agents start non-deleted.
|
||||
for _, id := range []uuid.UUID{a1.agentID, a2.agentID, a3.agentID, b1.agentID} {
|
||||
require.False(t, agentDeleted(id))
|
||||
}
|
||||
|
||||
err := db.SoftDeleteWorkspaceAgentsByWorkspaceID(ctx, wsA)
|
||||
require.NoError(t, err)
|
||||
|
||||
// All wsA agents flipped; wsB's agent untouched.
|
||||
assert.True(t, agentDeleted(a1.agentID), "wsA build 1 agent")
|
||||
assert.True(t, agentDeleted(a2.agentID), "wsA build 2 agent")
|
||||
assert.True(t, agentDeleted(a3.agentID), "wsA build 3 agent")
|
||||
assert.False(t, agentDeleted(b1.agentID), "wsB agent must not be affected")
|
||||
|
||||
// Idempotency: re-running is a no-op.
|
||||
err = db.SoftDeleteWorkspaceAgentsByWorkspaceID(ctx, wsA)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, agentDeleted(b1.agentID))
|
||||
|
||||
// Calling on an empty workspace (no agents) is a no-op and does not error.
|
||||
wsEmpty := dbgen.Workspace(t, db, database.WorkspaceTable{
|
||||
OrganizationID: org.ID,
|
||||
TemplateID: tpl.ID,
|
||||
OwnerID: user.ID,
|
||||
}).ID
|
||||
err = db.SoftDeleteWorkspaceAgentsByWorkspaceID(ctx, wsEmpty)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
@@ -30641,6 +30641,58 @@ func (q *sqlQuerier) InsertWorkspaceAgentScriptTimings(ctx context.Context, arg
|
||||
return i, err
|
||||
}
|
||||
|
||||
const softDeletePriorWorkspaceAgents = `-- name: SoftDeletePriorWorkspaceAgents :exec
|
||||
UPDATE workspace_agents
|
||||
SET deleted = TRUE
|
||||
WHERE id IN (
|
||||
SELECT wa.id
|
||||
FROM workspace_agents wa
|
||||
JOIN workspace_resources wr ON wr.id = wa.resource_id
|
||||
JOIN workspace_builds wb ON wb.job_id = wr.job_id
|
||||
WHERE wb.workspace_id = $1
|
||||
AND wb.id <> $2
|
||||
AND wa.deleted = FALSE
|
||||
)
|
||||
`
|
||||
|
||||
type SoftDeletePriorWorkspaceAgentsParams struct {
|
||||
WorkspaceID uuid.UUID `db:"workspace_id" json:"workspace_id"`
|
||||
CurrentBuildID uuid.UUID `db:"current_build_id" json:"current_build_id"`
|
||||
}
|
||||
|
||||
// Marks agents from all prior builds of this workspace as deleted,
|
||||
// preserving only agents belonging to @current_build_id. Called from
|
||||
// provisionerdserver when a workspace build completes, after the new
|
||||
// build's agents have been inserted, so running agents are not
|
||||
// deleted while a build is still queued or provisioning.
|
||||
func (q *sqlQuerier) SoftDeletePriorWorkspaceAgents(ctx context.Context, arg SoftDeletePriorWorkspaceAgentsParams) error {
|
||||
_, err := q.db.ExecContext(ctx, softDeletePriorWorkspaceAgents, arg.WorkspaceID, arg.CurrentBuildID)
|
||||
return err
|
||||
}
|
||||
|
||||
const softDeleteWorkspaceAgentsByWorkspaceID = `-- name: SoftDeleteWorkspaceAgentsByWorkspaceID :exec
|
||||
UPDATE workspace_agents
|
||||
SET deleted = TRUE
|
||||
WHERE id IN (
|
||||
SELECT wa.id
|
||||
FROM workspace_agents wa
|
||||
JOIN workspace_resources wr ON wr.id = wa.resource_id
|
||||
JOIN workspace_builds wb ON wb.job_id = wr.job_id
|
||||
WHERE wb.workspace_id = $1
|
||||
AND wa.deleted = FALSE
|
||||
)
|
||||
`
|
||||
|
||||
// Marks every non-deleted agent belonging to the given workspace as
|
||||
// deleted. Called alongside UpdateWorkspaceDeletedByID when a workspace
|
||||
// itself is soft-deleted, so the agent instance-identity auth path
|
||||
// (which filters on workspace_agents.deleted) doesn't keep seeing
|
||||
// orphaned rows.
|
||||
func (q *sqlQuerier) SoftDeleteWorkspaceAgentsByWorkspaceID(ctx context.Context, workspaceID uuid.UUID) error {
|
||||
_, err := q.db.ExecContext(ctx, softDeleteWorkspaceAgentsByWorkspaceID, workspaceID)
|
||||
return err
|
||||
}
|
||||
|
||||
const updateWorkspaceAgentConnectionByID = `-- name: UpdateWorkspaceAgentConnectionByID :exec
|
||||
UPDATE
|
||||
workspace_agents
|
||||
|
||||
@@ -517,3 +517,38 @@ WHERE
|
||||
AND workspaces.deleted = FALSE
|
||||
AND users.deleted = FALSE
|
||||
LIMIT 1;
|
||||
|
||||
-- name: SoftDeletePriorWorkspaceAgents :exec
|
||||
-- Marks agents from all prior builds of this workspace as deleted,
|
||||
-- preserving only agents belonging to @current_build_id. Called from
|
||||
-- provisionerdserver when a workspace build completes, after the new
|
||||
-- build's agents have been inserted, so running agents are not
|
||||
-- deleted while a build is still queued or provisioning.
|
||||
UPDATE workspace_agents
|
||||
SET deleted = TRUE
|
||||
WHERE id IN (
|
||||
SELECT wa.id
|
||||
FROM workspace_agents wa
|
||||
JOIN workspace_resources wr ON wr.id = wa.resource_id
|
||||
JOIN workspace_builds wb ON wb.job_id = wr.job_id
|
||||
WHERE wb.workspace_id = @workspace_id
|
||||
AND wb.id <> @current_build_id
|
||||
AND wa.deleted = FALSE
|
||||
);
|
||||
|
||||
-- name: SoftDeleteWorkspaceAgentsByWorkspaceID :exec
|
||||
-- Marks every non-deleted agent belonging to the given workspace as
|
||||
-- deleted. Called alongside UpdateWorkspaceDeletedByID when a workspace
|
||||
-- itself is soft-deleted, so the agent instance-identity auth path
|
||||
-- (which filters on workspace_agents.deleted) doesn't keep seeing
|
||||
-- orphaned rows.
|
||||
UPDATE workspace_agents
|
||||
SET deleted = TRUE
|
||||
WHERE id IN (
|
||||
SELECT wa.id
|
||||
FROM workspace_agents wa
|
||||
JOIN workspace_resources wr ON wr.id = wa.resource_id
|
||||
JOIN workspace_builds wb ON wb.job_id = wr.job_id
|
||||
WHERE wb.workspace_id = @workspace_id
|
||||
AND wa.deleted = FALSE
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user