fix: allow unhanger to unhang dormant workspaces (#20229)

This commit is contained in:
Dean Sheather
2025-10-09 17:33:57 +11:00
committed by GitHub
parent c84ab728a8
commit 8942b50872
4 changed files with 149 additions and 5 deletions
+5 -4
View File
@@ -274,10 +274,11 @@ var (
Identifier: rbac.RoleIdentifier{Name: "jobreaper"},
DisplayName: "Job Reaper Daemon",
Site: rbac.Permissions(map[string][]policy.Action{
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead},
rbac.ResourceWorkspace.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceProvisionerJobs.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceWorkspace.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceWorkspaceDormant.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceProvisionerJobs.Type: {policy.ActionRead, policy.ActionUpdate},
}),
Org: map[string][]rbac.Permission{},
User: []rbac.Permission{},
+34 -1
View File
@@ -1639,10 +1639,43 @@ func (s *MethodTestSuite) TestUser() {
}
func (s *MethodTestSuite) TestWorkspace() {
// The Workspace object differs it's type based on whether it's dormant or
// not, which is why we have two tests for it. To ensure we are actually
// testing the correct RBAC objects, we also explicitly create the expected
// object here rather than passing in the model.
s.Run("GetWorkspaceByID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
ws := testutil.Fake(s.T(), faker, database.Workspace{})
ws.DormantAt = sql.NullTime{
Time: time.Time{},
Valid: false,
}
// Ensure the RBAC is not the dormant type.
require.Equal(s.T(), rbac.ResourceWorkspace.Type, ws.RBACObject().Type)
dbm.EXPECT().GetWorkspaceByID(gomock.Any(), ws.ID).Return(ws, nil).AnyTimes()
check.Args(ws.ID).Asserts(ws, policy.ActionRead).Returns(ws)
// Explicitly create the expected object.
expected := rbac.ResourceWorkspace.WithID(ws.ID).
InOrg(ws.OrganizationID).
WithOwner(ws.OwnerID.String()).
WithGroupACL(ws.GroupACL.RBACACL()).
WithACLUserList(ws.UserACL.RBACACL())
check.Args(ws.ID).Asserts(expected, policy.ActionRead).Returns(ws)
}))
s.Run("DormantWorkspace/GetWorkspaceByID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
ws := testutil.Fake(s.T(), faker, database.Workspace{
DormantAt: sql.NullTime{
Time: time.Now().Add(-time.Hour),
Valid: true,
},
})
// Ensure the RBAC changed automatically.
require.Equal(s.T(), rbac.ResourceWorkspaceDormant.Type, ws.RBACObject().Type)
dbm.EXPECT().GetWorkspaceByID(gomock.Any(), ws.ID).Return(ws, nil).AnyTimes()
// Explicitly create the expected object.
expected := rbac.ResourceWorkspaceDormant.
WithID(ws.ID).
InOrg(ws.OrganizationID).
WithOwner(ws.OwnerID.String())
check.Args(ws.ID).Asserts(expected, policy.ActionRead).Returns(ws)
}))
s.Run("GetWorkspaceByResourceID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
ws := testutil.Fake(s.T(), faker, database.Workspace{})
+8
View File
@@ -420,6 +420,14 @@ func Workspace(t testing.TB, db database.Store, orig database.WorkspaceTable) da
require.NoError(t, err, "set workspace as deleted")
workspace.Deleted = true
}
if orig.DormantAt.Valid {
_, err = db.UpdateWorkspaceDormantDeletingAt(genCtx, database.UpdateWorkspaceDormantDeletingAtParams{
ID: workspace.ID,
DormantAt: orig.DormantAt,
})
require.NoError(t, err, "set workspace as dormant")
workspace.DormantAt = orig.DormantAt
}
return workspace
}
+102
View File
@@ -533,6 +533,108 @@ func TestDetectorPendingWorkspaceBuildNoOverrideStateIfNoExistingBuild(t *testin
detector.Wait()
}
// TestDetectorWorkspaceBuildForDormantWorkspace ensures that the jobreaper has
// enough permissions to fix dormant workspaces.
//
// Dormant workspaces are treated as rbac.ResourceWorkspaceDormant rather than
// rbac.ResourceWorkspace, which resulted in a bug where the jobreaper would
// be able to see but not fix dormant workspaces.
func TestDetectorWorkspaceBuildForDormantWorkspace(t *testing.T) {
t.Parallel()
var (
ctx = testutil.Context(t, testutil.WaitLong)
db, pubsub = dbtestutil.NewDB(t)
log = testutil.Logger(t)
tickCh = make(chan time.Time)
statsCh = make(chan jobreaper.Stats)
)
var (
now = time.Now()
tenMinAgo = now.Add(-time.Minute * 10)
sixMinAgo = now.Add(-time.Minute * 6)
org = dbgen.Organization(t, db, database.Organization{})
user = dbgen.User(t, db, database.User{})
file = dbgen.File(t, db, database.File{})
template = dbgen.Template(t, db, database.Template{
OrganizationID: org.ID,
CreatedBy: user.ID,
})
templateVersion = dbgen.TemplateVersion(t, db, database.TemplateVersion{
OrganizationID: org.ID,
TemplateID: uuid.NullUUID{
UUID: template.ID,
Valid: true,
},
CreatedBy: user.ID,
})
workspace = dbgen.Workspace(t, db, database.WorkspaceTable{
OwnerID: user.ID,
OrganizationID: org.ID,
TemplateID: template.ID,
DormantAt: sql.NullTime{
Time: now.Add(-time.Hour),
Valid: true,
},
})
// First build.
expectedWorkspaceBuildState = []byte(`{"dean":"cool","colin":"also cool"}`)
currentWorkspaceBuildJob = dbgen.ProvisionerJob(t, db, pubsub, database.ProvisionerJob{
CreatedAt: tenMinAgo,
UpdatedAt: sixMinAgo,
StartedAt: sql.NullTime{
Time: tenMinAgo,
Valid: true,
},
OrganizationID: org.ID,
InitiatorID: user.ID,
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
FileID: file.ID,
Type: database.ProvisionerJobTypeWorkspaceBuild,
Input: []byte("{}"),
})
_ = dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: workspace.ID,
TemplateVersionID: templateVersion.ID,
BuildNumber: 1,
JobID: currentWorkspaceBuildJob.ID,
// Should not be overridden.
ProvisionerState: expectedWorkspaceBuildState,
})
)
t.Log("current job ID: ", currentWorkspaceBuildJob.ID)
// Ensure the RBAC is the dormant type to ensure we're testing the right
// thing.
require.Equal(t, rbac.ResourceWorkspaceDormant.Type, workspace.RBACObject().Type)
detector := jobreaper.New(ctx, wrapDBAuthz(db, log), pubsub, log, tickCh).WithStatsChannel(statsCh)
detector.Start()
tickCh <- now
stats := <-statsCh
require.NoError(t, stats.Error)
require.Len(t, stats.TerminatedJobIDs, 1)
require.Equal(t, currentWorkspaceBuildJob.ID, stats.TerminatedJobIDs[0])
// Check that the current provisioner job was updated.
job, err := db.GetProvisionerJobByID(ctx, currentWorkspaceBuildJob.ID)
require.NoError(t, err)
require.WithinDuration(t, now, job.UpdatedAt, 30*time.Second)
require.True(t, job.CompletedAt.Valid)
require.WithinDuration(t, now, job.CompletedAt.Time, 30*time.Second)
require.True(t, job.Error.Valid)
require.Contains(t, job.Error.String, "Build has been detected as hung")
require.False(t, job.ErrorCode.Valid)
detector.Close()
detector.Wait()
}
func TestDetectorHungOtherJobTypes(t *testing.T) {
t.Parallel()