diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 2bbd14e267..0ad8e7ab66 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -1749,6 +1749,13 @@ func (q *querier) DeleteOldAuditLogConnectionEvents(ctx context.Context, thresho return q.db.DeleteOldAuditLogConnectionEvents(ctx, threshold) } +func (q *querier) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) { + if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil { + return 0, err + } + return q.db.DeleteOldAuditLogs(ctx, arg) +} + func (q *querier) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) { if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil { return 0, err diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 4833498f45..1075ed3bfd 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -324,6 +324,10 @@ func (s *MethodTestSuite) TestAuditLogs() { dbm.EXPECT().DeleteOldAuditLogConnectionEvents(gomock.Any(), database.DeleteOldAuditLogConnectionEventsParams{}).Return(nil).AnyTimes() check.Args(database.DeleteOldAuditLogConnectionEventsParams{}).Asserts(rbac.ResourceSystem, policy.ActionDelete) })) + s.Run("DeleteOldAuditLogs", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + dbm.EXPECT().DeleteOldAuditLogs(gomock.Any(), database.DeleteOldAuditLogsParams{}).Return(int64(0), nil).AnyTimes() + check.Args(database.DeleteOldAuditLogsParams{}).Asserts(rbac.ResourceSystem, policy.ActionDelete) + })) } func (s *MethodTestSuite) TestConnectionLogs() { diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 26342761a7..68cc143593 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -410,6 +410,13 @@ func (m queryMetricsStore) DeleteOldAuditLogConnectionEvents(ctx context.Context return r0 } +func (m queryMetricsStore) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) { + start := time.Now() + r0, r1 := m.s.DeleteOldAuditLogs(ctx, arg) + m.queryLatencies.WithLabelValues("DeleteOldAuditLogs").Observe(time.Since(start).Seconds()) + return r0, r1 +} + func (m queryMetricsStore) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) { start := time.Now() r0, r1 := m.s.DeleteOldConnectionLogs(ctx, arg) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index 7d5714deea..d52c335470 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -753,6 +753,21 @@ func (mr *MockStoreMockRecorder) DeleteOldAuditLogConnectionEvents(ctx, arg any) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteOldAuditLogConnectionEvents", reflect.TypeOf((*MockStore)(nil).DeleteOldAuditLogConnectionEvents), ctx, arg) } +// DeleteOldAuditLogs mocks base method. +func (m *MockStore) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteOldAuditLogs", ctx, arg) + ret0, _ := ret[0].(int64) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DeleteOldAuditLogs indicates an expected call of DeleteOldAuditLogs. +func (mr *MockStoreMockRecorder) DeleteOldAuditLogs(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteOldAuditLogs", reflect.TypeOf((*MockStore)(nil).DeleteOldAuditLogs), ctx, arg) +} + // DeleteOldConnectionLogs mocks base method. func (m *MockStore) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) { m.ctrl.T.Helper() diff --git a/coderd/database/dbpurge/dbpurge.go b/coderd/database/dbpurge/dbpurge.go index e9c32a5833..4b1f9f7628 100644 --- a/coderd/database/dbpurge/dbpurge.go +++ b/coderd/database/dbpurge/dbpurge.go @@ -27,6 +27,8 @@ const ( auditLogConnectionEventBatchSize = 1000 // Batch size for connection log deletion. connectionLogsBatchSize = 10000 + // Batch size for audit log deletion. + auditLogsBatchSize = 10000 // Telemetry heartbeats are used to deduplicate events across replicas. We // don't need to persist heartbeat rows for longer than 24 hours, as they // are only used for deduplication across replicas. The time needs to be @@ -126,10 +128,24 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder } } + var purgedAuditLogs int64 + auditLogsRetention := vals.Retention.AuditLogs.Value() + if auditLogsRetention > 0 { + deleteAuditLogsBefore := start.Add(-auditLogsRetention) + purgedAuditLogs, err = tx.DeleteOldAuditLogs(ctx, database.DeleteOldAuditLogsParams{ + BeforeTime: deleteAuditLogsBefore, + LimitCount: auditLogsBatchSize, + }) + if err != nil { + return xerrors.Errorf("failed to delete old audit logs: %w", err) + } + } + logger.Debug(ctx, "purged old database entries", slog.F("expired_api_keys", expiredAPIKeys), slog.F("aibridge_records", purgedAIBridgeRecords), slog.F("connection_logs", purgedConnectionLogs), + slog.F("audit_logs", purgedAuditLogs), slog.F("duration", clk.Since(start)), ) diff --git a/coderd/database/dbpurge/dbpurge_test.go b/coderd/database/dbpurge/dbpurge_test.go index 76d981959a..26e778b00a 100644 --- a/coderd/database/dbpurge/dbpurge_test.go +++ b/coderd/database/dbpurge/dbpurge_test.go @@ -1050,3 +1050,198 @@ func TestDeleteOldAIBridgeRecords(t *testing.T) { require.NoError(t, err) require.Len(t, newToolUsages, 1, "near threshold tool usages should not be deleted") } + +func TestDeleteOldAuditLogs(t *testing.T) { + t.Parallel() + + now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC) + retentionPeriod := 30 * 24 * time.Hour + afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold) + beforeThreshold := now.Add(-15 * 24 * time.Hour) // 15 days ago (newer than threshold) + + testCases := []struct { + name string + retentionConfig codersdk.RetentionConfig + oldLogTime time.Time + recentLogTime *time.Time // nil means no recent log created + expectOldDeleted bool + expectedLogsRemaining int + }{ + { + name: "RetentionEnabled", + retentionConfig: codersdk.RetentionConfig{ + AuditLogs: serpent.Duration(retentionPeriod), + }, + oldLogTime: afterThreshold, + recentLogTime: &beforeThreshold, + expectOldDeleted: true, + expectedLogsRemaining: 1, // only recent log remains + }, + { + name: "RetentionDisabled", + retentionConfig: codersdk.RetentionConfig{ + AuditLogs: serpent.Duration(0), + }, + oldLogTime: now.Add(-365 * 24 * time.Hour), // 1 year ago + recentLogTime: nil, + expectOldDeleted: false, + expectedLogsRemaining: 1, // old log is kept + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitShort) + clk := quartz.NewMock(t) + clk.Set(now).MustWait(ctx) + + db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure()) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + + // Setup test fixtures. + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + + // Create old audit log. + oldLog := dbgen.AuditLog(t, db, database.AuditLog{ + UserID: user.ID, + OrganizationID: org.ID, + Time: tc.oldLogTime, + Action: database.AuditActionCreate, + ResourceType: database.ResourceTypeWorkspace, + }) + + // Create recent audit log if specified. + var recentLog database.AuditLog + if tc.recentLogTime != nil { + recentLog = dbgen.AuditLog(t, db, database.AuditLog{ + UserID: user.ID, + OrganizationID: org.ID, + Time: *tc.recentLogTime, + Action: database.AuditActionCreate, + ResourceType: database.ResourceTypeWorkspace, + }) + } + + // Run the purge. + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ + Retention: tc.retentionConfig, + }, clk) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + // Verify results. + logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{ + LimitOpt: 100, + }) + require.NoError(t, err) + require.Len(t, logs, tc.expectedLogsRemaining, "unexpected number of logs remaining") + + logIDs := make([]uuid.UUID, len(logs)) + for i, log := range logs { + logIDs[i] = log.AuditLog.ID + } + + if tc.expectOldDeleted { + require.NotContains(t, logIDs, oldLog.ID, "old audit log should be deleted") + } else { + require.Contains(t, logIDs, oldLog.ID, "old audit log should NOT be deleted") + } + + if tc.recentLogTime != nil { + require.Contains(t, logIDs, recentLog.ID, "recent audit log should be kept") + } + }) + } + + // ConnectionEventsNotDeleted is a special case that tests multiple audit + // action types, so it's kept as a separate subtest. + t.Run("ConnectionEventsNotDeleted", func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitShort) + clk := quartz.NewMock(t) + clk.Set(now).MustWait(ctx) + + db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure()) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + + // Create old connection events (should NOT be deleted by audit logs retention). + oldConnectLog := dbgen.AuditLog(t, db, database.AuditLog{ + UserID: user.ID, + OrganizationID: org.ID, + Time: afterThreshold, + Action: database.AuditActionConnect, + ResourceType: database.ResourceTypeWorkspace, + }) + + oldDisconnectLog := dbgen.AuditLog(t, db, database.AuditLog{ + UserID: user.ID, + OrganizationID: org.ID, + Time: afterThreshold, + Action: database.AuditActionDisconnect, + ResourceType: database.ResourceTypeWorkspace, + }) + + oldOpenLog := dbgen.AuditLog(t, db, database.AuditLog{ + UserID: user.ID, + OrganizationID: org.ID, + Time: afterThreshold, + Action: database.AuditActionOpen, + ResourceType: database.ResourceTypeWorkspace, + }) + + oldCloseLog := dbgen.AuditLog(t, db, database.AuditLog{ + UserID: user.ID, + OrganizationID: org.ID, + Time: afterThreshold, + Action: database.AuditActionClose, + ResourceType: database.ResourceTypeWorkspace, + }) + + // Create old non-connection audit log (should be deleted). + oldCreateLog := dbgen.AuditLog(t, db, database.AuditLog{ + UserID: user.ID, + OrganizationID: org.ID, + Time: afterThreshold, + Action: database.AuditActionCreate, + ResourceType: database.ResourceTypeWorkspace, + }) + + // Run the purge with audit logs retention enabled. + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ + Retention: codersdk.RetentionConfig{ + AuditLogs: serpent.Duration(retentionPeriod), + }, + }, clk) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + // Verify results. + logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{ + LimitOpt: 100, + }) + require.NoError(t, err) + require.Len(t, logs, 4, "should have 4 connection event logs remaining") + + logIDs := make([]uuid.UUID, len(logs)) + for i, log := range logs { + logIDs[i] = log.AuditLog.ID + } + + // Connection events should NOT be deleted by audit logs retention. + require.Contains(t, logIDs, oldConnectLog.ID, "old connect log should NOT be deleted by audit logs retention") + require.Contains(t, logIDs, oldDisconnectLog.ID, "old disconnect log should NOT be deleted by audit logs retention") + require.Contains(t, logIDs, oldOpenLog.ID, "old open log should NOT be deleted by audit logs retention") + require.Contains(t, logIDs, oldCloseLog.ID, "old close log should NOT be deleted by audit logs retention") + + // Non-connection event should be deleted. + require.NotContains(t, logIDs, oldCreateLog.ID, "old create log should be deleted by audit logs retention") + }) +} diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 1ec13cdf4d..db748129ae 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -106,6 +106,10 @@ type sqlcQuerier interface { // Cumulative count. DeleteOldAIBridgeRecords(ctx context.Context, beforeTime time.Time) (int32, error) DeleteOldAuditLogConnectionEvents(ctx context.Context, arg DeleteOldAuditLogConnectionEventsParams) error + // Deletes old audit logs based on retention policy, excluding deprecated + // connection events (connect, disconnect, open, close) which are handled + // separately by DeleteOldAuditLogConnectionEvents. + DeleteOldAuditLogs(ctx context.Context, arg DeleteOldAuditLogsParams) (int64, error) DeleteOldConnectionLogs(ctx context.Context, arg DeleteOldConnectionLogsParams) (int64, error) // Delete all notification messages which have not been updated for over a week. DeleteOldNotificationMessages(ctx context.Context) error diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 2de5ae3a2c..82d8639abf 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -1637,6 +1637,37 @@ func (q *sqlQuerier) DeleteOldAuditLogConnectionEvents(ctx context.Context, arg return err } +const deleteOldAuditLogs = `-- name: DeleteOldAuditLogs :execrows +WITH old_logs AS ( + SELECT id + FROM audit_logs + WHERE + "time" < $1::timestamp with time zone + AND action NOT IN ('connect', 'disconnect', 'open', 'close') + ORDER BY "time" ASC + LIMIT $2 +) +DELETE FROM audit_logs +USING old_logs +WHERE audit_logs.id = old_logs.id +` + +type DeleteOldAuditLogsParams struct { + BeforeTime time.Time `db:"before_time" json:"before_time"` + LimitCount int32 `db:"limit_count" json:"limit_count"` +} + +// Deletes old audit logs based on retention policy, excluding deprecated +// connection events (connect, disconnect, open, close) which are handled +// separately by DeleteOldAuditLogConnectionEvents. +func (q *sqlQuerier) DeleteOldAuditLogs(ctx context.Context, arg DeleteOldAuditLogsParams) (int64, error) { + result, err := q.db.ExecContext(ctx, deleteOldAuditLogs, arg.BeforeTime, arg.LimitCount) + if err != nil { + return 0, err + } + return result.RowsAffected() +} + const getAuditLogsOffset = `-- name: GetAuditLogsOffset :many SELECT audit_logs.id, audit_logs.time, audit_logs.user_id, audit_logs.organization_id, audit_logs.ip, audit_logs.user_agent, audit_logs.resource_type, audit_logs.resource_id, audit_logs.resource_target, audit_logs.action, audit_logs.diff, audit_logs.status_code, audit_logs.additional_fields, audit_logs.request_id, audit_logs.resource_icon, -- sqlc.embed(users) would be nice but it does not seem to play well with diff --git a/coderd/database/queries/auditlogs.sql b/coderd/database/queries/auditlogs.sql index 63e8c721c8..a1c219e702 100644 --- a/coderd/database/queries/auditlogs.sql +++ b/coderd/database/queries/auditlogs.sql @@ -253,3 +253,20 @@ WHERE id IN ( ORDER BY "time" ASC LIMIT @limit_count ); + +-- name: DeleteOldAuditLogs :execrows +-- Deletes old audit logs based on retention policy, excluding deprecated +-- connection events (connect, disconnect, open, close) which are handled +-- separately by DeleteOldAuditLogConnectionEvents. +WITH old_logs AS ( + SELECT id + FROM audit_logs + WHERE + "time" < @before_time::timestamp with time zone + AND action NOT IN ('connect', 'disconnect', 'open', 'close') + ORDER BY "time" ASC + LIMIT @limit_count +) +DELETE FROM audit_logs +USING old_logs +WHERE audit_logs.id = old_logs.id;