mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat(coderd/database/dbpurge): add retention for audit logs (#21025)
Add configurable retention policy for audit logs. The DeleteOldAuditLogs query excludes deprecated connection events (connect, disconnect, open, close) which are handled separately by DeleteOldAuditLogConnectionEvents. Disabled (0) by default. Depends on #21021 Updates #20743
This commit is contained in:
committed by
GitHub
parent
fa7bbe2f55
commit
c85d79bcdb
@@ -1749,6 +1749,13 @@ func (q *querier) DeleteOldAuditLogConnectionEvents(ctx context.Context, thresho
|
||||
return q.db.DeleteOldAuditLogConnectionEvents(ctx, threshold)
|
||||
}
|
||||
|
||||
func (q *querier) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) {
|
||||
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return q.db.DeleteOldAuditLogs(ctx, arg)
|
||||
}
|
||||
|
||||
func (q *querier) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) {
|
||||
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
|
||||
return 0, err
|
||||
|
||||
@@ -324,6 +324,10 @@ func (s *MethodTestSuite) TestAuditLogs() {
|
||||
dbm.EXPECT().DeleteOldAuditLogConnectionEvents(gomock.Any(), database.DeleteOldAuditLogConnectionEventsParams{}).Return(nil).AnyTimes()
|
||||
check.Args(database.DeleteOldAuditLogConnectionEventsParams{}).Asserts(rbac.ResourceSystem, policy.ActionDelete)
|
||||
}))
|
||||
s.Run("DeleteOldAuditLogs", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) {
|
||||
dbm.EXPECT().DeleteOldAuditLogs(gomock.Any(), database.DeleteOldAuditLogsParams{}).Return(int64(0), nil).AnyTimes()
|
||||
check.Args(database.DeleteOldAuditLogsParams{}).Asserts(rbac.ResourceSystem, policy.ActionDelete)
|
||||
}))
|
||||
}
|
||||
|
||||
func (s *MethodTestSuite) TestConnectionLogs() {
|
||||
|
||||
@@ -410,6 +410,13 @@ func (m queryMetricsStore) DeleteOldAuditLogConnectionEvents(ctx context.Context
|
||||
return r0
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) {
|
||||
start := time.Now()
|
||||
r0, r1 := m.s.DeleteOldAuditLogs(ctx, arg)
|
||||
m.queryLatencies.WithLabelValues("DeleteOldAuditLogs").Observe(time.Since(start).Seconds())
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) {
|
||||
start := time.Now()
|
||||
r0, r1 := m.s.DeleteOldConnectionLogs(ctx, arg)
|
||||
|
||||
@@ -753,6 +753,21 @@ func (mr *MockStoreMockRecorder) DeleteOldAuditLogConnectionEvents(ctx, arg any)
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteOldAuditLogConnectionEvents", reflect.TypeOf((*MockStore)(nil).DeleteOldAuditLogConnectionEvents), ctx, arg)
|
||||
}
|
||||
|
||||
// DeleteOldAuditLogs mocks base method.
|
||||
func (m *MockStore) DeleteOldAuditLogs(ctx context.Context, arg database.DeleteOldAuditLogsParams) (int64, error) {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "DeleteOldAuditLogs", ctx, arg)
|
||||
ret0, _ := ret[0].(int64)
|
||||
ret1, _ := ret[1].(error)
|
||||
return ret0, ret1
|
||||
}
|
||||
|
||||
// DeleteOldAuditLogs indicates an expected call of DeleteOldAuditLogs.
|
||||
func (mr *MockStoreMockRecorder) DeleteOldAuditLogs(ctx, arg any) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteOldAuditLogs", reflect.TypeOf((*MockStore)(nil).DeleteOldAuditLogs), ctx, arg)
|
||||
}
|
||||
|
||||
// DeleteOldConnectionLogs mocks base method.
|
||||
func (m *MockStore) DeleteOldConnectionLogs(ctx context.Context, arg database.DeleteOldConnectionLogsParams) (int64, error) {
|
||||
m.ctrl.T.Helper()
|
||||
|
||||
@@ -27,6 +27,8 @@ const (
|
||||
auditLogConnectionEventBatchSize = 1000
|
||||
// Batch size for connection log deletion.
|
||||
connectionLogsBatchSize = 10000
|
||||
// Batch size for audit log deletion.
|
||||
auditLogsBatchSize = 10000
|
||||
// Telemetry heartbeats are used to deduplicate events across replicas. We
|
||||
// don't need to persist heartbeat rows for longer than 24 hours, as they
|
||||
// are only used for deduplication across replicas. The time needs to be
|
||||
@@ -126,10 +128,24 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder
|
||||
}
|
||||
}
|
||||
|
||||
var purgedAuditLogs int64
|
||||
auditLogsRetention := vals.Retention.AuditLogs.Value()
|
||||
if auditLogsRetention > 0 {
|
||||
deleteAuditLogsBefore := start.Add(-auditLogsRetention)
|
||||
purgedAuditLogs, err = tx.DeleteOldAuditLogs(ctx, database.DeleteOldAuditLogsParams{
|
||||
BeforeTime: deleteAuditLogsBefore,
|
||||
LimitCount: auditLogsBatchSize,
|
||||
})
|
||||
if err != nil {
|
||||
return xerrors.Errorf("failed to delete old audit logs: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debug(ctx, "purged old database entries",
|
||||
slog.F("expired_api_keys", expiredAPIKeys),
|
||||
slog.F("aibridge_records", purgedAIBridgeRecords),
|
||||
slog.F("connection_logs", purgedConnectionLogs),
|
||||
slog.F("audit_logs", purgedAuditLogs),
|
||||
slog.F("duration", clk.Since(start)),
|
||||
)
|
||||
|
||||
|
||||
@@ -1050,3 +1050,198 @@ func TestDeleteOldAIBridgeRecords(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Len(t, newToolUsages, 1, "near threshold tool usages should not be deleted")
|
||||
}
|
||||
|
||||
func TestDeleteOldAuditLogs(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.Date(2025, 1, 15, 7, 30, 0, 0, time.UTC)
|
||||
retentionPeriod := 30 * 24 * time.Hour
|
||||
afterThreshold := now.Add(-retentionPeriod).Add(-24 * time.Hour) // 31 days ago (older than threshold)
|
||||
beforeThreshold := now.Add(-15 * 24 * time.Hour) // 15 days ago (newer than threshold)
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
retentionConfig codersdk.RetentionConfig
|
||||
oldLogTime time.Time
|
||||
recentLogTime *time.Time // nil means no recent log created
|
||||
expectOldDeleted bool
|
||||
expectedLogsRemaining int
|
||||
}{
|
||||
{
|
||||
name: "RetentionEnabled",
|
||||
retentionConfig: codersdk.RetentionConfig{
|
||||
AuditLogs: serpent.Duration(retentionPeriod),
|
||||
},
|
||||
oldLogTime: afterThreshold,
|
||||
recentLogTime: &beforeThreshold,
|
||||
expectOldDeleted: true,
|
||||
expectedLogsRemaining: 1, // only recent log remains
|
||||
},
|
||||
{
|
||||
name: "RetentionDisabled",
|
||||
retentionConfig: codersdk.RetentionConfig{
|
||||
AuditLogs: serpent.Duration(0),
|
||||
},
|
||||
oldLogTime: now.Add(-365 * 24 * time.Hour), // 1 year ago
|
||||
recentLogTime: nil,
|
||||
expectOldDeleted: false,
|
||||
expectedLogsRemaining: 1, // old log is kept
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
clk := quartz.NewMock(t)
|
||||
clk.Set(now).MustWait(ctx)
|
||||
|
||||
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
|
||||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
|
||||
|
||||
// Setup test fixtures.
|
||||
user := dbgen.User(t, db, database.User{})
|
||||
org := dbgen.Organization(t, db, database.Organization{})
|
||||
|
||||
// Create old audit log.
|
||||
oldLog := dbgen.AuditLog(t, db, database.AuditLog{
|
||||
UserID: user.ID,
|
||||
OrganizationID: org.ID,
|
||||
Time: tc.oldLogTime,
|
||||
Action: database.AuditActionCreate,
|
||||
ResourceType: database.ResourceTypeWorkspace,
|
||||
})
|
||||
|
||||
// Create recent audit log if specified.
|
||||
var recentLog database.AuditLog
|
||||
if tc.recentLogTime != nil {
|
||||
recentLog = dbgen.AuditLog(t, db, database.AuditLog{
|
||||
UserID: user.ID,
|
||||
OrganizationID: org.ID,
|
||||
Time: *tc.recentLogTime,
|
||||
Action: database.AuditActionCreate,
|
||||
ResourceType: database.ResourceTypeWorkspace,
|
||||
})
|
||||
}
|
||||
|
||||
// Run the purge.
|
||||
done := awaitDoTick(ctx, t, clk)
|
||||
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
|
||||
Retention: tc.retentionConfig,
|
||||
}, clk)
|
||||
defer closer.Close()
|
||||
testutil.TryReceive(ctx, t, done)
|
||||
|
||||
// Verify results.
|
||||
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
|
||||
LimitOpt: 100,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, logs, tc.expectedLogsRemaining, "unexpected number of logs remaining")
|
||||
|
||||
logIDs := make([]uuid.UUID, len(logs))
|
||||
for i, log := range logs {
|
||||
logIDs[i] = log.AuditLog.ID
|
||||
}
|
||||
|
||||
if tc.expectOldDeleted {
|
||||
require.NotContains(t, logIDs, oldLog.ID, "old audit log should be deleted")
|
||||
} else {
|
||||
require.Contains(t, logIDs, oldLog.ID, "old audit log should NOT be deleted")
|
||||
}
|
||||
|
||||
if tc.recentLogTime != nil {
|
||||
require.Contains(t, logIDs, recentLog.ID, "recent audit log should be kept")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ConnectionEventsNotDeleted is a special case that tests multiple audit
|
||||
// action types, so it's kept as a separate subtest.
|
||||
t.Run("ConnectionEventsNotDeleted", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
clk := quartz.NewMock(t)
|
||||
clk.Set(now).MustWait(ctx)
|
||||
|
||||
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
|
||||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
|
||||
user := dbgen.User(t, db, database.User{})
|
||||
org := dbgen.Organization(t, db, database.Organization{})
|
||||
|
||||
// Create old connection events (should NOT be deleted by audit logs retention).
|
||||
oldConnectLog := dbgen.AuditLog(t, db, database.AuditLog{
|
||||
UserID: user.ID,
|
||||
OrganizationID: org.ID,
|
||||
Time: afterThreshold,
|
||||
Action: database.AuditActionConnect,
|
||||
ResourceType: database.ResourceTypeWorkspace,
|
||||
})
|
||||
|
||||
oldDisconnectLog := dbgen.AuditLog(t, db, database.AuditLog{
|
||||
UserID: user.ID,
|
||||
OrganizationID: org.ID,
|
||||
Time: afterThreshold,
|
||||
Action: database.AuditActionDisconnect,
|
||||
ResourceType: database.ResourceTypeWorkspace,
|
||||
})
|
||||
|
||||
oldOpenLog := dbgen.AuditLog(t, db, database.AuditLog{
|
||||
UserID: user.ID,
|
||||
OrganizationID: org.ID,
|
||||
Time: afterThreshold,
|
||||
Action: database.AuditActionOpen,
|
||||
ResourceType: database.ResourceTypeWorkspace,
|
||||
})
|
||||
|
||||
oldCloseLog := dbgen.AuditLog(t, db, database.AuditLog{
|
||||
UserID: user.ID,
|
||||
OrganizationID: org.ID,
|
||||
Time: afterThreshold,
|
||||
Action: database.AuditActionClose,
|
||||
ResourceType: database.ResourceTypeWorkspace,
|
||||
})
|
||||
|
||||
// Create old non-connection audit log (should be deleted).
|
||||
oldCreateLog := dbgen.AuditLog(t, db, database.AuditLog{
|
||||
UserID: user.ID,
|
||||
OrganizationID: org.ID,
|
||||
Time: afterThreshold,
|
||||
Action: database.AuditActionCreate,
|
||||
ResourceType: database.ResourceTypeWorkspace,
|
||||
})
|
||||
|
||||
// Run the purge with audit logs retention enabled.
|
||||
done := awaitDoTick(ctx, t, clk)
|
||||
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
|
||||
Retention: codersdk.RetentionConfig{
|
||||
AuditLogs: serpent.Duration(retentionPeriod),
|
||||
},
|
||||
}, clk)
|
||||
defer closer.Close()
|
||||
testutil.TryReceive(ctx, t, done)
|
||||
|
||||
// Verify results.
|
||||
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{
|
||||
LimitOpt: 100,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, logs, 4, "should have 4 connection event logs remaining")
|
||||
|
||||
logIDs := make([]uuid.UUID, len(logs))
|
||||
for i, log := range logs {
|
||||
logIDs[i] = log.AuditLog.ID
|
||||
}
|
||||
|
||||
// Connection events should NOT be deleted by audit logs retention.
|
||||
require.Contains(t, logIDs, oldConnectLog.ID, "old connect log should NOT be deleted by audit logs retention")
|
||||
require.Contains(t, logIDs, oldDisconnectLog.ID, "old disconnect log should NOT be deleted by audit logs retention")
|
||||
require.Contains(t, logIDs, oldOpenLog.ID, "old open log should NOT be deleted by audit logs retention")
|
||||
require.Contains(t, logIDs, oldCloseLog.ID, "old close log should NOT be deleted by audit logs retention")
|
||||
|
||||
// Non-connection event should be deleted.
|
||||
require.NotContains(t, logIDs, oldCreateLog.ID, "old create log should be deleted by audit logs retention")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -106,6 +106,10 @@ type sqlcQuerier interface {
|
||||
// Cumulative count.
|
||||
DeleteOldAIBridgeRecords(ctx context.Context, beforeTime time.Time) (int32, error)
|
||||
DeleteOldAuditLogConnectionEvents(ctx context.Context, arg DeleteOldAuditLogConnectionEventsParams) error
|
||||
// Deletes old audit logs based on retention policy, excluding deprecated
|
||||
// connection events (connect, disconnect, open, close) which are handled
|
||||
// separately by DeleteOldAuditLogConnectionEvents.
|
||||
DeleteOldAuditLogs(ctx context.Context, arg DeleteOldAuditLogsParams) (int64, error)
|
||||
DeleteOldConnectionLogs(ctx context.Context, arg DeleteOldConnectionLogsParams) (int64, error)
|
||||
// Delete all notification messages which have not been updated for over a week.
|
||||
DeleteOldNotificationMessages(ctx context.Context) error
|
||||
|
||||
@@ -1637,6 +1637,37 @@ func (q *sqlQuerier) DeleteOldAuditLogConnectionEvents(ctx context.Context, arg
|
||||
return err
|
||||
}
|
||||
|
||||
const deleteOldAuditLogs = `-- name: DeleteOldAuditLogs :execrows
|
||||
WITH old_logs AS (
|
||||
SELECT id
|
||||
FROM audit_logs
|
||||
WHERE
|
||||
"time" < $1::timestamp with time zone
|
||||
AND action NOT IN ('connect', 'disconnect', 'open', 'close')
|
||||
ORDER BY "time" ASC
|
||||
LIMIT $2
|
||||
)
|
||||
DELETE FROM audit_logs
|
||||
USING old_logs
|
||||
WHERE audit_logs.id = old_logs.id
|
||||
`
|
||||
|
||||
type DeleteOldAuditLogsParams struct {
|
||||
BeforeTime time.Time `db:"before_time" json:"before_time"`
|
||||
LimitCount int32 `db:"limit_count" json:"limit_count"`
|
||||
}
|
||||
|
||||
// Deletes old audit logs based on retention policy, excluding deprecated
|
||||
// connection events (connect, disconnect, open, close) which are handled
|
||||
// separately by DeleteOldAuditLogConnectionEvents.
|
||||
func (q *sqlQuerier) DeleteOldAuditLogs(ctx context.Context, arg DeleteOldAuditLogsParams) (int64, error) {
|
||||
result, err := q.db.ExecContext(ctx, deleteOldAuditLogs, arg.BeforeTime, arg.LimitCount)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return result.RowsAffected()
|
||||
}
|
||||
|
||||
const getAuditLogsOffset = `-- name: GetAuditLogsOffset :many
|
||||
SELECT audit_logs.id, audit_logs.time, audit_logs.user_id, audit_logs.organization_id, audit_logs.ip, audit_logs.user_agent, audit_logs.resource_type, audit_logs.resource_id, audit_logs.resource_target, audit_logs.action, audit_logs.diff, audit_logs.status_code, audit_logs.additional_fields, audit_logs.request_id, audit_logs.resource_icon,
|
||||
-- sqlc.embed(users) would be nice but it does not seem to play well with
|
||||
|
||||
@@ -253,3 +253,20 @@ WHERE id IN (
|
||||
ORDER BY "time" ASC
|
||||
LIMIT @limit_count
|
||||
);
|
||||
|
||||
-- name: DeleteOldAuditLogs :execrows
|
||||
-- Deletes old audit logs based on retention policy, excluding deprecated
|
||||
-- connection events (connect, disconnect, open, close) which are handled
|
||||
-- separately by DeleteOldAuditLogConnectionEvents.
|
||||
WITH old_logs AS (
|
||||
SELECT id
|
||||
FROM audit_logs
|
||||
WHERE
|
||||
"time" < @before_time::timestamp with time zone
|
||||
AND action NOT IN ('connect', 'disconnect', 'open', 'close')
|
||||
ORDER BY "time" ASC
|
||||
LIMIT @limit_count
|
||||
)
|
||||
DELETE FROM audit_logs
|
||||
USING old_logs
|
||||
WHERE audit_logs.id = old_logs.id;
|
||||
|
||||
Reference in New Issue
Block a user