feat: auto-archive inactive chats with audit trail (#24642)

Adds a background job in `dbpurge` that periodically archives chats
inactive beyond a configurable threshold. Each archived root chat gets a
background audit entry tagged `chat_auto_archive`. Disabled by default.

* New `AutoArchiveInactiveChats` SQL query with LATERAL last-activity
subquery and partial index on archive candidates
* `site_configs`-backed `auto_archive_days` setting with admin-only PUT,
any-authenticated-user GET
* Cascade archive via `root_chat_id`; pinned chats and active threads
exempt
* Root-only audit dispatch on detached context, matching manual archive
(`patchChat`) behavior
* 11 subtests covering disabled no-op, boundary, deleted messages, child
activity, pinned exemption, multi-owner, idempotency, and batch
pagination

PR #24643 adds per-owner digest notifications.
PR #24704 adds the requisite UI controls.

> 🤖
This commit is contained in:
Cian Johnston
2026-04-24 14:18:28 +01:00
committed by GitHub
parent 346b46228f
commit a876287d36
26 changed files with 1504 additions and 46 deletions
+1 -1
View File
@@ -1074,7 +1074,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
defer shutdownConns() defer shutdownConns()
// Ensures that old database entries are cleaned up over time! // Ensures that old database entries are cleaned up over time!
purger := dbpurge.New(ctx, logger.Named("dbpurge"), options.Database, options.DeploymentValues, quartz.NewReal(), options.PrometheusRegistry) purger := dbpurge.New(ctx, logger.Named("dbpurge"), options.Database, options.DeploymentValues, options.PrometheusRegistry, &coderAPI.Auditor)
defer purger.Close() defer purger.Close()
// Updates workspace usage // Updates workspace usage
+1
View File
@@ -11,6 +11,7 @@ type BackgroundSubsystem string
const ( const (
BackgroundSubsystemDormancy BackgroundSubsystem = "dormancy" BackgroundSubsystemDormancy BackgroundSubsystem = "dormancy"
BackgroundSubsystemChatAutoArchive BackgroundSubsystem = "chat_auto_archive"
) )
func BackgroundTaskFields(subsystem BackgroundSubsystem) map[string]string { func BackgroundTaskFields(subsystem BackgroundSubsystem) map[string]string {
+2
View File
@@ -1201,6 +1201,8 @@ func New(options *Options) *API {
r.Put("/workspace-ttl", api.putChatWorkspaceTTL) r.Put("/workspace-ttl", api.putChatWorkspaceTTL)
r.Get("/retention-days", api.getChatRetentionDays) r.Get("/retention-days", api.getChatRetentionDays)
r.Put("/retention-days", api.putChatRetentionDays) r.Put("/retention-days", api.putChatRetentionDays)
r.Get("/auto-archive-days", api.getChatAutoArchiveDays)
r.Put("/auto-archive-days", api.putChatAutoArchiveDays)
r.Get("/template-allowlist", api.getChatTemplateAllowlist) r.Get("/template-allowlist", api.getChatTemplateAllowlist)
r.Put("/template-allowlist", api.putChatTemplateAllowlist) r.Put("/template-allowlist", api.putChatTemplateAllowlist)
}) })
+30
View File
@@ -644,6 +644,8 @@ var (
rbac.ResourceNotificationMessage.Type: {policy.ActionDelete}, rbac.ResourceNotificationMessage.Type: {policy.ActionDelete},
rbac.ResourceApiKey.Type: {policy.ActionDelete}, rbac.ResourceApiKey.Type: {policy.ActionDelete},
rbac.ResourceAibridgeInterception.Type: {policy.ActionDelete}, rbac.ResourceAibridgeInterception.Type: {policy.ActionDelete},
// Chat auto-archive sets archived=true on inactive chats.
rbac.ResourceChat.Type: {policy.ActionRead, policy.ActionUpdate},
}), }),
User: []rbac.Permission{}, User: []rbac.Permission{},
ByOrgID: map[string]rbac.OrgPermissions{}, ByOrgID: map[string]rbac.OrgPermissions{},
@@ -1593,6 +1595,16 @@ func (q *querier) ArchiveUnusedTemplateVersions(ctx context.Context, arg databas
return q.db.ArchiveUnusedTemplateVersions(ctx, arg) return q.db.ArchiveUnusedTemplateVersions(ctx, arg)
} }
func (q *querier) AutoArchiveInactiveChats(ctx context.Context, arg database.AutoArchiveInactiveChatsParams) ([]database.AutoArchiveInactiveChatsRow, error) {
// Background write by dbpurge. The LATERAL read of chat_messages rows
// happens below the RBAC boundary; only the chat row itself requires
// authorization.
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceChat); err != nil {
return nil, err
}
return q.db.AutoArchiveInactiveChats(ctx, arg)
}
func (q *querier) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error { func (q *querier) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error {
// This is a system-level operation used by the gitsync // This is a system-level operation used by the gitsync
// background worker to reschedule failed refreshes. Same // background worker to reschedule failed refreshes. Same
@@ -2557,6 +2569,17 @@ func (q *querier) GetAuthorizationUserRoles(ctx context.Context, userID uuid.UUI
return q.db.GetAuthorizationUserRoles(ctx, userID) return q.db.GetAuthorizationUserRoles(ctx, userID)
} }
func (q *querier) GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) {
// Chat auto-archive is a deployment-wide config read by dbpurge.
// Only requires a valid actor in context. The HTTP GET handler
// allows any authenticated user; the PUT handler enforces admin
// access (policy.ActionUpdate on ResourceDeploymentConfig).
if _, ok := ActorFromContext(ctx); !ok {
return 0, ErrNoActor
}
return q.db.GetChatAutoArchiveDays(ctx, defaultAutoArchiveDays)
}
func (q *querier) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) { func (q *querier) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) {
return fetch(q.log, q.auth, q.db.GetChatByID)(ctx, id) return fetch(q.log, q.auth, q.db.GetChatByID)(ctx, id)
} }
@@ -7374,6 +7397,13 @@ func (q *querier) UpsertBoundaryUsageStats(ctx context.Context, arg database.Ups
return q.db.UpsertBoundaryUsageStats(ctx, arg) return q.db.UpsertBoundaryUsageStats(ctx, arg)
} }
func (q *querier) UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error {
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceDeploymentConfig); err != nil {
return err
}
return q.db.UpsertChatAutoArchiveDays(ctx, autoArchiveDays)
}
func (q *querier) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error { func (q *querier) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error {
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceDeploymentConfig); err != nil { if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceDeploymentConfig); err != nil {
return err return err
+12
View File
@@ -741,6 +741,18 @@ func (s *MethodTestSuite) TestChats() {
dbm.EXPECT().UpsertChatRetentionDays(gomock.Any(), int32(30)).Return(nil).AnyTimes() dbm.EXPECT().UpsertChatRetentionDays(gomock.Any(), int32(30)).Return(nil).AnyTimes()
check.Args(int32(30)).Asserts(rbac.ResourceDeploymentConfig, policy.ActionUpdate) check.Args(int32(30)).Asserts(rbac.ResourceDeploymentConfig, policy.ActionUpdate)
})) }))
s.Run("GetChatAutoArchiveDays", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) {
dbm.EXPECT().GetChatAutoArchiveDays(gomock.Any(), gomock.Any()).Return(int32(90), nil).AnyTimes()
check.Args(int32(90)).Asserts()
}))
s.Run("UpsertChatAutoArchiveDays", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) {
dbm.EXPECT().UpsertChatAutoArchiveDays(gomock.Any(), int32(90)).Return(nil).AnyTimes()
check.Args(int32(90)).Asserts(rbac.ResourceDeploymentConfig, policy.ActionUpdate)
}))
s.Run("AutoArchiveInactiveChats", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) {
dbm.EXPECT().AutoArchiveInactiveChats(gomock.Any(), database.AutoArchiveInactiveChatsParams{}).Return([]database.AutoArchiveInactiveChatsRow{}, nil).AnyTimes()
check.Args(database.AutoArchiveInactiveChatsParams{}).Asserts(rbac.ResourceChat, policy.ActionUpdate)
}))
s.Run("GetChatMessageByID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) { s.Run("GetChatMessageByID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
chat := testutil.Fake(s.T(), faker, database.Chat{}) chat := testutil.Fake(s.T(), faker, database.Chat{})
msg := testutil.Fake(s.T(), faker, database.ChatMessage{ChatID: chat.ID}) msg := testutil.Fake(s.T(), faker, database.ChatMessage{ChatID: chat.ID})
+24
View File
@@ -176,6 +176,14 @@ func (m queryMetricsStore) ArchiveUnusedTemplateVersions(ctx context.Context, ar
return r0, r1 return r0, r1
} }
func (m queryMetricsStore) AutoArchiveInactiveChats(ctx context.Context, arg database.AutoArchiveInactiveChatsParams) ([]database.AutoArchiveInactiveChatsRow, error) {
start := time.Now()
r0, r1 := m.s.AutoArchiveInactiveChats(ctx, arg)
m.queryLatencies.WithLabelValues("AutoArchiveInactiveChats").Observe(time.Since(start).Seconds())
m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "AutoArchiveInactiveChats").Inc()
return r0, r1
}
func (m queryMetricsStore) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error { func (m queryMetricsStore) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error {
start := time.Now() start := time.Now()
r0 := m.s.BackoffChatDiffStatus(ctx, arg) r0 := m.s.BackoffChatDiffStatus(ctx, arg)
@@ -1112,6 +1120,14 @@ func (m queryMetricsStore) GetAuthorizationUserRoles(ctx context.Context, userID
return r0, r1 return r0, r1
} }
func (m queryMetricsStore) GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) {
start := time.Now()
r0, r1 := m.s.GetChatAutoArchiveDays(ctx, defaultAutoArchiveDays)
m.queryLatencies.WithLabelValues("GetChatAutoArchiveDays").Observe(time.Since(start).Seconds())
m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetChatAutoArchiveDays").Inc()
return r0, r1
}
func (m queryMetricsStore) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) { func (m queryMetricsStore) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) {
start := time.Now() start := time.Now()
r0, r1 := m.s.GetChatByID(ctx, id) r0, r1 := m.s.GetChatByID(ctx, id)
@@ -5272,6 +5288,14 @@ func (m queryMetricsStore) UpsertBoundaryUsageStats(ctx context.Context, arg dat
return r0, r1 return r0, r1
} }
func (m queryMetricsStore) UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error {
start := time.Now()
r0 := m.s.UpsertChatAutoArchiveDays(ctx, autoArchiveDays)
m.queryLatencies.WithLabelValues("UpsertChatAutoArchiveDays").Observe(time.Since(start).Seconds())
m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "UpsertChatAutoArchiveDays").Inc()
return r0
}
func (m queryMetricsStore) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error { func (m queryMetricsStore) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error {
start := time.Now() start := time.Now()
r0 := m.s.UpsertChatDebugLoggingAllowUsers(ctx, allowUsers) r0 := m.s.UpsertChatDebugLoggingAllowUsers(ctx, allowUsers)
+44
View File
@@ -177,6 +177,21 @@ func (mr *MockStoreMockRecorder) ArchiveUnusedTemplateVersions(ctx, arg any) *go
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ArchiveUnusedTemplateVersions", reflect.TypeOf((*MockStore)(nil).ArchiveUnusedTemplateVersions), ctx, arg) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ArchiveUnusedTemplateVersions", reflect.TypeOf((*MockStore)(nil).ArchiveUnusedTemplateVersions), ctx, arg)
} }
// AutoArchiveInactiveChats mocks base method.
func (m *MockStore) AutoArchiveInactiveChats(ctx context.Context, arg database.AutoArchiveInactiveChatsParams) ([]database.AutoArchiveInactiveChatsRow, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "AutoArchiveInactiveChats", ctx, arg)
ret0, _ := ret[0].([]database.AutoArchiveInactiveChatsRow)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// AutoArchiveInactiveChats indicates an expected call of AutoArchiveInactiveChats.
func (mr *MockStoreMockRecorder) AutoArchiveInactiveChats(ctx, arg any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AutoArchiveInactiveChats", reflect.TypeOf((*MockStore)(nil).AutoArchiveInactiveChats), ctx, arg)
}
// BackoffChatDiffStatus mocks base method. // BackoffChatDiffStatus mocks base method.
func (m *MockStore) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error { func (m *MockStore) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error {
m.ctrl.T.Helper() m.ctrl.T.Helper()
@@ -2041,6 +2056,21 @@ func (mr *MockStoreMockRecorder) GetAuthorizedWorkspacesAndAgentsByOwnerID(ctx,
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAuthorizedWorkspacesAndAgentsByOwnerID", reflect.TypeOf((*MockStore)(nil).GetAuthorizedWorkspacesAndAgentsByOwnerID), ctx, ownerID, prepared) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAuthorizedWorkspacesAndAgentsByOwnerID", reflect.TypeOf((*MockStore)(nil).GetAuthorizedWorkspacesAndAgentsByOwnerID), ctx, ownerID, prepared)
} }
// GetChatAutoArchiveDays mocks base method.
func (m *MockStore) GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "GetChatAutoArchiveDays", ctx, defaultAutoArchiveDays)
ret0, _ := ret[0].(int32)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// GetChatAutoArchiveDays indicates an expected call of GetChatAutoArchiveDays.
func (mr *MockStoreMockRecorder) GetChatAutoArchiveDays(ctx, defaultAutoArchiveDays any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetChatAutoArchiveDays", reflect.TypeOf((*MockStore)(nil).GetChatAutoArchiveDays), ctx, defaultAutoArchiveDays)
}
// GetChatByID mocks base method. // GetChatByID mocks base method.
func (m *MockStore) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) { func (m *MockStore) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) {
m.ctrl.T.Helper() m.ctrl.T.Helper()
@@ -9906,6 +9936,20 @@ func (mr *MockStoreMockRecorder) UpsertBoundaryUsageStats(ctx, arg any) *gomock.
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpsertBoundaryUsageStats", reflect.TypeOf((*MockStore)(nil).UpsertBoundaryUsageStats), ctx, arg) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpsertBoundaryUsageStats", reflect.TypeOf((*MockStore)(nil).UpsertBoundaryUsageStats), ctx, arg)
} }
// UpsertChatAutoArchiveDays mocks base method.
func (m *MockStore) UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "UpsertChatAutoArchiveDays", ctx, autoArchiveDays)
ret0, _ := ret[0].(error)
return ret0
}
// UpsertChatAutoArchiveDays indicates an expected call of UpsertChatAutoArchiveDays.
func (mr *MockStoreMockRecorder) UpsertChatAutoArchiveDays(ctx, autoArchiveDays any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpsertChatAutoArchiveDays", reflect.TypeOf((*MockStore)(nil).UpsertChatAutoArchiveDays), ctx, autoArchiveDays)
}
// UpsertChatDebugLoggingAllowUsers mocks base method. // UpsertChatDebugLoggingAllowUsers mocks base method.
func (m *MockStore) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error { func (m *MockStore) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error {
m.ctrl.T.Helper() m.ctrl.T.Helper()
+163 -11
View File
@@ -3,12 +3,15 @@ package dbpurge
import ( import (
"context" "context"
"io" "io"
"net/http"
"sync/atomic"
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"golang.org/x/xerrors" "golang.org/x/xerrors"
"cdr.dev/slog/v3" "cdr.dev/slog/v3"
"github.com/coder/coder/v2/coderd/audit"
"github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/coderd/database/dbtime"
@@ -34,18 +37,38 @@ const (
// long enough to cover the maximum interval of a heartbeat event (currently // long enough to cover the maximum interval of a heartbeat event (currently
// 1 hour) plus some buffer. // 1 hour) plus some buffer.
maxTelemetryHeartbeatAge = 24 * time.Hour maxTelemetryHeartbeatAge = 24 * time.Hour
// Batch sizes for chat purging. Both use 1000, which is smaller // Chat batch sizes stay smaller than audit/connection log batches because
// than audit/connection log batches (10000), because chat_files // chat_files rows carry bytea blobs.
// rows contain bytea blob data that make large batches heavier.
chatsBatchSize = 1000 chatsBatchSize = 1000
chatFilesBatchSize = 1000 chatFilesBatchSize = 1000
) )
// defaultChatAutoArchiveBatchSize bounds how many root chats one
// tick will archive by default.
const defaultChatAutoArchiveBatchSize int32 = 1000
type Option func(*instance)
// WithClock overrides the clock used by the purger. Defaults to
// quartz.NewReal().
func WithClock(clk quartz.Clock) Option {
return func(i *instance) { i.clk = clk }
}
// WithChatAutoArchiveBatchSize overrides how many root chats a
// single tick will auto-archive. Defaults to
// defaultChatAutoArchiveBatchSize (1000).
func WithChatAutoArchiveBatchSize(n int32) Option {
return func(i *instance) { i.chatAutoArchiveBatchSize = n }
}
// New creates a new periodically purging database instance. // New creates a new periodically purging database instance.
// It is the caller's responsibility to call Close on the returned instance. // Callers must Close the returned instance.
// //
// This is for cleaning up old, unused resources from the database that take up space. // The auditor pointer is loaded on each dispatch tick so runtime
func New(ctx context.Context, logger slog.Logger, db database.Store, vals *codersdk.DeploymentValues, clk quartz.Clock, reg prometheus.Registerer) io.Closer { // entitlement changes (e.g. toggling the audit-log feature) take
// effect without restarting the process.
func New(ctx context.Context, logger slog.Logger, db database.Store, vals *codersdk.DeploymentValues, reg prometheus.Registerer, auditor *atomic.Pointer[audit.Auditor], opts ...Option) io.Closer {
closed := make(chan struct{}) closed := make(chan struct{})
ctx, cancelFunc := context.WithCancel(ctx) ctx, cancelFunc := context.WithCancel(ctx)
@@ -69,18 +92,32 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder
}, []string{"record_type"}) }, []string{"record_type"})
reg.MustRegister(recordsPurged) reg.MustRegister(recordsPurged)
chatAutoArchiveRecords := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "coderd",
Subsystem: "chat_auto_archive",
Name: "records_archived_total",
Help: "Total number of chats archived by the auto-archive job (counting both roots and cascaded children).",
})
reg.MustRegister(chatAutoArchiveRecords)
inst := &instance{ inst := &instance{
cancel: cancelFunc, cancel: cancelFunc,
closed: closed, closed: closed,
logger: logger, logger: logger,
vals: vals, vals: vals,
clk: clk, clk: quartz.NewReal(),
auditor: auditor,
iterationDuration: iterationDuration, iterationDuration: iterationDuration,
recordsPurged: recordsPurged, recordsPurged: recordsPurged,
chatAutoArchiveRecords: chatAutoArchiveRecords,
chatAutoArchiveBatchSize: defaultChatAutoArchiveBatchSize,
}
for _, opt := range opts {
opt(inst)
} }
// Start the ticker with the initial delay. // Start the ticker with the initial delay.
ticker := clk.NewTicker(delay) ticker := inst.clk.NewTicker(delay)
doTick := func(ctx context.Context, start time.Time) { doTick := func(ctx context.Context, start time.Time) {
defer ticker.Reset(delay) defer ticker.Reset(delay)
err := inst.purgeTick(ctx, db, start) err := inst.purgeTick(ctx, db, start)
@@ -88,7 +125,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder
logger.Error(ctx, "failed to purge old database entries", slog.Error(err)) logger.Error(ctx, "failed to purge old database entries", slog.Error(err))
// Record metrics for failed purge iteration. // Record metrics for failed purge iteration.
duration := clk.Since(start) duration := inst.clk.Since(start)
iterationDuration.WithLabelValues("false").Observe(duration.Seconds()) iterationDuration.WithLabelValues("false").Observe(duration.Seconds())
} }
} }
@@ -97,7 +134,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder
defer close(closed) defer close(closed)
defer ticker.Stop() defer ticker.Stop()
// Force an initial tick. // Force an initial tick.
doTick(ctx, dbtime.Time(clk.Now()).UTC()) doTick(ctx, dbtime.Time(inst.clk.Now()).UTC())
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
@@ -125,9 +162,19 @@ func (i *instance) purgeTick(ctx context.Context, db database.Store, start time.
chatRetentionDays = 0 chatRetentionDays = 0
} }
// Same rationale as chat_retention_days: read outside the tx.
chatAutoArchiveDays, err := db.GetChatAutoArchiveDays(ctx, codersdk.DefaultChatAutoArchiveDays)
if err != nil {
i.logger.Warn(ctx, "failed to read chat auto-archive config, skipping auto-archive", slog.Error(err))
chatAutoArchiveDays = 0
}
// Populated inside the tx; dispatched post-commit.
var archivedChats []database.AutoArchiveInactiveChatsRow
// Start a transaction to grab advisory lock, we don't want to run // Start a transaction to grab advisory lock, we don't want to run
// multiple purges at the same time (multiple replicas). // multiple purges at the same time (multiple replicas).
return db.InTx(func(tx database.Store) error { err = db.InTx(func(tx database.Store) error {
// Acquire a lock to ensure that only one instance of the // Acquire a lock to ensure that only one instance of the
// purge is running at a time. // purge is running at a time.
ok, err := tx.TryAcquireLock(ctx, database.LockIDDBPurge) ok, err := tx.TryAcquireLock(ctx, database.LockIDDBPurge)
@@ -258,6 +305,20 @@ func (i *instance) purgeTick(ctx context.Context, db database.Store, start time.
return xerrors.Errorf("failed to delete old chat files: %w", err) return xerrors.Errorf("failed to delete old chat files: %w", err)
} }
} }
// Auto-archive runs after the delete pass so newly
// archived chats aren't eligible for deletion this tick.
if chatAutoArchiveDays > 0 {
archiveCutoff := start.Add(-time.Duration(chatAutoArchiveDays) * 24 * time.Hour)
archivedChats, err = tx.AutoArchiveInactiveChats(ctx, database.AutoArchiveInactiveChatsParams{
ArchiveCutoff: archiveCutoff,
LimitCount: i.chatAutoArchiveBatchSize,
})
if err != nil {
return xerrors.Errorf("failed to auto-archive inactive chats: %w", err)
}
}
i.logger.Debug(ctx, "purged old database entries", i.logger.Debug(ctx, "purged old database entries",
slog.F("workspace_agent_logs", purgedWorkspaceAgentLogs), slog.F("workspace_agent_logs", purgedWorkspaceAgentLogs),
slog.F("expired_api_keys", expiredAPIKeys), slog.F("expired_api_keys", expiredAPIKeys),
@@ -266,6 +327,7 @@ func (i *instance) purgeTick(ctx context.Context, db database.Store, start time.
slog.F("audit_logs", purgedAuditLogs), slog.F("audit_logs", purgedAuditLogs),
slog.F("chats", purgedChats), slog.F("chats", purgedChats),
slog.F("chat_files", purgedChatFiles), slog.F("chat_files", purgedChatFiles),
slog.F("auto_archived_chats", len(archivedChats)),
slog.F("duration", i.clk.Since(start)), slog.F("duration", i.clk.Since(start)),
) )
@@ -285,6 +347,22 @@ func (i *instance) purgeTick(ctx context.Context, db database.Store, start time.
return nil return nil
}, database.DefaultTXOptions().WithID("db_purge")) }, database.DefaultTXOptions().WithID("db_purge"))
if err != nil {
return err
}
// Dispatch audits post-commit on a detached context so ticker
// cancellation doesn't interrupt the loop. No timeout: every root
// must be audited to avoid gaps in the trail. Children inherit
// their root's archival decision and are not audited individually,
// matching the manual archive path (patchChat audits the root only).
if len(archivedChats) > 0 {
i.chatAutoArchiveRecords.Add(float64(len(archivedChats)))
dispatchCtx := context.WithoutCancel(ctx)
i.dispatchChatAutoArchive(dispatchCtx, archivedChats)
}
return nil
} }
type instance struct { type instance struct {
@@ -293,8 +371,11 @@ type instance struct {
logger slog.Logger logger slog.Logger
vals *codersdk.DeploymentValues vals *codersdk.DeploymentValues
clk quartz.Clock clk quartz.Clock
auditor *atomic.Pointer[audit.Auditor]
iterationDuration *prometheus.HistogramVec iterationDuration *prometheus.HistogramVec
recordsPurged *prometheus.CounterVec recordsPurged *prometheus.CounterVec
chatAutoArchiveRecords prometheus.Counter
chatAutoArchiveBatchSize int32
} }
func (i *instance) Close() error { func (i *instance) Close() error {
@@ -302,3 +383,74 @@ func (i *instance) Close() error {
<-i.closed <-i.closed
return nil return nil
} }
// chatFromAutoArchiveRow reshapes the query row into a database.Chat for
// audit.Auditable[database.Chat].
func chatFromAutoArchiveRow(logger slog.Logger, r database.AutoArchiveInactiveChatsRow) database.Chat {
var labels database.StringMap
// sqlc's StringMap override doesn't reach CTE-aliased columns, so Labels
// arrives as raw JSON bytes. StringMap.Scan handles []byte and nil.
if err := labels.Scan([]byte(r.Labels)); err != nil {
logger.Warn(context.Background(), "failed to parse chat labels from auto-archive row",
slog.F("chat_id", r.ID),
slog.F("raw_labels", string(r.Labels)),
slog.Error(err),
)
}
return database.Chat{
ID: r.ID,
OwnerID: r.OwnerID,
OrganizationID: r.OrganizationID,
WorkspaceID: r.WorkspaceID,
BuildID: r.BuildID,
AgentID: r.AgentID,
Title: r.Title,
Status: r.Status,
WorkerID: r.WorkerID,
StartedAt: r.StartedAt,
HeartbeatAt: r.HeartbeatAt,
CreatedAt: r.CreatedAt,
UpdatedAt: r.UpdatedAt,
ParentChatID: r.ParentChatID,
RootChatID: r.RootChatID,
LastModelConfigID: r.LastModelConfigID,
Archived: r.Archived,
LastError: r.LastError,
Mode: r.Mode,
MCPServerIDs: r.MCPServerIDs,
Labels: labels,
PinOrder: r.PinOrder,
LastReadMessageID: r.LastReadMessageID,
LastInjectedContext: r.LastInjectedContext,
DynamicTools: r.DynamicTools,
PlanMode: r.PlanMode,
ClientType: r.ClientType,
}
}
// dispatchChatAutoArchive audits every archived root chat. Children
// inherit their root's archival decision and are skipped, matching
// the manual archive path (patchChat audits the root only). Runs on
// a detached context so ticker cancellation cannot truncate the trail.
func (i *instance) dispatchChatAutoArchive(ctx context.Context, archived []database.AutoArchiveInactiveChatsRow) {
auditor := *i.auditor.Load()
for _, row := range archived {
if row.ParentChatID.Valid {
continue // Children inherit root's archival; audit roots only.
}
after := chatFromAutoArchiveRow(i.logger, row)
before := after
before.Archived = false
audit.BackgroundAudit(ctx, &audit.BackgroundAuditParams[database.Chat]{
Audit: auditor,
Log: i.logger,
UserID: row.OwnerID,
OrganizationID: row.OrganizationID,
Action: database.AuditActionWrite,
Old: before,
New: after,
Status: http.StatusOK,
AdditionalFields: audit.BackgroundTaskFieldsBytes(ctx, i.logger, audit.BackgroundSubsystemChatAutoArchive),
})
}
}
+646 -19
View File
@@ -8,6 +8,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"slices" "slices"
"sync/atomic"
"testing" "testing"
"time" "time"
@@ -22,6 +23,7 @@ import (
"cdr.dev/slog/v3" "cdr.dev/slog/v3"
"cdr.dev/slog/v3/sloggers/slogtest" "cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/audit"
"github.com/coder/coder/v2/coderd/coderdtest/promhelp" "github.com/coder/coder/v2/coderd/coderdtest/promhelp"
"github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbgen"
@@ -55,8 +57,9 @@ func TestPurge(t *testing.T) {
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
mDB := dbmock.NewMockStore(gomock.NewController(t)) mDB := dbmock.NewMockStore(gomock.NewController(t))
mDB.EXPECT().GetChatRetentionDays(gomock.Any()).Return(int32(0), nil).AnyTimes() mDB.EXPECT().GetChatRetentionDays(gomock.Any()).Return(int32(0), nil).AnyTimes()
mDB.EXPECT().GetChatAutoArchiveDays(gomock.Any(), codersdk.DefaultChatAutoArchiveDays).Return(int32(0), nil).AnyTimes()
mDB.EXPECT().InTx(gomock.Any(), database.DefaultTXOptions().WithID("db_purge")).Return(nil).Times(2) mDB.EXPECT().InTx(gomock.Any(), database.DefaultTXOptions().WithID("db_purge")).Return(nil).Times(2)
purger := dbpurge.New(context.Background(), testutil.Logger(t), mDB, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) purger := dbpurge.New(context.Background(), testutil.Logger(t), mDB, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
<-done // wait for doTick() to run. <-done // wait for doTick() to run.
require.NoError(t, purger.Close()) require.NoError(t, purger.Close())
} }
@@ -90,7 +93,7 @@ func TestMetrics(t *testing.T) {
Retention: codersdk.RetentionConfig{ Retention: codersdk.RetentionConfig{
APIKeys: serpent.Duration(7 * 24 * time.Hour), // 7 days retention APIKeys: serpent.Duration(7 * 24 * time.Hour), // 7 days retention
}, },
}, clk, reg) }, reg, nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -151,6 +154,7 @@ func TestMetrics(t *testing.T) {
ctrl := gomock.NewController(t) ctrl := gomock.NewController(t)
mDB := dbmock.NewMockStore(ctrl) mDB := dbmock.NewMockStore(ctrl)
mDB.EXPECT().GetChatRetentionDays(gomock.Any()).Return(int32(0), nil).AnyTimes() mDB.EXPECT().GetChatRetentionDays(gomock.Any()).Return(int32(0), nil).AnyTimes()
mDB.EXPECT().GetChatAutoArchiveDays(gomock.Any(), codersdk.DefaultChatAutoArchiveDays).Return(int32(0), nil).AnyTimes()
mDB.EXPECT().InTx(gomock.Any(), database.DefaultTXOptions().WithID("db_purge")). mDB.EXPECT().InTx(gomock.Any(), database.DefaultTXOptions().WithID("db_purge")).
Return(xerrors.New("simulated database error")). Return(xerrors.New("simulated database error")).
MinTimes(1) MinTimes(1)
@@ -158,7 +162,7 @@ func TestMetrics(t *testing.T) {
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, mDB, &codersdk.DeploymentValues{}, clk, reg) closer := dbpurge.New(ctx, logger, mDB, &codersdk.DeploymentValues{}, reg, nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -248,7 +252,7 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) {
}) })
// when // when
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
// then // then
@@ -273,7 +277,7 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) {
// Start a new purger to immediately trigger delete after rollup. // Start a new purger to immediately trigger delete after rollup.
_ = closer.Close() _ = closer.Close()
closer = dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer = dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
// then // then
@@ -368,7 +372,7 @@ func TestDeleteOldWorkspaceAgentLogs(t *testing.T) {
Retention: codersdk.RetentionConfig{ Retention: codersdk.RetentionConfig{
WorkspaceAgentLogs: serpent.Duration(7 * 24 * time.Hour), WorkspaceAgentLogs: serpent.Duration(7 * 24 * time.Hour),
}, },
}, clk, prometheus.NewRegistry()) }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
<-done // doTick() has now run. <-done // doTick() has now run.
@@ -424,6 +428,63 @@ func awaitDoTick(ctx context.Context, t *testing.T, clk *quartz.Mock) chan struc
return ch return ch
} }
// tickDriver drives one or more dbpurge ticks against a single
// dbpurge.New instance. Unlike awaitDoTick it must be constructed
// *before* dbpurge.New so its traps are installed when the forced
// initial tick fires. awaitInitial waits for the forced tick's
// doTick to complete without advancing the clock, so no loop
// iteration has yet run; awaitNext then explicitly drives each
// subsequent iteration. This keeps each tick's observable state
// isolated and deterministic, which matters for tests where
// per-tick work differs (e.g. batch-size pagination).
type tickDriver struct {
clk *quartz.Mock
trapNow *quartz.Trap
trapStop *quartz.Trap
trapReset *quartz.Trap
}
func newTickDriver(t *testing.T, clk *quartz.Mock) *tickDriver {
t.Helper()
d := &tickDriver{
clk: clk,
trapNow: clk.Trap().Now(),
trapStop: clk.Trap().TickerStop(),
trapReset: clk.Trap().TickerReset(),
}
return d
}
// close releases all traps. Call this via defer *after* the defer
// that closes the dbpurge instance so trap closure releases the
// shutdown ticker.Stop() rather than blocking on it.
func (d *tickDriver) close() {
d.trapReset.Close()
d.trapStop.Close()
d.trapNow.Close()
}
// awaitInitial waits for the forced initial tick's doTick to
// complete. No loop iteration runs because the clock has not been
// advanced.
func (d *tickDriver) awaitInitial(ctx context.Context, t *testing.T) {
t.Helper()
d.trapNow.MustWait(ctx).MustRelease(ctx)
d.trapReset.MustWait(ctx).MustRelease(ctx)
}
// awaitNext advances the clock by the tick interval, lets the loop
// receive the tick and run doTick, and waits for the ensuing
// ticker.Reset so the driver is ready for another awaitNext.
func (d *tickDriver) awaitNext(ctx context.Context, t *testing.T) {
t.Helper()
dur, w := d.clk.AdvanceNext()
require.Equal(t, 10*time.Minute, dur)
w.MustWait(ctx)
d.trapStop.MustWait(ctx).MustRelease(ctx)
d.trapReset.MustWait(ctx).MustRelease(ctx)
}
func assertNoWorkspaceAgentLogs(ctx context.Context, t *testing.T, db database.Store, agentID uuid.UUID) { func assertNoWorkspaceAgentLogs(ctx context.Context, t *testing.T, db database.Store, agentID uuid.UUID) {
t.Helper() t.Helper()
agentLogs, err := db.GetWorkspaceAgentLogsAfter(ctx, database.GetWorkspaceAgentLogsAfterParams{ agentLogs, err := db.GetWorkspaceAgentLogsAfter(ctx, database.GetWorkspaceAgentLogsAfterParams{
@@ -583,7 +644,7 @@ func TestDeleteOldWorkspaceAgentLogsRetention(t *testing.T) {
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
Retention: tc.retentionConfig, Retention: tc.retentionConfig,
}, clk, prometheus.NewRegistry()) }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -674,7 +735,7 @@ func TestDeleteOldProvisionerDaemons(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
// when // when
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
// then // then
@@ -778,7 +839,7 @@ func TestDeleteOldAuditLogConnectionEvents(t *testing.T) {
// Run the purge // Run the purge
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
// Wait for tick // Wait for tick
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -941,7 +1002,7 @@ func TestDeleteOldTelemetryHeartbeats(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
<-done // doTick() has now run. <-done // doTick() has now run.
@@ -1060,7 +1121,7 @@ func TestDeleteOldConnectionLogs(t *testing.T) {
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
Retention: tc.retentionConfig, Retention: tc.retentionConfig,
}, clk, prometheus.NewRegistry()) }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -1316,7 +1377,7 @@ func TestDeleteOldAIBridgeRecords(t *testing.T) {
Retention: serpent.Duration(tc.retention), Retention: serpent.Duration(tc.retention),
}, },
}, },
}, clk, prometheus.NewRegistry()) }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -1403,7 +1464,7 @@ func TestDeleteOldAuditLogs(t *testing.T) {
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
Retention: tc.retentionConfig, Retention: tc.retentionConfig,
}, clk, prometheus.NewRegistry()) }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -1493,7 +1554,7 @@ func TestDeleteOldAuditLogs(t *testing.T) {
Retention: codersdk.RetentionConfig{ Retention: codersdk.RetentionConfig{
AuditLogs: serpent.Duration(retentionPeriod), AuditLogs: serpent.Duration(retentionPeriod),
}, },
}, clk, prometheus.NewRegistry()) }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -1613,7 +1674,7 @@ func TestDeleteExpiredAPIKeys(t *testing.T) {
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{
Retention: tc.retentionConfig, Retention: tc.retentionConfig,
}, clk, prometheus.NewRegistry()) }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -1648,6 +1709,23 @@ func ptr[T any](v T) *T {
return &v return &v
} }
// nopAuditorPtr returns an atomic pointer to a nop auditor for tests.
func nopAuditorPtr(t *testing.T) *atomic.Pointer[audit.Auditor] {
t.Helper()
nop := audit.NewNop()
var p atomic.Pointer[audit.Auditor]
p.Store(&nop)
return &p
}
// mockAuditorPtr wraps a *MockAuditor in an atomic pointer for tests.
func mockAuditorPtr(m *audit.MockAuditor) *atomic.Pointer[audit.Auditor] {
a := audit.Auditor(m)
var p atomic.Pointer[audit.Auditor]
p.Store(&a)
return &p
}
//nolint:paralleltest // It uses LockIDDBPurge. //nolint:paralleltest // It uses LockIDDBPurge.
func TestDeleteOldChatFiles(t *testing.T) { func TestDeleteOldChatFiles(t *testing.T) {
now := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC) now := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC)
@@ -1742,7 +1820,7 @@ func TestDeleteOldChatFiles(t *testing.T) {
oldFileID := createChatFile(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, now.Add(-31*24*time.Hour)) oldFileID := createChatFile(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, now.Add(-31*24*time.Hour))
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -1799,7 +1877,7 @@ func TestDeleteOldChatFiles(t *testing.T) {
activeChat := createChat(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, deps.modelConfig.ID, false, now) activeChat := createChat(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, deps.modelConfig.ID, false, now)
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -1856,7 +1934,7 @@ func TestDeleteOldChatFiles(t *testing.T) {
fileBoundary := createChatFile(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, now.Add(-30*24*time.Hour).Add(time.Hour)) fileBoundary := createChatFile(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, now.Add(-30*24*time.Hour).Add(time.Hour))
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -1936,7 +2014,7 @@ func TestDeleteOldChatFiles(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
done := awaitDoTick(ctx, t, clk) done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk))
defer closer.Close() defer closer.Close()
testutil.TryReceive(ctx, t, done) testutil.TryReceive(ctx, t, done)
@@ -2137,3 +2215,552 @@ func TestDeleteOldChatFiles(t *testing.T) {
}) })
} }
} }
// helpers for TestAutoArchiveInactiveChats. Kept scoped to the
// test so they don't leak into the package surface area.
func archiveTestDeps(ctx context.Context, t *testing.T, db database.Store) chatAutoArchiveDeps {
t.Helper()
user := dbgen.User(t, db, database.User{})
org := dbgen.Organization(t, db, database.Organization{})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: user.ID, OrganizationID: org.ID})
_, err := db.InsertChatProvider(ctx, database.InsertChatProviderParams{
Provider: "openai",
DisplayName: "OpenAI",
Enabled: true,
CentralApiKeyEnabled: true,
})
require.NoError(t, err)
mc, err := db.InsertChatModelConfig(ctx, database.InsertChatModelConfigParams{
Provider: "openai",
Model: "test-model",
ContextLimit: 8192,
Options: json.RawMessage("{}"),
})
require.NoError(t, err)
return chatAutoArchiveDeps{user: user, org: org, modelConfig: mc}
}
type chatAutoArchiveDeps struct {
user database.User
org database.Organization
modelConfig database.ChatModelConfig
}
// archiveHarness bundles the per-subtest setup shared by every
// TestAutoArchiveInactiveChats case. Subtests read fields off the
// harness directly instead of repeating six lines of identical
// plumbing.
type archiveHarness struct {
ctx context.Context
clk *quartz.Mock
db database.Store
rawDB *sql.DB
logger slog.Logger
deps chatAutoArchiveDeps
}
func newArchiveHarness(t *testing.T, now time.Time) *archiveHarness {
t.Helper()
ctx := testutil.Context(t, testutil.WaitLong)
clk := quartz.NewMock(t)
clk.Set(now).MustWait(ctx)
db, _, rawDB := dbtestutil.NewDBWithSQLDB(t, dbtestutil.WithDumpOnFailure())
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
return &archiveHarness{
ctx: ctx,
clk: clk,
db: db,
rawDB: rawDB,
logger: logger,
deps: archiveTestDeps(ctx, t, db),
}
}
// createArchiveChat inserts a chat with an optional backdated
// created_at. Title is propagated through so tests can assert on
// digest contents.
func createArchiveChat(ctx context.Context, t *testing.T, db database.Store, rawDB *sql.DB, deps chatAutoArchiveDeps, title string, createdAt time.Time) database.Chat {
t.Helper()
chat, err := db.InsertChat(ctx, database.InsertChatParams{
OrganizationID: deps.org.ID,
OwnerID: deps.user.ID,
LastModelConfigID: deps.modelConfig.ID,
Title: title,
Status: database.ChatStatusWaiting,
ClientType: database.ChatClientTypeUi,
})
require.NoError(t, err)
_, err = rawDB.ExecContext(ctx, "UPDATE chats SET created_at = $1, updated_at = $1 WHERE id = $2", createdAt, chat.ID)
require.NoError(t, err)
return chat
}
// insertTextMessage appends a non-deleted user message with a
// backdated created_at. Used to establish "last activity" for the
// auto-archive query's LATERAL subquery.
func insertTextMessage(ctx context.Context, t *testing.T, db database.Store, rawDB *sql.DB, chatID, userID, modelConfigID uuid.UUID, createdAt time.Time) {
t.Helper()
msgs, err := db.InsertChatMessages(ctx, database.InsertChatMessagesParams{
ChatID: chatID,
CreatedBy: []uuid.UUID{userID},
ModelConfigID: []uuid.UUID{modelConfigID},
Role: []database.ChatMessageRole{database.ChatMessageRoleUser},
Content: []string{`[{"type":"text","text":"hello"}]`},
ContentVersion: []int16{0},
Visibility: []database.ChatMessageVisibility{database.ChatMessageVisibilityBoth},
InputTokens: []int64{0},
OutputTokens: []int64{0},
TotalTokens: []int64{0},
ReasoningTokens: []int64{0},
CacheCreationTokens: []int64{0},
CacheReadTokens: []int64{0},
ContextLimit: []int64{0},
Compressed: []bool{false},
TotalCostMicros: []int64{0},
RuntimeMs: []int64{0},
ProviderResponseID: []string{""},
})
require.NoError(t, err)
require.Len(t, msgs, 1)
_, err = rawDB.ExecContext(ctx, "UPDATE chat_messages SET created_at = $1 WHERE id = $2", createdAt, msgs[0].ID)
require.NoError(t, err)
}
//nolint:paralleltest // It uses LockIDDBPurge.
func TestAutoArchiveInactiveChats(t *testing.T) {
now := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC)
tests := []struct {
name string
run func(t *testing.T)
}{
{
name: "AutoArchiveDisabled",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.Zero(t, codersdk.DefaultChatAutoArchiveDays)
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, codersdk.DefaultChatAutoArchiveDays))
// Chat older than any reasonable cutoff.
staleChat := createArchiveChat(ctx, t, db, rawDB, deps, "stale-chat", now.Add(-365*24*time.Hour))
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
defer closer.Close()
testutil.TryReceive(ctx, t, done)
refreshed, err := db.GetChatByID(ctx, staleChat.ID)
require.NoError(t, err)
require.False(t, refreshed.Archived, "chat should stay active when auto-archive is disabled")
require.Empty(t, auditor.AuditLogs(), "no audit log entries expected")
},
},
{
name: "ArchivesInactiveRoot",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90)))
// Inactive root: newest message 100 days old.
staleChat := createArchiveChat(ctx, t, db, rawDB, deps, "stale-chat", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, staleChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour))
// Active root: message 10 days old, within cutoff.
activeChat := createArchiveChat(ctx, t, db, rawDB, deps, "active-chat", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, activeChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-10*24*time.Hour))
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
defer closer.Close()
testutil.TryReceive(ctx, t, done)
refreshedStale, err := db.GetChatByID(ctx, staleChat.ID)
require.NoError(t, err)
require.True(t, refreshedStale.Archived, "stale chat should be auto-archived")
refreshedActive, err := db.GetChatByID(ctx, activeChat.ID)
require.NoError(t, err)
require.False(t, refreshedActive.Archived, "active chat should stay live")
logs := auditor.AuditLogs()
require.Len(t, logs, 1, "expected one audit entry")
require.Equal(t, staleChat.ID, logs[0].ResourceID)
require.Equal(t, database.ResourceTypeChat, logs[0].ResourceType)
require.Equal(t, database.AuditActionWrite, logs[0].Action)
require.Contains(t, string(logs[0].AdditionalFields), "chat_auto_archive",
"audit entry must carry the auto-archive subsystem tag")
},
},
{
name: "ExactCutoffBoundary",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90)))
// The forced initial tick uses start = now. Compute
// the cutoff from that tick's perspective so the
// boundary is deterministic.
cutoff := now.Add(-90 * 24 * time.Hour)
// Message exactly at the cutoff: query uses strict <,
// so this chat must survive.
exactChat := createArchiveChat(ctx, t, db, rawDB, deps, "exact", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, exactChat.ID, deps.user.ID, deps.modelConfig.ID, cutoff)
// Message one second before the cutoff: should be archived.
justOverChat := createArchiveChat(ctx, t, db, rawDB, deps, "just-over", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, justOverChat.ID, deps.user.ID, deps.modelConfig.ID, cutoff.Add(-time.Second))
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
// Use newTickDriver for precise tick control so we
// observe the forced initial tick's results without
// racing with a second tick.
driver := newTickDriver(t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
// Defer driver.close() after closer.Close(): defers
// run LIFO, so driver cleanup frees shutdown's
// ticker.Stop() before the dbpurge goroutine blocks
// on it.
defer closer.Close()
defer driver.close()
driver.awaitInitial(ctx, t)
refreshedExact, err := db.GetChatByID(ctx, exactChat.ID)
require.NoError(t, err)
require.False(t, refreshedExact.Archived, "chat at exact cutoff must survive (strict <)")
refreshedOver, err := db.GetChatByID(ctx, justOverChat.ID)
require.NoError(t, err)
require.True(t, refreshedOver.Archived, "chat one second past cutoff must be archived")
require.Len(t, auditor.AuditLogs(), 1, "only the just-over chat should produce an audit entry")
},
},
{
name: "DeletedMessagesIgnored",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90)))
// Chat created 120 days ago with a recent message
// (10 days old) that is then soft-deleted. The
// LATERAL subquery filters cm.deleted = false, so
// the chat should fall back to created_at and be
// archived.
chat := createArchiveChat(ctx, t, db, rawDB, deps, "deleted-msg", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, chat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-10*24*time.Hour))
// Soft-delete all messages on this chat.
_, err := rawDB.ExecContext(ctx, "UPDATE chat_messages SET deleted = true WHERE chat_id = $1", chat.ID)
require.NoError(t, err)
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
defer closer.Close()
testutil.TryReceive(ctx, t, done)
refreshed, err := db.GetChatByID(ctx, chat.ID)
require.NoError(t, err)
require.True(t, refreshed.Archived, "chat with only deleted messages should be archived")
require.Len(t, auditor.AuditLogs(), 1)
},
},
{
name: "ChildActivityKeepsRootAlive",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90)))
// Stale root with no messages of its own.
root := createArchiveChat(ctx, t, db, rawDB, deps, "stale-root", now.Add(-120*24*time.Hour))
// Child linked to root with a recent message (10 days old,
// well within the 90-day cutoff).
child := createArchiveChat(ctx, t, db, rawDB, deps, "active-child", now.Add(-120*24*time.Hour))
_, err := rawDB.ExecContext(ctx, "UPDATE chats SET parent_chat_id = $1, root_chat_id = $1 WHERE id = $2", root.ID, child.ID)
require.NoError(t, err)
insertTextMessage(ctx, t, db, rawDB, child.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-10*24*time.Hour))
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
defer closer.Close()
testutil.TryReceive(ctx, t, done)
refreshedRoot, err := db.GetChatByID(ctx, root.ID)
require.NoError(t, err)
require.False(t, refreshedRoot.Archived, "root must stay active because child has recent activity")
refreshedChild, err := db.GetChatByID(ctx, child.ID)
require.NoError(t, err)
require.False(t, refreshedChild.Archived, "child must stay active")
require.Empty(t, auditor.AuditLogs(), "no chats should be archived")
},
},
{
name: "SkipsActiveStatusChats",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90)))
// Stale chats whose status prevents archiving.
runningChat := createArchiveChat(ctx, t, db, rawDB, deps, "running-chat", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, runningChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour))
_, err := rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusRunning, runningChat.ID)
require.NoError(t, err)
requiresActionChat := createArchiveChat(ctx, t, db, rawDB, deps, "requires-action-chat", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, requiresActionChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour))
_, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusRequiresAction, requiresActionChat.ID)
require.NoError(t, err)
pendingChat := createArchiveChat(ctx, t, db, rawDB, deps, "pending-chat", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, pendingChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour))
_, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusPending, pendingChat.ID)
require.NoError(t, err)
pausedChat := createArchiveChat(ctx, t, db, rawDB, deps, "paused-chat", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, pausedChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour))
_, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusPaused, pausedChat.ID)
require.NoError(t, err)
// Control: a stale chat with archivable status that
// should be archived.
completedChat := createArchiveChat(ctx, t, db, rawDB, deps, "completed-chat", now.Add(-120*24*time.Hour))
insertTextMessage(ctx, t, db, rawDB, completedChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour))
_, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusCompleted, completedChat.ID)
require.NoError(t, err)
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
defer closer.Close()
testutil.TryReceive(ctx, t, done)
refreshedRunning, err := db.GetChatByID(ctx, runningChat.ID)
require.NoError(t, err)
require.False(t, refreshedRunning.Archived, "running chat must not be archived")
refreshedRA, err := db.GetChatByID(ctx, requiresActionChat.ID)
require.NoError(t, err)
require.False(t, refreshedRA.Archived, "requires_action chat must not be archived")
refreshedPending, err := db.GetChatByID(ctx, pendingChat.ID)
require.NoError(t, err)
require.False(t, refreshedPending.Archived, "pending chat must not be archived")
refreshedPaused, err := db.GetChatByID(ctx, pausedChat.ID)
require.NoError(t, err)
require.False(t, refreshedPaused.Archived, "paused chat must not be archived")
refreshedCompleted, err := db.GetChatByID(ctx, completedChat.ID)
require.NoError(t, err)
require.True(t, refreshedCompleted.Archived, "completed stale chat should be archived")
logs := auditor.AuditLogs()
require.Len(t, logs, 1, "only the completed chat should produce an audit entry")
require.Equal(t, completedChat.ID, logs[0].ResourceID)
},
},
{
name: "SkipsPinnedAndChildren",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(30)))
// Pinned stale chat: should be skipped.
pinnedChat := createArchiveChat(ctx, t, db, rawDB, deps, "pinned-chat", now.Add(-90*24*time.Hour))
_, err := rawDB.ExecContext(ctx, "UPDATE chats SET pin_order = 1 WHERE id = $1", pinnedChat.ID)
require.NoError(t, err)
// Stale root with a child.
root := createArchiveChat(ctx, t, db, rawDB, deps, "root-chat", now.Add(-90*24*time.Hour))
child := createArchiveChat(ctx, t, db, rawDB, deps, "child-chat", now.Add(-90*24*time.Hour))
_, err = rawDB.ExecContext(ctx, "UPDATE chats SET parent_chat_id = $1, root_chat_id = $1 WHERE id = $2", root.ID, child.ID)
require.NoError(t, err)
// Give the child an active status to prove the cascade is
// status-blind by design. If someone adds a status filter
// to the cascade CTE, this assertion will catch it.
_, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusRunning, child.ID)
require.NoError(t, err)
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
defer closer.Close()
testutil.TryReceive(ctx, t, done)
refreshedPinned, err := db.GetChatByID(ctx, pinnedChat.ID)
require.NoError(t, err)
require.False(t, refreshedPinned.Archived, "pinned chat must be skipped")
refreshedRoot, err := db.GetChatByID(ctx, root.ID)
require.NoError(t, err)
require.True(t, refreshedRoot.Archived, "root should be archived")
refreshedChild, err := db.GetChatByID(ctx, child.ID)
require.NoError(t, err)
require.True(t, refreshedChild.Archived, "child should be cascade-archived")
// One audit entry for the root; the cascaded child is
// not audited individually.
require.Len(t, auditor.AuditLogs(), 1)
},
},
{
name: "MultipleOwners",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
user2 := dbgen.User(t, db, database.User{})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: user2.ID, OrganizationID: deps.org.ID})
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(30)))
// Two stale roots per owner, backdated well past the
// 30-day cutoff.
u1Deps := deps
u2Deps := chatAutoArchiveDeps{user: user2, org: deps.org, modelConfig: deps.modelConfig}
createArchiveChat(ctx, t, db, rawDB, u1Deps, "u1-a", now.Add(-60*24*time.Hour))
createArchiveChat(ctx, t, db, rawDB, u1Deps, "u1-b", now.Add(-60*24*time.Hour))
createArchiveChat(ctx, t, db, rawDB, u2Deps, "u2-a", now.Add(-60*24*time.Hour))
createArchiveChat(ctx, t, db, rawDB, u2Deps, "u2-b", now.Add(-60*24*time.Hour))
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
done := awaitDoTick(ctx, t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
defer closer.Close()
testutil.TryReceive(ctx, t, done)
// Four audit rows, one per archived root. Each entry
// carries the owning UserID so downstream consumers can
// correlate per-owner activity.
logs := auditor.AuditLogs()
require.Len(t, logs, 4)
byUser := map[uuid.UUID]int{}
for _, l := range logs {
byUser[l.UserID]++
}
require.Equal(t, 2, byUser[deps.user.ID])
require.Equal(t, 2, byUser[user2.ID])
},
},
{
name: "SecondTickIdempotent",
run: func(t *testing.T) {
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(30)))
// Two stale roots seeded before the first tick.
firstA := createArchiveChat(ctx, t, db, rawDB, deps, "first-a", now.Add(-60*24*time.Hour))
firstB := createArchiveChat(ctx, t, db, rawDB, deps, "first-b", now.Add(-60*24*time.Hour))
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
driver := newTickDriver(t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk))
// Defer driver.close() after closer.Close(): defers
// run LIFO, so this frees shutdown's ticker.Stop()
// before the dbpurge goroutine blocks on it.
defer closer.Close()
defer driver.close()
driver.awaitInitial(ctx, t)
// Tick 1: both archived.
require.Len(t, auditor.AuditLogs(), 2, "tick 1 audits")
// Seed a third stale root between ticks so tick 2 has
// genuine work and we can distinguish "ignored already
// archived" from "ignored everything".
third := createArchiveChat(ctx, t, db, rawDB, deps, "second-c", now.Add(-60*24*time.Hour))
driver.awaitNext(ctx, t)
// Tick 2: exactly one new audit for the third chat;
// tick 1's rows must not be re-archived.
require.Len(t, auditor.AuditLogs(), 3, "tick 2 cumulative audits")
// All three chats should remain archived.
for _, id := range []uuid.UUID{firstA.ID, firstB.ID, third.ID} {
refreshed, err := db.GetChatByID(ctx, id)
require.NoError(t, err)
require.True(t, refreshed.Archived, "chat %s should remain archived", id)
}
},
},
{
name: "BatchSizePagination",
run: func(t *testing.T) {
// With 27 stale roots and batch size 20, tick 1
// archives 20, tick 2 archives the remaining 7, tick 3
// archives none. We assert the audit dispatch follows
// the same pattern: no dispatch runs when rows == 0,
// so tick 3 emits no new audits.
h := newArchiveHarness(t, now)
ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps
require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(30)))
const total = 27
for i := range total {
createArchiveChat(ctx, t, db, rawDB, deps,
fmt.Sprintf("page-%02d", i),
now.Add(-60*24*time.Hour))
}
auditor := audit.NewMock()
auditorPtr := mockAuditorPtr(auditor)
driver := newTickDriver(t, clk)
closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk), dbpurge.WithChatAutoArchiveBatchSize(20))
// Defer driver.close() after closer.Close() so trap
// cleanup frees shutdown's ticker.Stop() before the
// dbpurge goroutine blocks on it.
defer closer.Close()
defer driver.close()
driver.awaitInitial(ctx, t)
require.Len(t, auditor.AuditLogs(), 20, "tick 1 audits")
driver.awaitNext(ctx, t)
require.Len(t, auditor.AuditLogs(), 27, "tick 2 cumulative audits")
driver.awaitNext(ctx, t)
// Tick 3: nothing left to archive; dispatch is gated
// on len(archivedChats) > 0 so no new audits.
require.Len(t, auditor.AuditLogs(), 27, "tick 3 cumulative audits unchanged")
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
tc.run(t)
})
}
}
+2
View File
@@ -3860,6 +3860,8 @@ CREATE INDEX idx_chat_queued_messages_chat_id ON chat_queued_messages USING btre
CREATE INDEX idx_chats_agent_id ON chats USING btree (agent_id) WHERE (agent_id IS NOT NULL); CREATE INDEX idx_chats_agent_id ON chats USING btree (agent_id) WHERE (agent_id IS NOT NULL);
CREATE INDEX idx_chats_auto_archive_candidates ON chats USING btree (created_at) WHERE ((archived = false) AND (pin_order = 0) AND (parent_chat_id IS NULL));
CREATE INDEX idx_chats_labels ON chats USING gin (labels); CREATE INDEX idx_chats_labels ON chats USING gin (labels);
CREATE INDEX idx_chats_last_model_config_id ON chats USING btree (last_model_config_id); CREATE INDEX idx_chats_last_model_config_id ON chats USING btree (last_model_config_id);
@@ -0,0 +1 @@
DROP INDEX IF EXISTS idx_chats_auto_archive_candidates;
@@ -0,0 +1,10 @@
-- Partial index matching the AutoArchiveInactiveChats WHERE clause so
-- dbpurge can skip the bulk of archived / pinned / child chats.
-- The status predicate lives in the query, not the index, because
-- enum values added by earlier migrations cannot be referenced in
-- index predicates within the same transaction batch.
CREATE INDEX IF NOT EXISTS idx_chats_auto_archive_candidates
ON chats (created_at)
WHERE archived = false
AND pin_order = 0
AND parent_chat_id IS NULL;
+7
View File
@@ -61,6 +61,10 @@ type sqlcQuerier interface {
// Only unused template versions will be archived, which are any versions not // Only unused template versions will be archived, which are any versions not
// referenced by the latest build of a workspace. // referenced by the latest build of a workspace.
ArchiveUnusedTemplateVersions(ctx context.Context, arg ArchiveUnusedTemplateVersionsParams) ([]uuid.UUID, error) ArchiveUnusedTemplateVersions(ctx context.Context, arg ArchiveUnusedTemplateVersionsParams) ([]uuid.UUID, error)
// Archives inactive root chats (pinned and already-archived chats skipped),
// cascading to children via root_chat_id. Limits apply to roots, not total
// rows. Used by dbpurge.
AutoArchiveInactiveChats(ctx context.Context, arg AutoArchiveInactiveChatsParams) ([]AutoArchiveInactiveChatsRow, error)
BackoffChatDiffStatus(ctx context.Context, arg BackoffChatDiffStatusParams) error BackoffChatDiffStatus(ctx context.Context, arg BackoffChatDiffStatusParams) error
BatchUpdateWorkspaceAgentMetadata(ctx context.Context, arg BatchUpdateWorkspaceAgentMetadataParams) error BatchUpdateWorkspaceAgentMetadata(ctx context.Context, arg BatchUpdateWorkspaceAgentMetadataParams) error
BatchUpdateWorkspaceLastUsedAt(ctx context.Context, arg BatchUpdateWorkspaceLastUsedAtParams) error BatchUpdateWorkspaceLastUsedAt(ctx context.Context, arg BatchUpdateWorkspaceLastUsedAtParams) error
@@ -267,6 +271,8 @@ type sqlcQuerier interface {
// This function returns roles for authorization purposes. Implied member roles // This function returns roles for authorization purposes. Implied member roles
// are included. // are included.
GetAuthorizationUserRoles(ctx context.Context, userID uuid.UUID) (GetAuthorizationUserRolesRow, error) GetAuthorizationUserRoles(ctx context.Context, userID uuid.UUID) (GetAuthorizationUserRolesRow, error)
// Auto-archive window in days. 0 disables.
GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error)
GetChatByID(ctx context.Context, id uuid.UUID) (Chat, error) GetChatByID(ctx context.Context, id uuid.UUID) (Chat, error)
GetChatByIDForUpdate(ctx context.Context, id uuid.UUID) (Chat, error) GetChatByIDForUpdate(ctx context.Context, id uuid.UUID) (Chat, error)
// Per-root-chat cost breakdown for a single user within a date range. // Per-root-chat cost breakdown for a single user within a date range.
@@ -1170,6 +1176,7 @@ type sqlcQuerier interface {
// cumulative values for unique counts (accurate period totals). Request counts // cumulative values for unique counts (accurate period totals). Request counts
// are always deltas, accumulated in DB. Returns true if insert, false if update. // are always deltas, accumulated in DB. Returns true if insert, false if update.
UpsertBoundaryUsageStats(ctx context.Context, arg UpsertBoundaryUsageStatsParams) (bool, error) UpsertBoundaryUsageStats(ctx context.Context, arg UpsertBoundaryUsageStatsParams) (bool, error)
UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error
// UpsertChatDebugLoggingAllowUsers updates the runtime admin setting that // UpsertChatDebugLoggingAllowUsers updates the runtime admin setting that
// allows users to opt into chat debug logging. // allows users to opt into chat debug logging.
UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error
+169
View File
@@ -5330,6 +5330,147 @@ func (q *sqlQuerier) ArchiveChatByID(ctx context.Context, id uuid.UUID) ([]Chat,
return items, nil return items, nil
} }
const autoArchiveInactiveChats = `-- name: AutoArchiveInactiveChats :many
WITH to_archive AS (
SELECT
c.id,
-- Activity = MAX(cm.created_at) across the family, or c.created_at
-- when the family has no non-deleted messages.
COALESCE(activity.last_activity_at, c.created_at) AS last_activity_at
FROM chats c
LEFT JOIN LATERAL (
SELECT MAX(cm.created_at) AS last_activity_at
FROM chat_messages cm
JOIN chats fc ON fc.id = cm.chat_id
WHERE (fc.id = c.id OR fc.root_chat_id = c.id)
AND cm.deleted = false
) activity ON TRUE
WHERE c.archived = false
AND c.pin_order = 0
AND c.parent_chat_id IS NULL -- roots only
AND c.created_at < $1::timestamptz
-- New active statuses must be added here to prevent archiving.
AND c.status NOT IN ('running', 'pending', 'paused', 'requires_action')
AND COALESCE(activity.last_activity_at, c.created_at) < $1::timestamptz
-- Sorting by created_at lets Postgres drive the scan from the
-- partial index instead of evaluating every LATERAL subquery
-- before sorting. All candidates are past the cutoff, so the
-- archive order is immaterial once the backlog drains.
ORDER BY c.created_at ASC
LIMIT $2
),
archived AS (
UPDATE chats c
SET archived = true, pin_order = 0, updated_at = NOW()
FROM to_archive t
WHERE (c.id = t.id OR c.root_chat_id = t.id) -- cascade to children
AND c.archived = false
RETURNING c.id, c.owner_id, c.workspace_id, c.title, c.status, c.worker_id, c.started_at, c.heartbeat_at, c.created_at, c.updated_at, c.parent_chat_id, c.root_chat_id, c.last_model_config_id, c.archived, c.last_error, c.mode, c.mcp_server_ids, c.labels, c.build_id, c.agent_id, c.pin_order, c.last_read_message_id, c.last_injected_context, c.dynamic_tools, c.organization_id, c.plan_mode, c.client_type
)
SELECT
a.id, a.owner_id, a.workspace_id, a.title, a.status, a.worker_id, a.started_at, a.heartbeat_at, a.created_at, a.updated_at, a.parent_chat_id, a.root_chat_id, a.last_model_config_id, a.archived, a.last_error, a.mode, a.mcp_server_ids, a.labels, a.build_id, a.agent_id, a.pin_order, a.last_read_message_id, a.last_injected_context, a.dynamic_tools, a.organization_id, a.plan_mode, a.client_type,
-- Children inherit their root's activity so last_activity_at is never null.
COALESCE(
t.last_activity_at,
(SELECT tr.last_activity_at FROM to_archive tr WHERE tr.id = a.root_chat_id),
a.created_at
)::timestamptz AS last_activity_at
FROM archived a
LEFT JOIN to_archive t ON t.id = a.id
ORDER BY (a.root_chat_id IS NULL) DESC, a.owner_id ASC, a.created_at ASC, a.id ASC
`
type AutoArchiveInactiveChatsParams struct {
ArchiveCutoff time.Time `db:"archive_cutoff" json:"archive_cutoff"`
LimitCount int32 `db:"limit_count" json:"limit_count"`
}
type AutoArchiveInactiveChatsRow struct {
ID uuid.UUID `db:"id" json:"id"`
OwnerID uuid.UUID `db:"owner_id" json:"owner_id"`
WorkspaceID uuid.NullUUID `db:"workspace_id" json:"workspace_id"`
Title string `db:"title" json:"title"`
Status ChatStatus `db:"status" json:"status"`
WorkerID uuid.NullUUID `db:"worker_id" json:"worker_id"`
StartedAt sql.NullTime `db:"started_at" json:"started_at"`
HeartbeatAt sql.NullTime `db:"heartbeat_at" json:"heartbeat_at"`
CreatedAt time.Time `db:"created_at" json:"created_at"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
ParentChatID uuid.NullUUID `db:"parent_chat_id" json:"parent_chat_id"`
RootChatID uuid.NullUUID `db:"root_chat_id" json:"root_chat_id"`
LastModelConfigID uuid.UUID `db:"last_model_config_id" json:"last_model_config_id"`
Archived bool `db:"archived" json:"archived"`
LastError sql.NullString `db:"last_error" json:"last_error"`
Mode NullChatMode `db:"mode" json:"mode"`
MCPServerIDs []uuid.UUID `db:"mcp_server_ids" json:"mcp_server_ids"`
Labels json.RawMessage `db:"labels" json:"labels"`
BuildID uuid.NullUUID `db:"build_id" json:"build_id"`
AgentID uuid.NullUUID `db:"agent_id" json:"agent_id"`
PinOrder int32 `db:"pin_order" json:"pin_order"`
LastReadMessageID sql.NullInt64 `db:"last_read_message_id" json:"last_read_message_id"`
LastInjectedContext pqtype.NullRawMessage `db:"last_injected_context" json:"last_injected_context"`
DynamicTools pqtype.NullRawMessage `db:"dynamic_tools" json:"dynamic_tools"`
OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"`
PlanMode NullChatPlanMode `db:"plan_mode" json:"plan_mode"`
ClientType ChatClientType `db:"client_type" json:"client_type"`
LastActivityAt time.Time `db:"last_activity_at" json:"last_activity_at"`
}
// Archives inactive root chats (pinned and already-archived chats skipped),
// cascading to children via root_chat_id. Limits apply to roots, not total
// rows. Used by dbpurge.
func (q *sqlQuerier) AutoArchiveInactiveChats(ctx context.Context, arg AutoArchiveInactiveChatsParams) ([]AutoArchiveInactiveChatsRow, error) {
rows, err := q.db.QueryContext(ctx, autoArchiveInactiveChats, arg.ArchiveCutoff, arg.LimitCount)
if err != nil {
return nil, err
}
defer rows.Close()
var items []AutoArchiveInactiveChatsRow
for rows.Next() {
var i AutoArchiveInactiveChatsRow
if err := rows.Scan(
&i.ID,
&i.OwnerID,
&i.WorkspaceID,
&i.Title,
&i.Status,
&i.WorkerID,
&i.StartedAt,
&i.HeartbeatAt,
&i.CreatedAt,
&i.UpdatedAt,
&i.ParentChatID,
&i.RootChatID,
&i.LastModelConfigID,
&i.Archived,
&i.LastError,
&i.Mode,
pq.Array(&i.MCPServerIDs),
&i.Labels,
&i.BuildID,
&i.AgentID,
&i.PinOrder,
&i.LastReadMessageID,
&i.LastInjectedContext,
&i.DynamicTools,
&i.OrganizationID,
&i.PlanMode,
&i.ClientType,
&i.LastActivityAt,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Close(); err != nil {
return nil, err
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const backoffChatDiffStatus = `-- name: BackoffChatDiffStatus :exec const backoffChatDiffStatus = `-- name: BackoffChatDiffStatus :exec
UPDATE UPDATE
chat_diff_statuses chat_diff_statuses
@@ -20367,6 +20508,22 @@ func (q *sqlQuerier) GetApplicationName(ctx context.Context) (string, error) {
return value, err return value, err
} }
const getChatAutoArchiveDays = `-- name: GetChatAutoArchiveDays :one
SELECT COALESCE(
(SELECT value::integer FROM site_configs
WHERE key = 'agents_chat_auto_archive_days'),
$1::integer
) :: integer AS auto_archive_days
`
// Auto-archive window in days. 0 disables.
func (q *sqlQuerier) GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) {
row := q.db.QueryRowContext(ctx, getChatAutoArchiveDays, defaultAutoArchiveDays)
var auto_archive_days int32
err := row.Scan(&auto_archive_days)
return auto_archive_days, err
}
const getChatDebugLoggingAllowUsers = `-- name: GetChatDebugLoggingAllowUsers :one const getChatDebugLoggingAllowUsers = `-- name: GetChatDebugLoggingAllowUsers :one
SELECT SELECT
COALESCE((SELECT value = 'true' FROM site_configs WHERE key = 'agents_chat_debug_logging_allow_users'), false) :: boolean AS allow_users COALESCE((SELECT value = 'true' FROM site_configs WHERE key = 'agents_chat_debug_logging_allow_users'), false) :: boolean AS allow_users
@@ -20729,6 +20886,18 @@ func (q *sqlQuerier) UpsertApplicationName(ctx context.Context, value string) er
return err return err
} }
const upsertChatAutoArchiveDays = `-- name: UpsertChatAutoArchiveDays :exec
INSERT INTO site_configs (key, value)
VALUES ('agents_chat_auto_archive_days', CAST($1 AS integer)::text)
ON CONFLICT (key) DO UPDATE SET value = CAST($1 AS integer)::text
WHERE site_configs.key = 'agents_chat_auto_archive_days'
`
func (q *sqlQuerier) UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error {
_, err := q.db.ExecContext(ctx, upsertChatAutoArchiveDays, autoArchiveDays)
return err
}
const upsertChatDebugLoggingAllowUsers = `-- name: UpsertChatDebugLoggingAllowUsers :exec const upsertChatDebugLoggingAllowUsers = `-- name: UpsertChatDebugLoggingAllowUsers :exec
INSERT INTO site_configs (key, value) INSERT INTO site_configs (key, value)
VALUES ( VALUES (
+52
View File
@@ -1427,3 +1427,55 @@ UPDATE chat_messages SET deleted = true
WHERE chat_id = @chat_id::uuid WHERE chat_id = @chat_id::uuid
AND deleted = false AND deleted = false
AND content::jsonb @> '[{"type": "context-file"}]'; AND content::jsonb @> '[{"type": "context-file"}]';
-- name: AutoArchiveInactiveChats :many
-- Archives inactive root chats (pinned and already-archived chats skipped),
-- cascading to children via root_chat_id. Limits apply to roots, not total
-- rows. Used by dbpurge.
WITH to_archive AS (
SELECT
c.id,
-- Activity = MAX(cm.created_at) across the family, or c.created_at
-- when the family has no non-deleted messages.
COALESCE(activity.last_activity_at, c.created_at) AS last_activity_at
FROM chats c
LEFT JOIN LATERAL (
SELECT MAX(cm.created_at) AS last_activity_at
FROM chat_messages cm
JOIN chats fc ON fc.id = cm.chat_id
WHERE (fc.id = c.id OR fc.root_chat_id = c.id)
AND cm.deleted = false
) activity ON TRUE
WHERE c.archived = false
AND c.pin_order = 0
AND c.parent_chat_id IS NULL -- roots only
AND c.created_at < @archive_cutoff::timestamptz
-- New active statuses must be added here to prevent archiving.
AND c.status NOT IN ('running', 'pending', 'paused', 'requires_action')
AND COALESCE(activity.last_activity_at, c.created_at) < @archive_cutoff::timestamptz
-- Sorting by created_at lets Postgres drive the scan from the
-- partial index instead of evaluating every LATERAL subquery
-- before sorting. All candidates are past the cutoff, so the
-- archive order is immaterial once the backlog drains.
ORDER BY c.created_at ASC
LIMIT @limit_count
),
archived AS (
UPDATE chats c
SET archived = true, pin_order = 0, updated_at = NOW()
FROM to_archive t
WHERE (c.id = t.id OR c.root_chat_id = t.id) -- cascade to children
AND c.archived = false
RETURNING c.*
)
SELECT
a.*,
-- Children inherit their root's activity so last_activity_at is never null.
COALESCE(
t.last_activity_at,
(SELECT tr.last_activity_at FROM to_archive tr WHERE tr.id = a.root_chat_id),
a.created_at
)::timestamptz AS last_activity_at
FROM archived a
LEFT JOIN to_archive t ON t.id = a.id
ORDER BY (a.root_chat_id IS NULL) DESC, a.owner_id ASC, a.created_at ASC, a.id ASC;
+14
View File
@@ -302,3 +302,17 @@ INSERT INTO site_configs (key, value)
VALUES ('agents_chat_retention_days', CAST(@retention_days AS integer)::text) VALUES ('agents_chat_retention_days', CAST(@retention_days AS integer)::text)
ON CONFLICT (key) DO UPDATE SET value = CAST(@retention_days AS integer)::text ON CONFLICT (key) DO UPDATE SET value = CAST(@retention_days AS integer)::text
WHERE site_configs.key = 'agents_chat_retention_days'; WHERE site_configs.key = 'agents_chat_retention_days';
-- name: GetChatAutoArchiveDays :one
-- Auto-archive window in days. 0 disables.
SELECT COALESCE(
(SELECT value::integer FROM site_configs
WHERE key = 'agents_chat_auto_archive_days'),
@default_auto_archive_days::integer
) :: integer AS auto_archive_days;
-- name: UpsertChatAutoArchiveDays :exec
INSERT INTO site_configs (key, value)
VALUES ('agents_chat_auto_archive_days', CAST(@auto_archive_days AS integer)::text)
ON CONFLICT (key) DO UPDATE SET value = CAST(@auto_archive_days AS integer)::text
WHERE site_configs.key = 'agents_chat_auto_archive_days';
+52
View File
@@ -4329,6 +4329,58 @@ func (api *API) putChatRetentionDays(rw http.ResponseWriter, r *http.Request) {
rw.WriteHeader(http.StatusNoContent) rw.WriteHeader(http.StatusNoContent)
} }
// getChatAutoArchiveDays returns the deployment-wide auto-archive
// window. Any authenticated user can read it (same as retention
// days); writes require admin.
//
//nolint:revive // get-return: revive assumes get* must be a getter, but this is an HTTP handler.
func (api *API) getChatAutoArchiveDays(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
autoArchiveDays, err := api.Database.GetChatAutoArchiveDays(ctx, codersdk.DefaultChatAutoArchiveDays)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to get chat auto-archive days.",
Detail: err.Error(),
})
return
}
httpapi.Write(ctx, rw, http.StatusOK, codersdk.ChatAutoArchiveDaysResponse{
AutoArchiveDays: autoArchiveDays,
})
}
// Upper bound for the auto-archive window. Update the frontend
// constant if a settings UI is added.
const autoArchiveDaysMaximum = 3650 // ~10 years
// putChatAutoArchiveDays updates the deployment-wide auto-archive
// window. Admin-only; documented in docs/ai-coder/agents/chats-api.md.
func (api *API) putChatAutoArchiveDays(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
if !api.Authorize(r, policy.ActionUpdate, rbac.ResourceDeploymentConfig) {
httpapi.Forbidden(rw)
return
}
var req codersdk.UpdateChatAutoArchiveDaysRequest
if !httpapi.Read(ctx, rw, r, &req) {
return
}
if req.AutoArchiveDays < 0 || req.AutoArchiveDays > autoArchiveDaysMaximum {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: fmt.Sprintf("Auto-archive days must be between 0 and %d.", autoArchiveDaysMaximum),
})
return
}
if err := api.Database.UpsertChatAutoArchiveDays(ctx, req.AutoArchiveDays); err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to update chat auto-archive days.",
Detail: err.Error(),
})
return
}
rw.WriteHeader(http.StatusNoContent)
}
// EXPERIMENTAL: this endpoint is experimental and is subject to change. // EXPERIMENTAL: this endpoint is experimental and is subject to change.
// //
//nolint:revive // get-return: revive assumes get* must be a getter, but this is an HTTP handler. //nolint:revive // get-return: revive assumes get* must be a getter, but this is an HTTP handler.
+63
View File
@@ -10962,6 +10962,69 @@ func TestChatRetentionDays(t *testing.T) {
requireSDKError(t, err, http.StatusBadRequest) requireSDKError(t, err, http.StatusBadRequest)
} }
func TestChatAutoArchiveDays(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
adminClient := newChatClient(t)
firstUser := coderdtest.CreateFirstUser(t, adminClient.Client)
memberClientRaw, _ := coderdtest.CreateAnotherUser(t, adminClient.Client, firstUser.OrganizationID)
memberClient := codersdk.NewExperimentalClient(memberClientRaw)
// Default value is DefaultChatAutoArchiveDays (0, disabled) when
// nothing has been configured.
resp, err := adminClient.GetChatAutoArchiveDays(ctx)
require.NoError(t, err, "get default")
require.Equal(t, codersdk.DefaultChatAutoArchiveDays, resp.AutoArchiveDays, "default should match DefaultChatAutoArchiveDays")
// Admin can set auto-archive days to 45.
err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{
AutoArchiveDays: 45,
})
require.NoError(t, err, "admin set 45")
resp, err = adminClient.GetChatAutoArchiveDays(ctx)
require.NoError(t, err, "get after set")
require.Equal(t, int32(45), resp.AutoArchiveDays, "should return 45")
// Non-admin member can read the value (same as retention days).
memberResp, err := memberClient.GetChatAutoArchiveDays(ctx)
require.NoError(t, err, "member read")
require.Equal(t, int32(45), memberResp.AutoArchiveDays, "member sees same value")
// Non-admin member cannot write.
err = memberClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{AutoArchiveDays: 7})
requireSDKError(t, err, http.StatusForbidden)
// Admin can disable auto-archive by setting 0.
err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{
AutoArchiveDays: 0,
})
require.NoError(t, err, "admin set 0")
resp, err = adminClient.GetChatAutoArchiveDays(ctx)
require.NoError(t, err, "get after zero")
require.Equal(t, int32(0), resp.AutoArchiveDays, "should be 0 after disable")
// An aggressive value of 1 is accepted (no pre-warn to break).
err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{
AutoArchiveDays: 1,
})
require.NoError(t, err, "admin set 1")
// Validation: negative value is rejected.
err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{
AutoArchiveDays: -1,
})
requireSDKError(t, err, http.StatusBadRequest)
// Validation: exceeding the 3650-day maximum is rejected.
err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{
AutoArchiveDays: 3651, // autoArchiveDaysMaximum + 1; keep in sync with coderd/exp_chats.go.
})
requireSDKError(t, err, http.StatusBadRequest)
}
//nolint:tparallel // subtests share state via client, firstUser, modelConfig //nolint:tparallel // subtests share state via client, firstUser, modelConfig
func TestUserChatCompactionThresholds(t *testing.T) { func TestUserChatCompactionThresholds(t *testing.T) {
t.Parallel() t.Parallel()
+43
View File
@@ -796,6 +796,11 @@ type ChatDebugStep struct {
// Zero means disabled — the template's own autostop setting applies. // Zero means disabled — the template's own autostop setting applies.
const DefaultChatWorkspaceTTL = 0 const DefaultChatWorkspaceTTL = 0
// DefaultChatAutoArchiveDays is the default auto-archive window, in
// days, applied when no site config row exists. Zero disables
// auto-archival.
const DefaultChatAutoArchiveDays int32 = 0
// ChatWorkspaceTTLResponse is the response for getting the chat // ChatWorkspaceTTLResponse is the response for getting the chat
// workspace TTL setting. // workspace TTL setting.
type ChatWorkspaceTTLResponse struct { type ChatWorkspaceTTLResponse struct {
@@ -823,6 +828,17 @@ type UpdateChatRetentionDaysRequest struct {
RetentionDays int32 `json:"retention_days"` RetentionDays int32 `json:"retention_days"`
} }
// ChatAutoArchiveDaysResponse contains the current chat auto-archive setting.
type ChatAutoArchiveDaysResponse struct {
AutoArchiveDays int32 `json:"auto_archive_days"`
}
// UpdateChatAutoArchiveDaysRequest is a request to update the chat
// auto-archive period.
type UpdateChatAutoArchiveDaysRequest struct {
AutoArchiveDays int32 `json:"auto_archive_days"`
}
// ParseChatWorkspaceTTL parses a stored TTL string, returning the // ParseChatWorkspaceTTL parses a stored TTL string, returning the
// default when the value is empty. // default when the value is empty.
func ParseChatWorkspaceTTL(s string) (time.Duration, error) { func ParseChatWorkspaceTTL(s string) (time.Duration, error) {
@@ -2183,6 +2199,33 @@ func (c *ExperimentalClient) UpdateChatRetentionDays(ctx context.Context, req Up
return nil return nil
} }
// GetChatAutoArchiveDays returns the configured chat auto-archive period.
func (c *ExperimentalClient) GetChatAutoArchiveDays(ctx context.Context) (ChatAutoArchiveDaysResponse, error) {
res, err := c.Request(ctx, http.MethodGet, "/api/experimental/chats/config/auto-archive-days", nil)
if err != nil {
return ChatAutoArchiveDaysResponse{}, err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return ChatAutoArchiveDaysResponse{}, ReadBodyAsError(res)
}
var resp ChatAutoArchiveDaysResponse
return resp, json.NewDecoder(res.Body).Decode(&resp)
}
// UpdateChatAutoArchiveDays updates the chat auto-archive period.
func (c *ExperimentalClient) UpdateChatAutoArchiveDays(ctx context.Context, req UpdateChatAutoArchiveDaysRequest) error {
res, err := c.Request(ctx, http.MethodPut, "/api/experimental/chats/config/auto-archive-days", req)
if err != nil {
return err
}
defer res.Body.Close()
if res.StatusCode != http.StatusNoContent {
return ReadBodyAsError(res)
}
return nil
}
// GetChatTemplateAllowlist returns the deployment-wide chat template allowlist. // GetChatTemplateAllowlist returns the deployment-wide chat template allowlist.
func (c *ExperimentalClient) GetChatTemplateAllowlist(ctx context.Context) (ChatTemplateAllowlist, error) { func (c *ExperimentalClient) GetChatTemplateAllowlist(ctx context.Context) (ChatTemplateAllowlist, error) {
res, err := c.Request(ctx, http.MethodGet, "/api/experimental/chats/config/template-allowlist", nil) res, err := c.Request(ctx, http.MethodGet, "/api/experimental/chats/config/template-allowlist", nil)
+1
View File
@@ -198,6 +198,7 @@ deployment. They will always be available from the agent.
| `coderd_authz_authorize_duration_seconds` | histogram | Duration of the 'Authorize' call in seconds. Only counts calls that succeed. | `allowed` | | `coderd_authz_authorize_duration_seconds` | histogram | Duration of the 'Authorize' call in seconds. Only counts calls that succeed. | `allowed` |
| `coderd_authz_prepare_authorize_duration_seconds` | histogram | Duration of the 'PrepareAuthorize' call in seconds. | | | `coderd_authz_prepare_authorize_duration_seconds` | histogram | Duration of the 'PrepareAuthorize' call in seconds. | |
| `coderd_build_info` | gauge | Describes the current build/version of the Coder server. Value is always 1. | `revision` `version` | | `coderd_build_info` | gauge | Describes the current build/version of the Coder server. Value is always 1. | `revision` `version` |
| `coderd_chat_auto_archive_records_archived_total` | counter | Total number of chats archived by the auto-archive job (counting both roots and cascaded children). | |
| `coderd_chatd_chats` | gauge | Number of chats being processed, by state. | `state` | | `coderd_chatd_chats` | gauge | Number of chats being processed, by state. | `state` |
| `coderd_chatd_compaction_total` | counter | Total compaction outcomes (only recorded when compaction was triggered or failed). | `model` `provider` `result` | | `coderd_chatd_compaction_total` | counter | Total compaction outcomes (only recorded when compaction was triggered or failed). | `model` `provider` `result` |
| `coderd_chatd_message_count` | histogram | Number of messages in the prompt per LLM request. | `model` `provider` | | `coderd_chatd_message_count` | histogram | Number of messages in the prompt per LLM request. | `model` `provider` |
+31 -1
View File
@@ -326,8 +326,38 @@ appear in the `files` field on subsequent
| Status | Meaning | | Status | Meaning |
|-------------------|------------------------------------------------------------------------------| |-------------------|------------------------------------------------------------------------------|
| `waiting` | No pending work (newly created, finished, or interrupted). | | `waiting` | Idle. Newly created, finished successfully, or interrupted. |
| `pending` | Queued for processing. | | `pending` | Queued for processing. |
| `running` | Agent is actively working. | | `running` | Agent is actively working. |
| `paused` | Agent is paused (for example, waiting for user input). |
| `completed` | Agent finished and the task is complete. |
| `error` | Agent encountered an error. | | `error` | Agent encountered an error. |
| `requires_action` | Agent invoked a client-provided tool and needs the result before continuing. | | `requires_action` | Agent invoked a client-provided tool and needs the result before continuing. |
## Configuration
Deployment-wide chat settings are read and written under
`/api/experimental/chats/config/*`. Reading config requires authentication; writing requires
deployment-admin privileges.
### Auto-archive window
Chats whose newest non-deleted message is older than
`auto_archive_days` are automatically archived by a background job.
Pinned chats and chats belonging to a still-active thread are
exempt. `0` disables the feature; the default is 90.
```sh
# Read
curl -H "Coder-Session-Token: $CODER_SESSION_TOKEN" \
https://coder.example.com/api/experimental/chats/config/auto-archive-days
# { "auto_archive_days": 90 }
# Update
curl -X PUT -H "Coder-Session-Token: $CODER_SESSION_TOKEN" \
-H "Content-Type: application/json" \
-d '{"auto_archive_days": 60}' \
https://coder.example.com/api/experimental/chats/config/auto-archive-days
```
Accepted range: `0` to `3650` (~10 years).
@@ -0,0 +1,84 @@
# Conversation Auto-Archive
Coder Agents automatically archives long-inactive conversations so they
drop out of active chat lists without any user intervention. Archived
conversations are still visible (and can be unarchived) until they age
out of the separate retention window, at which point they are purged.
## How it works
A background process runs approximately every 10 minutes. On each tick
it scans the chat database for root conversations whose most recent
non-deleted message is older than the configured auto-archive window
and flips them from "active" to "archived". Cascaded children (chats
linked into a larger conversation via `root_chat_id`) are archived
alongside their parent so the conversation stays coherent.
Activity is defined as the most recent non-deleted message in the
conversation family, counting messages from every role. Root chats
whose status indicates ongoing work (`running`, `pending`, `paused`,
or `requires_action`) are never selected for auto-archiving.
Children inherit their root's archival decision.
Pinned root conversations (those with a non-zero pin order) are never
selected for auto-archiving. Children are archived alongside their
root regardless of individual pin status. Admins and users who want
to retain a conversation long after its last message should pin the
root.
## Interaction with retention
Auto-archive and deletion are two independent controls:
| Control | What it does | Default |
|---------------------|---------------------------------------------------------------------------|-------------------|
| Auto-archive window | Moves inactive chats to the archived state | 0 days (disabled) |
| Retention window | Deletes chats that have been archived long enough and orphaned chat files | 30 days |
A conversation needs to be inactive for `auto_archive_days`, then
archived for `retention_days`, before it is deleted. The two windows
stack additively. With auto-archive disabled by default, inactive
chats are never auto-archived; once an admin opts in by setting a
non-zero `auto_archive_days`, a conversation lives for at least
`auto_archive_days + retention_days` from its last message before it
is permanently removed.
Auto-archive (like manual archive) resets the per-chat retention
clock, so the full `retention_days` runs from the tick that archived
the chat, not from its last message.
Setting either value to `0` disables that step. Setting
`auto_archive_days` to `0` means inactive chats are never
auto-archived (users still archive manually). Setting
`retention_days` to `0` means archived chats are kept indefinitely.
## Configuration
The auto-archive window is stored as the
`agents_chat_auto_archive_days` key in the `site_configs` table.
The default is `0` (disabled); set to a positive number of days to
enable auto-archiving.
Use the admin API to read or update the value:
GET /api/experimental/chats/config/auto-archive-days
PUT /api/experimental/chats/config/auto-archive-days
## Rollout advice
Auto-archive is disabled by default, so upgrading to a release that
includes this feature will not archive any existing chats until an
admin opts in. The first tick after enabling auto-archive on a
deployment with a long history will process up to 1,000 root chats
(and their children). If your deployment has a large backlog, the
initial rollout will span many ticks. This is intentional and avoids
stalling the rest of `dbpurge` during the first run. To disable,
set `auto_archive_days` back to `0`.
## Audit trail
Each auto-archived root chat produces an audit log entry with the
background subsystem tag `chat_auto_archive`. Cascaded children are
not audited individually. The audit entry records the chat ID, owner
ID, and organization ID, and the diff shows `archived` flipping from
`false` to `true`.
@@ -4,6 +4,10 @@ Coder Agents automatically cleans up old conversation data to manage database
growth. Archived conversations and their associated files are periodically growth. Archived conversations and their associated files are periodically
purged based on a configurable retention period. purged based on a configurable retention period.
Conversations become eligible for purging only after they are archived. Old
conversations can be archived manually, or automatically. See
[Auto-Archive](./chat-auto-archive.md) for how the two controls interact.
## How it works ## How it works
A background process runs approximately every 10 minutes to remove expired A background process runs approximately every 10 minutes to remove expired
+6
View File
@@ -1268,6 +1268,12 @@
"description": "Automatic cleanup of old conversation data", "description": "Automatic cleanup of old conversation data",
"path": "./ai-coder/agents/platform-controls/chat-retention.md", "path": "./ai-coder/agents/platform-controls/chat-retention.md",
"state": ["early access"] "state": ["early access"]
},
{
"title": "Auto-Archive",
"description": "Automatic archiving of inactive conversations",
"path": "./ai-coder/agents/platform-controls/chat-auto-archive.md",
"state": ["early access"]
} }
] ]
}, },
+3
View File
@@ -226,6 +226,9 @@ coderd_authz_prepare_authorize_duration_seconds 0
# HELP coderd_build_info Describes the current build/version of the Coder server. Value is always 1. # HELP coderd_build_info Describes the current build/version of the Coder server. Value is always 1.
# TYPE coderd_build_info gauge # TYPE coderd_build_info gauge
coderd_build_info{version="",revision=""} 0 coderd_build_info{version="",revision=""} 0
# HELP coderd_chat_auto_archive_records_archived_total Total number of chats archived by the auto-archive job (counting both roots and cascaded children).
# TYPE coderd_chat_auto_archive_records_archived_total counter
coderd_chat_auto_archive_records_archived_total 0
# HELP coderd_chatd_chats Number of chats being processed, by state. # HELP coderd_chatd_chats Number of chats being processed, by state.
# TYPE coderd_chatd_chats gauge # TYPE coderd_chatd_chats gauge
coderd_chatd_chats{state=""} 0 coderd_chatd_chats{state=""} 0
+25
View File
@@ -1291,6 +1291,14 @@ export interface ChatAgentModelOverrideResponse {
readonly is_malformed: boolean; readonly is_malformed: boolean;
} }
// From codersdk/chats.go
/**
* ChatAutoArchiveDaysResponse contains the current chat auto-archive setting.
*/
export interface ChatAutoArchiveDaysResponse {
readonly auto_archive_days: number;
}
// From codersdk/chats.go // From codersdk/chats.go
export type ChatBusyBehavior = "interrupt" | "queue"; export type ChatBusyBehavior = "interrupt" | "queue";
@@ -3403,6 +3411,14 @@ export interface DebugProfileOptions {
readonly Profiles: readonly string[]; readonly Profiles: readonly string[];
} }
// From codersdk/chats.go
/**
* DefaultChatAutoArchiveDays is the default auto-archive window, in
* days, applied when no site config row exists. Zero disables
* auto-archival.
*/
export const DefaultChatAutoArchiveDays = 0;
// From codersdk/chats.go // From codersdk/chats.go
/** /**
* DefaultChatWorkspaceTTL is the default TTL for chat workspaces. * DefaultChatWorkspaceTTL is the default TTL for chat workspaces.
@@ -7685,6 +7701,15 @@ export interface UpdateChatAgentModelOverrideRequest {
readonly model_config_id: string; readonly model_config_id: string;
} }
// From codersdk/chats.go
/**
* UpdateChatAutoArchiveDaysRequest is a request to update the chat
* auto-archive period.
*/
export interface UpdateChatAutoArchiveDaysRequest {
readonly auto_archive_days: number;
}
// From codersdk/chats.go // From codersdk/chats.go
/** /**
* UpdateChatDebugLoggingAllowUsersRequest is the admin request to * UpdateChatDebugLoggingAllowUsersRequest is the admin request to