diff --git a/cli/server.go b/cli/server.go index 19c4c841ab..98a318cf53 100644 --- a/cli/server.go +++ b/cli/server.go @@ -1074,7 +1074,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. defer shutdownConns() // Ensures that old database entries are cleaned up over time! - purger := dbpurge.New(ctx, logger.Named("dbpurge"), options.Database, options.DeploymentValues, quartz.NewReal(), options.PrometheusRegistry) + purger := dbpurge.New(ctx, logger.Named("dbpurge"), options.Database, options.DeploymentValues, options.PrometheusRegistry, &coderAPI.Auditor) defer purger.Close() // Updates workspace usage diff --git a/coderd/audit/fields.go b/coderd/audit/fields.go index cd6eaeb7ce..1b21ed4dba 100644 --- a/coderd/audit/fields.go +++ b/coderd/audit/fields.go @@ -10,7 +10,8 @@ import ( type BackgroundSubsystem string const ( - BackgroundSubsystemDormancy BackgroundSubsystem = "dormancy" + BackgroundSubsystemDormancy BackgroundSubsystem = "dormancy" + BackgroundSubsystemChatAutoArchive BackgroundSubsystem = "chat_auto_archive" ) func BackgroundTaskFields(subsystem BackgroundSubsystem) map[string]string { diff --git a/coderd/coderd.go b/coderd/coderd.go index 4feb7d8dba..2a5b8aca76 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -1201,6 +1201,8 @@ func New(options *Options) *API { r.Put("/workspace-ttl", api.putChatWorkspaceTTL) r.Get("/retention-days", api.getChatRetentionDays) r.Put("/retention-days", api.putChatRetentionDays) + r.Get("/auto-archive-days", api.getChatAutoArchiveDays) + r.Put("/auto-archive-days", api.putChatAutoArchiveDays) r.Get("/template-allowlist", api.getChatTemplateAllowlist) r.Put("/template-allowlist", api.putChatTemplateAllowlist) }) diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index e0c93fe07f..ba06bf45cc 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -644,6 +644,8 @@ var ( rbac.ResourceNotificationMessage.Type: {policy.ActionDelete}, rbac.ResourceApiKey.Type: {policy.ActionDelete}, rbac.ResourceAibridgeInterception.Type: {policy.ActionDelete}, + // Chat auto-archive sets archived=true on inactive chats. + rbac.ResourceChat.Type: {policy.ActionRead, policy.ActionUpdate}, }), User: []rbac.Permission{}, ByOrgID: map[string]rbac.OrgPermissions{}, @@ -1593,6 +1595,16 @@ func (q *querier) ArchiveUnusedTemplateVersions(ctx context.Context, arg databas return q.db.ArchiveUnusedTemplateVersions(ctx, arg) } +func (q *querier) AutoArchiveInactiveChats(ctx context.Context, arg database.AutoArchiveInactiveChatsParams) ([]database.AutoArchiveInactiveChatsRow, error) { + // Background write by dbpurge. The LATERAL read of chat_messages rows + // happens below the RBAC boundary; only the chat row itself requires + // authorization. + if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceChat); err != nil { + return nil, err + } + return q.db.AutoArchiveInactiveChats(ctx, arg) +} + func (q *querier) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error { // This is a system-level operation used by the gitsync // background worker to reschedule failed refreshes. Same @@ -2557,6 +2569,17 @@ func (q *querier) GetAuthorizationUserRoles(ctx context.Context, userID uuid.UUI return q.db.GetAuthorizationUserRoles(ctx, userID) } +func (q *querier) GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) { + // Chat auto-archive is a deployment-wide config read by dbpurge. + // Only requires a valid actor in context. The HTTP GET handler + // allows any authenticated user; the PUT handler enforces admin + // access (policy.ActionUpdate on ResourceDeploymentConfig). + if _, ok := ActorFromContext(ctx); !ok { + return 0, ErrNoActor + } + return q.db.GetChatAutoArchiveDays(ctx, defaultAutoArchiveDays) +} + func (q *querier) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) { return fetch(q.log, q.auth, q.db.GetChatByID)(ctx, id) } @@ -7374,6 +7397,13 @@ func (q *querier) UpsertBoundaryUsageStats(ctx context.Context, arg database.Ups return q.db.UpsertBoundaryUsageStats(ctx, arg) } +func (q *querier) UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error { + if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceDeploymentConfig); err != nil { + return err + } + return q.db.UpsertChatAutoArchiveDays(ctx, autoArchiveDays) +} + func (q *querier) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error { if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceDeploymentConfig); err != nil { return err diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 5657714c30..1128f757e6 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -741,6 +741,18 @@ func (s *MethodTestSuite) TestChats() { dbm.EXPECT().UpsertChatRetentionDays(gomock.Any(), int32(30)).Return(nil).AnyTimes() check.Args(int32(30)).Asserts(rbac.ResourceDeploymentConfig, policy.ActionUpdate) })) + s.Run("GetChatAutoArchiveDays", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + dbm.EXPECT().GetChatAutoArchiveDays(gomock.Any(), gomock.Any()).Return(int32(90), nil).AnyTimes() + check.Args(int32(90)).Asserts() + })) + s.Run("UpsertChatAutoArchiveDays", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + dbm.EXPECT().UpsertChatAutoArchiveDays(gomock.Any(), int32(90)).Return(nil).AnyTimes() + check.Args(int32(90)).Asserts(rbac.ResourceDeploymentConfig, policy.ActionUpdate) + })) + s.Run("AutoArchiveInactiveChats", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + dbm.EXPECT().AutoArchiveInactiveChats(gomock.Any(), database.AutoArchiveInactiveChatsParams{}).Return([]database.AutoArchiveInactiveChatsRow{}, nil).AnyTimes() + check.Args(database.AutoArchiveInactiveChatsParams{}).Asserts(rbac.ResourceChat, policy.ActionUpdate) + })) s.Run("GetChatMessageByID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) { chat := testutil.Fake(s.T(), faker, database.Chat{}) msg := testutil.Fake(s.T(), faker, database.ChatMessage{ChatID: chat.ID}) diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 502f5a1b45..9739359aac 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -176,6 +176,14 @@ func (m queryMetricsStore) ArchiveUnusedTemplateVersions(ctx context.Context, ar return r0, r1 } +func (m queryMetricsStore) AutoArchiveInactiveChats(ctx context.Context, arg database.AutoArchiveInactiveChatsParams) ([]database.AutoArchiveInactiveChatsRow, error) { + start := time.Now() + r0, r1 := m.s.AutoArchiveInactiveChats(ctx, arg) + m.queryLatencies.WithLabelValues("AutoArchiveInactiveChats").Observe(time.Since(start).Seconds()) + m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "AutoArchiveInactiveChats").Inc() + return r0, r1 +} + func (m queryMetricsStore) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error { start := time.Now() r0 := m.s.BackoffChatDiffStatus(ctx, arg) @@ -1112,6 +1120,14 @@ func (m queryMetricsStore) GetAuthorizationUserRoles(ctx context.Context, userID return r0, r1 } +func (m queryMetricsStore) GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) { + start := time.Now() + r0, r1 := m.s.GetChatAutoArchiveDays(ctx, defaultAutoArchiveDays) + m.queryLatencies.WithLabelValues("GetChatAutoArchiveDays").Observe(time.Since(start).Seconds()) + m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetChatAutoArchiveDays").Inc() + return r0, r1 +} + func (m queryMetricsStore) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) { start := time.Now() r0, r1 := m.s.GetChatByID(ctx, id) @@ -5272,6 +5288,14 @@ func (m queryMetricsStore) UpsertBoundaryUsageStats(ctx context.Context, arg dat return r0, r1 } +func (m queryMetricsStore) UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error { + start := time.Now() + r0 := m.s.UpsertChatAutoArchiveDays(ctx, autoArchiveDays) + m.queryLatencies.WithLabelValues("UpsertChatAutoArchiveDays").Observe(time.Since(start).Seconds()) + m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "UpsertChatAutoArchiveDays").Inc() + return r0 +} + func (m queryMetricsStore) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error { start := time.Now() r0 := m.s.UpsertChatDebugLoggingAllowUsers(ctx, allowUsers) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index fa9d2a729b..34369f4288 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -177,6 +177,21 @@ func (mr *MockStoreMockRecorder) ArchiveUnusedTemplateVersions(ctx, arg any) *go return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ArchiveUnusedTemplateVersions", reflect.TypeOf((*MockStore)(nil).ArchiveUnusedTemplateVersions), ctx, arg) } +// AutoArchiveInactiveChats mocks base method. +func (m *MockStore) AutoArchiveInactiveChats(ctx context.Context, arg database.AutoArchiveInactiveChatsParams) ([]database.AutoArchiveInactiveChatsRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AutoArchiveInactiveChats", ctx, arg) + ret0, _ := ret[0].([]database.AutoArchiveInactiveChatsRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// AutoArchiveInactiveChats indicates an expected call of AutoArchiveInactiveChats. +func (mr *MockStoreMockRecorder) AutoArchiveInactiveChats(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AutoArchiveInactiveChats", reflect.TypeOf((*MockStore)(nil).AutoArchiveInactiveChats), ctx, arg) +} + // BackoffChatDiffStatus mocks base method. func (m *MockStore) BackoffChatDiffStatus(ctx context.Context, arg database.BackoffChatDiffStatusParams) error { m.ctrl.T.Helper() @@ -2041,6 +2056,21 @@ func (mr *MockStoreMockRecorder) GetAuthorizedWorkspacesAndAgentsByOwnerID(ctx, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAuthorizedWorkspacesAndAgentsByOwnerID", reflect.TypeOf((*MockStore)(nil).GetAuthorizedWorkspacesAndAgentsByOwnerID), ctx, ownerID, prepared) } +// GetChatAutoArchiveDays mocks base method. +func (m *MockStore) GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetChatAutoArchiveDays", ctx, defaultAutoArchiveDays) + ret0, _ := ret[0].(int32) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetChatAutoArchiveDays indicates an expected call of GetChatAutoArchiveDays. +func (mr *MockStoreMockRecorder) GetChatAutoArchiveDays(ctx, defaultAutoArchiveDays any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetChatAutoArchiveDays", reflect.TypeOf((*MockStore)(nil).GetChatAutoArchiveDays), ctx, defaultAutoArchiveDays) +} + // GetChatByID mocks base method. func (m *MockStore) GetChatByID(ctx context.Context, id uuid.UUID) (database.Chat, error) { m.ctrl.T.Helper() @@ -9906,6 +9936,20 @@ func (mr *MockStoreMockRecorder) UpsertBoundaryUsageStats(ctx, arg any) *gomock. return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpsertBoundaryUsageStats", reflect.TypeOf((*MockStore)(nil).UpsertBoundaryUsageStats), ctx, arg) } +// UpsertChatAutoArchiveDays mocks base method. +func (m *MockStore) UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpsertChatAutoArchiveDays", ctx, autoArchiveDays) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpsertChatAutoArchiveDays indicates an expected call of UpsertChatAutoArchiveDays. +func (mr *MockStoreMockRecorder) UpsertChatAutoArchiveDays(ctx, autoArchiveDays any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpsertChatAutoArchiveDays", reflect.TypeOf((*MockStore)(nil).UpsertChatAutoArchiveDays), ctx, autoArchiveDays) +} + // UpsertChatDebugLoggingAllowUsers mocks base method. func (m *MockStore) UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error { m.ctrl.T.Helper() diff --git a/coderd/database/dbpurge/dbpurge.go b/coderd/database/dbpurge/dbpurge.go index 6c649b2145..61d0ccbd30 100644 --- a/coderd/database/dbpurge/dbpurge.go +++ b/coderd/database/dbpurge/dbpurge.go @@ -3,12 +3,15 @@ package dbpurge import ( "context" "io" + "net/http" + "sync/atomic" "time" "github.com/prometheus/client_golang/prometheus" "golang.org/x/xerrors" "cdr.dev/slog/v3" + "github.com/coder/coder/v2/coderd/audit" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbtime" @@ -34,18 +37,38 @@ const ( // long enough to cover the maximum interval of a heartbeat event (currently // 1 hour) plus some buffer. maxTelemetryHeartbeatAge = 24 * time.Hour - // Batch sizes for chat purging. Both use 1000, which is smaller - // than audit/connection log batches (10000), because chat_files - // rows contain bytea blob data that make large batches heavier. + // Chat batch sizes stay smaller than audit/connection log batches because + // chat_files rows carry bytea blobs. chatsBatchSize = 1000 chatFilesBatchSize = 1000 ) +// defaultChatAutoArchiveBatchSize bounds how many root chats one +// tick will archive by default. +const defaultChatAutoArchiveBatchSize int32 = 1000 + +type Option func(*instance) + +// WithClock overrides the clock used by the purger. Defaults to +// quartz.NewReal(). +func WithClock(clk quartz.Clock) Option { + return func(i *instance) { i.clk = clk } +} + +// WithChatAutoArchiveBatchSize overrides how many root chats a +// single tick will auto-archive. Defaults to +// defaultChatAutoArchiveBatchSize (1000). +func WithChatAutoArchiveBatchSize(n int32) Option { + return func(i *instance) { i.chatAutoArchiveBatchSize = n } +} + // New creates a new periodically purging database instance. -// It is the caller's responsibility to call Close on the returned instance. +// Callers must Close the returned instance. // -// This is for cleaning up old, unused resources from the database that take up space. -func New(ctx context.Context, logger slog.Logger, db database.Store, vals *codersdk.DeploymentValues, clk quartz.Clock, reg prometheus.Registerer) io.Closer { +// The auditor pointer is loaded on each dispatch tick so runtime +// entitlement changes (e.g. toggling the audit-log feature) take +// effect without restarting the process. +func New(ctx context.Context, logger slog.Logger, db database.Store, vals *codersdk.DeploymentValues, reg prometheus.Registerer, auditor *atomic.Pointer[audit.Auditor], opts ...Option) io.Closer { closed := make(chan struct{}) ctx, cancelFunc := context.WithCancel(ctx) @@ -69,18 +92,32 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder }, []string{"record_type"}) reg.MustRegister(recordsPurged) + chatAutoArchiveRecords := prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "coderd", + Subsystem: "chat_auto_archive", + Name: "records_archived_total", + Help: "Total number of chats archived by the auto-archive job (counting both roots and cascaded children).", + }) + reg.MustRegister(chatAutoArchiveRecords) + inst := &instance{ - cancel: cancelFunc, - closed: closed, - logger: logger, - vals: vals, - clk: clk, - iterationDuration: iterationDuration, - recordsPurged: recordsPurged, + cancel: cancelFunc, + closed: closed, + logger: logger, + vals: vals, + clk: quartz.NewReal(), + auditor: auditor, + iterationDuration: iterationDuration, + recordsPurged: recordsPurged, + chatAutoArchiveRecords: chatAutoArchiveRecords, + chatAutoArchiveBatchSize: defaultChatAutoArchiveBatchSize, + } + for _, opt := range opts { + opt(inst) } // Start the ticker with the initial delay. - ticker := clk.NewTicker(delay) + ticker := inst.clk.NewTicker(delay) doTick := func(ctx context.Context, start time.Time) { defer ticker.Reset(delay) err := inst.purgeTick(ctx, db, start) @@ -88,7 +125,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder logger.Error(ctx, "failed to purge old database entries", slog.Error(err)) // Record metrics for failed purge iteration. - duration := clk.Since(start) + duration := inst.clk.Since(start) iterationDuration.WithLabelValues("false").Observe(duration.Seconds()) } } @@ -97,7 +134,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, vals *coder defer close(closed) defer ticker.Stop() // Force an initial tick. - doTick(ctx, dbtime.Time(clk.Now()).UTC()) + doTick(ctx, dbtime.Time(inst.clk.Now()).UTC()) for { select { case <-ctx.Done(): @@ -125,9 +162,19 @@ func (i *instance) purgeTick(ctx context.Context, db database.Store, start time. chatRetentionDays = 0 } + // Same rationale as chat_retention_days: read outside the tx. + chatAutoArchiveDays, err := db.GetChatAutoArchiveDays(ctx, codersdk.DefaultChatAutoArchiveDays) + if err != nil { + i.logger.Warn(ctx, "failed to read chat auto-archive config, skipping auto-archive", slog.Error(err)) + chatAutoArchiveDays = 0 + } + + // Populated inside the tx; dispatched post-commit. + var archivedChats []database.AutoArchiveInactiveChatsRow + // Start a transaction to grab advisory lock, we don't want to run // multiple purges at the same time (multiple replicas). - return db.InTx(func(tx database.Store) error { + err = db.InTx(func(tx database.Store) error { // Acquire a lock to ensure that only one instance of the // purge is running at a time. ok, err := tx.TryAcquireLock(ctx, database.LockIDDBPurge) @@ -258,6 +305,20 @@ func (i *instance) purgeTick(ctx context.Context, db database.Store, start time. return xerrors.Errorf("failed to delete old chat files: %w", err) } } + + // Auto-archive runs after the delete pass so newly + // archived chats aren't eligible for deletion this tick. + if chatAutoArchiveDays > 0 { + archiveCutoff := start.Add(-time.Duration(chatAutoArchiveDays) * 24 * time.Hour) + archivedChats, err = tx.AutoArchiveInactiveChats(ctx, database.AutoArchiveInactiveChatsParams{ + ArchiveCutoff: archiveCutoff, + LimitCount: i.chatAutoArchiveBatchSize, + }) + if err != nil { + return xerrors.Errorf("failed to auto-archive inactive chats: %w", err) + } + } + i.logger.Debug(ctx, "purged old database entries", slog.F("workspace_agent_logs", purgedWorkspaceAgentLogs), slog.F("expired_api_keys", expiredAPIKeys), @@ -266,6 +327,7 @@ func (i *instance) purgeTick(ctx context.Context, db database.Store, start time. slog.F("audit_logs", purgedAuditLogs), slog.F("chats", purgedChats), slog.F("chat_files", purgedChatFiles), + slog.F("auto_archived_chats", len(archivedChats)), slog.F("duration", i.clk.Since(start)), ) @@ -285,16 +347,35 @@ func (i *instance) purgeTick(ctx context.Context, db database.Store, start time. return nil }, database.DefaultTXOptions().WithID("db_purge")) + if err != nil { + return err + } + + // Dispatch audits post-commit on a detached context so ticker + // cancellation doesn't interrupt the loop. No timeout: every root + // must be audited to avoid gaps in the trail. Children inherit + // their root's archival decision and are not audited individually, + // matching the manual archive path (patchChat audits the root only). + if len(archivedChats) > 0 { + i.chatAutoArchiveRecords.Add(float64(len(archivedChats))) + dispatchCtx := context.WithoutCancel(ctx) + i.dispatchChatAutoArchive(dispatchCtx, archivedChats) + } + + return nil } type instance struct { - cancel context.CancelFunc - closed chan struct{} - logger slog.Logger - vals *codersdk.DeploymentValues - clk quartz.Clock - iterationDuration *prometheus.HistogramVec - recordsPurged *prometheus.CounterVec + cancel context.CancelFunc + closed chan struct{} + logger slog.Logger + vals *codersdk.DeploymentValues + clk quartz.Clock + auditor *atomic.Pointer[audit.Auditor] + iterationDuration *prometheus.HistogramVec + recordsPurged *prometheus.CounterVec + chatAutoArchiveRecords prometheus.Counter + chatAutoArchiveBatchSize int32 } func (i *instance) Close() error { @@ -302,3 +383,74 @@ func (i *instance) Close() error { <-i.closed return nil } + +// chatFromAutoArchiveRow reshapes the query row into a database.Chat for +// audit.Auditable[database.Chat]. +func chatFromAutoArchiveRow(logger slog.Logger, r database.AutoArchiveInactiveChatsRow) database.Chat { + var labels database.StringMap + // sqlc's StringMap override doesn't reach CTE-aliased columns, so Labels + // arrives as raw JSON bytes. StringMap.Scan handles []byte and nil. + if err := labels.Scan([]byte(r.Labels)); err != nil { + logger.Warn(context.Background(), "failed to parse chat labels from auto-archive row", + slog.F("chat_id", r.ID), + slog.F("raw_labels", string(r.Labels)), + slog.Error(err), + ) + } + return database.Chat{ + ID: r.ID, + OwnerID: r.OwnerID, + OrganizationID: r.OrganizationID, + WorkspaceID: r.WorkspaceID, + BuildID: r.BuildID, + AgentID: r.AgentID, + Title: r.Title, + Status: r.Status, + WorkerID: r.WorkerID, + StartedAt: r.StartedAt, + HeartbeatAt: r.HeartbeatAt, + CreatedAt: r.CreatedAt, + UpdatedAt: r.UpdatedAt, + ParentChatID: r.ParentChatID, + RootChatID: r.RootChatID, + LastModelConfigID: r.LastModelConfigID, + Archived: r.Archived, + LastError: r.LastError, + Mode: r.Mode, + MCPServerIDs: r.MCPServerIDs, + Labels: labels, + PinOrder: r.PinOrder, + LastReadMessageID: r.LastReadMessageID, + LastInjectedContext: r.LastInjectedContext, + DynamicTools: r.DynamicTools, + PlanMode: r.PlanMode, + ClientType: r.ClientType, + } +} + +// dispatchChatAutoArchive audits every archived root chat. Children +// inherit their root's archival decision and are skipped, matching +// the manual archive path (patchChat audits the root only). Runs on +// a detached context so ticker cancellation cannot truncate the trail. +func (i *instance) dispatchChatAutoArchive(ctx context.Context, archived []database.AutoArchiveInactiveChatsRow) { + auditor := *i.auditor.Load() + for _, row := range archived { + if row.ParentChatID.Valid { + continue // Children inherit root's archival; audit roots only. + } + after := chatFromAutoArchiveRow(i.logger, row) + before := after + before.Archived = false + audit.BackgroundAudit(ctx, &audit.BackgroundAuditParams[database.Chat]{ + Audit: auditor, + Log: i.logger, + UserID: row.OwnerID, + OrganizationID: row.OrganizationID, + Action: database.AuditActionWrite, + Old: before, + New: after, + Status: http.StatusOK, + AdditionalFields: audit.BackgroundTaskFieldsBytes(ctx, i.logger, audit.BackgroundSubsystemChatAutoArchive), + }) + } +} diff --git a/coderd/database/dbpurge/dbpurge_test.go b/coderd/database/dbpurge/dbpurge_test.go index d1d5676ec8..69b9d34428 100644 --- a/coderd/database/dbpurge/dbpurge_test.go +++ b/coderd/database/dbpurge/dbpurge_test.go @@ -8,6 +8,7 @@ import ( "encoding/json" "fmt" "slices" + "sync/atomic" "testing" "time" @@ -22,6 +23,7 @@ import ( "cdr.dev/slog/v3" "cdr.dev/slog/v3/sloggers/slogtest" + "github.com/coder/coder/v2/coderd/audit" "github.com/coder/coder/v2/coderd/coderdtest/promhelp" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbgen" @@ -55,8 +57,9 @@ func TestPurge(t *testing.T) { done := awaitDoTick(ctx, t, clk) mDB := dbmock.NewMockStore(gomock.NewController(t)) mDB.EXPECT().GetChatRetentionDays(gomock.Any()).Return(int32(0), nil).AnyTimes() + mDB.EXPECT().GetChatAutoArchiveDays(gomock.Any(), codersdk.DefaultChatAutoArchiveDays).Return(int32(0), nil).AnyTimes() mDB.EXPECT().InTx(gomock.Any(), database.DefaultTXOptions().WithID("db_purge")).Return(nil).Times(2) - purger := dbpurge.New(context.Background(), testutil.Logger(t), mDB, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + purger := dbpurge.New(context.Background(), testutil.Logger(t), mDB, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) <-done // wait for doTick() to run. require.NoError(t, purger.Close()) } @@ -90,7 +93,7 @@ func TestMetrics(t *testing.T) { Retention: codersdk.RetentionConfig{ APIKeys: serpent.Duration(7 * 24 * time.Hour), // 7 days retention }, - }, clk, reg) + }, reg, nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -151,6 +154,7 @@ func TestMetrics(t *testing.T) { ctrl := gomock.NewController(t) mDB := dbmock.NewMockStore(ctrl) mDB.EXPECT().GetChatRetentionDays(gomock.Any()).Return(int32(0), nil).AnyTimes() + mDB.EXPECT().GetChatAutoArchiveDays(gomock.Any(), codersdk.DefaultChatAutoArchiveDays).Return(int32(0), nil).AnyTimes() mDB.EXPECT().InTx(gomock.Any(), database.DefaultTXOptions().WithID("db_purge")). Return(xerrors.New("simulated database error")). MinTimes(1) @@ -158,7 +162,7 @@ func TestMetrics(t *testing.T) { logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) done := awaitDoTick(ctx, t, clk) - closer := dbpurge.New(ctx, logger, mDB, &codersdk.DeploymentValues{}, clk, reg) + closer := dbpurge.New(ctx, logger, mDB, &codersdk.DeploymentValues{}, reg, nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -248,7 +252,7 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) { }) // when - closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() // then @@ -273,7 +277,7 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) { // Start a new purger to immediately trigger delete after rollup. _ = closer.Close() - closer = dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer = dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() // then @@ -368,7 +372,7 @@ func TestDeleteOldWorkspaceAgentLogs(t *testing.T) { Retention: codersdk.RetentionConfig{ WorkspaceAgentLogs: serpent.Duration(7 * 24 * time.Hour), }, - }, clk, prometheus.NewRegistry()) + }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() <-done // doTick() has now run. @@ -424,6 +428,63 @@ func awaitDoTick(ctx context.Context, t *testing.T, clk *quartz.Mock) chan struc return ch } +// tickDriver drives one or more dbpurge ticks against a single +// dbpurge.New instance. Unlike awaitDoTick it must be constructed +// *before* dbpurge.New so its traps are installed when the forced +// initial tick fires. awaitInitial waits for the forced tick's +// doTick to complete without advancing the clock, so no loop +// iteration has yet run; awaitNext then explicitly drives each +// subsequent iteration. This keeps each tick's observable state +// isolated and deterministic, which matters for tests where +// per-tick work differs (e.g. batch-size pagination). +type tickDriver struct { + clk *quartz.Mock + trapNow *quartz.Trap + trapStop *quartz.Trap + trapReset *quartz.Trap +} + +func newTickDriver(t *testing.T, clk *quartz.Mock) *tickDriver { + t.Helper() + d := &tickDriver{ + clk: clk, + trapNow: clk.Trap().Now(), + trapStop: clk.Trap().TickerStop(), + trapReset: clk.Trap().TickerReset(), + } + return d +} + +// close releases all traps. Call this via defer *after* the defer +// that closes the dbpurge instance so trap closure releases the +// shutdown ticker.Stop() rather than blocking on it. +func (d *tickDriver) close() { + d.trapReset.Close() + d.trapStop.Close() + d.trapNow.Close() +} + +// awaitInitial waits for the forced initial tick's doTick to +// complete. No loop iteration runs because the clock has not been +// advanced. +func (d *tickDriver) awaitInitial(ctx context.Context, t *testing.T) { + t.Helper() + d.trapNow.MustWait(ctx).MustRelease(ctx) + d.trapReset.MustWait(ctx).MustRelease(ctx) +} + +// awaitNext advances the clock by the tick interval, lets the loop +// receive the tick and run doTick, and waits for the ensuing +// ticker.Reset so the driver is ready for another awaitNext. +func (d *tickDriver) awaitNext(ctx context.Context, t *testing.T) { + t.Helper() + dur, w := d.clk.AdvanceNext() + require.Equal(t, 10*time.Minute, dur) + w.MustWait(ctx) + d.trapStop.MustWait(ctx).MustRelease(ctx) + d.trapReset.MustWait(ctx).MustRelease(ctx) +} + func assertNoWorkspaceAgentLogs(ctx context.Context, t *testing.T, db database.Store, agentID uuid.UUID) { t.Helper() agentLogs, err := db.GetWorkspaceAgentLogsAfter(ctx, database.GetWorkspaceAgentLogsAfterParams{ @@ -583,7 +644,7 @@ func TestDeleteOldWorkspaceAgentLogsRetention(t *testing.T) { done := awaitDoTick(ctx, t, clk) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ Retention: tc.retentionConfig, - }, clk, prometheus.NewRegistry()) + }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -674,7 +735,7 @@ func TestDeleteOldProvisionerDaemons(t *testing.T) { require.NoError(t, err) // when - closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() // then @@ -778,7 +839,7 @@ func TestDeleteOldAuditLogConnectionEvents(t *testing.T) { // Run the purge done := awaitDoTick(ctx, t, clk) - closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() // Wait for tick testutil.TryReceive(ctx, t, done) @@ -941,7 +1002,7 @@ func TestDeleteOldTelemetryHeartbeats(t *testing.T) { require.NoError(t, err) done := awaitDoTick(ctx, t, clk) - closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() <-done // doTick() has now run. @@ -1060,7 +1121,7 @@ func TestDeleteOldConnectionLogs(t *testing.T) { done := awaitDoTick(ctx, t, clk) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ Retention: tc.retentionConfig, - }, clk, prometheus.NewRegistry()) + }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -1316,7 +1377,7 @@ func TestDeleteOldAIBridgeRecords(t *testing.T) { Retention: serpent.Duration(tc.retention), }, }, - }, clk, prometheus.NewRegistry()) + }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -1403,7 +1464,7 @@ func TestDeleteOldAuditLogs(t *testing.T) { done := awaitDoTick(ctx, t, clk) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ Retention: tc.retentionConfig, - }, clk, prometheus.NewRegistry()) + }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -1493,7 +1554,7 @@ func TestDeleteOldAuditLogs(t *testing.T) { Retention: codersdk.RetentionConfig{ AuditLogs: serpent.Duration(retentionPeriod), }, - }, clk, prometheus.NewRegistry()) + }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -1613,7 +1674,7 @@ func TestDeleteExpiredAPIKeys(t *testing.T) { done := awaitDoTick(ctx, t, clk) closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{ Retention: tc.retentionConfig, - }, clk, prometheus.NewRegistry()) + }, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -1648,6 +1709,23 @@ func ptr[T any](v T) *T { return &v } +// nopAuditorPtr returns an atomic pointer to a nop auditor for tests. +func nopAuditorPtr(t *testing.T) *atomic.Pointer[audit.Auditor] { + t.Helper() + nop := audit.NewNop() + var p atomic.Pointer[audit.Auditor] + p.Store(&nop) + return &p +} + +// mockAuditorPtr wraps a *MockAuditor in an atomic pointer for tests. +func mockAuditorPtr(m *audit.MockAuditor) *atomic.Pointer[audit.Auditor] { + a := audit.Auditor(m) + var p atomic.Pointer[audit.Auditor] + p.Store(&a) + return &p +} + //nolint:paralleltest // It uses LockIDDBPurge. func TestDeleteOldChatFiles(t *testing.T) { now := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC) @@ -1742,7 +1820,7 @@ func TestDeleteOldChatFiles(t *testing.T) { oldFileID := createChatFile(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, now.Add(-31*24*time.Hour)) done := awaitDoTick(ctx, t, clk) - closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -1799,7 +1877,7 @@ func TestDeleteOldChatFiles(t *testing.T) { activeChat := createChat(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, deps.modelConfig.ID, false, now) done := awaitDoTick(ctx, t, clk) - closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -1856,7 +1934,7 @@ func TestDeleteOldChatFiles(t *testing.T) { fileBoundary := createChatFile(ctx, t, db, rawDB, deps.user.ID, deps.org.ID, now.Add(-30*24*time.Hour).Add(time.Hour)) done := awaitDoTick(ctx, t, clk) - closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -1936,7 +2014,7 @@ func TestDeleteOldChatFiles(t *testing.T) { require.NoError(t, err) done := awaitDoTick(ctx, t, clk) - closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, clk, prometheus.NewRegistry()) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), nopAuditorPtr(t), dbpurge.WithClock(clk)) defer closer.Close() testutil.TryReceive(ctx, t, done) @@ -2137,3 +2215,552 @@ func TestDeleteOldChatFiles(t *testing.T) { }) } } + +// helpers for TestAutoArchiveInactiveChats. Kept scoped to the +// test so they don't leak into the package surface area. +func archiveTestDeps(ctx context.Context, t *testing.T, db database.Store) chatAutoArchiveDeps { + t.Helper() + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: user.ID, OrganizationID: org.ID}) + _, err := db.InsertChatProvider(ctx, database.InsertChatProviderParams{ + Provider: "openai", + DisplayName: "OpenAI", + Enabled: true, + CentralApiKeyEnabled: true, + }) + require.NoError(t, err) + mc, err := db.InsertChatModelConfig(ctx, database.InsertChatModelConfigParams{ + Provider: "openai", + Model: "test-model", + ContextLimit: 8192, + Options: json.RawMessage("{}"), + }) + require.NoError(t, err) + return chatAutoArchiveDeps{user: user, org: org, modelConfig: mc} +} + +type chatAutoArchiveDeps struct { + user database.User + org database.Organization + modelConfig database.ChatModelConfig +} + +// archiveHarness bundles the per-subtest setup shared by every +// TestAutoArchiveInactiveChats case. Subtests read fields off the +// harness directly instead of repeating six lines of identical +// plumbing. +type archiveHarness struct { + ctx context.Context + clk *quartz.Mock + db database.Store + rawDB *sql.DB + logger slog.Logger + deps chatAutoArchiveDeps +} + +func newArchiveHarness(t *testing.T, now time.Time) *archiveHarness { + t.Helper() + ctx := testutil.Context(t, testutil.WaitLong) + clk := quartz.NewMock(t) + clk.Set(now).MustWait(ctx) + db, _, rawDB := dbtestutil.NewDBWithSQLDB(t, dbtestutil.WithDumpOnFailure()) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + return &archiveHarness{ + ctx: ctx, + clk: clk, + db: db, + rawDB: rawDB, + logger: logger, + deps: archiveTestDeps(ctx, t, db), + } +} + +// createArchiveChat inserts a chat with an optional backdated +// created_at. Title is propagated through so tests can assert on +// digest contents. +func createArchiveChat(ctx context.Context, t *testing.T, db database.Store, rawDB *sql.DB, deps chatAutoArchiveDeps, title string, createdAt time.Time) database.Chat { + t.Helper() + chat, err := db.InsertChat(ctx, database.InsertChatParams{ + OrganizationID: deps.org.ID, + OwnerID: deps.user.ID, + LastModelConfigID: deps.modelConfig.ID, + Title: title, + Status: database.ChatStatusWaiting, + ClientType: database.ChatClientTypeUi, + }) + require.NoError(t, err) + _, err = rawDB.ExecContext(ctx, "UPDATE chats SET created_at = $1, updated_at = $1 WHERE id = $2", createdAt, chat.ID) + require.NoError(t, err) + return chat +} + +// insertTextMessage appends a non-deleted user message with a +// backdated created_at. Used to establish "last activity" for the +// auto-archive query's LATERAL subquery. +func insertTextMessage(ctx context.Context, t *testing.T, db database.Store, rawDB *sql.DB, chatID, userID, modelConfigID uuid.UUID, createdAt time.Time) { + t.Helper() + msgs, err := db.InsertChatMessages(ctx, database.InsertChatMessagesParams{ + ChatID: chatID, + CreatedBy: []uuid.UUID{userID}, + ModelConfigID: []uuid.UUID{modelConfigID}, + Role: []database.ChatMessageRole{database.ChatMessageRoleUser}, + Content: []string{`[{"type":"text","text":"hello"}]`}, + ContentVersion: []int16{0}, + Visibility: []database.ChatMessageVisibility{database.ChatMessageVisibilityBoth}, + InputTokens: []int64{0}, + OutputTokens: []int64{0}, + TotalTokens: []int64{0}, + ReasoningTokens: []int64{0}, + CacheCreationTokens: []int64{0}, + CacheReadTokens: []int64{0}, + ContextLimit: []int64{0}, + Compressed: []bool{false}, + TotalCostMicros: []int64{0}, + RuntimeMs: []int64{0}, + ProviderResponseID: []string{""}, + }) + require.NoError(t, err) + require.Len(t, msgs, 1) + _, err = rawDB.ExecContext(ctx, "UPDATE chat_messages SET created_at = $1 WHERE id = $2", createdAt, msgs[0].ID) + require.NoError(t, err) +} + +//nolint:paralleltest // It uses LockIDDBPurge. +func TestAutoArchiveInactiveChats(t *testing.T) { + now := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC) + + tests := []struct { + name string + run func(t *testing.T) + }{ + { + name: "AutoArchiveDisabled", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.Zero(t, codersdk.DefaultChatAutoArchiveDays) + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, codersdk.DefaultChatAutoArchiveDays)) + + // Chat older than any reasonable cutoff. + staleChat := createArchiveChat(ctx, t, db, rawDB, deps, "stale-chat", now.Add(-365*24*time.Hour)) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + refreshed, err := db.GetChatByID(ctx, staleChat.ID) + require.NoError(t, err) + require.False(t, refreshed.Archived, "chat should stay active when auto-archive is disabled") + require.Empty(t, auditor.AuditLogs(), "no audit log entries expected") + }, + }, + { + name: "ArchivesInactiveRoot", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90))) + + // Inactive root: newest message 100 days old. + staleChat := createArchiveChat(ctx, t, db, rawDB, deps, "stale-chat", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, staleChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour)) + + // Active root: message 10 days old, within cutoff. + activeChat := createArchiveChat(ctx, t, db, rawDB, deps, "active-chat", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, activeChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-10*24*time.Hour)) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + refreshedStale, err := db.GetChatByID(ctx, staleChat.ID) + require.NoError(t, err) + require.True(t, refreshedStale.Archived, "stale chat should be auto-archived") + + refreshedActive, err := db.GetChatByID(ctx, activeChat.ID) + require.NoError(t, err) + require.False(t, refreshedActive.Archived, "active chat should stay live") + + logs := auditor.AuditLogs() + require.Len(t, logs, 1, "expected one audit entry") + require.Equal(t, staleChat.ID, logs[0].ResourceID) + require.Equal(t, database.ResourceTypeChat, logs[0].ResourceType) + require.Equal(t, database.AuditActionWrite, logs[0].Action) + require.Contains(t, string(logs[0].AdditionalFields), "chat_auto_archive", + "audit entry must carry the auto-archive subsystem tag") + }, + }, + { + name: "ExactCutoffBoundary", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90))) + // The forced initial tick uses start = now. Compute + // the cutoff from that tick's perspective so the + // boundary is deterministic. + cutoff := now.Add(-90 * 24 * time.Hour) + + // Message exactly at the cutoff: query uses strict <, + // so this chat must survive. + exactChat := createArchiveChat(ctx, t, db, rawDB, deps, "exact", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, exactChat.ID, deps.user.ID, deps.modelConfig.ID, cutoff) + + // Message one second before the cutoff: should be archived. + justOverChat := createArchiveChat(ctx, t, db, rawDB, deps, "just-over", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, justOverChat.ID, deps.user.ID, deps.modelConfig.ID, cutoff.Add(-time.Second)) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + // Use newTickDriver for precise tick control so we + // observe the forced initial tick's results without + // racing with a second tick. + driver := newTickDriver(t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + // Defer driver.close() after closer.Close(): defers + // run LIFO, so driver cleanup frees shutdown's + // ticker.Stop() before the dbpurge goroutine blocks + // on it. + defer closer.Close() + defer driver.close() + driver.awaitInitial(ctx, t) + + refreshedExact, err := db.GetChatByID(ctx, exactChat.ID) + require.NoError(t, err) + require.False(t, refreshedExact.Archived, "chat at exact cutoff must survive (strict <)") + + refreshedOver, err := db.GetChatByID(ctx, justOverChat.ID) + require.NoError(t, err) + require.True(t, refreshedOver.Archived, "chat one second past cutoff must be archived") + + require.Len(t, auditor.AuditLogs(), 1, "only the just-over chat should produce an audit entry") + }, + }, + { + name: "DeletedMessagesIgnored", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90))) + + // Chat created 120 days ago with a recent message + // (10 days old) that is then soft-deleted. The + // LATERAL subquery filters cm.deleted = false, so + // the chat should fall back to created_at and be + // archived. + chat := createArchiveChat(ctx, t, db, rawDB, deps, "deleted-msg", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, chat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-10*24*time.Hour)) + // Soft-delete all messages on this chat. + _, err := rawDB.ExecContext(ctx, "UPDATE chat_messages SET deleted = true WHERE chat_id = $1", chat.ID) + require.NoError(t, err) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + refreshed, err := db.GetChatByID(ctx, chat.ID) + require.NoError(t, err) + require.True(t, refreshed.Archived, "chat with only deleted messages should be archived") + require.Len(t, auditor.AuditLogs(), 1) + }, + }, + { + name: "ChildActivityKeepsRootAlive", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90))) + + // Stale root with no messages of its own. + root := createArchiveChat(ctx, t, db, rawDB, deps, "stale-root", now.Add(-120*24*time.Hour)) + + // Child linked to root with a recent message (10 days old, + // well within the 90-day cutoff). + child := createArchiveChat(ctx, t, db, rawDB, deps, "active-child", now.Add(-120*24*time.Hour)) + _, err := rawDB.ExecContext(ctx, "UPDATE chats SET parent_chat_id = $1, root_chat_id = $1 WHERE id = $2", root.ID, child.ID) + require.NoError(t, err) + insertTextMessage(ctx, t, db, rawDB, child.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-10*24*time.Hour)) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + refreshedRoot, err := db.GetChatByID(ctx, root.ID) + require.NoError(t, err) + require.False(t, refreshedRoot.Archived, "root must stay active because child has recent activity") + + refreshedChild, err := db.GetChatByID(ctx, child.ID) + require.NoError(t, err) + require.False(t, refreshedChild.Archived, "child must stay active") + + require.Empty(t, auditor.AuditLogs(), "no chats should be archived") + }, + }, + { + name: "SkipsActiveStatusChats", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(90))) + + // Stale chats whose status prevents archiving. + runningChat := createArchiveChat(ctx, t, db, rawDB, deps, "running-chat", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, runningChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour)) + _, err := rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusRunning, runningChat.ID) + require.NoError(t, err) + + requiresActionChat := createArchiveChat(ctx, t, db, rawDB, deps, "requires-action-chat", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, requiresActionChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour)) + _, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusRequiresAction, requiresActionChat.ID) + require.NoError(t, err) + + pendingChat := createArchiveChat(ctx, t, db, rawDB, deps, "pending-chat", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, pendingChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour)) + _, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusPending, pendingChat.ID) + require.NoError(t, err) + + pausedChat := createArchiveChat(ctx, t, db, rawDB, deps, "paused-chat", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, pausedChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour)) + _, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusPaused, pausedChat.ID) + require.NoError(t, err) + + // Control: a stale chat with archivable status that + // should be archived. + completedChat := createArchiveChat(ctx, t, db, rawDB, deps, "completed-chat", now.Add(-120*24*time.Hour)) + insertTextMessage(ctx, t, db, rawDB, completedChat.ID, deps.user.ID, deps.modelConfig.ID, now.Add(-100*24*time.Hour)) + _, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusCompleted, completedChat.ID) + require.NoError(t, err) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + refreshedRunning, err := db.GetChatByID(ctx, runningChat.ID) + require.NoError(t, err) + require.False(t, refreshedRunning.Archived, "running chat must not be archived") + + refreshedRA, err := db.GetChatByID(ctx, requiresActionChat.ID) + require.NoError(t, err) + require.False(t, refreshedRA.Archived, "requires_action chat must not be archived") + + refreshedPending, err := db.GetChatByID(ctx, pendingChat.ID) + require.NoError(t, err) + require.False(t, refreshedPending.Archived, "pending chat must not be archived") + + refreshedPaused, err := db.GetChatByID(ctx, pausedChat.ID) + require.NoError(t, err) + require.False(t, refreshedPaused.Archived, "paused chat must not be archived") + + refreshedCompleted, err := db.GetChatByID(ctx, completedChat.ID) + require.NoError(t, err) + require.True(t, refreshedCompleted.Archived, "completed stale chat should be archived") + + logs := auditor.AuditLogs() + require.Len(t, logs, 1, "only the completed chat should produce an audit entry") + require.Equal(t, completedChat.ID, logs[0].ResourceID) + }, + }, + { + name: "SkipsPinnedAndChildren", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(30))) + + // Pinned stale chat: should be skipped. + pinnedChat := createArchiveChat(ctx, t, db, rawDB, deps, "pinned-chat", now.Add(-90*24*time.Hour)) + _, err := rawDB.ExecContext(ctx, "UPDATE chats SET pin_order = 1 WHERE id = $1", pinnedChat.ID) + require.NoError(t, err) + + // Stale root with a child. + root := createArchiveChat(ctx, t, db, rawDB, deps, "root-chat", now.Add(-90*24*time.Hour)) + child := createArchiveChat(ctx, t, db, rawDB, deps, "child-chat", now.Add(-90*24*time.Hour)) + _, err = rawDB.ExecContext(ctx, "UPDATE chats SET parent_chat_id = $1, root_chat_id = $1 WHERE id = $2", root.ID, child.ID) + require.NoError(t, err) + // Give the child an active status to prove the cascade is + // status-blind by design. If someone adds a status filter + // to the cascade CTE, this assertion will catch it. + _, err = rawDB.ExecContext(ctx, "UPDATE chats SET status = $1 WHERE id = $2", database.ChatStatusRunning, child.ID) + require.NoError(t, err) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + refreshedPinned, err := db.GetChatByID(ctx, pinnedChat.ID) + require.NoError(t, err) + require.False(t, refreshedPinned.Archived, "pinned chat must be skipped") + + refreshedRoot, err := db.GetChatByID(ctx, root.ID) + require.NoError(t, err) + require.True(t, refreshedRoot.Archived, "root should be archived") + + refreshedChild, err := db.GetChatByID(ctx, child.ID) + require.NoError(t, err) + require.True(t, refreshedChild.Archived, "child should be cascade-archived") + + // One audit entry for the root; the cascaded child is + // not audited individually. + require.Len(t, auditor.AuditLogs(), 1) + }, + }, + { + name: "MultipleOwners", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + user2 := dbgen.User(t, db, database.User{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: user2.ID, OrganizationID: deps.org.ID}) + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(30))) + + // Two stale roots per owner, backdated well past the + // 30-day cutoff. + u1Deps := deps + u2Deps := chatAutoArchiveDeps{user: user2, org: deps.org, modelConfig: deps.modelConfig} + createArchiveChat(ctx, t, db, rawDB, u1Deps, "u1-a", now.Add(-60*24*time.Hour)) + createArchiveChat(ctx, t, db, rawDB, u1Deps, "u1-b", now.Add(-60*24*time.Hour)) + createArchiveChat(ctx, t, db, rawDB, u2Deps, "u2-a", now.Add(-60*24*time.Hour)) + createArchiveChat(ctx, t, db, rawDB, u2Deps, "u2-b", now.Add(-60*24*time.Hour)) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + done := awaitDoTick(ctx, t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + defer closer.Close() + testutil.TryReceive(ctx, t, done) + + // Four audit rows, one per archived root. Each entry + // carries the owning UserID so downstream consumers can + // correlate per-owner activity. + logs := auditor.AuditLogs() + require.Len(t, logs, 4) + byUser := map[uuid.UUID]int{} + for _, l := range logs { + byUser[l.UserID]++ + } + require.Equal(t, 2, byUser[deps.user.ID]) + require.Equal(t, 2, byUser[user2.ID]) + }, + }, + { + name: "SecondTickIdempotent", + run: func(t *testing.T) { + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(30))) + + // Two stale roots seeded before the first tick. + firstA := createArchiveChat(ctx, t, db, rawDB, deps, "first-a", now.Add(-60*24*time.Hour)) + firstB := createArchiveChat(ctx, t, db, rawDB, deps, "first-b", now.Add(-60*24*time.Hour)) + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + driver := newTickDriver(t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk)) + // Defer driver.close() after closer.Close(): defers + // run LIFO, so this frees shutdown's ticker.Stop() + // before the dbpurge goroutine blocks on it. + defer closer.Close() + defer driver.close() + driver.awaitInitial(ctx, t) + + // Tick 1: both archived. + require.Len(t, auditor.AuditLogs(), 2, "tick 1 audits") + + // Seed a third stale root between ticks so tick 2 has + // genuine work and we can distinguish "ignored already + // archived" from "ignored everything". + third := createArchiveChat(ctx, t, db, rawDB, deps, "second-c", now.Add(-60*24*time.Hour)) + + driver.awaitNext(ctx, t) + + // Tick 2: exactly one new audit for the third chat; + // tick 1's rows must not be re-archived. + require.Len(t, auditor.AuditLogs(), 3, "tick 2 cumulative audits") + + // All three chats should remain archived. + for _, id := range []uuid.UUID{firstA.ID, firstB.ID, third.ID} { + refreshed, err := db.GetChatByID(ctx, id) + require.NoError(t, err) + require.True(t, refreshed.Archived, "chat %s should remain archived", id) + } + }, + }, + { + name: "BatchSizePagination", + run: func(t *testing.T) { + // With 27 stale roots and batch size 20, tick 1 + // archives 20, tick 2 archives the remaining 7, tick 3 + // archives none. We assert the audit dispatch follows + // the same pattern: no dispatch runs when rows == 0, + // so tick 3 emits no new audits. + h := newArchiveHarness(t, now) + ctx, clk, db, rawDB, logger, deps := h.ctx, h.clk, h.db, h.rawDB, h.logger, h.deps + + require.NoError(t, db.UpsertChatAutoArchiveDays(ctx, int32(30))) + + const total = 27 + for i := range total { + createArchiveChat(ctx, t, db, rawDB, deps, + fmt.Sprintf("page-%02d", i), + now.Add(-60*24*time.Hour)) + } + + auditor := audit.NewMock() + auditorPtr := mockAuditorPtr(auditor) + driver := newTickDriver(t, clk) + closer := dbpurge.New(ctx, logger, db, &codersdk.DeploymentValues{}, prometheus.NewRegistry(), auditorPtr, dbpurge.WithClock(clk), dbpurge.WithChatAutoArchiveBatchSize(20)) + // Defer driver.close() after closer.Close() so trap + // cleanup frees shutdown's ticker.Stop() before the + // dbpurge goroutine blocks on it. + defer closer.Close() + defer driver.close() + driver.awaitInitial(ctx, t) + + require.Len(t, auditor.AuditLogs(), 20, "tick 1 audits") + + driver.awaitNext(ctx, t) + require.Len(t, auditor.AuditLogs(), 27, "tick 2 cumulative audits") + + driver.awaitNext(ctx, t) + // Tick 3: nothing left to archive; dispatch is gated + // on len(archivedChats) > 0 so no new audits. + require.Len(t, auditor.AuditLogs(), 27, "tick 3 cumulative audits unchanged") + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tc.run(t) + }) + } +} diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 9706259cd5..464433656f 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -3860,6 +3860,8 @@ CREATE INDEX idx_chat_queued_messages_chat_id ON chat_queued_messages USING btre CREATE INDEX idx_chats_agent_id ON chats USING btree (agent_id) WHERE (agent_id IS NOT NULL); +CREATE INDEX idx_chats_auto_archive_candidates ON chats USING btree (created_at) WHERE ((archived = false) AND (pin_order = 0) AND (parent_chat_id IS NULL)); + CREATE INDEX idx_chats_labels ON chats USING gin (labels); CREATE INDEX idx_chats_last_model_config_id ON chats USING btree (last_model_config_id); diff --git a/coderd/database/migrations/000477_chat_auto_archive.down.sql b/coderd/database/migrations/000477_chat_auto_archive.down.sql new file mode 100644 index 0000000000..fabb6e22c3 --- /dev/null +++ b/coderd/database/migrations/000477_chat_auto_archive.down.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS idx_chats_auto_archive_candidates; diff --git a/coderd/database/migrations/000477_chat_auto_archive.up.sql b/coderd/database/migrations/000477_chat_auto_archive.up.sql new file mode 100644 index 0000000000..501983c6c6 --- /dev/null +++ b/coderd/database/migrations/000477_chat_auto_archive.up.sql @@ -0,0 +1,10 @@ +-- Partial index matching the AutoArchiveInactiveChats WHERE clause so +-- dbpurge can skip the bulk of archived / pinned / child chats. +-- The status predicate lives in the query, not the index, because +-- enum values added by earlier migrations cannot be referenced in +-- index predicates within the same transaction batch. +CREATE INDEX IF NOT EXISTS idx_chats_auto_archive_candidates + ON chats (created_at) + WHERE archived = false + AND pin_order = 0 + AND parent_chat_id IS NULL; diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 48ce76b94c..ab0ac71e33 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -61,6 +61,10 @@ type sqlcQuerier interface { // Only unused template versions will be archived, which are any versions not // referenced by the latest build of a workspace. ArchiveUnusedTemplateVersions(ctx context.Context, arg ArchiveUnusedTemplateVersionsParams) ([]uuid.UUID, error) + // Archives inactive root chats (pinned and already-archived chats skipped), + // cascading to children via root_chat_id. Limits apply to roots, not total + // rows. Used by dbpurge. + AutoArchiveInactiveChats(ctx context.Context, arg AutoArchiveInactiveChatsParams) ([]AutoArchiveInactiveChatsRow, error) BackoffChatDiffStatus(ctx context.Context, arg BackoffChatDiffStatusParams) error BatchUpdateWorkspaceAgentMetadata(ctx context.Context, arg BatchUpdateWorkspaceAgentMetadataParams) error BatchUpdateWorkspaceLastUsedAt(ctx context.Context, arg BatchUpdateWorkspaceLastUsedAtParams) error @@ -267,6 +271,8 @@ type sqlcQuerier interface { // This function returns roles for authorization purposes. Implied member roles // are included. GetAuthorizationUserRoles(ctx context.Context, userID uuid.UUID) (GetAuthorizationUserRolesRow, error) + // Auto-archive window in days. 0 disables. + GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) GetChatByID(ctx context.Context, id uuid.UUID) (Chat, error) GetChatByIDForUpdate(ctx context.Context, id uuid.UUID) (Chat, error) // Per-root-chat cost breakdown for a single user within a date range. @@ -1170,6 +1176,7 @@ type sqlcQuerier interface { // cumulative values for unique counts (accurate period totals). Request counts // are always deltas, accumulated in DB. Returns true if insert, false if update. UpsertBoundaryUsageStats(ctx context.Context, arg UpsertBoundaryUsageStatsParams) (bool, error) + UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error // UpsertChatDebugLoggingAllowUsers updates the runtime admin setting that // allows users to opt into chat debug logging. UpsertChatDebugLoggingAllowUsers(ctx context.Context, allowUsers bool) error diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 355bdff168..f909dab25e 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -5330,6 +5330,147 @@ func (q *sqlQuerier) ArchiveChatByID(ctx context.Context, id uuid.UUID) ([]Chat, return items, nil } +const autoArchiveInactiveChats = `-- name: AutoArchiveInactiveChats :many +WITH to_archive AS ( + SELECT + c.id, + -- Activity = MAX(cm.created_at) across the family, or c.created_at + -- when the family has no non-deleted messages. + COALESCE(activity.last_activity_at, c.created_at) AS last_activity_at + FROM chats c + LEFT JOIN LATERAL ( + SELECT MAX(cm.created_at) AS last_activity_at + FROM chat_messages cm + JOIN chats fc ON fc.id = cm.chat_id + WHERE (fc.id = c.id OR fc.root_chat_id = c.id) + AND cm.deleted = false + ) activity ON TRUE + WHERE c.archived = false + AND c.pin_order = 0 + AND c.parent_chat_id IS NULL -- roots only + AND c.created_at < $1::timestamptz + -- New active statuses must be added here to prevent archiving. + AND c.status NOT IN ('running', 'pending', 'paused', 'requires_action') + AND COALESCE(activity.last_activity_at, c.created_at) < $1::timestamptz + -- Sorting by created_at lets Postgres drive the scan from the + -- partial index instead of evaluating every LATERAL subquery + -- before sorting. All candidates are past the cutoff, so the + -- archive order is immaterial once the backlog drains. + ORDER BY c.created_at ASC + LIMIT $2 +), +archived AS ( + UPDATE chats c + SET archived = true, pin_order = 0, updated_at = NOW() + FROM to_archive t + WHERE (c.id = t.id OR c.root_chat_id = t.id) -- cascade to children + AND c.archived = false + RETURNING c.id, c.owner_id, c.workspace_id, c.title, c.status, c.worker_id, c.started_at, c.heartbeat_at, c.created_at, c.updated_at, c.parent_chat_id, c.root_chat_id, c.last_model_config_id, c.archived, c.last_error, c.mode, c.mcp_server_ids, c.labels, c.build_id, c.agent_id, c.pin_order, c.last_read_message_id, c.last_injected_context, c.dynamic_tools, c.organization_id, c.plan_mode, c.client_type +) +SELECT + a.id, a.owner_id, a.workspace_id, a.title, a.status, a.worker_id, a.started_at, a.heartbeat_at, a.created_at, a.updated_at, a.parent_chat_id, a.root_chat_id, a.last_model_config_id, a.archived, a.last_error, a.mode, a.mcp_server_ids, a.labels, a.build_id, a.agent_id, a.pin_order, a.last_read_message_id, a.last_injected_context, a.dynamic_tools, a.organization_id, a.plan_mode, a.client_type, + -- Children inherit their root's activity so last_activity_at is never null. + COALESCE( + t.last_activity_at, + (SELECT tr.last_activity_at FROM to_archive tr WHERE tr.id = a.root_chat_id), + a.created_at + )::timestamptz AS last_activity_at +FROM archived a +LEFT JOIN to_archive t ON t.id = a.id +ORDER BY (a.root_chat_id IS NULL) DESC, a.owner_id ASC, a.created_at ASC, a.id ASC +` + +type AutoArchiveInactiveChatsParams struct { + ArchiveCutoff time.Time `db:"archive_cutoff" json:"archive_cutoff"` + LimitCount int32 `db:"limit_count" json:"limit_count"` +} + +type AutoArchiveInactiveChatsRow struct { + ID uuid.UUID `db:"id" json:"id"` + OwnerID uuid.UUID `db:"owner_id" json:"owner_id"` + WorkspaceID uuid.NullUUID `db:"workspace_id" json:"workspace_id"` + Title string `db:"title" json:"title"` + Status ChatStatus `db:"status" json:"status"` + WorkerID uuid.NullUUID `db:"worker_id" json:"worker_id"` + StartedAt sql.NullTime `db:"started_at" json:"started_at"` + HeartbeatAt sql.NullTime `db:"heartbeat_at" json:"heartbeat_at"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + ParentChatID uuid.NullUUID `db:"parent_chat_id" json:"parent_chat_id"` + RootChatID uuid.NullUUID `db:"root_chat_id" json:"root_chat_id"` + LastModelConfigID uuid.UUID `db:"last_model_config_id" json:"last_model_config_id"` + Archived bool `db:"archived" json:"archived"` + LastError sql.NullString `db:"last_error" json:"last_error"` + Mode NullChatMode `db:"mode" json:"mode"` + MCPServerIDs []uuid.UUID `db:"mcp_server_ids" json:"mcp_server_ids"` + Labels json.RawMessage `db:"labels" json:"labels"` + BuildID uuid.NullUUID `db:"build_id" json:"build_id"` + AgentID uuid.NullUUID `db:"agent_id" json:"agent_id"` + PinOrder int32 `db:"pin_order" json:"pin_order"` + LastReadMessageID sql.NullInt64 `db:"last_read_message_id" json:"last_read_message_id"` + LastInjectedContext pqtype.NullRawMessage `db:"last_injected_context" json:"last_injected_context"` + DynamicTools pqtype.NullRawMessage `db:"dynamic_tools" json:"dynamic_tools"` + OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` + PlanMode NullChatPlanMode `db:"plan_mode" json:"plan_mode"` + ClientType ChatClientType `db:"client_type" json:"client_type"` + LastActivityAt time.Time `db:"last_activity_at" json:"last_activity_at"` +} + +// Archives inactive root chats (pinned and already-archived chats skipped), +// cascading to children via root_chat_id. Limits apply to roots, not total +// rows. Used by dbpurge. +func (q *sqlQuerier) AutoArchiveInactiveChats(ctx context.Context, arg AutoArchiveInactiveChatsParams) ([]AutoArchiveInactiveChatsRow, error) { + rows, err := q.db.QueryContext(ctx, autoArchiveInactiveChats, arg.ArchiveCutoff, arg.LimitCount) + if err != nil { + return nil, err + } + defer rows.Close() + var items []AutoArchiveInactiveChatsRow + for rows.Next() { + var i AutoArchiveInactiveChatsRow + if err := rows.Scan( + &i.ID, + &i.OwnerID, + &i.WorkspaceID, + &i.Title, + &i.Status, + &i.WorkerID, + &i.StartedAt, + &i.HeartbeatAt, + &i.CreatedAt, + &i.UpdatedAt, + &i.ParentChatID, + &i.RootChatID, + &i.LastModelConfigID, + &i.Archived, + &i.LastError, + &i.Mode, + pq.Array(&i.MCPServerIDs), + &i.Labels, + &i.BuildID, + &i.AgentID, + &i.PinOrder, + &i.LastReadMessageID, + &i.LastInjectedContext, + &i.DynamicTools, + &i.OrganizationID, + &i.PlanMode, + &i.ClientType, + &i.LastActivityAt, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const backoffChatDiffStatus = `-- name: BackoffChatDiffStatus :exec UPDATE chat_diff_statuses @@ -20367,6 +20508,22 @@ func (q *sqlQuerier) GetApplicationName(ctx context.Context) (string, error) { return value, err } +const getChatAutoArchiveDays = `-- name: GetChatAutoArchiveDays :one +SELECT COALESCE( + (SELECT value::integer FROM site_configs + WHERE key = 'agents_chat_auto_archive_days'), + $1::integer +) :: integer AS auto_archive_days +` + +// Auto-archive window in days. 0 disables. +func (q *sqlQuerier) GetChatAutoArchiveDays(ctx context.Context, defaultAutoArchiveDays int32) (int32, error) { + row := q.db.QueryRowContext(ctx, getChatAutoArchiveDays, defaultAutoArchiveDays) + var auto_archive_days int32 + err := row.Scan(&auto_archive_days) + return auto_archive_days, err +} + const getChatDebugLoggingAllowUsers = `-- name: GetChatDebugLoggingAllowUsers :one SELECT COALESCE((SELECT value = 'true' FROM site_configs WHERE key = 'agents_chat_debug_logging_allow_users'), false) :: boolean AS allow_users @@ -20729,6 +20886,18 @@ func (q *sqlQuerier) UpsertApplicationName(ctx context.Context, value string) er return err } +const upsertChatAutoArchiveDays = `-- name: UpsertChatAutoArchiveDays :exec +INSERT INTO site_configs (key, value) +VALUES ('agents_chat_auto_archive_days', CAST($1 AS integer)::text) +ON CONFLICT (key) DO UPDATE SET value = CAST($1 AS integer)::text +WHERE site_configs.key = 'agents_chat_auto_archive_days' +` + +func (q *sqlQuerier) UpsertChatAutoArchiveDays(ctx context.Context, autoArchiveDays int32) error { + _, err := q.db.ExecContext(ctx, upsertChatAutoArchiveDays, autoArchiveDays) + return err +} + const upsertChatDebugLoggingAllowUsers = `-- name: UpsertChatDebugLoggingAllowUsers :exec INSERT INTO site_configs (key, value) VALUES ( diff --git a/coderd/database/queries/chats.sql b/coderd/database/queries/chats.sql index 3365e943bb..e88f0abc41 100644 --- a/coderd/database/queries/chats.sql +++ b/coderd/database/queries/chats.sql @@ -1427,3 +1427,55 @@ UPDATE chat_messages SET deleted = true WHERE chat_id = @chat_id::uuid AND deleted = false AND content::jsonb @> '[{"type": "context-file"}]'; + +-- name: AutoArchiveInactiveChats :many +-- Archives inactive root chats (pinned and already-archived chats skipped), +-- cascading to children via root_chat_id. Limits apply to roots, not total +-- rows. Used by dbpurge. +WITH to_archive AS ( + SELECT + c.id, + -- Activity = MAX(cm.created_at) across the family, or c.created_at + -- when the family has no non-deleted messages. + COALESCE(activity.last_activity_at, c.created_at) AS last_activity_at + FROM chats c + LEFT JOIN LATERAL ( + SELECT MAX(cm.created_at) AS last_activity_at + FROM chat_messages cm + JOIN chats fc ON fc.id = cm.chat_id + WHERE (fc.id = c.id OR fc.root_chat_id = c.id) + AND cm.deleted = false + ) activity ON TRUE + WHERE c.archived = false + AND c.pin_order = 0 + AND c.parent_chat_id IS NULL -- roots only + AND c.created_at < @archive_cutoff::timestamptz + -- New active statuses must be added here to prevent archiving. + AND c.status NOT IN ('running', 'pending', 'paused', 'requires_action') + AND COALESCE(activity.last_activity_at, c.created_at) < @archive_cutoff::timestamptz + -- Sorting by created_at lets Postgres drive the scan from the + -- partial index instead of evaluating every LATERAL subquery + -- before sorting. All candidates are past the cutoff, so the + -- archive order is immaterial once the backlog drains. + ORDER BY c.created_at ASC + LIMIT @limit_count +), +archived AS ( + UPDATE chats c + SET archived = true, pin_order = 0, updated_at = NOW() + FROM to_archive t + WHERE (c.id = t.id OR c.root_chat_id = t.id) -- cascade to children + AND c.archived = false + RETURNING c.* +) +SELECT + a.*, + -- Children inherit their root's activity so last_activity_at is never null. + COALESCE( + t.last_activity_at, + (SELECT tr.last_activity_at FROM to_archive tr WHERE tr.id = a.root_chat_id), + a.created_at + )::timestamptz AS last_activity_at +FROM archived a +LEFT JOIN to_archive t ON t.id = a.id +ORDER BY (a.root_chat_id IS NULL) DESC, a.owner_id ASC, a.created_at ASC, a.id ASC; diff --git a/coderd/database/queries/siteconfig.sql b/coderd/database/queries/siteconfig.sql index 1bf22448e1..0a02ace3a8 100644 --- a/coderd/database/queries/siteconfig.sql +++ b/coderd/database/queries/siteconfig.sql @@ -302,3 +302,17 @@ INSERT INTO site_configs (key, value) VALUES ('agents_chat_retention_days', CAST(@retention_days AS integer)::text) ON CONFLICT (key) DO UPDATE SET value = CAST(@retention_days AS integer)::text WHERE site_configs.key = 'agents_chat_retention_days'; + +-- name: GetChatAutoArchiveDays :one +-- Auto-archive window in days. 0 disables. +SELECT COALESCE( + (SELECT value::integer FROM site_configs + WHERE key = 'agents_chat_auto_archive_days'), + @default_auto_archive_days::integer +) :: integer AS auto_archive_days; + +-- name: UpsertChatAutoArchiveDays :exec +INSERT INTO site_configs (key, value) +VALUES ('agents_chat_auto_archive_days', CAST(@auto_archive_days AS integer)::text) +ON CONFLICT (key) DO UPDATE SET value = CAST(@auto_archive_days AS integer)::text +WHERE site_configs.key = 'agents_chat_auto_archive_days'; diff --git a/coderd/exp_chats.go b/coderd/exp_chats.go index 78b39a156d..fd31ff9e16 100644 --- a/coderd/exp_chats.go +++ b/coderd/exp_chats.go @@ -4329,6 +4329,58 @@ func (api *API) putChatRetentionDays(rw http.ResponseWriter, r *http.Request) { rw.WriteHeader(http.StatusNoContent) } +// getChatAutoArchiveDays returns the deployment-wide auto-archive +// window. Any authenticated user can read it (same as retention +// days); writes require admin. +// +//nolint:revive // get-return: revive assumes get* must be a getter, but this is an HTTP handler. +func (api *API) getChatAutoArchiveDays(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + autoArchiveDays, err := api.Database.GetChatAutoArchiveDays(ctx, codersdk.DefaultChatAutoArchiveDays) + if err != nil { + httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Failed to get chat auto-archive days.", + Detail: err.Error(), + }) + return + } + httpapi.Write(ctx, rw, http.StatusOK, codersdk.ChatAutoArchiveDaysResponse{ + AutoArchiveDays: autoArchiveDays, + }) +} + +// Upper bound for the auto-archive window. Update the frontend +// constant if a settings UI is added. +const autoArchiveDaysMaximum = 3650 // ~10 years + +// putChatAutoArchiveDays updates the deployment-wide auto-archive +// window. Admin-only; documented in docs/ai-coder/agents/chats-api.md. +func (api *API) putChatAutoArchiveDays(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + if !api.Authorize(r, policy.ActionUpdate, rbac.ResourceDeploymentConfig) { + httpapi.Forbidden(rw) + return + } + var req codersdk.UpdateChatAutoArchiveDaysRequest + if !httpapi.Read(ctx, rw, r, &req) { + return + } + if req.AutoArchiveDays < 0 || req.AutoArchiveDays > autoArchiveDaysMaximum { + httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ + Message: fmt.Sprintf("Auto-archive days must be between 0 and %d.", autoArchiveDaysMaximum), + }) + return + } + if err := api.Database.UpsertChatAutoArchiveDays(ctx, req.AutoArchiveDays); err != nil { + httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Failed to update chat auto-archive days.", + Detail: err.Error(), + }) + return + } + rw.WriteHeader(http.StatusNoContent) +} + // EXPERIMENTAL: this endpoint is experimental and is subject to change. // //nolint:revive // get-return: revive assumes get* must be a getter, but this is an HTTP handler. diff --git a/coderd/exp_chats_test.go b/coderd/exp_chats_test.go index c3f21d1df4..5d93be2e6a 100644 --- a/coderd/exp_chats_test.go +++ b/coderd/exp_chats_test.go @@ -10962,6 +10962,69 @@ func TestChatRetentionDays(t *testing.T) { requireSDKError(t, err, http.StatusBadRequest) } +func TestChatAutoArchiveDays(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitLong) + + adminClient := newChatClient(t) + firstUser := coderdtest.CreateFirstUser(t, adminClient.Client) + memberClientRaw, _ := coderdtest.CreateAnotherUser(t, adminClient.Client, firstUser.OrganizationID) + memberClient := codersdk.NewExperimentalClient(memberClientRaw) + + // Default value is DefaultChatAutoArchiveDays (0, disabled) when + // nothing has been configured. + resp, err := adminClient.GetChatAutoArchiveDays(ctx) + require.NoError(t, err, "get default") + require.Equal(t, codersdk.DefaultChatAutoArchiveDays, resp.AutoArchiveDays, "default should match DefaultChatAutoArchiveDays") + + // Admin can set auto-archive days to 45. + err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{ + AutoArchiveDays: 45, + }) + require.NoError(t, err, "admin set 45") + + resp, err = adminClient.GetChatAutoArchiveDays(ctx) + require.NoError(t, err, "get after set") + require.Equal(t, int32(45), resp.AutoArchiveDays, "should return 45") + + // Non-admin member can read the value (same as retention days). + memberResp, err := memberClient.GetChatAutoArchiveDays(ctx) + require.NoError(t, err, "member read") + require.Equal(t, int32(45), memberResp.AutoArchiveDays, "member sees same value") + + // Non-admin member cannot write. + err = memberClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{AutoArchiveDays: 7}) + requireSDKError(t, err, http.StatusForbidden) + + // Admin can disable auto-archive by setting 0. + err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{ + AutoArchiveDays: 0, + }) + require.NoError(t, err, "admin set 0") + + resp, err = adminClient.GetChatAutoArchiveDays(ctx) + require.NoError(t, err, "get after zero") + require.Equal(t, int32(0), resp.AutoArchiveDays, "should be 0 after disable") + + // An aggressive value of 1 is accepted (no pre-warn to break). + err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{ + AutoArchiveDays: 1, + }) + require.NoError(t, err, "admin set 1") + + // Validation: negative value is rejected. + err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{ + AutoArchiveDays: -1, + }) + requireSDKError(t, err, http.StatusBadRequest) + + // Validation: exceeding the 3650-day maximum is rejected. + err = adminClient.UpdateChatAutoArchiveDays(ctx, codersdk.UpdateChatAutoArchiveDaysRequest{ + AutoArchiveDays: 3651, // autoArchiveDaysMaximum + 1; keep in sync with coderd/exp_chats.go. + }) + requireSDKError(t, err, http.StatusBadRequest) +} + //nolint:tparallel // subtests share state via client, firstUser, modelConfig func TestUserChatCompactionThresholds(t *testing.T) { t.Parallel() diff --git a/codersdk/chats.go b/codersdk/chats.go index 7f66cb4203..fe66e77a00 100644 --- a/codersdk/chats.go +++ b/codersdk/chats.go @@ -796,6 +796,11 @@ type ChatDebugStep struct { // Zero means disabled — the template's own autostop setting applies. const DefaultChatWorkspaceTTL = 0 +// DefaultChatAutoArchiveDays is the default auto-archive window, in +// days, applied when no site config row exists. Zero disables +// auto-archival. +const DefaultChatAutoArchiveDays int32 = 0 + // ChatWorkspaceTTLResponse is the response for getting the chat // workspace TTL setting. type ChatWorkspaceTTLResponse struct { @@ -823,6 +828,17 @@ type UpdateChatRetentionDaysRequest struct { RetentionDays int32 `json:"retention_days"` } +// ChatAutoArchiveDaysResponse contains the current chat auto-archive setting. +type ChatAutoArchiveDaysResponse struct { + AutoArchiveDays int32 `json:"auto_archive_days"` +} + +// UpdateChatAutoArchiveDaysRequest is a request to update the chat +// auto-archive period. +type UpdateChatAutoArchiveDaysRequest struct { + AutoArchiveDays int32 `json:"auto_archive_days"` +} + // ParseChatWorkspaceTTL parses a stored TTL string, returning the // default when the value is empty. func ParseChatWorkspaceTTL(s string) (time.Duration, error) { @@ -2183,6 +2199,33 @@ func (c *ExperimentalClient) UpdateChatRetentionDays(ctx context.Context, req Up return nil } +// GetChatAutoArchiveDays returns the configured chat auto-archive period. +func (c *ExperimentalClient) GetChatAutoArchiveDays(ctx context.Context) (ChatAutoArchiveDaysResponse, error) { + res, err := c.Request(ctx, http.MethodGet, "/api/experimental/chats/config/auto-archive-days", nil) + if err != nil { + return ChatAutoArchiveDaysResponse{}, err + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return ChatAutoArchiveDaysResponse{}, ReadBodyAsError(res) + } + var resp ChatAutoArchiveDaysResponse + return resp, json.NewDecoder(res.Body).Decode(&resp) +} + +// UpdateChatAutoArchiveDays updates the chat auto-archive period. +func (c *ExperimentalClient) UpdateChatAutoArchiveDays(ctx context.Context, req UpdateChatAutoArchiveDaysRequest) error { + res, err := c.Request(ctx, http.MethodPut, "/api/experimental/chats/config/auto-archive-days", req) + if err != nil { + return err + } + defer res.Body.Close() + if res.StatusCode != http.StatusNoContent { + return ReadBodyAsError(res) + } + return nil +} + // GetChatTemplateAllowlist returns the deployment-wide chat template allowlist. func (c *ExperimentalClient) GetChatTemplateAllowlist(ctx context.Context) (ChatTemplateAllowlist, error) { res, err := c.Request(ctx, http.MethodGet, "/api/experimental/chats/config/template-allowlist", nil) diff --git a/docs/admin/integrations/prometheus.md b/docs/admin/integrations/prometheus.md index ec33e3a0c3..b78dbfe3b1 100644 --- a/docs/admin/integrations/prometheus.md +++ b/docs/admin/integrations/prometheus.md @@ -198,6 +198,7 @@ deployment. They will always be available from the agent. | `coderd_authz_authorize_duration_seconds` | histogram | Duration of the 'Authorize' call in seconds. Only counts calls that succeed. | `allowed` | | `coderd_authz_prepare_authorize_duration_seconds` | histogram | Duration of the 'PrepareAuthorize' call in seconds. | | | `coderd_build_info` | gauge | Describes the current build/version of the Coder server. Value is always 1. | `revision` `version` | +| `coderd_chat_auto_archive_records_archived_total` | counter | Total number of chats archived by the auto-archive job (counting both roots and cascaded children). | | | `coderd_chatd_chats` | gauge | Number of chats being processed, by state. | `state` | | `coderd_chatd_compaction_total` | counter | Total compaction outcomes (only recorded when compaction was triggered or failed). | `model` `provider` `result` | | `coderd_chatd_message_count` | histogram | Number of messages in the prompt per LLM request. | `model` `provider` | diff --git a/docs/ai-coder/agents/chats-api.md b/docs/ai-coder/agents/chats-api.md index 2d6c9f664a..03a4560893 100644 --- a/docs/ai-coder/agents/chats-api.md +++ b/docs/ai-coder/agents/chats-api.md @@ -326,8 +326,38 @@ appear in the `files` field on subsequent | Status | Meaning | |-------------------|------------------------------------------------------------------------------| -| `waiting` | No pending work (newly created, finished, or interrupted). | +| `waiting` | Idle. Newly created, finished successfully, or interrupted. | | `pending` | Queued for processing. | | `running` | Agent is actively working. | +| `paused` | Agent is paused (for example, waiting for user input). | +| `completed` | Agent finished and the task is complete. | | `error` | Agent encountered an error. | | `requires_action` | Agent invoked a client-provided tool and needs the result before continuing. | + +## Configuration + +Deployment-wide chat settings are read and written under +`/api/experimental/chats/config/*`. Reading config requires authentication; writing requires +deployment-admin privileges. + +### Auto-archive window + +Chats whose newest non-deleted message is older than +`auto_archive_days` are automatically archived by a background job. +Pinned chats and chats belonging to a still-active thread are +exempt. `0` disables the feature; the default is 90. + +```sh +# Read +curl -H "Coder-Session-Token: $CODER_SESSION_TOKEN" \ + https://coder.example.com/api/experimental/chats/config/auto-archive-days +# { "auto_archive_days": 90 } + +# Update +curl -X PUT -H "Coder-Session-Token: $CODER_SESSION_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"auto_archive_days": 60}' \ + https://coder.example.com/api/experimental/chats/config/auto-archive-days +``` + +Accepted range: `0` to `3650` (~10 years). diff --git a/docs/ai-coder/agents/platform-controls/chat-auto-archive.md b/docs/ai-coder/agents/platform-controls/chat-auto-archive.md new file mode 100644 index 0000000000..ad458dbda5 --- /dev/null +++ b/docs/ai-coder/agents/platform-controls/chat-auto-archive.md @@ -0,0 +1,84 @@ +# Conversation Auto-Archive + +Coder Agents automatically archives long-inactive conversations so they +drop out of active chat lists without any user intervention. Archived +conversations are still visible (and can be unarchived) until they age +out of the separate retention window, at which point they are purged. + +## How it works + +A background process runs approximately every 10 minutes. On each tick +it scans the chat database for root conversations whose most recent +non-deleted message is older than the configured auto-archive window +and flips them from "active" to "archived". Cascaded children (chats +linked into a larger conversation via `root_chat_id`) are archived +alongside their parent so the conversation stays coherent. + +Activity is defined as the most recent non-deleted message in the +conversation family, counting messages from every role. Root chats +whose status indicates ongoing work (`running`, `pending`, `paused`, +or `requires_action`) are never selected for auto-archiving. +Children inherit their root's archival decision. + +Pinned root conversations (those with a non-zero pin order) are never +selected for auto-archiving. Children are archived alongside their +root regardless of individual pin status. Admins and users who want +to retain a conversation long after its last message should pin the +root. + +## Interaction with retention + +Auto-archive and deletion are two independent controls: + +| Control | What it does | Default | +|---------------------|---------------------------------------------------------------------------|-------------------| +| Auto-archive window | Moves inactive chats to the archived state | 0 days (disabled) | +| Retention window | Deletes chats that have been archived long enough and orphaned chat files | 30 days | + +A conversation needs to be inactive for `auto_archive_days`, then +archived for `retention_days`, before it is deleted. The two windows +stack additively. With auto-archive disabled by default, inactive +chats are never auto-archived; once an admin opts in by setting a +non-zero `auto_archive_days`, a conversation lives for at least +`auto_archive_days + retention_days` from its last message before it +is permanently removed. + +Auto-archive (like manual archive) resets the per-chat retention +clock, so the full `retention_days` runs from the tick that archived +the chat, not from its last message. + +Setting either value to `0` disables that step. Setting +`auto_archive_days` to `0` means inactive chats are never +auto-archived (users still archive manually). Setting +`retention_days` to `0` means archived chats are kept indefinitely. + +## Configuration + +The auto-archive window is stored as the +`agents_chat_auto_archive_days` key in the `site_configs` table. +The default is `0` (disabled); set to a positive number of days to +enable auto-archiving. + +Use the admin API to read or update the value: + + GET /api/experimental/chats/config/auto-archive-days + PUT /api/experimental/chats/config/auto-archive-days + +## Rollout advice + +Auto-archive is disabled by default, so upgrading to a release that +includes this feature will not archive any existing chats until an +admin opts in. The first tick after enabling auto-archive on a +deployment with a long history will process up to 1,000 root chats +(and their children). If your deployment has a large backlog, the +initial rollout will span many ticks. This is intentional and avoids +stalling the rest of `dbpurge` during the first run. To disable, +set `auto_archive_days` back to `0`. + +## Audit trail + +Each auto-archived root chat produces an audit log entry with the +background subsystem tag `chat_auto_archive`. Cascaded children are +not audited individually. The audit entry records the chat ID, owner +ID, and organization ID, and the diff shows `archived` flipping from +`false` to `true`. diff --git a/docs/ai-coder/agents/platform-controls/chat-retention.md b/docs/ai-coder/agents/platform-controls/chat-retention.md index be30f46459..2b12df9af3 100644 --- a/docs/ai-coder/agents/platform-controls/chat-retention.md +++ b/docs/ai-coder/agents/platform-controls/chat-retention.md @@ -4,6 +4,10 @@ Coder Agents automatically cleans up old conversation data to manage database growth. Archived conversations and their associated files are periodically purged based on a configurable retention period. +Conversations become eligible for purging only after they are archived. Old +conversations can be archived manually, or automatically. See +[Auto-Archive](./chat-auto-archive.md) for how the two controls interact. + ## How it works A background process runs approximately every 10 minutes to remove expired diff --git a/docs/manifest.json b/docs/manifest.json index 1776952ec3..32d777824d 100644 --- a/docs/manifest.json +++ b/docs/manifest.json @@ -1268,6 +1268,12 @@ "description": "Automatic cleanup of old conversation data", "path": "./ai-coder/agents/platform-controls/chat-retention.md", "state": ["early access"] + }, + { + "title": "Auto-Archive", + "description": "Automatic archiving of inactive conversations", + "path": "./ai-coder/agents/platform-controls/chat-auto-archive.md", + "state": ["early access"] } ] }, diff --git a/scripts/metricsdocgen/generated_metrics b/scripts/metricsdocgen/generated_metrics index ce5e9f289d..43dd6cfdeb 100644 --- a/scripts/metricsdocgen/generated_metrics +++ b/scripts/metricsdocgen/generated_metrics @@ -226,6 +226,9 @@ coderd_authz_prepare_authorize_duration_seconds 0 # HELP coderd_build_info Describes the current build/version of the Coder server. Value is always 1. # TYPE coderd_build_info gauge coderd_build_info{version="",revision=""} 0 +# HELP coderd_chat_auto_archive_records_archived_total Total number of chats archived by the auto-archive job (counting both roots and cascaded children). +# TYPE coderd_chat_auto_archive_records_archived_total counter +coderd_chat_auto_archive_records_archived_total 0 # HELP coderd_chatd_chats Number of chats being processed, by state. # TYPE coderd_chatd_chats gauge coderd_chatd_chats{state=""} 0 diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index ddedc3e99d..8886232468 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -1291,6 +1291,14 @@ export interface ChatAgentModelOverrideResponse { readonly is_malformed: boolean; } +// From codersdk/chats.go +/** + * ChatAutoArchiveDaysResponse contains the current chat auto-archive setting. + */ +export interface ChatAutoArchiveDaysResponse { + readonly auto_archive_days: number; +} + // From codersdk/chats.go export type ChatBusyBehavior = "interrupt" | "queue"; @@ -3403,6 +3411,14 @@ export interface DebugProfileOptions { readonly Profiles: readonly string[]; } +// From codersdk/chats.go +/** + * DefaultChatAutoArchiveDays is the default auto-archive window, in + * days, applied when no site config row exists. Zero disables + * auto-archival. + */ +export const DefaultChatAutoArchiveDays = 0; + // From codersdk/chats.go /** * DefaultChatWorkspaceTTL is the default TTL for chat workspaces. @@ -7685,6 +7701,15 @@ export interface UpdateChatAgentModelOverrideRequest { readonly model_config_id: string; } +// From codersdk/chats.go +/** + * UpdateChatAutoArchiveDaysRequest is a request to update the chat + * auto-archive period. + */ +export interface UpdateChatAutoArchiveDaysRequest { + readonly auto_archive_days: number; +} + // From codersdk/chats.go /** * UpdateChatDebugLoggingAllowUsersRequest is the admin request to