mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: report user secrets adoption summary in telemetry (#24854)
Add a deployment-wide user secrets summary to the telemetry snapshot so we can track adoption of user secrets The summary reports: - A breakdown of secrets by which injection fields are populated: EnvNameOnly, FilePathOnly, Both, Neither - The distribution of secrets per user (max, p25, p50, p75, p90) All metrics are scoped to active non-system users. Soft-deleted users are excluded. The percentile distribution is computed across the entire active non-system user base, including users with zero secrets, so the percentiles reflect deployment-wide adoption. Assisted by Coder Agents.
This commit is contained in:
@@ -4445,6 +4445,17 @@ func (q *querier) GetUserSecretByUserIDAndName(ctx context.Context, arg database
|
||||
return q.db.GetUserSecretByUserIDAndName(ctx, arg)
|
||||
}
|
||||
|
||||
func (q *querier) GetUserSecretsTelemetrySummary(ctx context.Context) (database.GetUserSecretsTelemetrySummaryRow, error) {
|
||||
// Telemetry queries are called from system contexts only. The
|
||||
// query reads aggregate counts across all users' secrets, so
|
||||
// authorize against the resource type rather than a per-user
|
||||
// owner.
|
||||
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceUserSecret); err != nil {
|
||||
return database.GetUserSecretsTelemetrySummaryRow{}, err
|
||||
}
|
||||
return q.db.GetUserSecretsTelemetrySummary(ctx)
|
||||
}
|
||||
|
||||
func (q *querier) GetUserStatusCounts(ctx context.Context, arg database.GetUserStatusCountsParams) ([]database.GetUserStatusCountsRow, error) {
|
||||
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceUser); err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -5765,6 +5765,10 @@ func (s *MethodTestSuite) TestUserSecrets() {
|
||||
Asserts(secret, policy.ActionRead).
|
||||
Returns(secret)
|
||||
}))
|
||||
s.Run("GetUserSecretsTelemetrySummary", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) {
|
||||
dbm.EXPECT().GetUserSecretsTelemetrySummary(gomock.Any()).Return(database.GetUserSecretsTelemetrySummaryRow{}, nil).AnyTimes()
|
||||
check.Args().Asserts(rbac.ResourceUserSecret, policy.ActionRead)
|
||||
}))
|
||||
}
|
||||
|
||||
func (s *MethodTestSuite) TestUsageEvents() {
|
||||
|
||||
@@ -2904,6 +2904,14 @@ func (m queryMetricsStore) GetUserSecretByUserIDAndName(ctx context.Context, arg
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) GetUserSecretsTelemetrySummary(ctx context.Context) (database.GetUserSecretsTelemetrySummaryRow, error) {
|
||||
start := time.Now()
|
||||
r0, r1 := m.s.GetUserSecretsTelemetrySummary(ctx)
|
||||
m.queryLatencies.WithLabelValues("GetUserSecretsTelemetrySummary").Observe(time.Since(start).Seconds())
|
||||
m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetUserSecretsTelemetrySummary").Inc()
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) GetUserStatusCounts(ctx context.Context, arg database.GetUserStatusCountsParams) ([]database.GetUserStatusCountsRow, error) {
|
||||
start := time.Now()
|
||||
r0, r1 := m.s.GetUserStatusCounts(ctx, arg)
|
||||
|
||||
@@ -5432,6 +5432,21 @@ func (mr *MockStoreMockRecorder) GetUserSecretByUserIDAndName(ctx, arg any) *gom
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetUserSecretByUserIDAndName", reflect.TypeOf((*MockStore)(nil).GetUserSecretByUserIDAndName), ctx, arg)
|
||||
}
|
||||
|
||||
// GetUserSecretsTelemetrySummary mocks base method.
|
||||
func (m *MockStore) GetUserSecretsTelemetrySummary(ctx context.Context) (database.GetUserSecretsTelemetrySummaryRow, error) {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "GetUserSecretsTelemetrySummary", ctx)
|
||||
ret0, _ := ret[0].(database.GetUserSecretsTelemetrySummaryRow)
|
||||
ret1, _ := ret[1].(error)
|
||||
return ret0, ret1
|
||||
}
|
||||
|
||||
// GetUserSecretsTelemetrySummary indicates an expected call of GetUserSecretsTelemetrySummary.
|
||||
func (mr *MockStoreMockRecorder) GetUserSecretsTelemetrySummary(ctx any) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetUserSecretsTelemetrySummary", reflect.TypeOf((*MockStore)(nil).GetUserSecretsTelemetrySummary), ctx)
|
||||
}
|
||||
|
||||
// GetUserStatusCounts mocks base method.
|
||||
func (m *MockStore) GetUserStatusCounts(ctx context.Context, arg database.GetUserStatusCountsParams) ([]database.GetUserStatusCountsRow, error) {
|
||||
m.ctrl.T.Helper()
|
||||
|
||||
Generated
+1
-1
@@ -2439,7 +2439,7 @@ CREATE TABLE telemetry_items (
|
||||
CREATE TABLE telemetry_locks (
|
||||
event_type text NOT NULL,
|
||||
period_ending_at timestamp with time zone NOT NULL,
|
||||
CONSTRAINT telemetry_lock_event_type_constraint CHECK ((event_type = ANY (ARRAY['aibridge_interceptions_summary'::text, 'boundary_usage_summary'::text])))
|
||||
CONSTRAINT telemetry_lock_event_type_constraint CHECK ((event_type = ANY (ARRAY['aibridge_interceptions_summary'::text, 'boundary_usage_summary'::text, 'user_secrets_summary'::text])))
|
||||
);
|
||||
|
||||
COMMENT ON TABLE telemetry_locks IS 'Telemetry lock tracking table for deduplication of heartbeat events across replicas.';
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
-- Restore the previous telemetry_locks event_type constraint. Existing
|
||||
-- user_secrets_summary rows must be removed first or the new constraint
|
||||
-- check would fail.
|
||||
DELETE FROM telemetry_locks WHERE event_type = 'user_secrets_summary';
|
||||
|
||||
ALTER TABLE telemetry_locks DROP CONSTRAINT telemetry_lock_event_type_constraint;
|
||||
ALTER TABLE telemetry_locks ADD CONSTRAINT telemetry_lock_event_type_constraint
|
||||
CHECK (event_type IN ('aibridge_interceptions_summary', 'boundary_usage_summary'));
|
||||
@@ -0,0 +1,7 @@
|
||||
-- Add user_secrets_summary to the telemetry_locks event_type constraint.
|
||||
-- User secrets aggregates do not have a natural per-row UUID for the
|
||||
-- telemetry server to dedupe on, so we elect a single replica per
|
||||
-- snapshot period to report them via this lock table.
|
||||
ALTER TABLE telemetry_locks DROP CONSTRAINT telemetry_lock_event_type_constraint;
|
||||
ALTER TABLE telemetry_locks ADD CONSTRAINT telemetry_lock_event_type_constraint
|
||||
CHECK (event_type IN ('aibridge_interceptions_summary', 'boundary_usage_summary', 'user_secrets_summary'));
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
-- Smoke fixture: a single user_secrets_summary lock for a fixed period.
|
||||
INSERT INTO telemetry_locks (event_type, period_ending_at)
|
||||
VALUES ('user_secrets_summary', '2026-01-01 00:00:00+00');
|
||||
@@ -717,6 +717,31 @@ type sqlcQuerier interface {
|
||||
GetUserNotificationPreferences(ctx context.Context, userID uuid.UUID) ([]NotificationPreference, error)
|
||||
GetUserSecretByID(ctx context.Context, id uuid.UUID) (UserSecret, error)
|
||||
GetUserSecretByUserIDAndName(ctx context.Context, arg GetUserSecretByUserIDAndNameParams) (UserSecret, error)
|
||||
// Returns deployment-wide aggregates for the telemetry snapshot.
|
||||
//
|
||||
// The denominator for both user-level counts and the per-user
|
||||
// distribution is active non-system users. Specifically:
|
||||
//
|
||||
// * deleted = false: Coder soft-deletes by flipping users.deleted
|
||||
// rather than removing rows, so secrets persist after delete but
|
||||
// are unreachable.
|
||||
// * status = 'active': dormant users (no recent activity) and
|
||||
// suspended users (explicitly disabled) cannot use secrets, so
|
||||
// they shouldn't dilute the percentile distribution as
|
||||
// zero-secret entries.
|
||||
// * is_system = false: internal subjects like the prebuilds user
|
||||
// never use secrets in the normal flow.
|
||||
//
|
||||
// Status transitions move users in and out of this denominator, so a
|
||||
// snapshot's UsersWithSecrets can drop without any secret being
|
||||
// deleted.
|
||||
//
|
||||
// The percentile distribution is computed across all active non-system
|
||||
// users, including those with zero secrets, so the percentiles reflect
|
||||
// deployment-wide adoption rather than only the power-user subset.
|
||||
// percentile_disc returns an actual integer count from the underlying
|
||||
// values rather than interpolating between rows.
|
||||
GetUserSecretsTelemetrySummary(ctx context.Context) (GetUserSecretsTelemetrySummaryRow, error)
|
||||
// GetUserStatusCounts returns the count of users in each status over time.
|
||||
// The time range is inclusively defined by the start_time and end_time parameters.
|
||||
GetUserStatusCounts(ctx context.Context, arg GetUserStatusCountsParams) ([]GetUserStatusCountsRow, error)
|
||||
|
||||
@@ -24890,6 +24890,97 @@ func (q *sqlQuerier) GetUserSecretByUserIDAndName(ctx context.Context, arg GetUs
|
||||
return i, err
|
||||
}
|
||||
|
||||
const getUserSecretsTelemetrySummary = `-- name: GetUserSecretsTelemetrySummary :one
|
||||
WITH active_users AS (
|
||||
SELECT id AS user_id
|
||||
FROM users
|
||||
WHERE deleted = false
|
||||
AND is_system = false
|
||||
AND status = 'active'::user_status
|
||||
),
|
||||
per_user AS (
|
||||
SELECT au.user_id, COUNT(us.id)::bigint AS n
|
||||
FROM active_users au
|
||||
LEFT JOIN user_secrets us ON us.user_id = au.user_id
|
||||
GROUP BY au.user_id
|
||||
),
|
||||
secrets_filtered AS (
|
||||
SELECT us.env_name, us.file_path
|
||||
FROM user_secrets us
|
||||
JOIN active_users au ON au.user_id = us.user_id
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE n > 0)::bigint AS users_with_secrets,
|
||||
(SELECT COUNT(*) FROM secrets_filtered)::bigint AS total_secrets,
|
||||
(SELECT COUNT(*) FROM secrets_filtered WHERE env_name != '' AND file_path = '' )::bigint AS env_name_only,
|
||||
(SELECT COUNT(*) FROM secrets_filtered WHERE env_name = '' AND file_path != '')::bigint AS file_path_only,
|
||||
(SELECT COUNT(*) FROM secrets_filtered WHERE env_name != '' AND file_path != '')::bigint AS both,
|
||||
(SELECT COUNT(*) FROM secrets_filtered WHERE env_name = '' AND file_path = '' )::bigint AS neither,
|
||||
COALESCE(MAX(n), 0)::bigint AS secrets_per_user_max,
|
||||
COALESCE(percentile_disc(0.25) WITHIN GROUP (ORDER BY n), 0)::bigint AS secrets_per_user_p25,
|
||||
COALESCE(percentile_disc(0.50) WITHIN GROUP (ORDER BY n), 0)::bigint AS secrets_per_user_p50,
|
||||
COALESCE(percentile_disc(0.75) WITHIN GROUP (ORDER BY n), 0)::bigint AS secrets_per_user_p75,
|
||||
COALESCE(percentile_disc(0.90) WITHIN GROUP (ORDER BY n), 0)::bigint AS secrets_per_user_p90
|
||||
FROM per_user
|
||||
`
|
||||
|
||||
type GetUserSecretsTelemetrySummaryRow struct {
|
||||
UsersWithSecrets int64 `db:"users_with_secrets" json:"users_with_secrets"`
|
||||
TotalSecrets int64 `db:"total_secrets" json:"total_secrets"`
|
||||
EnvNameOnly int64 `db:"env_name_only" json:"env_name_only"`
|
||||
FilePathOnly int64 `db:"file_path_only" json:"file_path_only"`
|
||||
Both int64 `db:"both" json:"both"`
|
||||
Neither int64 `db:"neither" json:"neither"`
|
||||
SecretsPerUserMax int64 `db:"secrets_per_user_max" json:"secrets_per_user_max"`
|
||||
SecretsPerUserP25 int64 `db:"secrets_per_user_p25" json:"secrets_per_user_p25"`
|
||||
SecretsPerUserP50 int64 `db:"secrets_per_user_p50" json:"secrets_per_user_p50"`
|
||||
SecretsPerUserP75 int64 `db:"secrets_per_user_p75" json:"secrets_per_user_p75"`
|
||||
SecretsPerUserP90 int64 `db:"secrets_per_user_p90" json:"secrets_per_user_p90"`
|
||||
}
|
||||
|
||||
// Returns deployment-wide aggregates for the telemetry snapshot.
|
||||
//
|
||||
// The denominator for both user-level counts and the per-user
|
||||
// distribution is active non-system users. Specifically:
|
||||
//
|
||||
// - deleted = false: Coder soft-deletes by flipping users.deleted
|
||||
// rather than removing rows, so secrets persist after delete but
|
||||
// are unreachable.
|
||||
// - status = 'active': dormant users (no recent activity) and
|
||||
// suspended users (explicitly disabled) cannot use secrets, so
|
||||
// they shouldn't dilute the percentile distribution as
|
||||
// zero-secret entries.
|
||||
// - is_system = false: internal subjects like the prebuilds user
|
||||
// never use secrets in the normal flow.
|
||||
//
|
||||
// Status transitions move users in and out of this denominator, so a
|
||||
// snapshot's UsersWithSecrets can drop without any secret being
|
||||
// deleted.
|
||||
//
|
||||
// The percentile distribution is computed across all active non-system
|
||||
// users, including those with zero secrets, so the percentiles reflect
|
||||
// deployment-wide adoption rather than only the power-user subset.
|
||||
// percentile_disc returns an actual integer count from the underlying
|
||||
// values rather than interpolating between rows.
|
||||
func (q *sqlQuerier) GetUserSecretsTelemetrySummary(ctx context.Context) (GetUserSecretsTelemetrySummaryRow, error) {
|
||||
row := q.db.QueryRowContext(ctx, getUserSecretsTelemetrySummary)
|
||||
var i GetUserSecretsTelemetrySummaryRow
|
||||
err := row.Scan(
|
||||
&i.UsersWithSecrets,
|
||||
&i.TotalSecrets,
|
||||
&i.EnvNameOnly,
|
||||
&i.FilePathOnly,
|
||||
&i.Both,
|
||||
&i.Neither,
|
||||
&i.SecretsPerUserMax,
|
||||
&i.SecretsPerUserP25,
|
||||
&i.SecretsPerUserP50,
|
||||
&i.SecretsPerUserP75,
|
||||
&i.SecretsPerUserP90,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const listUserSecrets = `-- name: ListUserSecrets :many
|
||||
SELECT
|
||||
id, user_id, name, description,
|
||||
|
||||
@@ -65,3 +65,60 @@ RETURNING *;
|
||||
DELETE FROM user_secrets
|
||||
WHERE user_id = @user_id AND name = @name
|
||||
RETURNING *;
|
||||
|
||||
-- name: GetUserSecretsTelemetrySummary :one
|
||||
-- Returns deployment-wide aggregates for the telemetry snapshot.
|
||||
--
|
||||
-- The denominator for both user-level counts and the per-user
|
||||
-- distribution is active non-system users. Specifically:
|
||||
--
|
||||
-- * deleted = false: Coder soft-deletes by flipping users.deleted
|
||||
-- rather than removing rows, so secrets persist after delete but
|
||||
-- are unreachable.
|
||||
-- * status = 'active': dormant users (no recent activity) and
|
||||
-- suspended users (explicitly disabled) cannot use secrets, so
|
||||
-- they shouldn't dilute the percentile distribution as
|
||||
-- zero-secret entries.
|
||||
-- * is_system = false: internal subjects like the prebuilds user
|
||||
-- never use secrets in the normal flow.
|
||||
--
|
||||
-- Status transitions move users in and out of this denominator, so a
|
||||
-- snapshot's UsersWithSecrets can drop without any secret being
|
||||
-- deleted.
|
||||
--
|
||||
-- The percentile distribution is computed across all active non-system
|
||||
-- users, including those with zero secrets, so the percentiles reflect
|
||||
-- deployment-wide adoption rather than only the power-user subset.
|
||||
-- percentile_disc returns an actual integer count from the underlying
|
||||
-- values rather than interpolating between rows.
|
||||
WITH active_users AS (
|
||||
SELECT id AS user_id
|
||||
FROM users
|
||||
WHERE deleted = false
|
||||
AND is_system = false
|
||||
AND status = 'active'::user_status
|
||||
),
|
||||
per_user AS (
|
||||
SELECT au.user_id, COUNT(us.id)::bigint AS n
|
||||
FROM active_users au
|
||||
LEFT JOIN user_secrets us ON us.user_id = au.user_id
|
||||
GROUP BY au.user_id
|
||||
),
|
||||
secrets_filtered AS (
|
||||
SELECT us.env_name, us.file_path
|
||||
FROM user_secrets us
|
||||
JOIN active_users au ON au.user_id = us.user_id
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE n > 0)::bigint AS users_with_secrets,
|
||||
(SELECT COUNT(*) FROM secrets_filtered)::bigint AS total_secrets,
|
||||
(SELECT COUNT(*) FROM secrets_filtered WHERE env_name != '' AND file_path = '' )::bigint AS env_name_only,
|
||||
(SELECT COUNT(*) FROM secrets_filtered WHERE env_name = '' AND file_path != '')::bigint AS file_path_only,
|
||||
(SELECT COUNT(*) FROM secrets_filtered WHERE env_name != '' AND file_path != '')::bigint AS both,
|
||||
(SELECT COUNT(*) FROM secrets_filtered WHERE env_name = '' AND file_path = '' )::bigint AS neither,
|
||||
COALESCE(MAX(n), 0)::bigint AS secrets_per_user_max,
|
||||
COALESCE(percentile_disc(0.25) WITHIN GROUP (ORDER BY n), 0)::bigint AS secrets_per_user_p25,
|
||||
COALESCE(percentile_disc(0.50) WITHIN GROUP (ORDER BY n), 0)::bigint AS secrets_per_user_p50,
|
||||
COALESCE(percentile_disc(0.75) WITHIN GROUP (ORDER BY n), 0)::bigint AS secrets_per_user_p75,
|
||||
COALESCE(percentile_disc(0.90) WITHIN GROUP (ORDER BY n), 0)::bigint AS secrets_per_user_p90
|
||||
FROM per_user;
|
||||
|
||||
@@ -822,6 +822,18 @@ func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
|
||||
}
|
||||
return nil
|
||||
})
|
||||
eg.Go(func() error {
|
||||
summary, err := r.collectUserSecretsSummary(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("collect user secrets summary: %w", err)
|
||||
}
|
||||
// summary is nil when another replica already claimed the
|
||||
// telemetry lock for this period.
|
||||
if summary != nil {
|
||||
snapshot.UserSecretsSummary = summary
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
err := eg.Wait()
|
||||
if err != nil {
|
||||
@@ -952,6 +964,49 @@ func (r *remoteReporter) collectBoundaryUsageSummary(ctx context.Context) (*Boun
|
||||
}, nil
|
||||
}
|
||||
|
||||
// collectUserSecretsSummary returns a deployment-wide aggregate of user
|
||||
// secrets configuration. Returns nil if another replica has already
|
||||
// collected for this period.
|
||||
//
|
||||
// The summary has no natural per-row UUID for the telemetry server to
|
||||
// de-duplicate on, so we elect a single replica per snapshot period
|
||||
// via the telemetry_locks table.
|
||||
func (r *remoteReporter) collectUserSecretsSummary(ctx context.Context) (*UserSecretsSummary, error) {
|
||||
// Claim the telemetry lock for this period. Use snapshot frequency so
|
||||
// each telemetry snapshot period gets exactly one collection across
|
||||
// replicas.
|
||||
periodEndingAt := dbtime.Time(r.options.Clock.Now()).UTC().Truncate(r.options.SnapshotFrequency)
|
||||
err := r.options.Database.InsertTelemetryLock(ctx, database.InsertTelemetryLockParams{
|
||||
EventType: "user_secrets_summary",
|
||||
PeriodEndingAt: periodEndingAt,
|
||||
})
|
||||
if database.IsUniqueViolation(err, database.UniqueTelemetryLocksPkey) {
|
||||
r.options.Logger.Debug(ctx, "user secrets telemetry lock already claimed by another replica, skipping", slog.F("period_ending_at", periodEndingAt))
|
||||
return nil, nil //nolint:nilnil // This is simple to handle when dealing with telemetry.
|
||||
}
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("insert user secrets telemetry lock (period_ending_at=%q): %w", periodEndingAt, err)
|
||||
}
|
||||
|
||||
row, err := r.options.Database.GetUserSecretsTelemetrySummary(ctx)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("get user secrets telemetry summary: %w", err)
|
||||
}
|
||||
return &UserSecretsSummary{
|
||||
UsersWithSecrets: row.UsersWithSecrets,
|
||||
TotalSecrets: row.TotalSecrets,
|
||||
EnvNameOnly: row.EnvNameOnly,
|
||||
FilePathOnly: row.FilePathOnly,
|
||||
Both: row.Both,
|
||||
Neither: row.Neither,
|
||||
SecretsPerUserMax: row.SecretsPerUserMax,
|
||||
SecretsPerUserP25: row.SecretsPerUserP25,
|
||||
SecretsPerUserP50: row.SecretsPerUserP50,
|
||||
SecretsPerUserP75: row.SecretsPerUserP75,
|
||||
SecretsPerUserP90: row.SecretsPerUserP90,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func CollectTasks(ctx context.Context, db database.Store) ([]Task, error) {
|
||||
dbTasks, err := db.ListTasks(ctx, database.ListTasksParams{
|
||||
OwnerID: uuid.Nil,
|
||||
@@ -1554,6 +1609,7 @@ type Snapshot struct {
|
||||
ChatMessageSummaries []ChatMessageSummary `json:"chat_message_summaries"`
|
||||
ChatModelConfigs []ChatModelConfig `json:"chat_model_configs"`
|
||||
ChatDiffStatusSummary *ChatDiffStatusSummary `json:"chat_diff_status_summary"`
|
||||
UserSecretsSummary *UserSecretsSummary `json:"user_secrets_summary"`
|
||||
}
|
||||
|
||||
// Deployment contains information about the host running Coder.
|
||||
@@ -2409,6 +2465,38 @@ type ChatDiffStatusSummary struct {
|
||||
Closed int64 `json:"closed"`
|
||||
}
|
||||
|
||||
// UserSecretsSummary contains deployment-wide aggregates about user
|
||||
// secrets. All counts are scoped to active non-system users so that
|
||||
// soft-deleted accounts, dormant or suspended users, and internal
|
||||
// subjects (e.g. the prebuilds user) do not skew the results. Status
|
||||
// transitions move users in and out of this denominator, so a
|
||||
// snapshot's UsersWithSecrets can drop without any secret being
|
||||
// deleted.
|
||||
//
|
||||
// UsersWithSecrets is the count of active non-system users that have
|
||||
// at least one secret. TotalSecrets is the count of secrets owned by
|
||||
// those users. EnvNameOnly, FilePathOnly, Both, and Neither break
|
||||
// TotalSecrets down by which injection fields are populated.
|
||||
//
|
||||
// The SecretsPerUser* fields describe the distribution of secrets per
|
||||
// user across the entire active non-system user base, including users
|
||||
// with zero secrets, so the percentiles reflect deployment-wide
|
||||
// adoption rather than only the power-user subset. Max and Px are the
|
||||
// maximum and the 25th, 50th, 75th, and 90th percentiles.
|
||||
type UserSecretsSummary struct {
|
||||
UsersWithSecrets int64 `json:"users_with_secrets"`
|
||||
TotalSecrets int64 `json:"total_secrets"`
|
||||
EnvNameOnly int64 `json:"env_name_only"`
|
||||
FilePathOnly int64 `json:"file_path_only"`
|
||||
Both int64 `json:"both"`
|
||||
Neither int64 `json:"neither"`
|
||||
SecretsPerUserMax int64 `json:"secrets_per_user_max"`
|
||||
SecretsPerUserP25 int64 `json:"secrets_per_user_p25"`
|
||||
SecretsPerUserP50 int64 `json:"secrets_per_user_p50"`
|
||||
SecretsPerUserP75 int64 `json:"secrets_per_user_p75"`
|
||||
SecretsPerUserP90 int64 `json:"secrets_per_user_p90"`
|
||||
}
|
||||
|
||||
func ConvertAIBridgeInterceptionsSummary(endTime time.Time, provider, model, client string, summary database.CalculateAIBridgeInterceptionsTelemetrySummaryRow) AIBridgeInterceptionsSummary {
|
||||
return AIBridgeInterceptionsSummary{
|
||||
ID: uuid.New(),
|
||||
|
||||
@@ -1998,3 +1998,260 @@ func TestChatDiffStatusSummaryTelemetry(t *testing.T) {
|
||||
assert.Equal(t, int64(2), snapshot2.ChatDiffStatusSummary.Merged)
|
||||
assert.Equal(t, int64(1), snapshot2.ChatDiffStatusSummary.Closed)
|
||||
}
|
||||
|
||||
func TestUserSecretsTelemetry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("Empty", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
|
||||
// Empty deployment should report a non-nil summary with zeros.
|
||||
_, snap := collectSnapshot(ctx, t, db, nil)
|
||||
require.Equal(t, &telemetry.UserSecretsSummary{}, snap.UserSecretsSummary)
|
||||
})
|
||||
|
||||
t.Run("ConfigurationBreakdown", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
|
||||
userA := dbgen.User(t, db, database.User{})
|
||||
userB := dbgen.User(t, db, database.User{})
|
||||
|
||||
// userA: env-only and file-only. dbgen.UserSecret defaults
|
||||
// EnvName and FilePath to non-empty, so use mutators to clear
|
||||
// them where the test wants empty values.
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: userA.ID,
|
||||
Name: "a-env",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = "A_ENV"
|
||||
p.FilePath = ""
|
||||
})
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: userA.ID,
|
||||
Name: "a-file",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = ""
|
||||
p.FilePath = "/home/coder/a.file"
|
||||
})
|
||||
// userB: both and neither.
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: userB.ID,
|
||||
Name: "b-both",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = "B_BOTH"
|
||||
p.FilePath = "/home/coder/b.both"
|
||||
})
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: userB.ID,
|
||||
Name: "b-neither",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = ""
|
||||
p.FilePath = ""
|
||||
})
|
||||
|
||||
_, snap := collectSnapshot(ctx, t, db, nil)
|
||||
// Each user has exactly two secrets, so every percentile and
|
||||
// the max collapse to 2.
|
||||
require.Equal(t, &telemetry.UserSecretsSummary{
|
||||
UsersWithSecrets: 2,
|
||||
TotalSecrets: 4,
|
||||
EnvNameOnly: 1,
|
||||
FilePathOnly: 1,
|
||||
Both: 1,
|
||||
Neither: 1,
|
||||
SecretsPerUserMax: 2,
|
||||
SecretsPerUserP25: 2,
|
||||
SecretsPerUserP50: 2,
|
||||
SecretsPerUserP75: 2,
|
||||
SecretsPerUserP90: 2,
|
||||
}, snap.UserSecretsSummary)
|
||||
})
|
||||
|
||||
t.Run("PercentileDistribution", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
|
||||
// Five users have secret counts 1, 2, 4, 8, 16 and five other
|
||||
// users have zero secrets. Including the zero-secret users in
|
||||
// the distribution gives a sorted vector of length 10:
|
||||
// [0, 0, 0, 0, 0, 1, 2, 4, 8, 16]
|
||||
// percentile_disc(p) returns the value at the smallest
|
||||
// 1-indexed position i where i/n >= p, so the buckets land at:
|
||||
// p25 -> position 3 -> 0
|
||||
// p50 -> position 5 -> 0
|
||||
// p75 -> position 8 -> 4
|
||||
// p90 -> position 9 -> 8
|
||||
adopters := []int{1, 2, 4, 8, 16}
|
||||
for _, n := range adopters {
|
||||
u := dbgen.User(t, db, database.User{})
|
||||
for i := 0; i < n; i++ {
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: u.ID,
|
||||
Name: fmt.Sprintf("secret-%d", i),
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
// Clear EnvName and FilePath so the unique
|
||||
// (user_id, env_name) and (user_id, file_path)
|
||||
// indexes don't collide across multiple secrets
|
||||
// for the same user.
|
||||
p.EnvName = ""
|
||||
p.FilePath = ""
|
||||
})
|
||||
}
|
||||
}
|
||||
for i := 0; i < 5; i++ {
|
||||
_ = dbgen.User(t, db, database.User{})
|
||||
}
|
||||
|
||||
_, snap := collectSnapshot(ctx, t, db, nil)
|
||||
require.Equal(t, &telemetry.UserSecretsSummary{
|
||||
UsersWithSecrets: 5,
|
||||
TotalSecrets: 31,
|
||||
EnvNameOnly: 0,
|
||||
FilePathOnly: 0,
|
||||
Both: 0,
|
||||
Neither: 31,
|
||||
SecretsPerUserMax: 16,
|
||||
SecretsPerUserP25: 0,
|
||||
SecretsPerUserP50: 0,
|
||||
SecretsPerUserP75: 4,
|
||||
SecretsPerUserP90: 8,
|
||||
}, snap.UserSecretsSummary)
|
||||
})
|
||||
|
||||
t.Run("FilterSkipsInactiveUsers", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
|
||||
// Active user with two secrets contributes the only entries
|
||||
// to UsersWithSecrets, TotalSecrets, and the percentile
|
||||
// distribution.
|
||||
active := dbgen.User(t, db, database.User{})
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: active.ID,
|
||||
Name: "active-env",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = "ACTIVE_ENV"
|
||||
p.FilePath = ""
|
||||
})
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: active.ID,
|
||||
Name: "active-file",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = ""
|
||||
p.FilePath = "/home/coder/active.file"
|
||||
})
|
||||
|
||||
// Soft-deleted user. user_secrets has ON DELETE CASCADE on
|
||||
// users, but Coder soft-deletes by setting users.deleted, so
|
||||
// the secret row persists. The summary should ignore it.
|
||||
deleted := dbgen.User(t, db, database.User{Deleted: true})
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: deleted.ID,
|
||||
Name: "deleted-secret",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = "DELETED_ENV"
|
||||
p.FilePath = ""
|
||||
})
|
||||
|
||||
// User secret owned by a dormant user should be excluded.
|
||||
dormant := dbgen.User(t, db, database.User{Status: database.UserStatusDormant})
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: dormant.ID,
|
||||
Name: "dormant-secret",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = "DORMANT_ENV"
|
||||
p.FilePath = ""
|
||||
})
|
||||
|
||||
// User secret owned by a suspended user should be excluded.
|
||||
suspended := dbgen.User(t, db, database.User{Status: database.UserStatusSuspended})
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: suspended.ID,
|
||||
Name: "suspended-secret",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = ""
|
||||
p.FilePath = "/home/coder/suspended.file"
|
||||
})
|
||||
|
||||
// System user. Only its UUID is needed. Tying a secret to it
|
||||
// proves the is_system filter excludes it.
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: database.PrebuildsSystemUserID,
|
||||
Name: "prebuilds-secret",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = ""
|
||||
p.FilePath = "/home/coder/prebuilds.file"
|
||||
})
|
||||
|
||||
_, snap := collectSnapshot(ctx, t, db, nil)
|
||||
require.Equal(t, &telemetry.UserSecretsSummary{
|
||||
UsersWithSecrets: 1,
|
||||
TotalSecrets: 2,
|
||||
EnvNameOnly: 1,
|
||||
FilePathOnly: 1,
|
||||
Both: 0,
|
||||
Neither: 0,
|
||||
SecretsPerUserMax: 2,
|
||||
SecretsPerUserP25: 2,
|
||||
SecretsPerUserP50: 2,
|
||||
SecretsPerUserP75: 2,
|
||||
SecretsPerUserP90: 2,
|
||||
}, snap.UserSecretsSummary)
|
||||
})
|
||||
|
||||
t.Run("OnlyOneReplicaCollects", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
|
||||
// Seed one user with one secret so the summary would normally
|
||||
// be populated. The user_secrets_summary aggregate has no
|
||||
// natural per-row UUID for the telemetry server to dedupe on,
|
||||
// so a telemetry lock elects a single replica per period.
|
||||
u := dbgen.User(t, db, database.User{})
|
||||
_ = dbgen.UserSecret(t, db, database.UserSecret{
|
||||
UserID: u.ID,
|
||||
Name: "only-secret",
|
||||
}, func(p *database.CreateUserSecretParams) {
|
||||
p.EnvName = ""
|
||||
p.FilePath = ""
|
||||
})
|
||||
|
||||
clock := quartz.NewMock(t)
|
||||
clock.Set(dbtime.Now())
|
||||
|
||||
// First snapshot claims the lock and reports the summary.
|
||||
_, snap1 := collectSnapshot(ctx, t, db, func(opts telemetry.Options) telemetry.Options {
|
||||
opts.Clock = clock
|
||||
return opts
|
||||
})
|
||||
require.Equal(t, &telemetry.UserSecretsSummary{
|
||||
UsersWithSecrets: 1,
|
||||
TotalSecrets: 1,
|
||||
EnvNameOnly: 0,
|
||||
FilePathOnly: 0,
|
||||
Both: 0,
|
||||
Neither: 1,
|
||||
SecretsPerUserMax: 1,
|
||||
SecretsPerUserP25: 1,
|
||||
SecretsPerUserP50: 1,
|
||||
SecretsPerUserP75: 1,
|
||||
SecretsPerUserP90: 1,
|
||||
}, snap1.UserSecretsSummary)
|
||||
|
||||
// A second snapshot in the same period simulates a second
|
||||
// replica racing to claim the lock; it should observe the
|
||||
// unique violation and skip reporting.
|
||||
_, snap2 := collectSnapshot(ctx, t, db, func(opts telemetry.Options) telemetry.Options {
|
||||
opts.Clock = clock
|
||||
return opts
|
||||
})
|
||||
require.Nil(t, snap2.UserSecretsSummary)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user