Files
coder/coderd/x/chatd/usagelimit.go
T
Cian Johnston c552f9f281 fix: stop group spend limits from leaking across org boundaries (#24294)
Three SQL queries (`GetUserGroupSpendLimit`,
`ResolveUserChatSpendLimit`, `GetUserChatSpendInPeriod`) aggregated chat
spend limits and usage globally across all organizations. A restrictive
group limit in org A would bleed into org B.

## Changes

- Add `organization_id` parameter to all three SQL queries in
`coderd/database/queries/chats.sql`
- When nil UUID is passed, queries fall back to global behavior
(backward compat for HTTP dashboard endpoints)
- When real org ID is passed, limits and spend are scoped to that
organization
- Thread `organizationID` through `ResolveUsageLimitStatus` →
`checkUsageLimit` → all chatd call sites
- Update dbauthz wrappers for new param structs
- HTTP endpoints (`chatCostSummary`, `getMyChatUsageLimitStatus`) pass
`uuid.Nil` with TODO for future org-scoped UI
- Add `TestResolveUsageLimitStatus_OrgScoped` with 5 test cases covering
org isolation, nil-UUID fallback, spend scoping, and user override
priority

Closes coder/internal#1466

> 🤖
2026-04-14 16:56:17 +01:00

153 lines
5.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package chatd
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/google/uuid"
"golang.org/x/xerrors"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/codersdk"
)
// ComputeUsagePeriodBounds returns the UTC-aligned start and end bounds for the
// active usage-limit period containing now.
func ComputeUsagePeriodBounds(now time.Time, period codersdk.ChatUsageLimitPeriod) (start, end time.Time) {
utcNow := now.UTC()
switch period {
case codersdk.ChatUsageLimitPeriodDay:
start = time.Date(utcNow.Year(), utcNow.Month(), utcNow.Day(), 0, 0, 0, 0, time.UTC)
end = start.AddDate(0, 0, 1)
case codersdk.ChatUsageLimitPeriodWeek:
// Walk backward to Monday of the current ISO week.
// ISO 8601 weeks always start on Monday, so this never
// crosses an ISO-week boundary.
start = time.Date(utcNow.Year(), utcNow.Month(), utcNow.Day(), 0, 0, 0, 0, time.UTC)
for start.Weekday() != time.Monday {
start = start.AddDate(0, 0, -1)
}
end = start.AddDate(0, 0, 7)
case codersdk.ChatUsageLimitPeriodMonth:
start = time.Date(utcNow.Year(), utcNow.Month(), 1, 0, 0, 0, 0, time.UTC)
end = start.AddDate(0, 1, 0)
default:
panic(fmt.Sprintf("unknown chat usage limit period: %q", period))
}
return start, end
}
// ResolveUsageLimitStatus resolves the current usage-limit status for
// userID within organizationID. When organizationID is invalid (Valid
// == false), limits and spend are computed globally across all
// organizations (legacy behavior).
//
// Note: There is a potential race condition where two concurrent messages
// from the same user can both pass the limit check if processed in
// parallel, allowing brief overage. This is acceptable because:
// - Cost is only known after the LLM API returns.
// - Overage is bounded by message cost × concurrency.
// - Fail-open is the deliberate design choice for this feature.
//
// Architecture note: today this path enforces one period globally
// (day/week/month) from config.
// To support simultaneous periods, add nullable
// daily/weekly/monthly_limit_micros columns on override tables, where NULL
// means no limit for that period.
// Then scan spend once over the widest active window with conditional SUMs
// for each period and compare each spend/limit pair Go-side, blocking on
// whichever period is tightest.
func ResolveUsageLimitStatus(ctx context.Context, db database.Store, userID uuid.UUID, organizationID uuid.NullUUID, now time.Time) (*codersdk.ChatUsageLimitStatus, error) {
//nolint:gocritic // AsChatd provides narrowly-scoped daemon access for
// deployment config reads and cross-user chat spend aggregation.
authCtx := dbauthz.AsChatd(ctx)
config, err := db.GetChatUsageLimitConfig(authCtx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, nil //nolint:nilnil // Nil status cleanly signals disabled limits.
}
return nil, err
}
if !config.Enabled {
return nil, nil //nolint:nilnil // Nil status cleanly signals disabled limits.
}
period, ok := mapDBPeriodToSDK(config.Period)
if !ok {
return nil, xerrors.Errorf("invalid chat usage limit period %q", config.Period)
}
// Resolve effective limit in a single query:
// individual override > group limit > global default.
limitResult, err := db.ResolveUserChatSpendLimit(authCtx, database.ResolveUserChatSpendLimitParams{
UserID: userID,
OrganizationID: organizationID,
})
if err != nil {
return nil, err
}
// -1 means limits are disabled (shouldn't happen since we checked
// above, but handle gracefully).
if limitResult.EffectiveLimitMicros < 0 {
return nil, nil //nolint:nilnil // Nil status cleanly signals disabled limits.
}
start, end := ComputeUsagePeriodBounds(now, period)
// When the winning limit tier is org-scoped (group), scope spend
// to the same org. When the limit is global (user override or
// deployment default), check spend globally to prevent a user
// from exceeding their limit by spreading spend across orgs.
spendOrgID := organizationID
if limitResult.LimitSource != limitSourceGroup {
spendOrgID = uuid.NullUUID{}
}
spendTotal, err := db.GetUserChatSpendInPeriod(authCtx, database.GetUserChatSpendInPeriodParams{
UserID: userID,
OrganizationID: spendOrgID,
StartTime: start,
EndTime: end,
})
if err != nil {
return nil, err
}
effectiveLimit := limitResult.EffectiveLimitMicros
return &codersdk.ChatUsageLimitStatus{
IsLimited: true,
Period: period,
SpendLimitMicros: &effectiveLimit,
CurrentSpend: spendTotal,
PeriodStart: start,
PeriodEnd: end,
}, nil
}
// Limit source constants returned by ResolveUserChatSpendLimit.
const (
limitSourceUser = "user"
limitSourceGroup = "group"
limitSourceDefault = "default"
)
func mapDBPeriodToSDK(dbPeriod string) (codersdk.ChatUsageLimitPeriod, bool) {
switch dbPeriod {
case string(codersdk.ChatUsageLimitPeriodDay):
return codersdk.ChatUsageLimitPeriodDay, true
case string(codersdk.ChatUsageLimitPeriodWeek):
return codersdk.ChatUsageLimitPeriodWeek, true
case string(codersdk.ChatUsageLimitPeriodMonth):
return codersdk.ChatUsageLimitPeriodMonth, true
default:
return "", false
}
}