mirror of
https://github.com/coder/coder.git
synced 2026-06-03 21:18:24 +00:00
15f2fa55c6
## Summary Adds a process-wide cache for three hot database queries in `chatd` that were hitting Postgres on **every chat turn** despite returning rarely-changing configuration data: | Query | Before (50k turns) | After | Reduction | |---|---|---|---| | `GetEnabledChatProviders` | ~98.6k calls | ~500-1000 | ~99% | | `GetChatModelConfigByID` | ~49.2k calls | ~500-1000 | ~98% | | `GetUserChatCustomPrompt` | ~46.7k calls | ~1000-2000 | ~97% | These were identified via `coder exp scaletest chat` (5000 concurrent chats × 10 turns) as the dominant source of Postgres load during chat processing. ## Design Follows the established **webpush subscription cache pattern** (`coderd/webpush/webpush.go`): - `sync.RWMutex` + `tailscale.com/util/singleflight` (generic) + generation-based stale prevention + TTL - 10s TTL for provider/model config, 5s TTL for user prompts - Negative caching for `sql.ErrNoRows` on user prompts (the common case — most users don't set custom prompts) - Deep-clones `ChatModelConfig.Options` (`json.RawMessage` = `[]byte`) on both store and read paths ### Invalidation Single pubsub channel (`chat:config_change`) with kind discriminator for cross-replica cache invalidation. Seven publish points in `coderd/chats.go` cover all admin mutation endpoints (create/update/delete for providers and model configs, put for user prompts). _This PR was generated with mux and was reviewed by a human_
413 lines
10 KiB
Go
413 lines
10 KiB
Go
package chatd
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"slices"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/ammario/tlru"
|
|
"github.com/google/uuid"
|
|
"tailscale.com/util/singleflight"
|
|
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
"github.com/coder/quartz"
|
|
)
|
|
|
|
const (
|
|
chatConfigProvidersTTL = 10 * time.Second
|
|
chatConfigModelConfigTTL = 10 * time.Second
|
|
chatConfigUserPromptTTL = 5 * time.Second
|
|
// Bound user-prompt cache cardinality so one-shot users do not
|
|
// accumulate forever in long-lived chatd processes.
|
|
chatConfigUserPromptEntryLimit = 64 * 1024
|
|
)
|
|
|
|
type cachedProviders struct {
|
|
providers []database.ChatProvider
|
|
expiresAt time.Time
|
|
}
|
|
|
|
type cachedModelConfig struct {
|
|
config database.ChatModelConfig
|
|
expiresAt time.Time
|
|
}
|
|
|
|
type modelConfigSnapshot struct {
|
|
epoch uint64
|
|
generation uint64
|
|
}
|
|
|
|
// cloneModelConfig returns a shallow copy of cfg with Options
|
|
// deep-cloned so the cache owns its own backing array.
|
|
func cloneModelConfig(cfg database.ChatModelConfig) database.ChatModelConfig {
|
|
cfg.Options = slices.Clone(cfg.Options)
|
|
return cfg
|
|
}
|
|
|
|
type chatConfigCache struct {
|
|
db database.Store
|
|
clock quartz.Clock
|
|
// ctx is the server-scoped context used for all DB fills.
|
|
// Cache fills run inside singleflight.Do where one caller
|
|
// becomes the leader for all coalesced waiters. Using a
|
|
// per-request context would mean the leader's cancellation
|
|
// (timeout, user disconnect) fans the error to every waiter.
|
|
// Storing the server context here makes that impossible by
|
|
// construction — callers cannot pass a request context into
|
|
// the shared fill path.
|
|
ctx context.Context
|
|
|
|
mu sync.RWMutex
|
|
|
|
// Providers (singleton).
|
|
providers *cachedProviders
|
|
providerGeneration uint64
|
|
providerFetches singleflight.Group[string, []database.ChatProvider]
|
|
|
|
// Model configs (keyed by ID).
|
|
modelTopologyEpoch uint64
|
|
modelConfigs map[uuid.UUID]cachedModelConfig
|
|
modelConfigFetches singleflight.Group[string, database.ChatModelConfig]
|
|
|
|
// Default model config (singleton).
|
|
defaultModelConfig *cachedModelConfig
|
|
defaultModelConfigGeneration uint64
|
|
defaultModelConfigFetches singleflight.Group[string, database.ChatModelConfig]
|
|
|
|
// User custom prompts (keyed by user ID).
|
|
userPromptEpoch uint64
|
|
userPrompts *tlru.Cache[uuid.UUID, string]
|
|
userPromptFetches singleflight.Group[string, string]
|
|
}
|
|
|
|
func newChatConfigCache(ctx context.Context, db database.Store, clock quartz.Clock) *chatConfigCache {
|
|
return &chatConfigCache{
|
|
db: db,
|
|
clock: clock,
|
|
ctx: ctx,
|
|
modelConfigs: make(map[uuid.UUID]cachedModelConfig),
|
|
userPrompts: tlru.New[uuid.UUID](
|
|
tlru.ConstantCost[string],
|
|
chatConfigUserPromptEntryLimit,
|
|
),
|
|
}
|
|
}
|
|
|
|
// singleflightDoChan wraps a singleflight group's DoChan method,
|
|
// allowing the caller to abandon the wait if their context is
|
|
// canceled while the shared fill continues running to completion.
|
|
// This separates two lifetimes: the fill runs under the server-scoped
|
|
// context, while each caller waits under its own request-scoped context.
|
|
func singleflightDoChan[K comparable, V any](
|
|
ctx context.Context,
|
|
group *singleflight.Group[K, V],
|
|
key K,
|
|
fn func() (V, error),
|
|
) (V, error) {
|
|
ch := group.DoChan(key, fn)
|
|
select {
|
|
case <-ctx.Done():
|
|
var zero V
|
|
return zero, ctx.Err()
|
|
case res := <-ch:
|
|
return res.Val, res.Err
|
|
}
|
|
}
|
|
|
|
func (c *chatConfigCache) EnabledProviders(ctx context.Context) ([]database.ChatProvider, error) {
|
|
if providers, ok := c.cachedProviders(); ok {
|
|
return providers, nil
|
|
}
|
|
|
|
generation := c.providersGeneration()
|
|
providers, err := singleflightDoChan(
|
|
ctx,
|
|
&c.providerFetches,
|
|
fmt.Sprintf("%d:providers", generation),
|
|
func() ([]database.ChatProvider, error) {
|
|
if cached, ok := c.cachedProviders(); ok {
|
|
return cached, nil
|
|
}
|
|
|
|
fetched, err := c.db.GetEnabledChatProviders(c.ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
c.storeProviders(generation, fetched)
|
|
return slices.Clone(fetched), nil
|
|
},
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return slices.Clone(providers), nil
|
|
}
|
|
|
|
func (c *chatConfigCache) cachedProviders() ([]database.ChatProvider, bool) {
|
|
c.mu.RLock()
|
|
entry := c.providers
|
|
c.mu.RUnlock()
|
|
if entry == nil {
|
|
return nil, false
|
|
}
|
|
if c.clock.Now().Before(entry.expiresAt) {
|
|
return slices.Clone(entry.providers), true
|
|
}
|
|
|
|
c.mu.Lock()
|
|
if current := c.providers; current != nil && !c.clock.Now().Before(current.expiresAt) {
|
|
c.providers = nil
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
return nil, false
|
|
}
|
|
|
|
func (c *chatConfigCache) providersGeneration() uint64 {
|
|
c.mu.RLock()
|
|
generation := c.providerGeneration
|
|
c.mu.RUnlock()
|
|
return generation
|
|
}
|
|
|
|
func (c *chatConfigCache) storeProviders(generation uint64, providers []database.ChatProvider) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
if c.providerGeneration != generation {
|
|
return
|
|
}
|
|
|
|
c.providers = &cachedProviders{
|
|
providers: slices.Clone(providers),
|
|
expiresAt: c.clock.Now().Add(chatConfigProvidersTTL),
|
|
}
|
|
}
|
|
|
|
func (c *chatConfigCache) InvalidateProviders() {
|
|
c.mu.Lock()
|
|
c.providers = nil
|
|
c.providerGeneration++
|
|
// Provider topology changed — model selections depend on
|
|
// provider existence, so flush all model-config state.
|
|
clear(c.modelConfigs)
|
|
c.modelTopologyEpoch++
|
|
c.defaultModelConfig = nil
|
|
c.defaultModelConfigGeneration++
|
|
c.mu.Unlock()
|
|
}
|
|
|
|
func (c *chatConfigCache) ModelConfigByID(ctx context.Context, id uuid.UUID) (database.ChatModelConfig, error) {
|
|
if config, ok := c.cachedModelConfig(id); ok {
|
|
return config, nil
|
|
}
|
|
|
|
snap := c.modelConfigSnapshot()
|
|
config, err := singleflightDoChan(ctx, &c.modelConfigFetches, fmt.Sprintf("%d:%s", snap.epoch, id), func() (database.ChatModelConfig, error) {
|
|
if cached, ok := c.cachedModelConfig(id); ok {
|
|
return cached, nil
|
|
}
|
|
|
|
fetched, err := c.db.GetChatModelConfigByID(c.ctx, id)
|
|
if err != nil {
|
|
return database.ChatModelConfig{}, err
|
|
}
|
|
c.storeModelConfig(snap, fetched)
|
|
return cloneModelConfig(fetched), nil
|
|
})
|
|
if err != nil {
|
|
return database.ChatModelConfig{}, err
|
|
}
|
|
|
|
return config, nil
|
|
}
|
|
|
|
func (c *chatConfigCache) cachedModelConfig(id uuid.UUID) (database.ChatModelConfig, bool) {
|
|
c.mu.RLock()
|
|
entry, ok := c.modelConfigs[id]
|
|
c.mu.RUnlock()
|
|
if !ok {
|
|
return database.ChatModelConfig{}, false
|
|
}
|
|
if c.clock.Now().Before(entry.expiresAt) {
|
|
return cloneModelConfig(entry.config), true
|
|
}
|
|
|
|
c.mu.Lock()
|
|
if current, ok := c.modelConfigs[id]; ok && !c.clock.Now().Before(current.expiresAt) {
|
|
delete(c.modelConfigs, id)
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
return database.ChatModelConfig{}, false
|
|
}
|
|
|
|
func (c *chatConfigCache) modelConfigSnapshot() modelConfigSnapshot {
|
|
c.mu.RLock()
|
|
snap := modelConfigSnapshot{epoch: c.modelTopologyEpoch}
|
|
c.mu.RUnlock()
|
|
return snap
|
|
}
|
|
|
|
func (c *chatConfigCache) storeModelConfig(snap modelConfigSnapshot, config database.ChatModelConfig) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
if c.modelTopologyEpoch != snap.epoch {
|
|
return
|
|
}
|
|
|
|
c.modelConfigs[config.ID] = cachedModelConfig{
|
|
config: cloneModelConfig(config),
|
|
expiresAt: c.clock.Now().Add(chatConfigModelConfigTTL),
|
|
}
|
|
}
|
|
|
|
func (c *chatConfigCache) DefaultModelConfig(ctx context.Context) (database.ChatModelConfig, error) {
|
|
if config, ok := c.cachedDefaultModelConfig(); ok {
|
|
return config, nil
|
|
}
|
|
|
|
snap := c.defaultModelConfigSnapshot()
|
|
config, err := singleflightDoChan(ctx, &c.defaultModelConfigFetches, fmt.Sprintf("%d:default", snap.epoch), func() (database.ChatModelConfig, error) {
|
|
if cached, ok := c.cachedDefaultModelConfig(); ok {
|
|
return cached, nil
|
|
}
|
|
|
|
fetched, err := c.db.GetDefaultChatModelConfig(c.ctx)
|
|
if err != nil {
|
|
return database.ChatModelConfig{}, err
|
|
}
|
|
c.storeDefaultModelConfig(snap, fetched)
|
|
return cloneModelConfig(fetched), nil
|
|
})
|
|
if err != nil {
|
|
return database.ChatModelConfig{}, err
|
|
}
|
|
|
|
return config, nil
|
|
}
|
|
|
|
func (c *chatConfigCache) cachedDefaultModelConfig() (database.ChatModelConfig, bool) {
|
|
c.mu.RLock()
|
|
entry := c.defaultModelConfig
|
|
c.mu.RUnlock()
|
|
if entry == nil {
|
|
return database.ChatModelConfig{}, false
|
|
}
|
|
if c.clock.Now().Before(entry.expiresAt) {
|
|
return cloneModelConfig(entry.config), true
|
|
}
|
|
|
|
c.mu.Lock()
|
|
if current := c.defaultModelConfig; current != nil && !c.clock.Now().Before(current.expiresAt) {
|
|
c.defaultModelConfig = nil
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
return database.ChatModelConfig{}, false
|
|
}
|
|
|
|
func (c *chatConfigCache) defaultModelConfigSnapshot() modelConfigSnapshot {
|
|
c.mu.RLock()
|
|
snap := modelConfigSnapshot{
|
|
epoch: c.modelTopologyEpoch,
|
|
generation: c.defaultModelConfigGeneration,
|
|
}
|
|
c.mu.RUnlock()
|
|
return snap
|
|
}
|
|
|
|
func (c *chatConfigCache) storeDefaultModelConfig(snap modelConfigSnapshot, config database.ChatModelConfig) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
if c.modelTopologyEpoch != snap.epoch {
|
|
return
|
|
}
|
|
if c.defaultModelConfigGeneration != snap.generation {
|
|
return
|
|
}
|
|
|
|
c.defaultModelConfig = &cachedModelConfig{
|
|
config: cloneModelConfig(config),
|
|
expiresAt: c.clock.Now().Add(chatConfigModelConfigTTL),
|
|
}
|
|
}
|
|
|
|
func (c *chatConfigCache) UserPrompt(ctx context.Context, userID uuid.UUID) (string, error) {
|
|
if prompt, ok := c.cachedUserPrompt(userID); ok {
|
|
return prompt, nil
|
|
}
|
|
|
|
epoch := c.currentUserPromptEpoch()
|
|
prompt, err := singleflightDoChan(ctx, &c.userPromptFetches, fmt.Sprintf("%d:%s", epoch, userID), func() (string, error) {
|
|
if cached, ok := c.cachedUserPrompt(userID); ok {
|
|
return cached, nil
|
|
}
|
|
|
|
fetched, err := c.db.GetUserChatCustomPrompt(c.ctx, userID)
|
|
if err != nil {
|
|
if errors.Is(err, sql.ErrNoRows) {
|
|
c.storeUserPrompt(epoch, userID, "")
|
|
return "", nil
|
|
}
|
|
return "", err
|
|
}
|
|
c.storeUserPrompt(epoch, userID, fetched)
|
|
return fetched, nil
|
|
})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return prompt, nil
|
|
}
|
|
|
|
func (c *chatConfigCache) cachedUserPrompt(userID uuid.UUID) (string, bool) {
|
|
prompt, _, ok := c.userPrompts.Get(userID)
|
|
if !ok {
|
|
return "", false
|
|
}
|
|
return prompt, true
|
|
}
|
|
|
|
func (c *chatConfigCache) currentUserPromptEpoch() uint64 {
|
|
c.mu.RLock()
|
|
epoch := c.userPromptEpoch
|
|
c.mu.RUnlock()
|
|
return epoch
|
|
}
|
|
|
|
func (c *chatConfigCache) storeUserPrompt(epoch uint64, userID uuid.UUID, prompt string) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
if c.userPromptEpoch != epoch {
|
|
return
|
|
}
|
|
|
|
c.userPrompts.Set(userID, prompt, chatConfigUserPromptTTL)
|
|
}
|
|
|
|
func (c *chatConfigCache) InvalidateModelConfig(id uuid.UUID) {
|
|
c.mu.Lock()
|
|
delete(c.modelConfigs, id)
|
|
c.modelTopologyEpoch++
|
|
c.defaultModelConfig = nil
|
|
c.defaultModelConfigGeneration++
|
|
c.mu.Unlock()
|
|
}
|
|
|
|
func (c *chatConfigCache) InvalidateUserPrompt(userID uuid.UUID) {
|
|
c.mu.Lock()
|
|
c.userPrompts.Delete(userID)
|
|
c.userPromptEpoch++
|
|
c.mu.Unlock()
|
|
}
|