mirror of
https://github.com/coder/coder.git
synced 2026-06-03 04:58:23 +00:00
282ab7de34
Replace the env-based `BuildProviders` with a DB-backed loader. The database is now the single source of truth for runtime provider configuration; env config arrives via `SeedAIProvidersFromEnv` (run at boot) and `BuildProviders` reads it back as `aibridge.Provider` instances. `cli/server.go` and `enterprise/cli/server.go` both call the same path, so aibridged and aibridgeproxyd see the same provider set. Per-provider `DumpDir` is replaced by a top-level `CODER_AI_GATEWAY_DUMP_DIR` base; each provider's effective dump path is `<base>/<provider name>`.
250 lines
8.5 KiB
Go
250 lines
8.5 KiB
Go
//go:build !slim
|
|
|
|
package cli
|
|
|
|
import (
|
|
"context"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"golang.org/x/xerrors"
|
|
|
|
"cdr.dev/slog/v3"
|
|
"github.com/coder/coder/v2/aibridge"
|
|
"github.com/coder/coder/v2/aibridge/config"
|
|
"github.com/coder/coder/v2/aibridge/keypool"
|
|
"github.com/coder/coder/v2/coderd"
|
|
"github.com/coder/coder/v2/coderd/aibridged"
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
"github.com/coder/coder/v2/coderd/database/db2sdk"
|
|
"github.com/coder/coder/v2/coderd/database/dbauthz"
|
|
"github.com/coder/coder/v2/coderd/tracing"
|
|
"github.com/coder/coder/v2/coderd/util/ptr"
|
|
"github.com/coder/coder/v2/codersdk"
|
|
"github.com/coder/quartz"
|
|
)
|
|
|
|
func newAIBridgeDaemon(coderAPI *coderd.API, providers []aibridge.Provider) (*aibridged.Server, error) {
|
|
ctx := context.Background()
|
|
coderAPI.Logger.Debug(ctx, "starting in-memory aibridge daemon")
|
|
|
|
logger := coderAPI.Logger.Named("aibridged")
|
|
|
|
reg := prometheus.WrapRegistererWithPrefix("coder_aibridged_", coderAPI.PrometheusRegistry)
|
|
metrics := aibridge.NewMetrics(reg)
|
|
tracer := coderAPI.TracerProvider.Tracer(tracing.TracerName)
|
|
|
|
// Create pool for reusable stateful [aibridge.RequestBridge] instances (one per user).
|
|
pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, logger.Named("pool"), metrics, tracer) // TODO: configurable size.
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("create request pool: %w", err)
|
|
}
|
|
|
|
// Create daemon.
|
|
srv, err := aibridged.New(ctx, pool, func(dialCtx context.Context) (aibridged.DRPCClient, error) {
|
|
return coderAPI.CreateInMemoryAIBridgeServer(dialCtx)
|
|
}, logger, tracer)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("start in-memory aibridge daemon: %w", err)
|
|
}
|
|
return srv, nil
|
|
}
|
|
|
|
// BuildProviders loads every enabled ai_providers row, attaches its
|
|
// keys, and constructs the equivalent [aibridge.Provider] instances.
|
|
// The database is the single source of truth for runtime provider
|
|
// configuration.
|
|
//
|
|
// Per-provider construction errors are logged and the offending row is
|
|
// excluded from the returned snapshot; only a failure of the DB query
|
|
// itself is propagated. This keeps a single misconfigured row from
|
|
// taking the whole daemon down.
|
|
func BuildProviders(ctx context.Context, db database.Store, cfg codersdk.AIBridgeConfig, logger slog.Logger) ([]aibridge.Provider, error) {
|
|
//nolint:gocritic // AsAIBridged has a minimal permission set for this purpose.
|
|
authCtx := dbauthz.AsAIBridged(ctx)
|
|
|
|
var rows []database.AIProvider
|
|
keysByProvider := make(map[uuid.UUID][]database.AIProviderKey)
|
|
|
|
// Wrap both queries in a read-only transaction so the provider list
|
|
// and the key list are consistent with each other.
|
|
err := db.InTx(func(tx database.Store) error {
|
|
var err error
|
|
rows, err = tx.GetAIProviders(authCtx, database.GetAIProvidersParams{
|
|
IncludeDisabled: false,
|
|
})
|
|
if err != nil {
|
|
return xerrors.Errorf("load ai providers: %w", err)
|
|
}
|
|
|
|
if len(rows) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Load keys only for the enabled providers to avoid materializing
|
|
// secrets for disabled rows.
|
|
ids := make([]uuid.UUID, len(rows))
|
|
for i, r := range rows {
|
|
ids[i] = r.ID
|
|
}
|
|
keyRows, err := tx.GetAIProviderKeysByProviderIDs(authCtx, ids)
|
|
if err != nil {
|
|
return xerrors.Errorf("load ai provider keys: %w", err)
|
|
}
|
|
for _, k := range keyRows {
|
|
keysByProvider[k.ProviderID] = append(keysByProvider[k.ProviderID], k)
|
|
}
|
|
return nil
|
|
}, &database.TxOptions{ReadOnly: true, TxIdentifier: "build_ai_providers"})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
out := make([]aibridge.Provider, 0, len(rows))
|
|
for _, row := range rows {
|
|
prov, err := buildAIProviderFromRow(row, keysByProvider[row.ID], cfg)
|
|
if err != nil {
|
|
logger.Error(ctx, "skipping misconfigured ai provider",
|
|
slog.F("provider_id", row.ID),
|
|
slog.F("provider_name", row.Name),
|
|
slog.F("provider_type", string(row.Type)),
|
|
slog.Error(err),
|
|
)
|
|
continue
|
|
}
|
|
out = append(out, prov)
|
|
}
|
|
|
|
if len(rows) > 0 && len(out) == 0 {
|
|
logger.Warn(ctx, "all enabled ai providers failed to build; daemon will start with zero providers")
|
|
}
|
|
|
|
return out, nil
|
|
}
|
|
|
|
// buildAIProviderFromRow decodes the settings blob and constructs the
|
|
// appropriate [aibridge.Provider] for a single ai_providers row.
|
|
func buildAIProviderFromRow(
|
|
row database.AIProvider,
|
|
keys []database.AIProviderKey,
|
|
cfg codersdk.AIBridgeConfig,
|
|
) (aibridge.Provider, error) {
|
|
settings, err := db2sdk.AIProviderSettings(row.Settings)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("decode settings: %w", err)
|
|
}
|
|
|
|
cbCfg := circuitBreakerConfig(cfg)
|
|
sendActorHeaders := cfg.SendActorHeaders.Value()
|
|
dumpDir := cfg.APIDumpDir.Value()
|
|
|
|
switch row.Type {
|
|
case database.AiProviderTypeOpenai:
|
|
if len(keys) == 0 && !cfg.AllowBYOK.Value() {
|
|
return nil, xerrors.New("openai provider has no api keys configured and BYOK is not enabled")
|
|
}
|
|
var pool *keypool.Pool
|
|
if len(keys) > 0 {
|
|
var err error
|
|
pool, err = buildAIProviderKeyPool(keys)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("openai key pool: %w", err)
|
|
}
|
|
}
|
|
return aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{
|
|
Name: row.Name,
|
|
BaseURL: row.BaseUrl,
|
|
KeyPool: pool,
|
|
APIDumpDir: dumpDir,
|
|
CircuitBreaker: cbCfg,
|
|
SendActorHeaders: sendActorHeaders,
|
|
}), nil
|
|
|
|
case database.AiProviderTypeAnthropic:
|
|
bedrock := bedrockConfigFromRow(row, settings)
|
|
// Bedrock-backed Anthropic authenticates via AWS credentials in
|
|
// the settings blob, not the api_keys table. A bearer-token
|
|
// Anthropic without any key cannot make upstream calls.
|
|
if bedrock == nil && len(keys) == 0 && !cfg.AllowBYOK.Value() {
|
|
return nil, xerrors.New("anthropic provider has no api keys, no bedrock credentials, and BYOK is not enabled")
|
|
}
|
|
var pool *keypool.Pool
|
|
if len(keys) > 0 {
|
|
var err error
|
|
pool, err = buildAIProviderKeyPool(keys)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("anthropic key pool: %w", err)
|
|
}
|
|
}
|
|
return aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{
|
|
Name: row.Name,
|
|
BaseURL: row.BaseUrl,
|
|
KeyPool: pool,
|
|
APIDumpDir: dumpDir,
|
|
CircuitBreaker: cbCfg,
|
|
SendActorHeaders: sendActorHeaders,
|
|
}, bedrock), nil
|
|
|
|
case database.AiProviderTypeCopilot:
|
|
// Copilot is always BYOK; the per-user token is supplied on each
|
|
// request via the Authorization header, so no keypool is built.
|
|
return aibridge.NewCopilotProvider(aibridge.CopilotConfig{
|
|
Name: row.Name,
|
|
BaseURL: row.BaseUrl,
|
|
APIDumpDir: dumpDir,
|
|
CircuitBreaker: cbCfg,
|
|
}), nil
|
|
|
|
default:
|
|
return nil, xerrors.Errorf("unsupported provider type: %q", row.Type)
|
|
}
|
|
}
|
|
|
|
// buildAIProviderKeyPool builds a [keypool.Pool]. Callers must check
|
|
// len(keys) > 0 first; keypool.New rejects empty input.
|
|
func buildAIProviderKeyPool(keys []database.AIProviderKey) (*keypool.Pool, error) {
|
|
raw := make([]string, 0, len(keys))
|
|
for _, k := range keys {
|
|
raw = append(raw, k.APIKey)
|
|
}
|
|
return keypool.New(raw, quartz.NewReal())
|
|
}
|
|
|
|
// bedrockConfigFromRow returns nil when the settings have no Bedrock
|
|
// discriminator or when the Bedrock fields are not actually configured.
|
|
// The provider row's BaseUrl is the generic upstream endpoint and is
|
|
// always non-empty, so it cannot serve as a Bedrock detection signal;
|
|
// gate on the settings blob alone via [codersdk.AIProviderBedrockSettings.IsConfigured].
|
|
func bedrockConfigFromRow(row database.AIProvider, settings codersdk.AIProviderSettings) *aibridge.AWSBedrockConfig {
|
|
if settings.Bedrock == nil {
|
|
return nil
|
|
}
|
|
bedrockSettings := *settings.Bedrock
|
|
if !bedrockSettings.IsConfigured() {
|
|
return nil
|
|
}
|
|
accessKey := ptr.NilToEmpty(bedrockSettings.AccessKey)
|
|
accessKeySecret := ptr.NilToEmpty(bedrockSettings.AccessKeySecret)
|
|
return &aibridge.AWSBedrockConfig{
|
|
BaseURL: row.BaseUrl,
|
|
Region: bedrockSettings.Region,
|
|
AccessKey: accessKey,
|
|
AccessKeySecret: accessKeySecret,
|
|
Model: bedrockSettings.Model,
|
|
SmallFastModel: bedrockSettings.SmallFastModel,
|
|
}
|
|
}
|
|
|
|
// circuitBreakerConfig returns nil when the breaker is disabled.
|
|
func circuitBreakerConfig(cfg codersdk.AIBridgeConfig) *config.CircuitBreaker {
|
|
if !cfg.CircuitBreakerEnabled.Value() {
|
|
return nil
|
|
}
|
|
return &config.CircuitBreaker{
|
|
FailureThreshold: uint32(cfg.CircuitBreakerFailureThreshold.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options.
|
|
Interval: cfg.CircuitBreakerInterval.Value(),
|
|
Timeout: cfg.CircuitBreakerTimeout.Value(),
|
|
MaxRequests: uint32(cfg.CircuitBreakerMaxRequests.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options.
|
|
}
|
|
}
|