Files
coder/enterprise/cli/server.go
Danny Kopping 08045c2aac feat: configure multiple AI Bridge providers of the same type (#23948)
_Disclaimer: produced mostly by Claude Opus 4.6 following detailed
planning._

## Summary
- Support multiple instances of the same AI Bridge provider type via
indexed env vars (`CODER_AIBRIDGE_PROVIDER_<N>_<KEY>`), following the
`CODER_EXTERNAL_AUTH_<N>_<KEY>` pattern
- Existing single-provider env vars (`CODER_AIBRIDGE_OPENAI_KEY`, etc.)
continue to work unchanged
- Setting both a legacy env var and an indexed provider with the same
name errors at startup to prevent silent misconfiguration
- Mark legacy provider fields (`OpenAI`, `Anthropic`, `Bedrock`) as
deprecated in `AIBridgeConfig` in favor of `Providers`
  ## Example
```sh
CODER_AIBRIDGE_PROVIDER_0_TYPE=anthropic
CODER_AIBRIDGE_PROVIDER_0_NAME=anthropic-corp
CODER_AIBRIDGE_PROVIDER_0_KEY=sk-ant-corp-xxx

CODER_AIBRIDGE_PROVIDER_0_BASE_URL=https://llm-proxy.internal.example.com/anthropic

CODER_AIBRIDGE_PROVIDER_1_TYPE=anthropic
CODER_AIBRIDGE_PROVIDER_1_NAME=anthropic-direct
  CODER_AIBRIDGE_PROVIDER_1_KEY=sk-ant-direct-yyy         
  ```
  Each instance is routed by name:
- /api/v2/aibridge/**anthropic-corp**/v1/messages
- /api/v2/aibridge/**anthropic-direct**/v1/messages
Closes
[AIGOV-157](https://linear.app/codercom/issue/AIGOV-157/spike-to-understand-if-there-is-a-simple-way-to-handle-multi-api-key)

---------

Signed-off-by: Danny Kopping <danny@coder.com>
2026-04-15 07:59:37 +00:00

237 lines
7.7 KiB
Go

//go:build !slim
package cli
import (
"context"
"database/sql"
"encoding/base64"
"errors"
"io"
"net/url"
"time"
"golang.org/x/xerrors"
"tailscale.com/derp"
"tailscale.com/types/key"
agplcoderd "github.com/coder/coder/v2/coderd"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/cryptorand"
"github.com/coder/coder/v2/enterprise/audit"
"github.com/coder/coder/v2/enterprise/audit/backends"
"github.com/coder/coder/v2/enterprise/coderd"
"github.com/coder/coder/v2/enterprise/coderd/dormancy"
"github.com/coder/coder/v2/enterprise/coderd/usage"
"github.com/coder/coder/v2/enterprise/dbcrypt"
"github.com/coder/coder/v2/enterprise/trialer"
"github.com/coder/coder/v2/tailnet"
"github.com/coder/quartz"
"github.com/coder/serpent"
)
func (r *RootCmd) Server(_ func()) *serpent.Command {
cmd := r.RootCmd.Server(func(ctx context.Context, options *agplcoderd.Options) (*agplcoderd.API, io.Closer, error) {
if options.DeploymentValues.DERP.Server.RelayURL.String() != "" {
_, err := url.Parse(options.DeploymentValues.DERP.Server.RelayURL.String())
if err != nil {
return nil, nil, xerrors.Errorf("derp-server-relay-address must be a valid HTTP URL: %w", err)
}
}
// Always generate a mesh key, even if the built-in DERP server is
// disabled. This mesh key is still used by workspace proxies running
// HA.
var meshKey string
err := options.Database.InTx(func(tx database.Store) error {
// This will block until the lock is acquired, and will be
// automatically released when the transaction ends.
err := tx.AcquireLock(ctx, database.LockIDEnterpriseDeploymentSetup)
if err != nil {
return xerrors.Errorf("acquire lock: %w", err)
}
meshKey, err = tx.GetDERPMeshKey(ctx)
if err == nil {
return nil
}
if !errors.Is(err, sql.ErrNoRows) {
return xerrors.Errorf("get DERP mesh key: %w", err)
}
meshKey, err = cryptorand.String(32)
if err != nil {
return xerrors.Errorf("generate DERP mesh key: %w", err)
}
err = tx.InsertDERPMeshKey(ctx, meshKey)
if err != nil {
return xerrors.Errorf("insert DERP mesh key: %w", err)
}
return nil
}, nil)
if err != nil {
return nil, nil, err
}
if meshKey == "" {
return nil, nil, xerrors.New("mesh key is empty")
}
if options.DeploymentValues.DERP.Server.Enable {
options.DERPServer = derp.NewServer(key.NewNode(), tailnet.Logger(options.Logger.Named("derp")))
options.DERPServer.SetMeshKey(meshKey)
}
options.Auditor = audit.NewAuditor(
options.Database,
audit.DefaultFilter,
backends.NewPostgres(options.Database, true),
backends.NewSlog(options.Logger),
)
options.TrialGenerator = trialer.New(options.Database, "https://v2-licensor.coder.com/trial", coderd.Keys)
o := &coderd.Options{
Options: options,
AuditLogging: true,
ConnectionLogging: true,
BrowserOnly: options.DeploymentValues.BrowserOnly.Value(),
SCIMAPIKey: []byte(options.DeploymentValues.SCIMAPIKey.Value()),
RBAC: true,
DERPServerRelayAddress: options.DeploymentValues.DERP.Server.RelayURL.String(),
DERPServerRegionID: int(options.DeploymentValues.DERP.Server.RegionID.Value()),
ProxyHealthInterval: options.DeploymentValues.ProxyHealthStatusInterval.Value(),
DefaultQuietHoursSchedule: options.DeploymentValues.UserQuietHoursSchedule.DefaultSchedule.Value(),
ProvisionerDaemonPSK: options.DeploymentValues.Provisioner.DaemonPSK.Value(),
CheckInactiveUsersCancelFunc: dormancy.CheckInactiveUsers(ctx, options.Logger, quartz.NewReal(), options.Database, options.Auditor),
}
if encKeys := options.DeploymentValues.ExternalTokenEncryptionKeys.Value(); len(encKeys) != 0 {
keys := make([][]byte, 0, len(encKeys))
for idx, ek := range encKeys {
dk, err := base64.StdEncoding.DecodeString(ek)
if err != nil {
return nil, nil, xerrors.Errorf("decode external-token-encryption-key %d: %w", idx, err)
}
keys = append(keys, dk)
}
cs, err := dbcrypt.NewCiphers(keys...)
if err != nil {
return nil, nil, xerrors.Errorf("initialize encryption: %w", err)
}
o.ExternalTokenEncryption = cs
}
if o.LicenseKeys == nil {
o.LicenseKeys = coderd.Keys
}
closers := &multiCloser{}
// Create the enterprise API.
api, err := coderd.New(ctx, o)
if err != nil {
return nil, nil, err
}
closers.Add(api)
// Start the enterprise usage publisher routine. This won't do anything
// unless the deployment is licensed and one of the licenses has usage
// publishing enabled.
publisher := usage.NewTallymanPublisher(ctx, options.Logger, options.Database, o.LicenseKeys,
usage.PublisherWithHTTPClient(api.HTTPClient),
)
err = publisher.Start()
if err != nil {
_ = closers.Close()
return nil, nil, xerrors.Errorf("start usage publisher: %w", err)
}
closers.Add(publisher)
// usageCron are heartbeat events to the usage table. These events are eventually sent
// to Tallyman.
usageCron := usage.NewCron(quartz.NewReal(), options.Logger.Named("usage-cron"), options.Database, *options.UsageInserter.Load())
// ai-seats heartbeats track the number of users that have used an AI feature.
// These users consume a seat for the AI addon to our License.
_ = usageCron.Register(usage.CronJob{
Name: "ai-seats",
Interval: usage.AISeatsInterval,
Jitter: 10 * time.Minute,
Fn: usage.AISeatsHeartbeat(options.Database),
})
usageCron.Start(ctx)
closers.Add(usageCron)
// Build the provider list and start AI Bridge daemons only when
// at least one of the bridge or proxy features is enabled.
bridgeEnabled := options.DeploymentValues.AI.BridgeConfig.Enabled.Value()
proxyEnabled := options.DeploymentValues.AI.BridgeProxyConfig.Enabled.Value()
if bridgeEnabled || proxyEnabled {
providers, err := buildProviders(options.DeploymentValues.AI.BridgeConfig)
if err != nil {
return nil, nil, xerrors.Errorf("build aibridge providers: %w", err)
}
// In-memory aibridge daemon.
// TODO(@deansheather): the lifecycle of the aibridged server is
// probably better managed by the enterprise API type itself. Managing
// it in the API type means we can avoid starting it up when the license
// is not entitled to the feature.
if bridgeEnabled {
aibridgeDaemon, err := newAIBridgeDaemon(api, providers)
if err != nil {
return nil, nil, xerrors.Errorf("create aibridged: %w", err)
}
api.RegisterInMemoryAIBridgedHTTPHandler(aibridgeDaemon)
// When running as an in-memory daemon, the HTTP handler is
// wired into the coderd API and therefore is subject to its
// context. Calling Close() on aibridged will NOT affect
// in-flight requests but those will be closed once the API
// server is itself shutdown.
closers.Add(aibridgeDaemon)
}
// In-memory AI Bridge Proxy daemon.
if proxyEnabled {
aiBridgeProxyServer, err := newAIBridgeProxyDaemon(api, providers)
if err != nil {
_ = closers.Close()
return nil, nil, xerrors.Errorf("create aibridgeproxyd: %w", err)
}
closers.Add(aiBridgeProxyServer)
// Register the handler so coderd can serve the proxy endpoints.
api.RegisterInMemoryAIBridgeProxydHTTPHandler(aiBridgeProxyServer.Handler())
}
}
return api.AGPL, closers, nil
})
cmd.AddSubcommands(
r.dbcryptCmd(),
)
return cmd
}
type multiCloser struct {
closers []io.Closer
}
var _ io.Closer = &multiCloser{}
func (m *multiCloser) Add(closer io.Closer) {
m.closers = append(m.closers, closer)
}
func (m *multiCloser) Close() error {
var errs []error
for _, closer := range m.closers {
if err := closer.Close(); err != nil {
errs = append(errs, xerrors.Errorf("close %T: %w", closer, err))
}
}
return errors.Join(errs...)
}