mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
8b1705eb65
## Summary Routes chatd model calls backed by concrete AI Provider rows through the in-process aibridge transport by default, with deployment options to use direct provider routing when AI Gateway is disabled or chat AI Gateway routing is disabled. - Splits model routing into common, direct provider, and AI Gateway paths behind a single deployment-mode entry point. - Builds chatd models through explicit request, route, and options data. Active API key attribution is passed explicitly instead of being hidden inside generic model construction. - For AI Gateway BYOK routes, resolves the user's provider key in chatd, forwards it through provider-specific auth headers, and sets `X-Coder-AI-Governance-Token` to the `delegated` marker so aibridge preserves those headers while still stripping Coder-specific metadata. - Keeps central provider credentials and deployment fallback credentials out of forwarded provider auth headers, so AI Gateway central policy remains authoritative. - Redacts delegated provider auth from default string formatting to avoid accidental plaintext logging of user BYOK credentials. - Covers selected chat models, advisor overrides, title and quickgen paths, subagent overrides, computer use model selection, and an integration-style chat turn through the aibridge transport path. - Persists initiating API key IDs on chat and queued user messages, including subagent child messages, and fails closed for AI Gateway-routed model builds without an active key. - Removes unused `api_key_id` indexes while keeping the persistence columns and foreign keys. - Keeps the deployment option available through config and env parsing, but hides it from CLI help and generated docs. - Stabilizes the subagent poll fallback test so background CreateChat processing cannot win the state transition under slower CI environments. ## Tests - `go test ./coderd/x/chatd -run 'TestAIGatewayProviderAuthForUser|TestAIGatewayProviderAuthRedactsFormatting|TestResolveModelRouteForConfigAIGatewayProviderAuth|TestAIGatewayModelForwardsProviderAuth|TestProcessChat_AIGatewayRoutingUsesDelegatedAPIKey|TestAwaitSubagentCompletion' -count=1` - `go test ./coderd/aibridged -run 'TestServeHTTP_DelegatedAPIKey|TestServeHTTP_StripCoderToken' -count=1` - `git diff --check HEAD~1..HEAD` - `make lint` > Mux working on behalf of Mike.
145 lines
3.9 KiB
Go
145 lines
3.9 KiB
Go
package chatd
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"charm.land/fantasy"
|
|
|
|
"cdr.dev/slog/v3"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chatdebug"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chatprovider"
|
|
)
|
|
|
|
const (
|
|
debugCleanupRetryDelay = 500 * time.Millisecond
|
|
debugCleanupAttempts = 3
|
|
debugCleanupTimeout = 5 * time.Second
|
|
// debugCreateRunTimeout caps how long a CreateRun insert can
|
|
// block the caller's critical path. Debug persistence is
|
|
// best-effort, so the turn proceeds without debug rows if the
|
|
// DB is slow or locked. Matches the manual-title budget.
|
|
debugCreateRunTimeout = 5 * time.Second
|
|
// debugCleanupClockSkew gives cleanup cutoffs tolerance for cross-
|
|
// replica clock drift. The cutoff is sampled from the DB
|
|
// (updated_at returned by the status transition), and
|
|
// chat_debug_runs.started_at is stamped by whatever replica
|
|
// processes the replacement turn. If that replica's clock lags
|
|
// the DB, its started_at can land behind a commit-time cutoff
|
|
// even though the insert physically happened after commit.
|
|
// Subtracting this buffer ensures the fast retry path cannot
|
|
// delete replacement rows when clocks drift by up to this
|
|
// amount; rows within the buffer survive the fast cleanup but
|
|
// are still finalized (and eligible for stale-sweep cleanup) by
|
|
// the existing FinalizeStale background loop.
|
|
debugCleanupClockSkew = 30 * time.Second
|
|
)
|
|
|
|
func (p *Server) debugService() *chatdebug.Service {
|
|
if p == nil {
|
|
return nil
|
|
}
|
|
if p.debugSvcFactory == nil {
|
|
return p.debugSvc
|
|
}
|
|
p.debugSvcInit.Do(func() {
|
|
p.debugSvc = p.debugSvcFactory()
|
|
p.debugSvcReady.Store(p.debugSvc != nil)
|
|
})
|
|
return p.debugSvc
|
|
}
|
|
|
|
func (p *Server) existingDebugService() *chatdebug.Service {
|
|
if p == nil {
|
|
return nil
|
|
}
|
|
if p.debugSvcFactory == nil {
|
|
return p.debugSvc
|
|
}
|
|
if !p.debugSvcReady.Load() {
|
|
return nil
|
|
}
|
|
return p.debugSvc
|
|
}
|
|
|
|
func (p *Server) scheduleDebugCleanup(
|
|
ctx context.Context,
|
|
logMessage string,
|
|
fields []slog.Field,
|
|
cleanup func(context.Context, *chatdebug.Service) error,
|
|
) {
|
|
debugSvc := p.debugService()
|
|
if debugSvc == nil {
|
|
return
|
|
}
|
|
|
|
// Acquire inflightMu around the positive Add so Close() cannot
|
|
// call drainInflight concurrently when the counter is at zero.
|
|
// See drainInflight for the WaitGroup contract this preserves.
|
|
p.inflightMu.Lock()
|
|
p.inflight.Add(1)
|
|
p.inflightMu.Unlock()
|
|
go func() {
|
|
defer p.inflight.Done()
|
|
|
|
cleanupCtx := context.WithoutCancel(ctx)
|
|
for attempt := 0; attempt < debugCleanupAttempts; attempt++ {
|
|
if attempt > 0 {
|
|
timer := p.clock.NewTimer(debugCleanupRetryDelay, "chatd", "debug_cleanup")
|
|
<-timer.C
|
|
}
|
|
|
|
passCtx, cancel := context.WithTimeout(cleanupCtx, debugCleanupTimeout)
|
|
err := cleanup(passCtx, debugSvc)
|
|
cancel()
|
|
if err == nil {
|
|
return
|
|
}
|
|
|
|
logFields := append([]slog.Field{
|
|
slog.F("attempt", attempt+1),
|
|
slog.F("max_attempts", debugCleanupAttempts),
|
|
}, fields...)
|
|
logFields = append(logFields, slog.Error(err))
|
|
p.logger.Warn(cleanupCtx, logMessage, logFields...)
|
|
}
|
|
}()
|
|
}
|
|
|
|
func (p *Server) newDebugAwareModel(
|
|
ctx context.Context,
|
|
req modelClientRequest,
|
|
route resolvedModelRoute,
|
|
opts modelBuildOptions,
|
|
) (fantasy.LanguageModel, bool, error) {
|
|
providerHint, err := route.providerHint()
|
|
if err != nil {
|
|
return nil, false, err
|
|
}
|
|
provider, resolvedModel, err := chatprovider.ResolveModelWithProviderHint(req.ModelName, providerHint)
|
|
if err != nil {
|
|
return nil, false, err
|
|
}
|
|
route = route.withProviderHint(provider)
|
|
req.ModelName = resolvedModel
|
|
|
|
debugSvc := p.debugService()
|
|
debugEnabled := debugSvc != nil && debugSvc.IsEnabled(ctx, req.Chat.ID, req.Chat.OwnerID)
|
|
opts.RecordHTTP = debugEnabled
|
|
|
|
model, err := p.newModel(ctx, req, route, opts)
|
|
if err != nil {
|
|
return nil, debugEnabled, err
|
|
}
|
|
if !debugEnabled {
|
|
return model, false, nil
|
|
}
|
|
|
|
return chatdebug.WrapModel(model, debugSvc, chatdebug.RecorderOptions{
|
|
ChatID: req.Chat.ID,
|
|
OwnerID: req.Chat.OwnerID,
|
|
Provider: provider,
|
|
Model: resolvedModel,
|
|
}), true, nil
|
|
}
|