mirror of
https://github.com/coder/coder.git
synced 2026-06-03 04:58:23 +00:00
2bdacae5f5
## Summary Adds automatic retry with exponential backoff for transient LLM errors during chat streaming and title generation. Inspired by [coder/mux](https://github.com/coder/mux)'s retry mechanism. ## Key Behaviors - **Infinite retries** with exponential backoff: 1s → 2s → 4s → ... → 60s cap - **Deterministic delays** (no jitter) - **Error classification**: retryable (429, 5xx, overloaded, rate limit, network errors) vs non-retryable (auth, quota, context exceeded, model not found, canceled) - **Retry status published to SSE stream** so frontend can show "Retrying in Xs..." UI - **Title generation** retries silently (best-effort, nil onRetry callback) ## New Package: `coderd/chatd/chatretry/` | File | Purpose | |------|---------| | `classify.go` | `IsRetryable(err)` and `StatusCodeRetryable(code)` | | `backoff.go` | `Delay(attempt)` — exponential doubling with 60s cap | | `retry.go` | `Retry(ctx, fn, onRetry)` — infinite loop with context-aware timer | ## Test Helpers: `coderd/chatd/chattest/errors.go` Anthropic and OpenAI error response builders for use in chattest providers: - `AnthropicErrorResponse()`, `AnthropicOverloadedResponse()`, `AnthropicRateLimitResponse()` - `OpenAIErrorResponse()`, `OpenAIRateLimitResponse()`, `OpenAIServerErrorResponse()` ## SDK Changes: `codersdk/chats.go` - New `ChatStreamEventType: "retry"` - New `ChatStreamRetry` struct with `Attempt`, `DelayMs`, `Error`, `RetryingAt` fields - TypeScript types auto-generated ## Changed Files - `coderd/chatd/chatloop/chatloop.go` — wraps `agent.Stream()` in `chatretry.Retry()` - `coderd/chatd/chatd.go` — publishes retry events to SSE stream with logging - `coderd/chatd/title.go` — wraps `model.Generate()` in silent retry - `coderd/chatd/chattest/anthropic.go` / `openai.go` — error injection support ## Tests 42 tests covering classification (33), backoff (9), and retry scenarios (8).
709 lines
19 KiB
Go
709 lines
19 KiB
Go
package chatloop
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"encoding/json"
|
|
"errors"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"charm.land/fantasy"
|
|
fantasyanthropic "charm.land/fantasy/providers/anthropic"
|
|
"github.com/google/uuid"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/coderd/chatd/chatprompt"
|
|
"github.com/coder/coder/v2/coderd/chatd/chatretry"
|
|
"github.com/coder/coder/v2/codersdk"
|
|
)
|
|
|
|
const (
|
|
interruptedToolResultErrorMessage = "tool call was interrupted before it produced a result"
|
|
)
|
|
|
|
var ErrInterrupted = xerrors.New("chat interrupted")
|
|
|
|
// PersistedStep contains the full content of a completed or
|
|
// interrupted agent step. Content includes both assistant blocks
|
|
// (text, reasoning, tool calls) and tool result blocks, mirroring
|
|
// what fantasy provides in StepResult.Content. The persistence
|
|
// layer is responsible for splitting these into separate database
|
|
// messages by role.
|
|
type PersistedStep struct {
|
|
Content []fantasy.Content
|
|
Usage fantasy.Usage
|
|
ContextLimit sql.NullInt64
|
|
}
|
|
|
|
// RunOptions configures a single streaming chat loop run.
|
|
type RunOptions struct {
|
|
Model fantasy.LanguageModel
|
|
Messages []fantasy.Message
|
|
Tools []fantasy.AgentTool
|
|
StreamCall fantasy.AgentStreamCall
|
|
MaxSteps int
|
|
|
|
ActiveTools []string
|
|
ContextLimitFallback int64
|
|
|
|
PersistStep func(context.Context, PersistedStep) error
|
|
PublishMessagePart func(
|
|
role fantasy.MessageRole,
|
|
part codersdk.ChatMessagePart,
|
|
)
|
|
Compaction *CompactionOptions
|
|
|
|
// OnRetry is called before each retry attempt when the LLM
|
|
// stream fails with a retryable error. It provides the attempt
|
|
// number, error, and backoff delay so callers can publish status
|
|
// events to connected clients.
|
|
OnRetry chatretry.OnRetryFn
|
|
|
|
OnInterruptedPersistError func(error)
|
|
}
|
|
|
|
// Run executes the chat step-stream loop and delegates persistence/publishing to callbacks.
|
|
func Run(ctx context.Context, opts RunOptions) (*fantasy.AgentResult, error) {
|
|
if opts.Model == nil {
|
|
return nil, xerrors.New("chat model is required")
|
|
}
|
|
if opts.PersistStep == nil {
|
|
return nil, xerrors.New("persist step callback is required")
|
|
}
|
|
if opts.MaxSteps <= 0 {
|
|
opts.MaxSteps = 1
|
|
}
|
|
|
|
publishMessagePart := func(role fantasy.MessageRole, part codersdk.ChatMessagePart) {
|
|
if opts.PublishMessagePart == nil {
|
|
return
|
|
}
|
|
opts.PublishMessagePart(role, part)
|
|
}
|
|
|
|
var (
|
|
stepStateMu sync.Mutex
|
|
streamToolNames map[string]string
|
|
streamReasoningTitles map[string]string
|
|
streamReasoningText map[string]string
|
|
// stepToolResultContents tracks tool results received during
|
|
// streaming. These are needed for the interrupted-step path
|
|
// where OnStepFinish never fires.
|
|
stepToolResultContents []fantasy.ToolResultContent
|
|
stepAssistantDraft []fantasy.Content
|
|
stepToolCallIndexByID map[string]int
|
|
)
|
|
|
|
resetStepState := func() {
|
|
stepStateMu.Lock()
|
|
streamToolNames = make(map[string]string)
|
|
streamReasoningTitles = make(map[string]string)
|
|
streamReasoningText = make(map[string]string)
|
|
stepToolResultContents = nil
|
|
stepAssistantDraft = nil
|
|
stepToolCallIndexByID = make(map[string]int)
|
|
stepStateMu.Unlock()
|
|
}
|
|
|
|
setReasoningTitleFromText := func(id string, text string) {
|
|
if id == "" || strings.TrimSpace(text) == "" {
|
|
return
|
|
}
|
|
|
|
stepStateMu.Lock()
|
|
defer stepStateMu.Unlock()
|
|
|
|
if streamReasoningTitles[id] != "" {
|
|
return
|
|
}
|
|
|
|
streamReasoningText[id] += text
|
|
if !strings.ContainsAny(streamReasoningText[id], "\r\n") {
|
|
return
|
|
}
|
|
title := chatprompt.ReasoningTitleFromFirstLine(streamReasoningText[id])
|
|
if title == "" {
|
|
return
|
|
}
|
|
|
|
streamReasoningTitles[id] = title
|
|
}
|
|
|
|
appendDraftText := func(text string) {
|
|
if text == "" {
|
|
return
|
|
}
|
|
|
|
stepStateMu.Lock()
|
|
defer stepStateMu.Unlock()
|
|
|
|
if len(stepAssistantDraft) > 0 {
|
|
lastIndex := len(stepAssistantDraft) - 1
|
|
switch last := stepAssistantDraft[lastIndex].(type) {
|
|
case fantasy.TextContent:
|
|
last.Text += text
|
|
stepAssistantDraft[lastIndex] = last
|
|
return
|
|
case *fantasy.TextContent:
|
|
last.Text += text
|
|
stepAssistantDraft[lastIndex] = fantasy.TextContent{Text: last.Text}
|
|
return
|
|
}
|
|
}
|
|
stepAssistantDraft = append(stepAssistantDraft, fantasy.TextContent{Text: text})
|
|
}
|
|
|
|
appendDraftReasoning := func(text string) {
|
|
if text == "" {
|
|
return
|
|
}
|
|
|
|
stepStateMu.Lock()
|
|
defer stepStateMu.Unlock()
|
|
|
|
if len(stepAssistantDraft) > 0 {
|
|
lastIndex := len(stepAssistantDraft) - 1
|
|
switch last := stepAssistantDraft[lastIndex].(type) {
|
|
case fantasy.ReasoningContent:
|
|
last.Text += text
|
|
stepAssistantDraft[lastIndex] = last
|
|
return
|
|
case *fantasy.ReasoningContent:
|
|
last.Text += text
|
|
stepAssistantDraft[lastIndex] = fantasy.ReasoningContent{Text: last.Text}
|
|
return
|
|
}
|
|
}
|
|
stepAssistantDraft = append(stepAssistantDraft, fantasy.ReasoningContent{Text: text})
|
|
}
|
|
|
|
upsertDraftToolCall := func(toolCallID, toolName, input string, appendInput bool) {
|
|
if toolCallID == "" {
|
|
return
|
|
}
|
|
|
|
stepStateMu.Lock()
|
|
defer stepStateMu.Unlock()
|
|
|
|
if strings.TrimSpace(toolName) != "" {
|
|
streamToolNames[toolCallID] = toolName
|
|
}
|
|
|
|
index, exists := stepToolCallIndexByID[toolCallID]
|
|
if !exists {
|
|
stepToolCallIndexByID[toolCallID] = len(stepAssistantDraft)
|
|
stepAssistantDraft = append(stepAssistantDraft, fantasy.ToolCallContent{
|
|
ToolCallID: toolCallID,
|
|
ToolName: toolName,
|
|
Input: input,
|
|
})
|
|
return
|
|
}
|
|
|
|
if index < 0 || index >= len(stepAssistantDraft) {
|
|
stepToolCallIndexByID[toolCallID] = len(stepAssistantDraft)
|
|
stepAssistantDraft = append(stepAssistantDraft, fantasy.ToolCallContent{
|
|
ToolCallID: toolCallID,
|
|
ToolName: toolName,
|
|
Input: input,
|
|
})
|
|
return
|
|
}
|
|
|
|
existingCall, ok := fantasy.AsContentType[fantasy.ToolCallContent](stepAssistantDraft[index])
|
|
if !ok {
|
|
if ptrCall, ptrOK := fantasy.AsContentType[*fantasy.ToolCallContent](stepAssistantDraft[index]); ptrOK && ptrCall != nil {
|
|
existingCall = *ptrCall
|
|
ok = true
|
|
}
|
|
}
|
|
if !ok {
|
|
stepToolCallIndexByID[toolCallID] = len(stepAssistantDraft)
|
|
stepAssistantDraft = append(stepAssistantDraft, fantasy.ToolCallContent{
|
|
ToolCallID: toolCallID,
|
|
ToolName: toolName,
|
|
Input: input,
|
|
})
|
|
return
|
|
}
|
|
|
|
if strings.TrimSpace(toolName) != "" {
|
|
existingCall.ToolName = toolName
|
|
}
|
|
if appendInput {
|
|
existingCall.Input += input
|
|
} else if input != "" || existingCall.Input == "" {
|
|
existingCall.Input = input
|
|
}
|
|
stepAssistantDraft[index] = existingCall
|
|
}
|
|
|
|
appendDraftSource := func(source fantasy.SourceContent) {
|
|
stepStateMu.Lock()
|
|
stepAssistantDraft = append(stepAssistantDraft, source)
|
|
stepStateMu.Unlock()
|
|
}
|
|
|
|
persistInterruptedStep := func() error {
|
|
stepStateMu.Lock()
|
|
draft := append([]fantasy.Content(nil), stepAssistantDraft...)
|
|
toolResults := append([]fantasy.ToolResultContent(nil), stepToolResultContents...)
|
|
toolNameByCallID := make(map[string]string, len(streamToolNames))
|
|
for id, name := range streamToolNames {
|
|
toolNameByCallID[id] = name
|
|
}
|
|
stepStateMu.Unlock()
|
|
|
|
if len(draft) == 0 && len(toolResults) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Track which tool calls already have results.
|
|
answeredToolCalls := make(map[string]struct{}, len(toolResults))
|
|
for _, tr := range toolResults {
|
|
if tr.ToolCallID != "" {
|
|
answeredToolCalls[tr.ToolCallID] = struct{}{}
|
|
}
|
|
}
|
|
|
|
// Build the combined content: draft + received tool results
|
|
// + synthetic interrupted results for unanswered tool calls.
|
|
content := make([]fantasy.Content, 0, len(draft)+len(toolResults))
|
|
content = append(content, draft...)
|
|
for _, tr := range toolResults {
|
|
content = append(content, tr)
|
|
}
|
|
|
|
for _, block := range draft {
|
|
toolCall, ok := fantasy.AsContentType[fantasy.ToolCallContent](block)
|
|
if !ok {
|
|
if ptrCall, ptrOK := fantasy.AsContentType[*fantasy.ToolCallContent](block); ptrOK && ptrCall != nil {
|
|
toolCall = *ptrCall
|
|
ok = true
|
|
}
|
|
}
|
|
if !ok || toolCall.ToolCallID == "" {
|
|
continue
|
|
}
|
|
if _, exists := answeredToolCalls[toolCall.ToolCallID]; exists {
|
|
continue
|
|
}
|
|
|
|
toolName := strings.TrimSpace(toolCall.ToolName)
|
|
if toolName == "" {
|
|
toolName = strings.TrimSpace(toolNameByCallID[toolCall.ToolCallID])
|
|
}
|
|
|
|
content = append(content, fantasy.ToolResultContent{
|
|
ToolCallID: toolCall.ToolCallID,
|
|
ToolName: toolName,
|
|
Result: fantasy.ToolResultOutputContentError{
|
|
Error: xerrors.New(interruptedToolResultErrorMessage),
|
|
},
|
|
})
|
|
answeredToolCalls[toolCall.ToolCallID] = struct{}{}
|
|
}
|
|
|
|
persistCtx := context.WithoutCancel(ctx)
|
|
return opts.PersistStep(persistCtx, PersistedStep{
|
|
Content: content,
|
|
})
|
|
}
|
|
|
|
resetStepState()
|
|
|
|
agent := fantasy.NewAgent(
|
|
opts.Model,
|
|
fantasy.WithTools(opts.Tools...),
|
|
fantasy.WithStopConditions(fantasy.StepCountIs(opts.MaxSteps)),
|
|
)
|
|
applyAnthropicCaching := shouldApplyAnthropicPromptCaching(opts.Model)
|
|
// Fantasy's AgentStreamCall currently requires a non-empty Prompt and always
|
|
// appends it as a user message. chatd already supplies the full history in
|
|
// Messages, so we pass and then strip a sentinel user message in PrepareStep.
|
|
sentinelPrompt := "__chatd_agent_prompt_sentinel_" + uuid.NewString()
|
|
|
|
streamCall := opts.StreamCall
|
|
streamCall.Prompt = sentinelPrompt
|
|
streamCall.Messages = opts.Messages
|
|
streamCall.PrepareStep = func(
|
|
stepCtx context.Context,
|
|
options fantasy.PrepareStepFunctionOptions,
|
|
) (context.Context, fantasy.PrepareStepResult, error) {
|
|
return stepCtx, prepareStepResult(
|
|
options.Messages,
|
|
sentinelPrompt,
|
|
opts.ActiveTools,
|
|
applyAnthropicCaching,
|
|
), nil
|
|
}
|
|
streamCall.OnStepStart = func(_ int) error {
|
|
resetStepState()
|
|
return nil
|
|
}
|
|
streamCall.OnTextDelta = func(_ string, text string) error {
|
|
appendDraftText(text)
|
|
publishMessagePart(fantasy.MessageRoleAssistant, codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeText,
|
|
Text: text,
|
|
})
|
|
return nil
|
|
}
|
|
streamCall.OnReasoningDelta = func(id string, text string) error {
|
|
appendDraftReasoning(text)
|
|
setReasoningTitleFromText(id, text)
|
|
stepStateMu.Lock()
|
|
title := streamReasoningTitles[id]
|
|
stepStateMu.Unlock()
|
|
publishMessagePart(fantasy.MessageRoleAssistant, codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeReasoning,
|
|
Text: text,
|
|
Title: title,
|
|
})
|
|
return nil
|
|
}
|
|
streamCall.OnReasoningEnd = func(id string, _ fantasy.ReasoningContent) error {
|
|
stepStateMu.Lock()
|
|
if streamReasoningTitles[id] == "" {
|
|
// At the end of reasoning we have the full text, so we can
|
|
// safely evaluate first-line title format even if no newline
|
|
// ever arrived in deltas.
|
|
streamReasoningTitles[id] = chatprompt.ReasoningTitleFromFirstLine(
|
|
streamReasoningText[id],
|
|
)
|
|
}
|
|
title := streamReasoningTitles[id]
|
|
stepStateMu.Unlock()
|
|
if title != "" {
|
|
// Publish a title-only reasoning part so clients can update the
|
|
// reasoning header when metadata arrives at the end of streaming.
|
|
publishMessagePart(fantasy.MessageRoleAssistant, codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeReasoning,
|
|
Title: title,
|
|
})
|
|
}
|
|
return nil
|
|
}
|
|
streamCall.OnToolInputStart = func(id, toolName string) error {
|
|
upsertDraftToolCall(id, toolName, "", false)
|
|
return nil
|
|
}
|
|
streamCall.OnToolInputDelta = func(id, delta string) error {
|
|
stepStateMu.Lock()
|
|
toolName := streamToolNames[id]
|
|
stepStateMu.Unlock()
|
|
upsertDraftToolCall(id, toolName, delta, true)
|
|
publishMessagePart(fantasy.MessageRoleAssistant, codersdk.ChatMessagePart{
|
|
Type: codersdk.ChatMessagePartTypeToolCall,
|
|
ToolCallID: id,
|
|
ToolName: toolName,
|
|
ArgsDelta: delta,
|
|
})
|
|
return nil
|
|
}
|
|
streamCall.OnToolCall = func(toolCall fantasy.ToolCallContent) error {
|
|
upsertDraftToolCall(toolCall.ToolCallID, toolCall.ToolName, toolCall.Input, false)
|
|
publishMessagePart(
|
|
fantasy.MessageRoleAssistant,
|
|
chatprompt.PartFromContent(toolCall),
|
|
)
|
|
return nil
|
|
}
|
|
streamCall.OnSource = func(source fantasy.SourceContent) error {
|
|
appendDraftSource(source)
|
|
publishMessagePart(
|
|
fantasy.MessageRoleAssistant,
|
|
chatprompt.PartFromContent(source),
|
|
)
|
|
return nil
|
|
}
|
|
streamCall.OnToolResult = func(result fantasy.ToolResultContent) error {
|
|
publishMessagePart(
|
|
fantasy.MessageRoleTool,
|
|
chatprompt.PartFromContent(result),
|
|
)
|
|
|
|
stepStateMu.Lock()
|
|
if result.ToolCallID != "" && strings.TrimSpace(result.ToolName) != "" {
|
|
streamToolNames[result.ToolCallID] = result.ToolName
|
|
}
|
|
stepToolResultContents = append(stepToolResultContents, result)
|
|
stepStateMu.Unlock()
|
|
|
|
return nil
|
|
}
|
|
streamCall.OnStepFinish = func(stepResult fantasy.StepResult) error {
|
|
contextLimit := extractContextLimit(stepResult.ProviderMetadata)
|
|
if !contextLimit.Valid && opts.ContextLimitFallback > 0 {
|
|
contextLimit = sql.NullInt64{
|
|
Int64: opts.ContextLimitFallback,
|
|
Valid: true,
|
|
}
|
|
}
|
|
|
|
return opts.PersistStep(ctx, PersistedStep{
|
|
Content: stepResult.Content,
|
|
Usage: stepResult.Usage,
|
|
ContextLimit: contextLimit,
|
|
})
|
|
}
|
|
|
|
var result *fantasy.AgentResult
|
|
err := chatretry.Retry(ctx, func(retryCtx context.Context) error {
|
|
var streamErr error
|
|
result, streamErr = agent.Stream(retryCtx, streamCall)
|
|
if streamErr != nil {
|
|
// Interrupts are not retryable — propagate them
|
|
// immediately so processChat can set the correct
|
|
// status.
|
|
if errors.Is(streamErr, context.Canceled) &&
|
|
errors.Is(context.Cause(retryCtx), ErrInterrupted) {
|
|
if persistErr := persistInterruptedStep(); persistErr != nil {
|
|
if opts.OnInterruptedPersistError != nil {
|
|
opts.OnInterruptedPersistError(persistErr)
|
|
}
|
|
}
|
|
// Return ErrInterrupted directly so the retry
|
|
// loop sees a non-retryable error and stops.
|
|
return ErrInterrupted
|
|
}
|
|
return streamErr
|
|
}
|
|
return nil
|
|
}, func(attempt int, retryErr error, delay time.Duration) {
|
|
// Reset accumulated draft state from the failed attempt
|
|
// so the next attempt starts clean.
|
|
resetStepState()
|
|
|
|
if opts.OnRetry != nil {
|
|
opts.OnRetry(attempt, retryErr, delay)
|
|
}
|
|
})
|
|
if err != nil {
|
|
if errors.Is(err, ErrInterrupted) {
|
|
return nil, ErrInterrupted
|
|
}
|
|
return nil, xerrors.Errorf("stream response: %w", err)
|
|
}
|
|
if opts.Compaction != nil {
|
|
if err := maybeCompact(ctx, opts, result); err != nil {
|
|
if opts.Compaction.OnError != nil {
|
|
opts.Compaction.OnError(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
//nolint:revive // Boolean controls Anthropic-specific caching behavior.
|
|
func prepareStepResult(
|
|
messages []fantasy.Message,
|
|
sentinel string,
|
|
activeTools []string,
|
|
anthropicCaching bool,
|
|
) fantasy.PrepareStepResult {
|
|
filtered := make([]fantasy.Message, 0, len(messages))
|
|
removed := false
|
|
for _, message := range messages {
|
|
if !removed &&
|
|
message.Role == fantasy.MessageRoleUser &&
|
|
len(message.Content) == 1 {
|
|
textPart, ok := fantasy.AsMessagePart[fantasy.TextPart](message.Content[0])
|
|
if ok && textPart.Text == sentinel {
|
|
removed = true
|
|
continue
|
|
}
|
|
}
|
|
filtered = append(filtered, message)
|
|
}
|
|
|
|
result := fantasy.PrepareStepResult{
|
|
Messages: filtered,
|
|
}
|
|
if anthropicCaching {
|
|
result.Messages = addAnthropicPromptCaching(result.Messages)
|
|
}
|
|
if len(activeTools) > 0 {
|
|
result.ActiveTools = append([]string(nil), activeTools...)
|
|
}
|
|
return result
|
|
}
|
|
|
|
func shouldApplyAnthropicPromptCaching(model fantasy.LanguageModel) bool {
|
|
if model == nil {
|
|
return false
|
|
}
|
|
return model.Provider() == fantasyanthropic.Name
|
|
}
|
|
|
|
func addAnthropicPromptCaching(messages []fantasy.Message) []fantasy.Message {
|
|
for i := range messages {
|
|
messages[i].ProviderOptions = nil
|
|
}
|
|
|
|
providerOption := fantasy.ProviderOptions{
|
|
fantasyanthropic.Name: &fantasyanthropic.ProviderCacheControlOptions{
|
|
CacheControl: fantasyanthropic.CacheControl{Type: "ephemeral"},
|
|
},
|
|
}
|
|
|
|
lastSystemRoleIdx := -1
|
|
systemMessageUpdated := false
|
|
for i, msg := range messages {
|
|
if msg.Role == fantasy.MessageRoleSystem {
|
|
lastSystemRoleIdx = i
|
|
} else if !systemMessageUpdated && lastSystemRoleIdx >= 0 {
|
|
messages[lastSystemRoleIdx].ProviderOptions = providerOption
|
|
systemMessageUpdated = true
|
|
}
|
|
if i > len(messages)-3 {
|
|
messages[i].ProviderOptions = providerOption
|
|
}
|
|
}
|
|
|
|
return messages
|
|
}
|
|
|
|
func extractContextLimit(metadata fantasy.ProviderMetadata) sql.NullInt64 {
|
|
if len(metadata) == 0 {
|
|
return sql.NullInt64{}
|
|
}
|
|
|
|
encoded, err := json.Marshal(metadata)
|
|
if err != nil || len(encoded) == 0 {
|
|
return sql.NullInt64{}
|
|
}
|
|
|
|
var payload any
|
|
if err := json.Unmarshal(encoded, &payload); err != nil {
|
|
return sql.NullInt64{}
|
|
}
|
|
|
|
limit, ok := findContextLimitValue(payload)
|
|
if !ok {
|
|
return sql.NullInt64{}
|
|
}
|
|
|
|
return sql.NullInt64{
|
|
Int64: limit,
|
|
Valid: true,
|
|
}
|
|
}
|
|
|
|
func findContextLimitValue(value any) (int64, bool) {
|
|
var (
|
|
limit int64
|
|
found bool
|
|
)
|
|
|
|
collectContextLimitValues(value, func(candidate int64) {
|
|
if !found || candidate > limit {
|
|
limit = candidate
|
|
found = true
|
|
}
|
|
})
|
|
|
|
return limit, found
|
|
}
|
|
|
|
func collectContextLimitValues(value any, onValue func(int64)) {
|
|
switch typed := value.(type) {
|
|
case map[string]any:
|
|
for key, child := range typed {
|
|
if isContextLimitKey(key) {
|
|
if numeric, ok := numericContextLimitValue(child); ok {
|
|
onValue(numeric)
|
|
}
|
|
}
|
|
collectContextLimitValues(child, onValue)
|
|
}
|
|
case []any:
|
|
for _, child := range typed {
|
|
collectContextLimitValues(child, onValue)
|
|
}
|
|
}
|
|
}
|
|
|
|
func isContextLimitKey(key string) bool {
|
|
normalized := normalizeMetadataKey(key)
|
|
if normalized == "" {
|
|
return false
|
|
}
|
|
|
|
switch normalized {
|
|
case
|
|
"contextlimit",
|
|
"contextwindow",
|
|
"contextlength",
|
|
"maxcontext",
|
|
"maxcontexttokens",
|
|
"maxinputtokens",
|
|
"maxinputtoken",
|
|
"inputtokenlimit":
|
|
return true
|
|
}
|
|
|
|
return strings.Contains(normalized, "context") &&
|
|
(strings.Contains(normalized, "limit") ||
|
|
strings.Contains(normalized, "window") ||
|
|
strings.Contains(normalized, "length") ||
|
|
strings.HasPrefix(normalized, "max"))
|
|
}
|
|
|
|
func normalizeMetadataKey(key string) string {
|
|
var b strings.Builder
|
|
b.Grow(len(key))
|
|
|
|
for _, r := range key {
|
|
switch {
|
|
case r >= 'a' && r <= 'z':
|
|
_, _ = b.WriteRune(r)
|
|
case r >= 'A' && r <= 'Z':
|
|
_, _ = b.WriteRune(r + ('a' - 'A'))
|
|
case r >= '0' && r <= '9':
|
|
_, _ = b.WriteRune(r)
|
|
}
|
|
}
|
|
|
|
return b.String()
|
|
}
|
|
|
|
func numericContextLimitValue(value any) (int64, bool) {
|
|
switch typed := value.(type) {
|
|
case int64:
|
|
return positiveInt64(typed)
|
|
case int32:
|
|
return positiveInt64(int64(typed))
|
|
case int:
|
|
return positiveInt64(int64(typed))
|
|
case float64:
|
|
casted := int64(typed)
|
|
if typed > 0 && float64(casted) == typed {
|
|
return casted, true
|
|
}
|
|
case string:
|
|
parsed, err := strconv.ParseInt(strings.TrimSpace(typed), 10, 64)
|
|
if err == nil {
|
|
return positiveInt64(parsed)
|
|
}
|
|
case json.Number:
|
|
parsed, err := typed.Int64()
|
|
if err == nil {
|
|
return positiveInt64(parsed)
|
|
}
|
|
}
|
|
|
|
return 0, false
|
|
}
|
|
|
|
func positiveInt64(value int64) (int64, bool) {
|
|
if value <= 0 {
|
|
return 0, false
|
|
}
|
|
return value, true
|
|
}
|