mirror of
https://github.com/coder/coder.git
synced 2026-06-04 21:48:22 +00:00
62e9752acd
> Worked on by Mux on Mike's behalf. ## Summary - Disable OpenAI Responses `previous_response_id` chain mode when the prior assistant response has unresolved local tool calls, so the next request can include paired tool outputs instead of sending an incomplete continuation. - Update the fantasy pin to a Responses replay fix that preserves stored reasoning references, only replays web search references when paired with reasoning, and validates local function-call output pairing before send. - Add fake OpenAI Responses input validation for the two production 400 shapes and integration coverage for full-history reasoning plus web search replay. - Add sanitized diagnostics for the OpenAI Responses continuity errors. ## Tests - `go test ./providers/openai -run 'TestResponsesToPrompt_(ReasoningWithStore|ReasoningWithWebSearchCombined|WebSearchRequiresReasoningReference|ReasoningWithFunctionCallCombined|WebSearchProviderExecutedToolResults)|TestPrepareParams_(SkipsProviderExecutedToolReferences|ValidatesFunctionCallOutputPairing)|TestValidateResponsesInput_WebSearchReferenceRequiresReasoning' -count=1` - `go test ./providers/openai -count=1` - `GOWORK=off go test ./coderd/x/chatd/chattest -run TestValidateResponsesAPIInput -count=1` - `GOWORK=off go test ./coderd/x/chatd -run 'TestOpenAIResponses(NoStaleWebSearchReplay|FullReplayPairsReasoningAndWebSearch|ChainModeSkipsWhenLocalCallPending|ChainModeStillFiresForProviderExecutedOnly)$|TestResolveChainMode_' -count=1` - `GOWORK=off go test ./coderd/x/chatd/chatprompt -run 'TestInjectMissingToolResults_' -count=1` - `GOWORK=off go test ./coderd/x/chatd/chaterror -run TestClassify_OpenAIResponsesAPIDiagnostics -count=1` - `GOWORK=off go test ./coderd/x/chatd/... -count=1` - `git diff --check` - `git commit` pre-commit hook
266 lines
7.5 KiB
Go
266 lines
7.5 KiB
Go
package chaterror
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// ClassifiedError is the normalized, user-facing view of an
|
|
// underlying provider or runtime error.
|
|
type ClassifiedError struct {
|
|
Message string
|
|
Detail string
|
|
Kind string
|
|
Provider string
|
|
Retryable bool
|
|
StatusCode int
|
|
|
|
// RetryAfter is a normalized minimum retry delay derived from
|
|
// provider response metadata when available.
|
|
RetryAfter time.Duration
|
|
}
|
|
|
|
const responsesAPIDiagnosticMessage = "The chat continuation failed due to an " +
|
|
"internal state mismatch. This is not a configuration or billing issue."
|
|
|
|
type responsesAPIDiagnosticMatch struct {
|
|
pattern string
|
|
detail string
|
|
}
|
|
|
|
// responsesAPIDiagnosticMatches maps provider error fragments to safe
|
|
// diagnostics. Details must not include provider item IDs because they are
|
|
// returned to clients and used by operators for grepping.
|
|
var responsesAPIDiagnosticMatches = []responsesAPIDiagnosticMatch{
|
|
{
|
|
pattern: "no tool output found for function call",
|
|
detail: "OpenAI Responses API request continuity diagnostic: match=function_call_output_missing.",
|
|
},
|
|
{
|
|
pattern: "was provided without its required 'reasoning' item",
|
|
detail: "OpenAI Responses API request continuity diagnostic: match=web_search_reasoning_missing.",
|
|
},
|
|
}
|
|
|
|
// WithProvider returns a copy of the classification using an explicit
|
|
// provider hint. Explicit provider hints are trusted over provider names
|
|
// heuristically parsed from the error text.
|
|
func (c ClassifiedError) WithProvider(provider string) ClassifiedError {
|
|
hint := normalizeProvider(provider)
|
|
if hint == "" {
|
|
return normalizeClassification(c)
|
|
}
|
|
if c.Provider == hint && strings.TrimSpace(c.Message) != "" {
|
|
return normalizeClassification(c)
|
|
}
|
|
updated := c
|
|
updated.Provider = hint
|
|
updated.Message = ""
|
|
return normalizeClassification(updated)
|
|
}
|
|
|
|
// WithClassification wraps err so future calls to Classify return
|
|
// classified instead of re-deriving it from err.Error().
|
|
func WithClassification(err error, classified ClassifiedError) error {
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
return &classifiedError{
|
|
cause: err,
|
|
classified: normalizeClassification(classified),
|
|
}
|
|
}
|
|
|
|
type classifiedError struct {
|
|
cause error
|
|
classified ClassifiedError
|
|
}
|
|
|
|
func (e *classifiedError) Error() string {
|
|
return e.cause.Error()
|
|
}
|
|
|
|
func (e *classifiedError) Unwrap() error {
|
|
return e.cause
|
|
}
|
|
|
|
// Classify normalizes err into a stable, user-facing payload used for
|
|
// retry handling, streamed terminal errors, and persisted last_error
|
|
// values.
|
|
func Classify(err error) ClassifiedError {
|
|
if err == nil {
|
|
return ClassifiedError{}
|
|
}
|
|
|
|
var wrapped *classifiedError
|
|
if errors.As(err, &wrapped) {
|
|
return normalizeClassification(wrapped.classified)
|
|
}
|
|
|
|
structured := extractProviderErrorDetails(err)
|
|
message := strings.TrimSpace(err.Error())
|
|
if message == "" && structured.detail == "" && structured.statusCode == 0 && structured.retryAfter <= 0 {
|
|
return ClassifiedError{}
|
|
}
|
|
|
|
lower := strings.ToLower(message)
|
|
statusCode := structured.statusCode
|
|
if statusCode == 0 {
|
|
statusCode = extractStatusCode(lower)
|
|
}
|
|
provider := detectProvider(lower)
|
|
canceled := errors.Is(err, context.Canceled) || strings.Contains(lower, "context canceled")
|
|
interrupted := containsAny(lower, interruptedPatterns...)
|
|
if canceled || interrupted {
|
|
return normalizeClassification(ClassifiedError{
|
|
Message: "The request was canceled before it completed.",
|
|
Detail: structured.detail,
|
|
Kind: KindGeneric,
|
|
Provider: provider,
|
|
StatusCode: statusCode,
|
|
RetryAfter: structured.retryAfter,
|
|
})
|
|
}
|
|
|
|
if detail, ok := responsesAPIDiagnostic(lower, structured.detail); ok {
|
|
return normalizeClassification(ClassifiedError{
|
|
Message: responsesAPIDiagnosticMessage,
|
|
Detail: detail,
|
|
Kind: KindGeneric,
|
|
Provider: provider,
|
|
StatusCode: statusCode,
|
|
RetryAfter: structured.retryAfter,
|
|
})
|
|
}
|
|
|
|
deadline := errors.Is(err, context.DeadlineExceeded) || strings.Contains(lower, "context deadline exceeded")
|
|
overloadedMatch := statusCode == 529 || containsAny(lower, overloadedPatterns...)
|
|
authStrong := statusCode == 401 || containsAny(lower, authStrongPatterns...)
|
|
configMatch := containsAny(lower, configPatterns...)
|
|
authWeak := statusCode == 403 || containsAny(lower, authWeakPatterns...)
|
|
rateLimitMatch := statusCode == 429 || containsAny(lower, rateLimitPatterns...)
|
|
timeoutMatch := deadline || statusCode == 408 || statusCode == 502 ||
|
|
statusCode == 503 || statusCode == 504 ||
|
|
containsAny(lower, timeoutPatterns...)
|
|
genericRetryableMatch := statusCode == 500 || containsAny(lower, genericRetryablePatterns...)
|
|
|
|
// Config signals should beat ambiguous wrapper signals so
|
|
// transient-looking errors like "503 invalid model" fail fast.
|
|
// Overloaded stays ahead because 529/overloaded is a dedicated
|
|
// provider saturation signal, not a common transport wrapper.
|
|
// Strong auth still stays above config because bad credentials are
|
|
// the root cause when both signals appear.
|
|
rules := []struct {
|
|
match bool
|
|
kind string
|
|
retryable bool
|
|
}{
|
|
{
|
|
match: overloadedMatch,
|
|
kind: KindOverloaded,
|
|
retryable: true,
|
|
},
|
|
{
|
|
match: authStrong,
|
|
kind: KindAuth,
|
|
retryable: false,
|
|
},
|
|
{
|
|
match: authWeak && !configMatch,
|
|
kind: KindAuth,
|
|
retryable: false,
|
|
},
|
|
{
|
|
match: rateLimitMatch && !configMatch,
|
|
kind: KindRateLimit,
|
|
retryable: true,
|
|
},
|
|
{
|
|
match: timeoutMatch && !configMatch,
|
|
kind: KindTimeout,
|
|
retryable: !deadline,
|
|
},
|
|
{
|
|
match: configMatch,
|
|
kind: KindConfig,
|
|
retryable: false,
|
|
},
|
|
{
|
|
match: genericRetryableMatch,
|
|
kind: KindGeneric,
|
|
retryable: true,
|
|
},
|
|
}
|
|
for _, rule := range rules {
|
|
if !rule.match {
|
|
continue
|
|
}
|
|
return normalizeClassification(ClassifiedError{
|
|
Detail: structured.detail,
|
|
Kind: rule.kind,
|
|
Provider: provider,
|
|
Retryable: rule.retryable,
|
|
StatusCode: statusCode,
|
|
RetryAfter: structured.retryAfter,
|
|
})
|
|
}
|
|
|
|
return normalizeClassification(ClassifiedError{
|
|
Detail: structured.detail,
|
|
Kind: KindGeneric,
|
|
Provider: provider,
|
|
StatusCode: statusCode,
|
|
RetryAfter: structured.retryAfter,
|
|
})
|
|
}
|
|
|
|
func responsesAPIDiagnostic(lowerMessage, detail string) (string, bool) {
|
|
lowerDetail := strings.ToLower(detail)
|
|
for _, match := range responsesAPIDiagnosticMatches {
|
|
if strings.Contains(lowerMessage, match.pattern) || strings.Contains(lowerDetail, match.pattern) {
|
|
return match.detail, true
|
|
}
|
|
}
|
|
return "", false
|
|
}
|
|
|
|
func normalizeClassification(classified ClassifiedError) ClassifiedError {
|
|
classified.Message = strings.TrimSpace(classified.Message)
|
|
classified.Detail = normalizeClassificationDetail(classified.Detail)
|
|
classified.Kind = strings.TrimSpace(classified.Kind)
|
|
classified.Provider = normalizeProvider(classified.Provider)
|
|
if classified.RetryAfter < 0 {
|
|
classified.RetryAfter = 0
|
|
}
|
|
if classified.Kind == "" && classified.Message == "" {
|
|
if classified.Detail == "" && classified.StatusCode == 0 &&
|
|
classified.RetryAfter <= 0 {
|
|
return ClassifiedError{}
|
|
}
|
|
classified.Kind = KindGeneric
|
|
}
|
|
if classified.Kind == "" {
|
|
classified.Kind = KindGeneric
|
|
}
|
|
if classified.Message == "" {
|
|
classified.Message = terminalMessage(classified)
|
|
}
|
|
return classified
|
|
}
|
|
|
|
const maxClassificationDetailRunes = 500
|
|
|
|
func normalizeClassificationDetail(detail string) string {
|
|
detail = strings.TrimSpace(detail)
|
|
if detail == "" {
|
|
return ""
|
|
}
|
|
runes := []rune(detail)
|
|
if len(runes) <= maxClassificationDetailRunes {
|
|
return detail
|
|
}
|
|
return string(runes[:maxClassificationDetailRunes-1]) + "…"
|
|
}
|