mirror of
https://github.com/coder/coder.git
synced 2026-06-05 05:58:20 +00:00
4d74603045
> **PR Stack** > 1. **#23351** ← `#23282` *(you are here)* > 2. #23282 ← `#23275` > 3. #23275 ← `#23349` > 4. #23349 ← `main` --- ## Summary `chatretry.Retry()` used pure exponential backoff (1 s, 2 s, 4 s, …) and never consulted provider `Retry-After` headers. Fantasy's `ProviderError` carries `ResponseHeaders` including `Retry-After`, but `chaterror.Classify()` only parsed error text and silently dropped the structured transport metadata. This makes `Retry-After` a first-class signal in the classification → retry pipeline. <img width="853" height="346" alt="image" src="https://github.com/user-attachments/assets/65f012b6-8173-43d2-957e-ab9faddea525" /> ## Changes ### `coderd/chatd/chaterror/classify.go` - Added `RetryAfter time.Duration` field to `ClassifiedError` — a normalized minimum retry delay derived from provider response metadata. - `Classify()` now calls `extractProviderErrorDetails()` before falling back to text heuristics. Structured `ProviderError.StatusCode` takes priority over regex extraction. - `normalizeClassification()` preserves and clamps `RetryAfter`. ### `coderd/chatd/chaterror/provider_error.go` (new) Provider-specific extraction, isolated from the text-based classification logic: - `extractProviderErrorDetails()` unwraps `*fantasy.ProviderError` from the error chain via `errors.As`. - `retryAfterFromHeaders()` parses headers in priority order: 1. `retry-after-ms` (OpenAI-specific, millisecond precision) 2. `retry-after` (standard HTTP — integer seconds or HTTP-date) - Case-insensitive header key lookup. ### `coderd/chatd/chatretry/chatretry.go` - `effectiveDelay(attempt, classified)` computes `max(Delay(attempt), classified.RetryAfter)` — the provider hint acts as a floor without weakening the local exponential backoff. - `Retry()` now uses `effectiveDelay` and passes the effective delay to both `onRetry(...)` and the sleep timer, so downstream payloads, logs, and the frontend countdown stay aligned automatically. ### Tests - `classify_test.go`: Structured provider status + `Retry-After` extraction, `retry-after-ms` priority, HTTP-date parsing, invalid header fallback, `WithProvider` preservation. - `chatretry_test.go`: Retry-after-as-floor semantics — longer hint wins, shorter hint keeps base delay. ## Design notes - **No SDK/API/frontend changes needed.** `codersdk.ChatStreamRetry` already carries `DelayMs` and `RetryingAt`, and the frontend already consumes them. The fix is purely in the server-side delay computation. - **Existing retryability rules unchanged.** This fixes *when* we sleep, not *whether* an error is retryable. - **Provider hint is a floor:** `max(baseDelay, RetryAfter)` ensures we never retry earlier than the provider asks, and never weaken our own backoff curve.
66 lines
1.4 KiB
Go
66 lines
1.4 KiB
Go
package chaterror
|
|
|
|
import (
|
|
"errors"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"charm.land/fantasy"
|
|
)
|
|
|
|
type providerErrorDetails struct {
|
|
statusCode int
|
|
retryAfter time.Duration
|
|
}
|
|
|
|
func extractProviderErrorDetails(err error) providerErrorDetails {
|
|
var providerErr *fantasy.ProviderError
|
|
if !errors.As(err, &providerErr) {
|
|
return providerErrorDetails{}
|
|
}
|
|
|
|
return providerErrorDetails{
|
|
statusCode: providerErr.StatusCode,
|
|
retryAfter: retryAfterFromHeaders(providerErr.ResponseHeaders),
|
|
}
|
|
}
|
|
|
|
func retryAfterFromHeaders(headers map[string]string) time.Duration {
|
|
if len(headers) == 0 {
|
|
return 0
|
|
}
|
|
|
|
// Prefer retry-after-ms (OpenAI convention, milliseconds)
|
|
// over the standard retry-after (seconds or HTTP-date).
|
|
for key, value := range headers {
|
|
if strings.EqualFold(key, "retry-after-ms") {
|
|
ms, err := strconv.ParseFloat(strings.TrimSpace(value), 64)
|
|
if err == nil && ms > 0 {
|
|
return time.Duration(ms * float64(time.Millisecond))
|
|
}
|
|
}
|
|
}
|
|
|
|
for key, value := range headers {
|
|
if strings.EqualFold(key, "retry-after") {
|
|
v := strings.TrimSpace(value)
|
|
if seconds, err := strconv.ParseFloat(v, 64); err == nil {
|
|
if seconds > 0 {
|
|
return time.Duration(seconds * float64(time.Second))
|
|
}
|
|
return 0
|
|
}
|
|
if retryAt, err := http.ParseTime(v); err == nil {
|
|
if d := time.Until(retryAt); d > 0 {
|
|
return d
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
}
|
|
|
|
return 0
|
|
}
|