mirror of
https://github.com/coder/coder.git
synced 2026-06-03 04:58:23 +00:00
2bdacae5f5
## Summary Adds automatic retry with exponential backoff for transient LLM errors during chat streaming and title generation. Inspired by [coder/mux](https://github.com/coder/mux)'s retry mechanism. ## Key Behaviors - **Infinite retries** with exponential backoff: 1s → 2s → 4s → ... → 60s cap - **Deterministic delays** (no jitter) - **Error classification**: retryable (429, 5xx, overloaded, rate limit, network errors) vs non-retryable (auth, quota, context exceeded, model not found, canceled) - **Retry status published to SSE stream** so frontend can show "Retrying in Xs..." UI - **Title generation** retries silently (best-effort, nil onRetry callback) ## New Package: `coderd/chatd/chatretry/` | File | Purpose | |------|---------| | `classify.go` | `IsRetryable(err)` and `StatusCodeRetryable(code)` | | `backoff.go` | `Delay(attempt)` — exponential doubling with 60s cap | | `retry.go` | `Retry(ctx, fn, onRetry)` — infinite loop with context-aware timer | ## Test Helpers: `coderd/chatd/chattest/errors.go` Anthropic and OpenAI error response builders for use in chattest providers: - `AnthropicErrorResponse()`, `AnthropicOverloadedResponse()`, `AnthropicRateLimitResponse()` - `OpenAIErrorResponse()`, `OpenAIRateLimitResponse()`, `OpenAIServerErrorResponse()` ## SDK Changes: `codersdk/chats.go` - New `ChatStreamEventType: "retry"` - New `ChatStreamRetry` struct with `Attempt`, `DelayMs`, `Error`, `RetryingAt` fields - TypeScript types auto-generated ## Changed Files - `coderd/chatd/chatloop/chatloop.go` — wraps `agent.Stream()` in `chatretry.Retry()` - `coderd/chatd/chatd.go` — publishes retry events to SSE stream with logging - `coderd/chatd/title.go` — wraps `model.Generate()` in silent retry - `coderd/chatd/chattest/anthropic.go` / `openai.go` — error injection support ## Tests 42 tests covering classification (33), backoff (9), and retry scenarios (8).
75 lines
2.4 KiB
Go
75 lines
2.4 KiB
Go
package chattest
|
|
|
|
import (
|
|
"encoding/json"
|
|
"net/http"
|
|
)
|
|
|
|
// ErrorResponse describes an HTTP error that a test server should return
|
|
// instead of a normal streaming or JSON response.
|
|
type ErrorResponse struct {
|
|
StatusCode int
|
|
Type string
|
|
Message string
|
|
}
|
|
|
|
// writeErrorResponse writes a JSON error response matching the common
|
|
// provider error format used by both Anthropic and OpenAI.
|
|
func writeErrorResponse(w http.ResponseWriter, errResp *ErrorResponse) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(errResp.StatusCode)
|
|
body := map[string]interface{}{
|
|
"error": map[string]interface{}{
|
|
"type": errResp.Type,
|
|
"message": errResp.Message,
|
|
},
|
|
}
|
|
_ = json.NewEncoder(w).Encode(body)
|
|
}
|
|
|
|
// AnthropicErrorResponse returns an AnthropicResponse that causes the
|
|
// test server to respond with the given HTTP status code and error.
|
|
// This simulates provider errors like 529 Overloaded or 429 Rate Limited.
|
|
func AnthropicErrorResponse(statusCode int, errorType, message string) AnthropicResponse {
|
|
return AnthropicResponse{
|
|
Error: &ErrorResponse{
|
|
StatusCode: statusCode,
|
|
Type: errorType,
|
|
Message: message,
|
|
},
|
|
}
|
|
}
|
|
|
|
// AnthropicOverloadedResponse returns a 529 "overloaded" error matching
|
|
// Anthropic's overloaded response format.
|
|
func AnthropicOverloadedResponse() AnthropicResponse {
|
|
return AnthropicErrorResponse(529, "overloaded_error", "Overloaded")
|
|
}
|
|
|
|
// AnthropicRateLimitResponse returns a 429 rate limit error.
|
|
func AnthropicRateLimitResponse() AnthropicResponse {
|
|
return AnthropicErrorResponse(http.StatusTooManyRequests, "rate_limit_error", "Rate limited")
|
|
}
|
|
|
|
// OpenAIErrorResponse returns an OpenAIResponse that causes the
|
|
// test server to respond with the given HTTP status code and error.
|
|
func OpenAIErrorResponse(statusCode int, errorType, message string) OpenAIResponse {
|
|
return OpenAIResponse{
|
|
Error: &ErrorResponse{
|
|
StatusCode: statusCode,
|
|
Type: errorType,
|
|
Message: message,
|
|
},
|
|
}
|
|
}
|
|
|
|
// OpenAIRateLimitResponse returns a 429 rate limit error.
|
|
func OpenAIRateLimitResponse() OpenAIResponse {
|
|
return OpenAIErrorResponse(http.StatusTooManyRequests, "rate_limit_exceeded", "Rate limit exceeded")
|
|
}
|
|
|
|
// OpenAIServerErrorResponse returns a 500 internal server error.
|
|
func OpenAIServerErrorResponse() OpenAIResponse {
|
|
return OpenAIErrorResponse(http.StatusInternalServerError, "server_error", "Internal server error")
|
|
}
|