Files
coder/aibridge/internal/integrationtest/responses_internal_test.go
T
Ethan c650aabbef chore: standardize on *_internal_test.go for white-box tests (#25601)
My agent added `//nolint:testpackage` to a test file on one of my PRs.
Again. This PR cleans it up across the entire repo and updates the
in-repo conventions so future agents stop doing it.

The repo already has a precedent for white-box tests that need to touch
unexported symbols: `*_internal_test.go` (145+ existing files). The
`testpackage` linter's default `skip-regexp` exempts that filename
suffix, so the `//nolint:testpackage` directive is unnecessary in every
case where someone reached for it. This PR renames 51 such files to
`*_internal_test.go` via `git mv` so blame and history follow, and
strips the dead directive from 2 files that were already correctly named
(`coderd/oauth2provider/authorize_internal_test.go`,
`coderd/x/chatd/advisor_internal_test.go`).

`.claude/docs/TESTING.md` now documents the rule explicitly under *Test
Package Naming*, which is imported into the root `AGENTS.md` via
`@.claude/docs/TESTING.md`. The rule: prefer `package foo_test`; if you
need internal access, rename the file to `*_internal_test.go` rather
than adding a nolint directive.
2026-05-22 20:24:38 +10:00

1101 lines
37 KiB
Go

package integrationtest
import (
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"slices"
"strconv"
"sync"
"testing"
"time"
"github.com/openai/openai-go/v3/responses"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/tidwall/sjson"
"github.com/coder/coder/v2/aibridge"
"github.com/coder/coder/v2/aibridge/config"
"github.com/coder/coder/v2/aibridge/fixtures"
"github.com/coder/coder/v2/aibridge/internal/testutil"
"github.com/coder/coder/v2/aibridge/provider"
"github.com/coder/coder/v2/aibridge/recorder"
"github.com/coder/coder/v2/aibridge/utils"
)
type keyVal struct {
key string
val any
}
func TestResponsesOutputMatchesUpstream(t *testing.T) {
t.Parallel()
tests := []struct {
name string
fixture []byte
streaming bool
expectModel string
expectPromptRecorded string
expectToolRecorded *recorder.ToolUsageRecord
expectTokenUsage *recorder.TokenUsageRecord
userAgent string
expectedClient aibridge.Client
}{
{
name: "blocking_simple",
fixture: fixtures.OaiResponsesBlockingSimple,
expectModel: "gpt-4o-mini",
expectPromptRecorded: "tell me a joke",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0388c79043df3e3400695f9f83cd6481959062cec6830d8d51",
Input: 11,
Output: 18,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 29,
},
},
userAgent: "claude-cli/2.0.67 (external, cli)",
expectedClient: aibridge.ClientClaudeCode,
},
{
name: "blocking_builtin_tool",
fixture: fixtures.OaiResponsesBlockingSingleBuiltinTool,
expectModel: "gpt-4.1",
expectPromptRecorded: "Is 3 + 5 a prime number? Use the add function to calculate the sum.",
expectToolRecorded: &recorder.ToolUsageRecord{
MsgID: "resp_0da6045a8b68fa5200695fa23dcc2c81a19c849f627abf8a31",
Tool: "add",
ToolCallID: "call_CJSaa2u51JG996575oVljuNq",
Args: map[string]any{"a": float64(3), "b": float64(5)},
Injected: false,
},
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0da6045a8b68fa5200695fa23dcc2c81a19c849f627abf8a31",
Input: 58,
Output: 18,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 76,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "blocking_cached_input_tokens",
fixture: fixtures.OaiResponsesBlockingCachedInputTokens,
expectModel: "gpt-4.1",
expectPromptRecorded: "This was a large input...",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0cd5d6b8310055d600696a1776b42c81a199fbb02248a8bfa0",
Input: 129, // 12033 input - 11904 cached
Output: 44,
CacheReadInputTokens: 11904,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 12077,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "blocking_custom_tool",
fixture: fixtures.OaiResponsesBlockingCustomTool,
expectModel: "gpt-5",
expectPromptRecorded: "Use the code_exec tool to print hello world to the console.",
expectToolRecorded: &recorder.ToolUsageRecord{
MsgID: "resp_09c614364030cdf000696942589da081a0af07f5859acb7308",
Tool: "code_exec",
ToolCallID: "call_haf8njtwrVZ1754Gm6fjAtuA",
Args: "print(\"hello world\")",
Injected: false,
},
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_09c614364030cdf000696942589da081a0af07f5859acb7308",
Input: 64,
Output: 148,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 128,
"total_tokens": 212,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "blocking_conversation",
fixture: fixtures.OaiResponsesBlockingConversation,
expectModel: "gpt-4o-mini",
expectPromptRecorded: "explain why this is funny.",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0c9f1f0524a858fa00695fa15fc5a081958f4304aafd3bdec2",
Input: 48,
Output: 116,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 164,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "blocking_prev_response_id",
fixture: fixtures.OaiResponsesBlockingPrevResponseID,
expectModel: "gpt-4o-mini",
expectPromptRecorded: "explain why this is funny.",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0388c79043df3e3400695f9f86cfa08195af1f015c60117a83",
Input: 43,
Output: 129,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 172,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "streaming_simple",
fixture: fixtures.OaiResponsesStreamingSimple,
streaming: true,
expectModel: "gpt-4o-mini",
expectPromptRecorded: "tell me a joke",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000",
Input: 11,
Output: 18,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 29,
},
},
userAgent: "Zed/0.219.4+stable.119.abc123 (macos; aarch64)",
expectedClient: aibridge.ClientZed,
},
{
name: "streaming_codex",
fixture: fixtures.OaiResponsesStreamingCodex,
streaming: true,
expectModel: "gpt-5-codex",
expectPromptRecorded: "hello",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0e172b76542a9100016964f7e63d888191a2a28cb2ba0ab6d3",
Input: 4006,
Output: 13,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 4019,
},
},
userAgent: "codex_cli_rs/0.87.0 (Mac OS 26.2.0; arm64)",
expectedClient: aibridge.ClientCodex,
},
{
name: "streaming_builtin_tool",
fixture: fixtures.OaiResponsesStreamingBuiltinTool,
streaming: true,
expectModel: "gpt-4.1",
expectPromptRecorded: "Is 3 + 5 a prime number? Use the add function to calculate the sum.",
expectToolRecorded: &recorder.ToolUsageRecord{
MsgID: "resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458",
Tool: "add",
ToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
Args: map[string]any{"a": float64(3), "b": float64(5)},
Injected: false,
},
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458",
Input: 58,
Output: 18,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 76,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "streaming_cached_tokens",
fixture: fixtures.OaiResponsesStreamingCachedInputTokens,
streaming: true,
expectModel: "gpt-5.2-codex",
expectPromptRecorded: "Test cached input tokens.",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35",
Input: 1165, // 16909 input - 15744 cached
Output: 54,
CacheReadInputTokens: 15744,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 16963,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "streaming_custom_tool",
fixture: fixtures.OaiResponsesStreamingCustomTool,
streaming: true,
expectModel: "gpt-5",
expectPromptRecorded: "Use the code_exec tool to print hello world to the console.",
expectToolRecorded: &recorder.ToolUsageRecord{
MsgID: "resp_0c26996bc41c2a0500696942e83634819fb71b2b8ff8a4a76c",
Tool: "code_exec",
ToolCallID: "call_2gSnF58IEhXLwlbnqbm5XKMd",
Args: "print(\"hello world\")",
Injected: false,
},
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0c26996bc41c2a0500696942e83634819fb71b2b8ff8a4a76c",
Input: 64,
Output: 340,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 320,
"total_tokens": 404,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "streaming_conversation",
fixture: fixtures.OaiResponsesStreamingConversation,
streaming: true,
expectModel: "gpt-4o-mini",
expectPromptRecorded: "explain why this is funny.",
expectedClient: aibridge.ClientUnknown,
},
{
name: "streaming_prev_response_id",
fixture: fixtures.OaiResponsesStreamingPrevResponseID,
streaming: true,
expectModel: "gpt-4o-mini",
expectPromptRecorded: "explain why this is funny.",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0f9c4b2f224d858000695fa0649b8c8197b38914b15a7add0e",
Input: 43,
Output: 182,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 225,
},
},
expectedClient: aibridge.ClientUnknown,
},
{
name: "stream_error",
fixture: fixtures.OaiResponsesStreamingStreamError,
streaming: true,
expectModel: "gpt-6.7",
expectPromptRecorded: "hello_stream_error",
expectedClient: aibridge.ClientUnknown,
},
{
name: "stream_failure",
fixture: fixtures.OaiResponsesStreamingStreamFailure,
streaming: true,
expectModel: "gpt-6.7",
expectPromptRecorded: "hello_stream_failure",
expectedClient: aibridge.ClientUnknown,
},
// Original status code and body is kept even with wrong json format
{
name: "blocking_wrong_format",
fixture: fixtures.OaiResponsesBlockingWrongResponseFormat,
expectModel: "gpt-6.7",
expectedClient: aibridge.ClientUnknown,
},
{
name: "streaming_wrong_format",
fixture: fixtures.OaiResponsesStreamingWrongResponseFormat,
streaming: true,
expectModel: "gpt-6.7",
expectPromptRecorded: "hello_wrong_format",
expectedClient: aibridge.ClientUnknown,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), testutil.WaitLong)
t.Cleanup(cancel)
fix := fixtures.Parse(t, tc.fixture)
upstream := newMockUpstream(ctx, t, newFixtureResponse(fix))
bridgeServer := newBridgeTestServer(ctx, t, upstream.URL)
resp, err := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request(), http.Header{"User-Agent": {tc.userAgent}})
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode)
got, err := io.ReadAll(resp.Body)
require.NoError(t, err)
if tc.streaming {
require.Equal(t, string(fix.Streaming()), string(got))
} else {
require.Equal(t, string(fix.NonStreaming()), string(got))
}
interceptions := bridgeServer.Recorder.RecordedInterceptions()
require.Len(t, interceptions, 1)
intc := interceptions[0]
require.Equal(t, intc.InitiatorID, defaultActorID)
require.Equal(t, intc.Provider, config.ProviderOpenAI)
require.Equal(t, intc.Model, tc.expectModel)
require.Equal(t, tc.userAgent, intc.UserAgent)
require.Equal(t, string(tc.expectedClient), intc.Client)
recordedPrompts := bridgeServer.Recorder.RecordedPromptUsages()
if tc.expectPromptRecorded != "" {
require.Len(t, recordedPrompts, 1)
promptEq := func(pur *recorder.PromptUsageRecord) bool { return pur.Prompt == tc.expectPromptRecorded }
require.Truef(t, slices.ContainsFunc(recordedPrompts, promptEq), "promnt not found, got: %v, want: %v", recordedPrompts, tc.expectPromptRecorded)
} else {
require.Empty(t, recordedPrompts)
}
recordedTools := bridgeServer.Recorder.RecordedToolUsages()
if tc.expectToolRecorded != nil {
require.Len(t, recordedTools, 1)
recordedTools[0].InterceptionID = tc.expectToolRecorded.InterceptionID // ignore interception id (interception id is not constant and response doesn't contain it)
recordedTools[0].CreatedAt = tc.expectToolRecorded.CreatedAt // ignore time
require.Equal(t, tc.expectToolRecorded, recordedTools[0])
} else {
require.Empty(t, recordedTools)
}
recordedTokens := bridgeServer.Recorder.RecordedTokenUsages()
if tc.expectTokenUsage != nil {
require.Len(t, recordedTokens, 1)
recordedTokens[0].InterceptionID = tc.expectTokenUsage.InterceptionID // ignore interception id
recordedTokens[0].CreatedAt = tc.expectTokenUsage.CreatedAt // ignore time
require.Equal(t, tc.expectTokenUsage, recordedTokens[0])
} else {
require.Empty(t, recordedTokens)
}
})
}
}
func TestResponsesBackgroundModeForbidden(t *testing.T) {
t.Parallel()
tests := []struct {
name string
streaming bool
}{
{
name: "blocking",
streaming: false,
},
{
name: "streaming",
streaming: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), testutil.WaitLong)
t.Cleanup(cancel)
// request with Background mode should be rejected before it reaches upstream
upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Errorf("unexpected request to upstream: %s %s", r.Method, r.URL.Path)
w.WriteHeader(http.StatusInternalServerError)
}))
t.Cleanup(upstream.Close)
bridgeServer := newBridgeTestServer(ctx, t, upstream.URL)
// Create a request with background mode enabled
reqBytes := responsesRequestBytes(t, tc.streaming, keyVal{"background", true})
resp, err := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, reqBytes)
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, "application/json", resp.Header.Get("Content-Type"))
require.Equal(t, http.StatusNotImplemented, resp.StatusCode)
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
requireResponsesError(t, http.StatusNotImplemented, "background requests are currently not supported by AI Bridge", body)
})
}
}
func TestResponsesParallelToolsOverwritten(t *testing.T) {
t.Parallel()
cases := []struct {
name string
fixture [2][]byte // [blocking, streaming] fixture pair.
withInjectedTools bool
initialSetting *bool
expectedSetting *bool // nil = field should not be present, non-nil = expected value.
}{
// With injected tools and builtin tools: parallel_tool_calls should be forced false.
{
name: "with injected and builtin tools: parallel_tool_calls true",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSingleBuiltinTool, fixtures.OaiResponsesStreamingBuiltinTool},
withInjectedTools: true,
initialSetting: utils.PtrTo(true),
expectedSetting: utils.PtrTo(false),
},
{
name: "with injected and builtin tools: parallel_tool_calls false",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSingleBuiltinTool, fixtures.OaiResponsesStreamingBuiltinTool},
withInjectedTools: true,
initialSetting: utils.PtrTo(false),
expectedSetting: utils.PtrTo(false),
},
{
name: "with injected and builtin tools: parallel_tool_calls unset",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSingleBuiltinTool, fixtures.OaiResponsesStreamingBuiltinTool},
withInjectedTools: true,
initialSetting: nil,
expectedSetting: utils.PtrTo(false),
},
// With injected tools but without builtin tools: parallel_tool_calls should be forced false.
{
name: "with injected tools only: parallel_tool_calls true",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSimple, fixtures.OaiResponsesStreamingSimple},
withInjectedTools: true,
initialSetting: utils.PtrTo(true),
expectedSetting: utils.PtrTo(false),
},
{
name: "with injected tools only: parallel_tool_calls false",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSimple, fixtures.OaiResponsesStreamingSimple},
withInjectedTools: true,
initialSetting: utils.PtrTo(false),
expectedSetting: utils.PtrTo(false),
},
{
name: "with injected tools only: parallel_tool_calls unset",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSimple, fixtures.OaiResponsesStreamingSimple},
withInjectedTools: true,
initialSetting: nil,
expectedSetting: utils.PtrTo(false),
},
// With builtin tools but without injected tools: parallel_tool_calls should be preserved.
{
name: "with builtin tools only: parallel_tool_calls true",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSingleBuiltinTool, fixtures.OaiResponsesStreamingBuiltinTool},
withInjectedTools: false,
initialSetting: utils.PtrTo(true),
expectedSetting: utils.PtrTo(true),
},
{
name: "with builtin tools only: parallel_tool_calls false",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSingleBuiltinTool, fixtures.OaiResponsesStreamingBuiltinTool},
withInjectedTools: false,
initialSetting: utils.PtrTo(false),
expectedSetting: utils.PtrTo(false),
},
{
name: "with builtin tools only: parallel_tool_calls unset",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSingleBuiltinTool, fixtures.OaiResponsesStreamingBuiltinTool},
withInjectedTools: false,
initialSetting: nil,
expectedSetting: nil,
},
// Without any tools: nothing is modified.
{
name: "no tools: parallel_tool_calls true",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSimple, fixtures.OaiResponsesStreamingSimple},
withInjectedTools: false,
initialSetting: utils.PtrTo(true),
expectedSetting: utils.PtrTo(true),
},
{
name: "no tools: parallel_tool_calls false",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSimple, fixtures.OaiResponsesStreamingSimple},
withInjectedTools: false,
initialSetting: utils.PtrTo(false),
expectedSetting: utils.PtrTo(false),
},
{
name: "no tools: parallel_tool_calls unset",
fixture: [2][]byte{fixtures.OaiResponsesBlockingSimple, fixtures.OaiResponsesStreamingSimple},
withInjectedTools: false,
initialSetting: nil,
expectedSetting: nil,
},
}
for _, tc := range cases {
for i, streaming := range []bool{false, true} {
t.Run(fmt.Sprintf("%s/streaming=%v", tc.name, streaming), func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), testutil.WaitLong)
t.Cleanup(cancel)
fix := fixtures.Parse(t, tc.fixture[i])
upstream := newMockUpstream(ctx, t, newFixtureResponse(fix))
var opts []bridgeOption
if tc.withInjectedTools {
opts = append(opts, withMCP(setupMCPForTest(t, defaultTracer)))
}
bridgeServer := newBridgeTestServer(ctx, t, upstream.URL, opts...)
var (
reqBody = fix.Request()
err error
)
if tc.initialSetting != nil {
reqBody, err = sjson.SetBytes(reqBody, "parallel_tool_calls", *tc.initialSetting)
require.NoError(t, err)
}
resp, err := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, reqBody)
require.NoError(t, err)
defer resp.Body.Close()
_, err = io.ReadAll(resp.Body)
require.NoError(t, err)
received := upstream.receivedRequests()
require.Len(t, received, 1)
var upstreamReq map[string]any
require.NoError(t, json.Unmarshal(received[0].Body, &upstreamReq))
ptc, ok := upstreamReq["parallel_tool_calls"].(bool)
require.Equal(t, tc.expectedSetting != nil, ok,
"parallel_tool_calls presence mismatch")
if tc.expectedSetting != nil {
assert.Equal(t, *tc.expectedSetting, ptc)
}
})
}
}
}
func TestClientAndConnectionError(t *testing.T) {
t.Parallel()
tests := []struct {
name string
addr string
streaming bool
errContains string
}{
{
name: "blocking_connection_refused",
addr: startRejectingListener(t),
streaming: false,
errContains: `connection reset by peer|forcibly closed`, // RST error message differs between Linux/macOS|Windows.
},
{
name: "streaming_connection_refused",
addr: startRejectingListener(t),
streaming: true,
errContains: `connection reset by peer|forcibly closed`, // RST error message differs between Linux/macOS|Windows.
},
{
name: "blocking_bad_url",
addr: "not_url",
streaming: false,
errContains: "unsupported protocol scheme",
},
{
name: "streaming_bad_url",
addr: "not_url",
streaming: true,
errContains: "unsupported protocol scheme",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), testutil.WaitLong)
t.Cleanup(cancel)
// tc.addr may be an intentionally invalid URL; use withCustomProvider.
cfg := openAICfg(tc.addr, apiKey)
bridgeServer := newBridgeTestServer(ctx, t, tc.addr, withCustomProvider(provider.NewOpenAI(cfg)))
reqBytes := responsesRequestBytes(t, tc.streaming)
resp, err := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, reqBytes)
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, "application/json", resp.Header.Get("Content-Type"))
require.Equal(t, http.StatusInternalServerError, resp.StatusCode)
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
requireResponsesError(t, http.StatusInternalServerError, tc.errContains, body)
require.Empty(t, bridgeServer.Recorder.RecordedPromptUsages())
})
}
}
func TestUpstreamError(t *testing.T) {
t.Parallel()
responsesError := `{"error":{"message":"Something went wrong","type":"invalid_request_error","param":null,"code":"invalid_request"}}`
nonResponsesError := `plain text error`
tests := []struct {
name string
streaming bool
statusCode int
contentType string
body string
}{
{
name: "blocking_responses_error",
streaming: false,
statusCode: http.StatusBadRequest,
contentType: "application/json",
body: responsesError,
},
{
name: "streaming_responses_error",
streaming: true,
statusCode: http.StatusBadRequest,
contentType: "application/json",
body: responsesError,
},
{
name: "blocking_non_responses_error",
streaming: false,
statusCode: http.StatusBadGateway,
contentType: "text/plain",
body: nonResponsesError,
},
{
name: "streaming_non_responses_error",
streaming: true,
statusCode: http.StatusBadGateway,
contentType: "text/plain",
body: nonResponsesError,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), testutil.WaitLong)
t.Cleanup(cancel)
upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", tc.contentType)
w.WriteHeader(tc.statusCode)
_, err := w.Write([]byte(tc.body))
require.NoError(t, err)
}))
t.Cleanup(upstream.Close)
cfg := openAICfg(upstream.URL, apiKey)
bridgeServer := newBridgeTestServer(ctx, t, upstream.URL, withCustomProvider(provider.NewOpenAI(cfg)))
reqBytes := responsesRequestBytes(t, tc.streaming)
resp, err := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, reqBytes)
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, tc.statusCode, resp.StatusCode)
require.Equal(t, tc.contentType, resp.Header.Get("Content-Type"))
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, tc.body, string(body))
})
}
}
// TestResponsesInjectedTool tests that injected MCP tool calls trigger the inner agentic loop,
// invoke the tool via MCP, and send the result back to the model.
func TestResponsesInjectedTool(t *testing.T) {
t.Parallel()
tests := []struct {
name string
fixture []byte
streaming bool
mcpToolName string
expectToolArgs map[string]any
expectToolError string // If non-empty, MCP tool returns this error.
expectPrompt string
expectTokenUsages []recorder.TokenUsageRecord
}{
{
name: "blocking_success",
fixture: fixtures.OaiResponsesBlockingSingleInjectedTool,
mcpToolName: "coder_template_version_parameters",
expectToolArgs: map[string]any{
"template_version_id": "aa4e30e4-a086-4df6-a364-1343f1458104",
},
expectPrompt: "list the template params for version aa4e30e4-a086-4df6-a364-1343f1458104",
expectTokenUsages: []recorder.TokenUsageRecord{
{
MsgID: "resp_012db006225b0ec700696b5de8a01481a28182ea6885448f93",
Input: 227, // 6371 input - 6144 cached
Output: 75,
CacheReadInputTokens: 6144,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 25,
"total_tokens": 6446,
},
},
{
MsgID: "resp_012db006225b0ec700696b5dec1d4c81a2a6a416e31af39b90",
Input: 612, // 6756 input - 6144 cached
Output: 231,
CacheReadInputTokens: 6144,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 43,
"total_tokens": 6987,
},
},
},
},
{
name: "blocking_tool_error",
fixture: fixtures.OaiResponsesBlockingSingleInjectedToolError,
mcpToolName: "coder_delete_template",
expectToolArgs: map[string]any{
"template_id": "03cb4fdd-8109-4a22-8e22-bb4975171395",
},
expectPrompt: "delete the template with ID 03cb4fdd-8109-4a22-8e22-bb4975171395, don't ask for confirmation",
expectToolError: "500 Internal error deleting template: unauthorized: rbac: forbidden",
expectTokenUsages: []recorder.TokenUsageRecord{
{
MsgID: "resp_06e2afba24b6b2ad00696b774d1df0819eaf1ec802bc8a2ca9",
Input: 233, // 6377 input - 6144 cached
Output: 119,
CacheReadInputTokens: 6144,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 70,
"total_tokens": 6496,
},
},
{
MsgID: "resp_06e2afba24b6b2ad00696b775044e8819ea14840698ef966e2",
Input: 395, // 6539 input - 6144 cached
Output: 144,
CacheReadInputTokens: 6144,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 28,
"total_tokens": 6683,
},
},
},
},
{
name: "streaming_success",
fixture: fixtures.OaiResponsesStreamingSingleInjectedTool,
streaming: true,
mcpToolName: "coder_list_templates",
expectToolArgs: map[string]any{},
expectPrompt: "List my coder templates.",
expectTokenUsages: []recorder.TokenUsageRecord{
{
MsgID: "resp_016595fe42aa62ca0069724419c52081a0b7eb479c6bc8109f",
Input: 6269, // 6269 input - 0 cached
Output: 18,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 6287,
},
},
{
MsgID: "resp_0bc5f54fce6df69a006972442175908194bb81d31f576e6ca6",
Input: 319, // 6463 input - 6144 cached
Output: 182,
CacheReadInputTokens: 6144,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 6645,
},
},
},
},
{
name: "streaming_tool_error",
fixture: fixtures.OaiResponsesStreamingSingleInjectedToolError,
streaming: true,
mcpToolName: "coder_create_workspace_build",
expectToolArgs: map[string]any{
"transition": "start",
"workspace_id": "non_existing_id",
},
expectPrompt: "Create a new workspace build for an workspace with id: 'non_existing_id'",
expectToolError: "workspace_id must be a valid UUID: invalid UUID length: 15",
expectTokenUsages: []recorder.TokenUsageRecord{
{
MsgID: "resp_0dfed48e1052ad7f0069725ca129f88193b97d6deff1760524",
Input: 6280, // 6280 input - 0 cached
Output: 30,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 6310,
},
},
{
MsgID: "resp_0dfed48e1052ad7f0069725ca39880819390fcc5b2eb8cf8c6",
Input: 6346, // 6346 input - 0 cached
Output: 56,
ExtraTokenTypes: map[string]int64{
"output_reasoning": 0,
"total_tokens": 6402,
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), testutil.WaitLong)
t.Cleanup(cancel)
// Setup mock server for multi-turn interaction.
// First request → tool call response, second → tool response.
fix := fixtures.Parse(t, tc.fixture)
upstream := newMockUpstream(ctx, t, newFixtureResponse(fix), newFixtureToolResponse(fix))
// Setup MCP server proxies (with mock tools).
mockMCP := setupMCPForTest(t, defaultTracer)
if tc.expectToolError != "" {
mockMCP.setToolError(tc.mcpToolName, tc.expectToolError)
}
bridgeServer := newBridgeTestServer(ctx, t, upstream.URL, withMCP(mockMCP))
resp, err := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request())
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode)
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
// Wait for both requests to be made (inner agentic loop).
require.Eventually(t, func() bool {
return upstream.Calls.Load() == 2
}, testutil.WaitMedium, testutil.IntervalFast)
// Verify the injected tool was invoked via MCP.
invocations := mockMCP.getCallsByTool(tc.mcpToolName)
require.Len(t, invocations, 1, "expected MCP tool to be invoked once")
// Verify the injected tool usage was recorded.
toolUsages := bridgeServer.Recorder.RecordedToolUsages()
require.Len(t, toolUsages, 1)
require.Equal(t, tc.mcpToolName, toolUsages[0].Tool)
require.Equal(t, tc.expectToolArgs, toolUsages[0].Args)
require.True(t, toolUsages[0].Injected, "injected tool should be marked as injected")
if tc.expectToolError != "" {
require.Contains(t, toolUsages[0].InvocationError.Error(), tc.expectToolError)
}
// Verify prompt was recorded.
prompts := bridgeServer.Recorder.RecordedPromptUsages()
require.Len(t, prompts, 1)
require.Equal(t, tc.expectPrompt, prompts[0].Prompt)
tokenUsages := bridgeServer.Recorder.RecordedTokenUsages()
require.Len(t, tokenUsages, len(tc.expectTokenUsages))
for i := range tokenUsages {
tokenUsages[i].InterceptionID = "" // ignore interception ID and time creation when comparing
tokenUsages[i].CreatedAt = time.Time{}
require.Equal(t, tc.expectTokenUsages[i], *tokenUsages[i])
}
// Verify the response is the final tool response (after agentic loop).
if tc.streaming {
require.Equal(t, string(fix.StreamingToolCall()), string(body))
} else {
require.Equal(t, string(fix.NonStreamingToolCall()), string(body))
}
})
}
}
func TestResponsesModelThoughts(t *testing.T) {
t.Parallel()
cases := []struct {
name string
fixture []byte
expectedThoughts []recorder.ModelThoughtRecord // nil means no tool usages expected at all
}{
{
name: "single reasoning/blocking",
fixture: fixtures.OaiResponsesBlockingSingleBuiltinTool,
expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants to add 3 and 5", recorder.ThoughtSourceReasoningSummary)},
},
{
name: "single reasoning/streaming",
fixture: fixtures.OaiResponsesStreamingBuiltinTool,
expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants to add 3 and 5", recorder.ThoughtSourceReasoningSummary)},
},
{
name: "multiple reasoning items/blocking",
fixture: fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
expectedThoughts: []recorder.ModelThoughtRecord{
newModelThought("The user wants to add 3 and 5", recorder.ThoughtSourceReasoningSummary),
newModelThought("After adding, I will check if the result is prime", recorder.ThoughtSourceReasoningSummary),
},
},
{
name: "multiple reasoning items/streaming",
fixture: fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
expectedThoughts: []recorder.ModelThoughtRecord{
newModelThought("The user wants to add 3 and 5", recorder.ThoughtSourceReasoningSummary),
newModelThought("After adding, I will check if the result is prime", recorder.ThoughtSourceReasoningSummary),
},
},
{
name: "commentary/blocking",
fixture: fixtures.OaiResponsesBlockingCommentaryBuiltinTool,
expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("Checking whether 3 + 5 is prime by calling the add function first.", recorder.ThoughtSourceCommentary)},
},
{
name: "commentary/streaming",
fixture: fixtures.OaiResponsesStreamingCommentaryBuiltinTool,
expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("Checking whether 3 + 5 is prime by calling the add function first.", recorder.ThoughtSourceCommentary)},
},
{
name: "summary and commentary/blocking",
fixture: fixtures.OaiResponsesBlockingSummaryAndCommentaryBuiltinTool,
expectedThoughts: []recorder.ModelThoughtRecord{
newModelThought("I need to add 3 and 5 to check primality.", recorder.ThoughtSourceReasoningSummary),
newModelThought("Let me calculate the sum first using the add function.", recorder.ThoughtSourceCommentary),
},
},
{
name: "summary and commentary/streaming",
fixture: fixtures.OaiResponsesStreamingSummaryAndCommentaryBuiltinTool,
expectedThoughts: []recorder.ModelThoughtRecord{
newModelThought("I need to add 3 and 5 to check primality.", recorder.ThoughtSourceReasoningSummary),
newModelThought("Let me calculate the sum first using the add function.", recorder.ThoughtSourceCommentary),
},
},
{
name: "parallel tool calls/blocking",
fixture: fixtures.OaiResponsesBlockingSingleBuiltinToolParallel,
expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants two additions", recorder.ThoughtSourceReasoningSummary)},
},
{
name: "parallel tool calls/streaming",
fixture: fixtures.OaiResponsesStreamingSingleBuiltinToolParallel,
expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants two additions", recorder.ThoughtSourceReasoningSummary)},
},
{
name: "thoughts without tool calls",
fixture: fixtures.OaiResponsesStreamingCodex, // This fixture contains reasoning, but it's not associated with tool calls.
expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("Preparing simple response", recorder.ThoughtSourceReasoningSummary)},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), testutil.WaitLong)
t.Cleanup(cancel)
fix := fixtures.Parse(t, tc.fixture)
upstream := newMockUpstream(ctx, t, newFixtureResponse(fix))
bridgeServer := newBridgeTestServer(ctx, t, upstream.URL)
resp, err := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request())
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode)
_, err = io.ReadAll(resp.Body)
require.NoError(t, err)
bridgeServer.Recorder.VerifyModelThoughtsRecorded(t, tc.expectedThoughts)
bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
})
}
}
func requireResponsesError(t *testing.T, code int, messagePattern string, body []byte) {
var respErr responses.Error
err := json.Unmarshal(body, &respErr)
require.NoError(t, err)
require.Equal(t, strconv.Itoa(code), respErr.Code)
require.Regexp(t, messagePattern, respErr.Message)
}
func responsesRequestBytes(t *testing.T, streaming bool, additionalFields ...keyVal) []byte {
reqBody := map[string]any{
"input": "tell me a joke",
"model": "gpt-4o-mini",
"stream": streaming,
}
for _, kv := range additionalFields {
reqBody[kv.key] = kv.val
}
reqBytes, err := json.Marshal(reqBody)
require.NoError(t, err)
return reqBytes
}
func startRejectingListener(t *testing.T) (addr string) {
t.Helper()
var wg sync.WaitGroup
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
_ = ln.Close()
wg.Wait()
})
go func() {
for {
wg.Add(1)
defer wg.Done()
c, err := ln.Accept()
if err != nil {
// When ln.Close() is called, Accept returns an error -> exit.
return
}
// Read at least 1 byte so the client has started writing
// before we RST, ensuring a consistent "connection reset by peer".
buf := make([]byte, 1)
_, _ = c.Read(buf)
if tc, ok := c.(*net.TCPConn); ok {
_ = tc.SetLinger(0)
}
_ = c.Close()
}
}()
return "http://" + ln.Addr().String()
}