Files
coder/aibridge/intercept/responses/injected_tools.go
T
Paweł Banaszewski e00e85765b chore: move aibridge library code into coder repo (#24190)
This PR merges code from `coder/aibridge` repository into `coder/coder`.
It was split into 4 PRs for easier review but stacked PRs will need to
be merged into this PR so all checks pass.

* https://github.com/coder/coder/pull/24190 -> raw code copy (this PR,
before merging PRs on top of it, it was just 1 commit:
https://github.com/coder/coder/commit/70d33f33200c7e77df910957595715f81f9bec24)
* https://github.com/coder/coder/pull/24570 -> update imports in
`coder/coder` to use copied code
* https://github.com/coder/coder/pull/24586 -> linter fixes and CI
integration (also added README.md)
* https://github.com/coder/coder/pull/24571 -> added exclude to
scripts/check_emdash.sh check

Original PR message (before PR squash):
Moves coder/aibridge code into coder/coder repository.

Omitted files:

- `go.mod`, `go.sum`, `.gitignore`, `.github/workflows/ci.yml,`
`Makefile`, `LICENSE`, `README.md` (modified README.md is added later)
- `.github`, `example`, `buildinfo,` `scripts` directories

Simple verification script (will list omitted files)

```
tmp=$(mktemp -d)
echo "$tmp"
git clone --depth=1 https://github.com/coder/aibridge "$tmp/aibridge"
git clone --depth=1 --branch pb/aibridge-code-move https://github.com/coder/coder "$tmp/coder"
diff -rq --exclude=.git "$tmp/aibridge" "$tmp/coder/aibridge"
# rm -rf "$tmp"
```
2026-04-22 17:01:01 +02:00

269 lines
10 KiB
Go

package responses
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/openai/openai-go/v3"
"github.com/openai/openai-go/v3/responses"
"github.com/openai/openai-go/v3/shared/constant"
"golang.org/x/xerrors"
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/aibridge/recorder"
)
func (i *responsesInterceptionBase) injectTools() {
if i.mcpProxy == nil || !i.hasInjectableTools() {
return
}
i.disableParallelToolCalls()
// Inject tools.
var injected []responses.ToolUnionParam
for _, tool := range i.mcpProxy.ListTools() {
var params map[string]any
if tool.Params != nil {
params = map[string]any{
"type": "object",
"properties": tool.Params,
// "additionalProperties": false, // Only relevant when strict=true.
}
}
// Otherwise the request fails with "None is not of type 'array'" if a nil slice is given.
if len(tool.Required) > 0 {
// Must list ALL properties when strict=true.
params["required"] = tool.Required
}
injected = append(injected, responses.ToolUnionParam{
OfFunction: &responses.FunctionToolParam{
Name: tool.ID,
Strict: openai.Bool(false), // TODO: configurable.
Description: openai.String(tool.Description),
Parameters: params,
},
})
}
updated, err := i.reqPayload.injectTools(injected)
if err != nil {
i.logger.Warn(context.Background(), "failed to inject tools", slog.Error(err))
return
}
i.reqPayload = updated
}
// disableParallelToolCalls disables parallel tool calls, to simplify the inner agentic loop.
// This is best-effort, and failing to set this flag does not fail the request.
// TODO: implement parallel tool calls.
func (i *responsesInterceptionBase) disableParallelToolCalls() {
updated, err := i.reqPayload.disableParallelToolCalls()
if err != nil {
i.logger.Warn(context.Background(), "failed to disable parallel_tool_calls", slog.Error(err))
return
}
i.reqPayload = updated
}
// handleInnerAgenticLoop orchestrates the inner agentic loop whereby injected tools
// are invoked and their results are sent back to the model.
// This is in contrast to regular tool calls which will be handled by the client
// in its own agentic loop.
func (i *responsesInterceptionBase) handleInnerAgenticLoop(ctx context.Context, pending []responses.ResponseFunctionToolCall, response *responses.Response) (bool, error) {
// Invoke any injected function calls.
// The Responses API refers to what we call "tools" as "functions", so we keep the terminology
// consistent in this package.
// See https://platform.openai.com/docs/guides/function-calling
results, err := i.handleInjectedToolCalls(ctx, pending, response)
if err != nil {
return false, xerrors.Errorf("failed to handle injected tool calls: %w", err)
}
// No tool results means no tools were invocable, so the flow is complete.
if len(results) == 0 {
return false, nil
}
// We'll use the tool results to issue another request to provide the model with.
err = i.prepareRequestForAgenticLoop(ctx, response, results)
return true, err
}
// handleInjectedToolCalls checks for function calls that we need to handle in our inner agentic loop.
// These are functions injected by the MCP proxy.
// Returns a list of tool call results.
func (i *responsesInterceptionBase) handleInjectedToolCalls(ctx context.Context, pending []responses.ResponseFunctionToolCall, response *responses.Response) ([]responses.ResponseInputItemUnionParam, error) {
if response == nil {
return nil, xerrors.New("empty response")
}
// MCP proxy has not been configured; no way to handle injected functions.
if i.mcpProxy == nil {
return nil, nil
}
var results []responses.ResponseInputItemUnionParam
for _, fc := range pending {
results = append(results, i.invokeInjectedTool(ctx, response.ID, fc))
}
return results, nil
}
// prepareRequestForAgenticLoop prepares the request by setting the output of the given
// response as input to the next request, in order for the tool call result(s) to make function correctly.
func (i *responsesInterceptionBase) prepareRequestForAgenticLoop(ctx context.Context, response *responses.Response, toolResults []responses.ResponseInputItemUnionParam) error {
// Collect new items to add: response outputs converted to input format + tool results.
var newItems []responses.ResponseInputItemUnionParam
// OutputText is also available, but by definition the trigger for a function call is not a simple
// text response from the model.
for _, output := range response.Output {
if inputItem := i.convertOutputToInput(output); inputItem != nil {
newItems = append(newItems, *inputItem)
}
}
newItems = append(newItems, toolResults...)
updated, err := i.reqPayload.appendInputItems(newItems)
if err != nil {
i.logger.Error(ctx, "failed to rewrite input in inner agentic loop", slog.Error(err))
return xerrors.Errorf("failed to rewrite input: %w", err)
}
i.reqPayload = updated
return nil
}
// getPendingInjectedToolCalls extracts function calls from the response that are managed by MCP proxy.
func (i *responsesInterceptionBase) getPendingInjectedToolCalls(response *responses.Response) []responses.ResponseFunctionToolCall {
var calls []responses.ResponseFunctionToolCall
for _, item := range response.Output {
if item.Type != string(constant.ValueOf[constant.FunctionCall]()) {
continue
}
// Injected functions are defined by MCP, and MCP tools have to have a schema
// for their inputs. The Responses API also supports "Custom Tools":
// https://platform.openai.com/docs/guides/function-calling#custom-tools
// These are like regular functions but their inputs are not schematized.
// As such, custom tools are not considered here.
fc := item.AsFunctionCall()
// Check if this is a tool managed by our MCP proxy
if i.mcpProxy != nil && i.mcpProxy.GetTool(fc.Name) != nil {
calls = append(calls, fc)
}
}
return calls
}
func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, responseID string, fc responses.ResponseFunctionToolCall) responses.ResponseInputItemUnionParam {
tool := i.mcpProxy.GetTool(fc.Name)
if tool == nil {
return responses.ResponseInputItemParamOfFunctionCallOutput(fc.CallID, fmt.Sprintf("error: unknown injected function %q", fc.ID))
}
args := i.parseFunctionCallJSONArgs(ctx, fc.Arguments)
res, err := tool.Call(ctx, args, i.tracer)
_ = i.recorder.RecordToolUsage(ctx, &recorder.ToolUsageRecord{
InterceptionID: i.ID().String(),
MsgID: responseID,
ToolCallID: fc.CallID,
ServerURL: &tool.ServerURL,
Tool: tool.Name,
Args: args,
Injected: true,
InvocationError: err,
})
var output string
if err != nil {
// Results have no fixed structure; if an error occurs, we can just pass back the error.
// https://platform.openai.com/docs/guides/function-calling?strict-mode=enabled#formatting-results
output = fmt.Sprintf("invocation error: %q", err.Error())
} else {
var out strings.Builder
if encErr := json.NewEncoder(&out).Encode(res); encErr != nil {
i.logger.Warn(ctx, "failed to encode tool response", slog.Error(encErr))
output = fmt.Sprintf("result encode error: %q", encErr.Error())
} else {
output = out.String()
}
}
return responses.ResponseInputItemParamOfFunctionCallOutput(fc.CallID, output)
}
// convertOutputToInput converts a response output item to an input item and appends it to the
// request's input list. This is used in agentic loops where we need to feed the model's output
// back as input for the next iteration (e.g., when processing tool call results).
//
// The conversion uses the openai-go library's ToParam() methods where available, which leverage
// param.Override() with raw JSON to preserve all fields. For types without ToParam(), we use
// the ResponseInputItemParamOf* helper functions.
func (i *responsesInterceptionBase) convertOutputToInput(item responses.ResponseOutputItemUnion) *responses.ResponseInputItemUnionParam {
var inputItem responses.ResponseInputItemUnionParam
switch item.Type {
case string(constant.ValueOf[constant.Message]()):
p := item.AsMessage().ToParam()
inputItem = responses.ResponseInputItemUnionParam{OfOutputMessage: &p}
case string(constant.ValueOf[constant.FileSearchCall]()):
p := item.AsFileSearchCall().ToParam()
inputItem = responses.ResponseInputItemUnionParam{OfFileSearchCall: &p}
case string(constant.ValueOf[constant.FunctionCall]()):
p := item.AsFunctionCall().ToParam()
inputItem = responses.ResponseInputItemUnionParam{OfFunctionCall: &p}
case string(constant.ValueOf[constant.WebSearchCall]()):
p := item.AsWebSearchCall().ToParam()
inputItem = responses.ResponseInputItemUnionParam{OfWebSearchCall: &p}
case "computer_call": // No constant.ComputerCall type exists
p := item.AsComputerCall().ToParam()
inputItem = responses.ResponseInputItemUnionParam{OfComputerCall: &p}
case string(constant.ValueOf[constant.Reasoning]()):
p := item.AsReasoning().ToParam()
inputItem = responses.ResponseInputItemUnionParam{OfReasoning: &p}
case string(constant.ValueOf[constant.Compaction]()):
c := item.AsCompaction()
inputItem = responses.ResponseInputItemParamOfCompaction(c.EncryptedContent)
case string(constant.ValueOf[constant.ImageGenerationCall]()):
c := item.AsImageGenerationCall()
inputItem = responses.ResponseInputItemParamOfImageGenerationCall(c.ID, c.Result, c.Status)
case string(constant.ValueOf[constant.CodeInterpreterCall]()):
p := item.AsCodeInterpreterCall().ToParam()
inputItem = responses.ResponseInputItemUnionParam{OfCodeInterpreterCall: &p}
case "custom_tool_call": // No constant.CustomToolCall type exists
p := item.AsCustomToolCall().ToParam()
inputItem = responses.ResponseInputItemUnionParam{OfCustomToolCall: &p}
// Output-only types that don't have direct input equivalents or are handled separately:
// - local_shell_call, shell_call, shell_call_output: Shell tool outputs
// - apply_patch_call, apply_patch_call_output: Apply patch outputs
// - mcp_call, mcp_list_tools, mcp_approval_request: MCP-specific outputs
default:
i.logger.Debug(context.Background(), "skipping output item type for input", slog.F("type", item.Type))
return nil
}
return &inputItem
}