fix: pin fixed anthropic/fantasy forks for streaming token accounting (#24077)

This commit is contained in:
Michael Suchacz
2026-04-08 19:07:39 +02:00
committed by GitHub
parent 543c448b72
commit 590235138f
9 changed files with 237 additions and 23 deletions
+48
View File
@@ -86,6 +86,54 @@ func TestRun_ActiveToolsPrepareBehavior(t *testing.T) {
require.True(t, hasAnthropicEphemeralCacheControl(capturedCall.Prompt[4]))
}
func TestProcessStepStream_AnthropicUsageMatchesFinalDelta(t *testing.T) {
t.Parallel()
model := &loopTestModel{
provider: fantasyanthropic.Name,
streamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
return streamFromParts([]fantasy.StreamPart{
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "cached response"},
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
{
Type: fantasy.StreamPartTypeFinish,
Usage: fantasy.Usage{
InputTokens: 200,
OutputTokens: 75,
TotalTokens: 275,
CacheCreationTokens: 30,
CacheReadTokens: 150,
ReasoningTokens: 0,
},
FinishReason: fantasy.FinishReasonStop,
},
}), nil
},
}
var persistedStep PersistedStep
err := Run(context.Background(), RunOptions{
Model: model,
Messages: []fantasy.Message{
textMessage(fantasy.MessageRoleUser, "hello"),
},
MaxSteps: 1,
ContextLimitFallback: 4096,
PersistStep: func(_ context.Context, step PersistedStep) error {
persistedStep = step
return nil
},
})
require.NoError(t, err)
require.Equal(t, int64(200), persistedStep.Usage.InputTokens)
require.Equal(t, int64(75), persistedStep.Usage.OutputTokens)
require.Equal(t, int64(275), persistedStep.Usage.TotalTokens)
require.Equal(t, int64(30), persistedStep.Usage.CacheCreationTokens)
require.Equal(t, int64(150), persistedStep.Usage.CacheReadTokens)
}
func TestRun_OnRetryEnrichesProvider(t *testing.T) {
t.Parallel()
+89 -7
View File
@@ -53,8 +53,10 @@ type AnthropicMessage struct {
// AnthropicUsage represents usage information in an Anthropic response.
type AnthropicUsage struct {
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"`
CacheReadInputTokens int `json:"cache_read_input_tokens,omitempty"`
}
// AnthropicChunk represents a streaming chunk from Anthropic.
@@ -67,14 +69,16 @@ type AnthropicChunk struct {
StopReason string `json:"stop_reason,omitempty"`
StopSequence *string `json:"stop_sequence,omitempty"`
Usage AnthropicUsage `json:"usage,omitempty"`
UsageMap map[string]int `json:"-"`
}
// AnthropicChunkMessage represents message metadata in a chunk.
type AnthropicChunkMessage struct {
ID string `json:"id"`
Type string `json:"type"`
Role string `json:"role"`
Model string `json:"model"`
ID string `json:"id"`
Type string `json:"type"`
Role string `json:"role"`
Model string `json:"model"`
Usage map[string]int `json:"usage,omitempty"`
}
// AnthropicContentBlock represents a content block in a chunk.
@@ -206,7 +210,11 @@ func (s *anthropicServer) writeStreamingResponse(w http.ResponseWriter, chunks <
"stop_reason": chunk.StopReason,
"stop_sequence": chunk.StopSequence,
}
chunkData["usage"] = chunk.Usage
if chunk.UsageMap != nil {
chunkData["usage"] = chunk.UsageMap
} else {
chunkData["usage"] = chunk.Usage
}
case "message_stop":
// No additional fields
}
@@ -342,6 +350,80 @@ func AnthropicTextChunks(deltas ...string) []AnthropicChunk {
return chunks
}
// AnthropicTextChunksWithCacheUsage creates a streaming response with text
// deltas and explicit cache token usage. The message_start event carries
// the initial input and cache token counts, and the final message_delta
// carries the output token count.
func AnthropicTextChunksWithCacheUsage(usage AnthropicUsage, deltas ...string) []AnthropicChunk {
if len(deltas) == 0 {
return nil
}
messageID := fmt.Sprintf("msg-%s", uuid.New().String()[:8])
model := "claude-3-opus-20240229"
messageUsage := map[string]int{
"input_tokens": usage.InputTokens,
}
if usage.CacheCreationInputTokens != 0 {
messageUsage["cache_creation_input_tokens"] = usage.CacheCreationInputTokens
}
if usage.CacheReadInputTokens != 0 {
messageUsage["cache_read_input_tokens"] = usage.CacheReadInputTokens
}
chunks := []AnthropicChunk{
{
Type: "message_start",
Message: AnthropicChunkMessage{
ID: messageID,
Type: "message",
Role: "assistant",
Model: model,
Usage: messageUsage,
},
},
{
Type: "content_block_start",
Index: 0,
ContentBlock: AnthropicContentBlock{
Type: "text",
Text: "",
},
},
}
for _, delta := range deltas {
chunks = append(chunks, AnthropicChunk{
Type: "content_block_delta",
Index: 0,
Delta: AnthropicDeltaBlock{
Type: "text_delta",
Text: delta,
},
})
}
chunks = append(chunks,
AnthropicChunk{
Type: "content_block_stop",
Index: 0,
},
AnthropicChunk{
Type: "message_delta",
StopReason: "end_turn",
UsageMap: map[string]int{
"output_tokens": usage.OutputTokens,
},
},
AnthropicChunk{
Type: "message_stop",
},
)
return chunks
}
// AnthropicToolCallChunks creates a complete streaming response for a tool call.
// Input JSON can be split across multiple deltas, matching Anthropic's
// input_json_delta streaming behavior.
+53
View File
@@ -63,6 +63,59 @@ func TestAnthropic_Streaming(t *testing.T) {
require.Equal(t, len(expectedDeltas), deltaIndex, "Expected %d deltas, got %d. Total parts received: %d", len(expectedDeltas), deltaIndex, len(allParts))
}
func TestAnthropic_StreamingUsageIncludesCacheTokens(t *testing.T) {
t.Parallel()
serverURL := chattest.NewAnthropic(t, func(req *chattest.AnthropicRequest) chattest.AnthropicResponse {
return chattest.AnthropicStreamingResponse(
chattest.AnthropicTextChunksWithCacheUsage(chattest.AnthropicUsage{
InputTokens: 200,
OutputTokens: 75,
CacheCreationInputTokens: 30,
CacheReadInputTokens: 150,
}, "cached", " response")...,
)
})
client, err := fantasyanthropic.New(
fantasyanthropic.WithAPIKey("test-key"),
fantasyanthropic.WithBaseURL(serverURL),
)
require.NoError(t, err)
model, err := client.LanguageModel(context.Background(), "claude-3-opus-20240229")
require.NoError(t, err)
stream, err := model.Stream(context.Background(), fantasy.Call{
Prompt: []fantasy.Message{
{
Role: fantasy.MessageRoleUser,
Content: []fantasy.MessagePart{fantasy.TextPart{Text: "hello"}},
},
},
})
require.NoError(t, err)
var (
finishPart fantasy.StreamPart
found bool
)
for part := range stream {
if part.Type != fantasy.StreamPartTypeFinish {
continue
}
finishPart = part
found = true
}
require.True(t, found)
require.Equal(t, int64(200), finishPart.Usage.InputTokens)
require.Equal(t, int64(75), finishPart.Usage.OutputTokens)
require.Equal(t, int64(275), finishPart.Usage.TotalTokens)
require.Equal(t, int64(30), finishPart.Usage.CacheCreationTokens)
require.Equal(t, int64(150), finishPart.Usage.CacheReadTokens)
}
func TestAnthropic_ToolCalls(t *testing.T) {
t.Parallel()
+24 -8
View File
@@ -180,7 +180,7 @@ func generateTitle(
model fantasy.LanguageModel,
input string,
) (string, error) {
title, _, err := generateStructuredTitle(ctx, model, titleGenerationPrompt, input)
title, err := generateStructuredTitle(ctx, model, titleGenerationPrompt, input)
if err != nil {
return "", err
}
@@ -192,6 +192,24 @@ func generateStructuredTitle(
model fantasy.LanguageModel,
systemPrompt string,
userInput string,
) (string, error) {
title, _, err := generateStructuredTitleWithUsage(
ctx,
model,
systemPrompt,
userInput,
)
if err != nil {
return "", err
}
return title, nil
}
func generateStructuredTitleWithUsage(
ctx context.Context,
model fantasy.LanguageModel,
systemPrompt string,
userInput string,
) (string, fantasy.Usage, error) {
userInput = strings.TrimSpace(userInput)
if userInput == "" {
@@ -226,8 +244,6 @@ func generateStructuredTitle(
return genErr
}, nil)
if err != nil {
// Extract usage from the error when available so that
// failed attempts are still accounted for in usage tracking.
var usage fantasy.Usage
var noObjErr *fantasy.NoObjectGeneratedError
if errors.As(err, &noObjErr) {
@@ -529,7 +545,7 @@ func generateManualTitle(
userInput = strings.TrimSpace(firstUserText)
}
title, usage, err := generateStructuredTitle(
title, usage, err := generateStructuredTitleWithUsage(
titleCtx,
fallbackModel,
systemPrompt,
@@ -579,7 +595,7 @@ func generatePushSummary(
candidates = append(candidates, fallbackModel)
for _, model := range candidates {
summary, _, err := generateShortText(summaryCtx, model, pushSummaryPrompt, input)
summary, err := generateShortText(summaryCtx, model, pushSummaryPrompt, input)
if err != nil {
logger.Debug(ctx, "push summary model candidate failed",
slog.Error(err),
@@ -601,7 +617,7 @@ func generateShortText(
model fantasy.LanguageModel,
systemPrompt string,
userInput string,
) (string, fantasy.Usage, error) {
) (string, error) {
prompt := []fantasy.Message{
{
Role: fantasy.MessageRoleSystem,
@@ -629,7 +645,7 @@ func generateShortText(
return genErr
}, nil)
if err != nil {
return "", fantasy.Usage{}, xerrors.Errorf("generate short text: %w", err)
return "", xerrors.Errorf("generate short text: %w", err)
}
responseParts := make([]codersdk.ChatMessagePart, 0, len(response.Content))
@@ -639,5 +655,5 @@ func generateShortText(
}
}
text := normalizeShortTextOutput(contentBlocksToText(responseParts))
return text, response.Usage, nil
return text, nil
}
+1 -4
View File
@@ -515,12 +515,9 @@ func Test_generateShortText_NormalizesQuotedOutput(t *testing.T) {
},
}
text, usage, err := generateShortText(context.Background(), model, "system", "user")
text, err := generateShortText(context.Background(), model, "system", "user")
require.NoError(t, err)
require.Equal(t, "Quoted summary", text)
require.Equal(t, int64(3), usage.InputTokens)
require.Equal(t, int64(2), usage.OutputTokens)
require.Equal(t, int64(5), usage.TotalTokens)
}
type stubModel struct {
@@ -83,3 +83,8 @@ Select a user to see:
bar shows current spend relative to the limit.
- **Per-model breakdown** — table of costs and token usage by model.
- **Per-chat breakdown** — table of costs and token usage by chat session.
> [!NOTE]
> Automatic title generation uses lightweight models, such as Claude Haiku or GPT-4o
> Mini. Its token usage is not counted towards usage limits or shown in usage
> summaries.
+6 -1
View File
@@ -82,7 +82,12 @@ replace github.com/spf13/afero => github.com/aslilac/afero v0.0.0-20250403163713
// 3) ibetitsmike/fantasy#4 — skip ephemeral replay items when store=false
replace charm.land/fantasy => github.com/coder/fantasy v0.0.0-20260325145725-112927d9b6d8
replace github.com/charmbracelet/anthropic-sdk-go => github.com/kylecarbs/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab
// Forked from coder/anthropic-sdk-go (fantasy branch) which adds:
// 1) All kylecarbs changes (fantasy branch merge).
// 2) Explicit usage-field merging in Accumulate using JSON presence checks
// (preserves input and cache tokens from message_start when message_delta
// omits them).
replace github.com/charmbracelet/anthropic-sdk-go => github.com/coder/anthropic-sdk-go v0.0.0-20260408163834-8345653c189a
require (
cdr.dev/slog/v3 v3.0.0
+2 -2
View File
@@ -316,6 +316,8 @@ github.com/coder/aibridge v1.1.1-0.20260331154949-a011104f377d h1:yoDGndlvKP6fiK
github.com/coder/aibridge v1.1.1-0.20260331154949-a011104f377d/go.mod h1:u6WvGLMQQbk3ByeOw+LBdVgDNc/v/ujAtUc6MfvzQb4=
github.com/coder/aisdk-go v0.0.9 h1:Vzo/k2qwVGLTR10ESDeP2Ecek1SdPfZlEjtTfMveiVo=
github.com/coder/aisdk-go v0.0.9/go.mod h1:KF6/Vkono0FJJOtWtveh5j7yfNrSctVTpwgweYWSp5M=
github.com/coder/anthropic-sdk-go v0.0.0-20260408163834-8345653c189a h1:0wjvSIzTI5BkhcrV1oKED3S8MEpPUVJOat19D7ityrw=
github.com/coder/anthropic-sdk-go v0.0.0-20260408163834-8345653c189a/go.mod h1:hqlYqR7uPKOKfnNeicUbZp0Ps0GeYFlKYtwh5HGDCx8=
github.com/coder/boundary v0.8.4-0.20260304164748-566aeea939ab h1:HrlxyTmMQpOHfSKzRU1vf5TxrmV6vL5OiWq+Dvn5qh0=
github.com/coder/boundary v0.8.4-0.20260304164748-566aeea939ab/go.mod h1:BhJhyKW/+zZQzaGZ3vn27if2k0Vx5xLXzq7ZCQx5gPk=
github.com/coder/bubbletea v1.2.2-0.20241212190825-007a1cdb2c41 h1:SBN/DA63+ZHwuWwPHPYoCZ/KLAjHv5g4h2MS4f2/MTI=
@@ -811,8 +813,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylecarbs/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab h1:5UMYqr13zFQKfq8YscVuFwE7cCQpLieaPJDtLUPe11E=
github.com/kylecarbs/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab/go.mod h1:hqlYqR7uPKOKfnNeicUbZp0Ps0GeYFlKYtwh5HGDCx8=
github.com/kylecarbs/chroma/v2 v2.0.0-20240401211003-9e036e0631f3 h1:Z9/bo5PSeMutpdiKYNt/TTSfGM1Ll0naj3QzYX9VxTc=
github.com/kylecarbs/chroma/v2 v2.0.0-20240401211003-9e036e0631f3/go.mod h1:BUGjjsD+ndS6eX37YgTchSEG+Jg9Jv1GiZs9sqPqztk=
github.com/kylecarbs/openai-go/v3 v3.0.0-20260319113850-9477dcaedcae h1:xlFZNX4nnxpj9Cf6mTwD3pirXGNtBJ/6COsf9iZmsL0=
@@ -1,5 +1,5 @@
import dayjs from "dayjs";
import { TriangleAlertIcon } from "lucide-react";
import { InfoIcon, TriangleAlertIcon } from "lucide-react";
import type { FC } from "react";
import { getErrorMessage } from "#/api/errors";
import type * as TypesGen from "#/api/typesGenerated";
@@ -235,6 +235,14 @@ export const ChatCostSummaryView: FC<ChatCostSummaryViewProps> = ({
</div>
)}
<div className="flex items-start gap-3 p-4 text-sm text-content-secondary">
<InfoIcon className="h-5 w-5 shrink-0" />
<span>
Automatic title generation uses lightweight models and is not counted
towards usage limits.
</span>
</div>
{summary.by_model.length === 0 && summary.by_chat.length === 0 ? (
<p className="py-12 text-center text-content-secondary">
{emptyMessage}