mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix: pin fixed anthropic/fantasy forks for streaming token accounting (#24077)
This commit is contained in:
@@ -86,6 +86,54 @@ func TestRun_ActiveToolsPrepareBehavior(t *testing.T) {
|
||||
require.True(t, hasAnthropicEphemeralCacheControl(capturedCall.Prompt[4]))
|
||||
}
|
||||
|
||||
func TestProcessStepStream_AnthropicUsageMatchesFinalDelta(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
model := &loopTestModel{
|
||||
provider: fantasyanthropic.Name,
|
||||
streamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
|
||||
return streamFromParts([]fantasy.StreamPart{
|
||||
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
|
||||
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "cached response"},
|
||||
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
|
||||
{
|
||||
Type: fantasy.StreamPartTypeFinish,
|
||||
Usage: fantasy.Usage{
|
||||
InputTokens: 200,
|
||||
OutputTokens: 75,
|
||||
TotalTokens: 275,
|
||||
CacheCreationTokens: 30,
|
||||
CacheReadTokens: 150,
|
||||
ReasoningTokens: 0,
|
||||
},
|
||||
FinishReason: fantasy.FinishReasonStop,
|
||||
},
|
||||
}), nil
|
||||
},
|
||||
}
|
||||
|
||||
var persistedStep PersistedStep
|
||||
|
||||
err := Run(context.Background(), RunOptions{
|
||||
Model: model,
|
||||
Messages: []fantasy.Message{
|
||||
textMessage(fantasy.MessageRoleUser, "hello"),
|
||||
},
|
||||
MaxSteps: 1,
|
||||
ContextLimitFallback: 4096,
|
||||
PersistStep: func(_ context.Context, step PersistedStep) error {
|
||||
persistedStep = step
|
||||
return nil
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, int64(200), persistedStep.Usage.InputTokens)
|
||||
require.Equal(t, int64(75), persistedStep.Usage.OutputTokens)
|
||||
require.Equal(t, int64(275), persistedStep.Usage.TotalTokens)
|
||||
require.Equal(t, int64(30), persistedStep.Usage.CacheCreationTokens)
|
||||
require.Equal(t, int64(150), persistedStep.Usage.CacheReadTokens)
|
||||
}
|
||||
|
||||
func TestRun_OnRetryEnrichesProvider(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
@@ -53,8 +53,10 @@ type AnthropicMessage struct {
|
||||
|
||||
// AnthropicUsage represents usage information in an Anthropic response.
|
||||
type AnthropicUsage struct {
|
||||
InputTokens int `json:"input_tokens"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
InputTokens int `json:"input_tokens"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"`
|
||||
CacheReadInputTokens int `json:"cache_read_input_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// AnthropicChunk represents a streaming chunk from Anthropic.
|
||||
@@ -67,14 +69,16 @@ type AnthropicChunk struct {
|
||||
StopReason string `json:"stop_reason,omitempty"`
|
||||
StopSequence *string `json:"stop_sequence,omitempty"`
|
||||
Usage AnthropicUsage `json:"usage,omitempty"`
|
||||
UsageMap map[string]int `json:"-"`
|
||||
}
|
||||
|
||||
// AnthropicChunkMessage represents message metadata in a chunk.
|
||||
type AnthropicChunkMessage struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Role string `json:"role"`
|
||||
Model string `json:"model"`
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Role string `json:"role"`
|
||||
Model string `json:"model"`
|
||||
Usage map[string]int `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
// AnthropicContentBlock represents a content block in a chunk.
|
||||
@@ -206,7 +210,11 @@ func (s *anthropicServer) writeStreamingResponse(w http.ResponseWriter, chunks <
|
||||
"stop_reason": chunk.StopReason,
|
||||
"stop_sequence": chunk.StopSequence,
|
||||
}
|
||||
chunkData["usage"] = chunk.Usage
|
||||
if chunk.UsageMap != nil {
|
||||
chunkData["usage"] = chunk.UsageMap
|
||||
} else {
|
||||
chunkData["usage"] = chunk.Usage
|
||||
}
|
||||
case "message_stop":
|
||||
// No additional fields
|
||||
}
|
||||
@@ -342,6 +350,80 @@ func AnthropicTextChunks(deltas ...string) []AnthropicChunk {
|
||||
return chunks
|
||||
}
|
||||
|
||||
// AnthropicTextChunksWithCacheUsage creates a streaming response with text
|
||||
// deltas and explicit cache token usage. The message_start event carries
|
||||
// the initial input and cache token counts, and the final message_delta
|
||||
// carries the output token count.
|
||||
func AnthropicTextChunksWithCacheUsage(usage AnthropicUsage, deltas ...string) []AnthropicChunk {
|
||||
if len(deltas) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
messageID := fmt.Sprintf("msg-%s", uuid.New().String()[:8])
|
||||
model := "claude-3-opus-20240229"
|
||||
|
||||
messageUsage := map[string]int{
|
||||
"input_tokens": usage.InputTokens,
|
||||
}
|
||||
if usage.CacheCreationInputTokens != 0 {
|
||||
messageUsage["cache_creation_input_tokens"] = usage.CacheCreationInputTokens
|
||||
}
|
||||
if usage.CacheReadInputTokens != 0 {
|
||||
messageUsage["cache_read_input_tokens"] = usage.CacheReadInputTokens
|
||||
}
|
||||
|
||||
chunks := []AnthropicChunk{
|
||||
{
|
||||
Type: "message_start",
|
||||
Message: AnthropicChunkMessage{
|
||||
ID: messageID,
|
||||
Type: "message",
|
||||
Role: "assistant",
|
||||
Model: model,
|
||||
Usage: messageUsage,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: "content_block_start",
|
||||
Index: 0,
|
||||
ContentBlock: AnthropicContentBlock{
|
||||
Type: "text",
|
||||
Text: "",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, delta := range deltas {
|
||||
chunks = append(chunks, AnthropicChunk{
|
||||
Type: "content_block_delta",
|
||||
Index: 0,
|
||||
Delta: AnthropicDeltaBlock{
|
||||
Type: "text_delta",
|
||||
Text: delta,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
chunks = append(chunks,
|
||||
AnthropicChunk{
|
||||
Type: "content_block_stop",
|
||||
Index: 0,
|
||||
},
|
||||
AnthropicChunk{
|
||||
Type: "message_delta",
|
||||
StopReason: "end_turn",
|
||||
UsageMap: map[string]int{
|
||||
"output_tokens": usage.OutputTokens,
|
||||
},
|
||||
},
|
||||
AnthropicChunk{
|
||||
Type: "message_stop",
|
||||
},
|
||||
)
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
// AnthropicToolCallChunks creates a complete streaming response for a tool call.
|
||||
// Input JSON can be split across multiple deltas, matching Anthropic's
|
||||
// input_json_delta streaming behavior.
|
||||
|
||||
@@ -63,6 +63,59 @@ func TestAnthropic_Streaming(t *testing.T) {
|
||||
require.Equal(t, len(expectedDeltas), deltaIndex, "Expected %d deltas, got %d. Total parts received: %d", len(expectedDeltas), deltaIndex, len(allParts))
|
||||
}
|
||||
|
||||
func TestAnthropic_StreamingUsageIncludesCacheTokens(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
serverURL := chattest.NewAnthropic(t, func(req *chattest.AnthropicRequest) chattest.AnthropicResponse {
|
||||
return chattest.AnthropicStreamingResponse(
|
||||
chattest.AnthropicTextChunksWithCacheUsage(chattest.AnthropicUsage{
|
||||
InputTokens: 200,
|
||||
OutputTokens: 75,
|
||||
CacheCreationInputTokens: 30,
|
||||
CacheReadInputTokens: 150,
|
||||
}, "cached", " response")...,
|
||||
)
|
||||
})
|
||||
|
||||
client, err := fantasyanthropic.New(
|
||||
fantasyanthropic.WithAPIKey("test-key"),
|
||||
fantasyanthropic.WithBaseURL(serverURL),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
model, err := client.LanguageModel(context.Background(), "claude-3-opus-20240229")
|
||||
require.NoError(t, err)
|
||||
|
||||
stream, err := model.Stream(context.Background(), fantasy.Call{
|
||||
Prompt: []fantasy.Message{
|
||||
{
|
||||
Role: fantasy.MessageRoleUser,
|
||||
Content: []fantasy.MessagePart{fantasy.TextPart{Text: "hello"}},
|
||||
},
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
var (
|
||||
finishPart fantasy.StreamPart
|
||||
found bool
|
||||
)
|
||||
for part := range stream {
|
||||
if part.Type != fantasy.StreamPartTypeFinish {
|
||||
continue
|
||||
}
|
||||
finishPart = part
|
||||
found = true
|
||||
}
|
||||
|
||||
require.True(t, found)
|
||||
require.Equal(t, int64(200), finishPart.Usage.InputTokens)
|
||||
require.Equal(t, int64(75), finishPart.Usage.OutputTokens)
|
||||
require.Equal(t, int64(275), finishPart.Usage.TotalTokens)
|
||||
require.Equal(t, int64(30), finishPart.Usage.CacheCreationTokens)
|
||||
require.Equal(t, int64(150), finishPart.Usage.CacheReadTokens)
|
||||
}
|
||||
|
||||
func TestAnthropic_ToolCalls(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
@@ -180,7 +180,7 @@ func generateTitle(
|
||||
model fantasy.LanguageModel,
|
||||
input string,
|
||||
) (string, error) {
|
||||
title, _, err := generateStructuredTitle(ctx, model, titleGenerationPrompt, input)
|
||||
title, err := generateStructuredTitle(ctx, model, titleGenerationPrompt, input)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -192,6 +192,24 @@ func generateStructuredTitle(
|
||||
model fantasy.LanguageModel,
|
||||
systemPrompt string,
|
||||
userInput string,
|
||||
) (string, error) {
|
||||
title, _, err := generateStructuredTitleWithUsage(
|
||||
ctx,
|
||||
model,
|
||||
systemPrompt,
|
||||
userInput,
|
||||
)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return title, nil
|
||||
}
|
||||
|
||||
func generateStructuredTitleWithUsage(
|
||||
ctx context.Context,
|
||||
model fantasy.LanguageModel,
|
||||
systemPrompt string,
|
||||
userInput string,
|
||||
) (string, fantasy.Usage, error) {
|
||||
userInput = strings.TrimSpace(userInput)
|
||||
if userInput == "" {
|
||||
@@ -226,8 +244,6 @@ func generateStructuredTitle(
|
||||
return genErr
|
||||
}, nil)
|
||||
if err != nil {
|
||||
// Extract usage from the error when available so that
|
||||
// failed attempts are still accounted for in usage tracking.
|
||||
var usage fantasy.Usage
|
||||
var noObjErr *fantasy.NoObjectGeneratedError
|
||||
if errors.As(err, &noObjErr) {
|
||||
@@ -529,7 +545,7 @@ func generateManualTitle(
|
||||
userInput = strings.TrimSpace(firstUserText)
|
||||
}
|
||||
|
||||
title, usage, err := generateStructuredTitle(
|
||||
title, usage, err := generateStructuredTitleWithUsage(
|
||||
titleCtx,
|
||||
fallbackModel,
|
||||
systemPrompt,
|
||||
@@ -579,7 +595,7 @@ func generatePushSummary(
|
||||
candidates = append(candidates, fallbackModel)
|
||||
|
||||
for _, model := range candidates {
|
||||
summary, _, err := generateShortText(summaryCtx, model, pushSummaryPrompt, input)
|
||||
summary, err := generateShortText(summaryCtx, model, pushSummaryPrompt, input)
|
||||
if err != nil {
|
||||
logger.Debug(ctx, "push summary model candidate failed",
|
||||
slog.Error(err),
|
||||
@@ -601,7 +617,7 @@ func generateShortText(
|
||||
model fantasy.LanguageModel,
|
||||
systemPrompt string,
|
||||
userInput string,
|
||||
) (string, fantasy.Usage, error) {
|
||||
) (string, error) {
|
||||
prompt := []fantasy.Message{
|
||||
{
|
||||
Role: fantasy.MessageRoleSystem,
|
||||
@@ -629,7 +645,7 @@ func generateShortText(
|
||||
return genErr
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return "", fantasy.Usage{}, xerrors.Errorf("generate short text: %w", err)
|
||||
return "", xerrors.Errorf("generate short text: %w", err)
|
||||
}
|
||||
|
||||
responseParts := make([]codersdk.ChatMessagePart, 0, len(response.Content))
|
||||
@@ -639,5 +655,5 @@ func generateShortText(
|
||||
}
|
||||
}
|
||||
text := normalizeShortTextOutput(contentBlocksToText(responseParts))
|
||||
return text, response.Usage, nil
|
||||
return text, nil
|
||||
}
|
||||
|
||||
@@ -515,12 +515,9 @@ func Test_generateShortText_NormalizesQuotedOutput(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
text, usage, err := generateShortText(context.Background(), model, "system", "user")
|
||||
text, err := generateShortText(context.Background(), model, "system", "user")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "Quoted summary", text)
|
||||
require.Equal(t, int64(3), usage.InputTokens)
|
||||
require.Equal(t, int64(2), usage.OutputTokens)
|
||||
require.Equal(t, int64(5), usage.TotalTokens)
|
||||
}
|
||||
|
||||
type stubModel struct {
|
||||
|
||||
@@ -83,3 +83,8 @@ Select a user to see:
|
||||
bar shows current spend relative to the limit.
|
||||
- **Per-model breakdown** — table of costs and token usage by model.
|
||||
- **Per-chat breakdown** — table of costs and token usage by chat session.
|
||||
|
||||
> [!NOTE]
|
||||
> Automatic title generation uses lightweight models, such as Claude Haiku or GPT-4o
|
||||
> Mini. Its token usage is not counted towards usage limits or shown in usage
|
||||
> summaries.
|
||||
|
||||
@@ -82,7 +82,12 @@ replace github.com/spf13/afero => github.com/aslilac/afero v0.0.0-20250403163713
|
||||
// 3) ibetitsmike/fantasy#4 — skip ephemeral replay items when store=false
|
||||
replace charm.land/fantasy => github.com/coder/fantasy v0.0.0-20260325145725-112927d9b6d8
|
||||
|
||||
replace github.com/charmbracelet/anthropic-sdk-go => github.com/kylecarbs/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab
|
||||
// Forked from coder/anthropic-sdk-go (fantasy branch) which adds:
|
||||
// 1) All kylecarbs changes (fantasy branch merge).
|
||||
// 2) Explicit usage-field merging in Accumulate using JSON presence checks
|
||||
// (preserves input and cache tokens from message_start when message_delta
|
||||
// omits them).
|
||||
replace github.com/charmbracelet/anthropic-sdk-go => github.com/coder/anthropic-sdk-go v0.0.0-20260408163834-8345653c189a
|
||||
|
||||
require (
|
||||
cdr.dev/slog/v3 v3.0.0
|
||||
|
||||
@@ -316,6 +316,8 @@ github.com/coder/aibridge v1.1.1-0.20260331154949-a011104f377d h1:yoDGndlvKP6fiK
|
||||
github.com/coder/aibridge v1.1.1-0.20260331154949-a011104f377d/go.mod h1:u6WvGLMQQbk3ByeOw+LBdVgDNc/v/ujAtUc6MfvzQb4=
|
||||
github.com/coder/aisdk-go v0.0.9 h1:Vzo/k2qwVGLTR10ESDeP2Ecek1SdPfZlEjtTfMveiVo=
|
||||
github.com/coder/aisdk-go v0.0.9/go.mod h1:KF6/Vkono0FJJOtWtveh5j7yfNrSctVTpwgweYWSp5M=
|
||||
github.com/coder/anthropic-sdk-go v0.0.0-20260408163834-8345653c189a h1:0wjvSIzTI5BkhcrV1oKED3S8MEpPUVJOat19D7ityrw=
|
||||
github.com/coder/anthropic-sdk-go v0.0.0-20260408163834-8345653c189a/go.mod h1:hqlYqR7uPKOKfnNeicUbZp0Ps0GeYFlKYtwh5HGDCx8=
|
||||
github.com/coder/boundary v0.8.4-0.20260304164748-566aeea939ab h1:HrlxyTmMQpOHfSKzRU1vf5TxrmV6vL5OiWq+Dvn5qh0=
|
||||
github.com/coder/boundary v0.8.4-0.20260304164748-566aeea939ab/go.mod h1:BhJhyKW/+zZQzaGZ3vn27if2k0Vx5xLXzq7ZCQx5gPk=
|
||||
github.com/coder/bubbletea v1.2.2-0.20241212190825-007a1cdb2c41 h1:SBN/DA63+ZHwuWwPHPYoCZ/KLAjHv5g4h2MS4f2/MTI=
|
||||
@@ -811,8 +813,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/kylecarbs/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab h1:5UMYqr13zFQKfq8YscVuFwE7cCQpLieaPJDtLUPe11E=
|
||||
github.com/kylecarbs/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab/go.mod h1:hqlYqR7uPKOKfnNeicUbZp0Ps0GeYFlKYtwh5HGDCx8=
|
||||
github.com/kylecarbs/chroma/v2 v2.0.0-20240401211003-9e036e0631f3 h1:Z9/bo5PSeMutpdiKYNt/TTSfGM1Ll0naj3QzYX9VxTc=
|
||||
github.com/kylecarbs/chroma/v2 v2.0.0-20240401211003-9e036e0631f3/go.mod h1:BUGjjsD+ndS6eX37YgTchSEG+Jg9Jv1GiZs9sqPqztk=
|
||||
github.com/kylecarbs/openai-go/v3 v3.0.0-20260319113850-9477dcaedcae h1:xlFZNX4nnxpj9Cf6mTwD3pirXGNtBJ/6COsf9iZmsL0=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import dayjs from "dayjs";
|
||||
import { TriangleAlertIcon } from "lucide-react";
|
||||
import { InfoIcon, TriangleAlertIcon } from "lucide-react";
|
||||
import type { FC } from "react";
|
||||
import { getErrorMessage } from "#/api/errors";
|
||||
import type * as TypesGen from "#/api/typesGenerated";
|
||||
@@ -235,6 +235,14 @@ export const ChatCostSummaryView: FC<ChatCostSummaryViewProps> = ({
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex items-start gap-3 p-4 text-sm text-content-secondary">
|
||||
<InfoIcon className="h-5 w-5 shrink-0" />
|
||||
<span>
|
||||
Automatic title generation uses lightweight models and is not counted
|
||||
towards usage limits.
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{summary.by_model.length === 0 && summary.by_chat.length === 0 ? (
|
||||
<p className="py-12 text-center text-content-secondary">
|
||||
{emptyMessage}
|
||||
|
||||
Reference in New Issue
Block a user