From 8369fa88fde774fda9af6cea83ad1b03e0c50726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Banaszewski?= Date: Fri, 3 Apr 2026 16:27:31 +0200 Subject: [PATCH] feat: add columns for cached tokens from aibridge (#23832) Two new columns added to aibridge_token_usages: - cache_read_input_tokens (BIGINT, default 0) - cache_write_input_tokens (BIGINT, default 0) Migration backfills existing rows by extracting values from the metadata JSONB column (cache_read_input, input_cached, prompt_cached for reads (max value selected since only 1 should be set), cache_creation_input for writes). All references to data from metadata were updated to reference new columns. No other changes then changing where data is extracted from. Requires aibridge library version bump to include: https://github.com/coder/aibridge/pull/229 Fixes: https://github.com/coder/aibridge/issues/150 --- coderd/apidoc/docs.go | 18 ++++ coderd/apidoc/swagger.json | 18 ++++ coderd/database/db2sdk/db2sdk.go | 44 +++++---- coderd/database/db2sdk/db2sdk_test.go | 14 ++- coderd/database/dbgen/dbgen.go | 16 ++-- coderd/database/dump.sql | 4 +- ...0461_aibridge_cache_token_columns.down.sql | 3 + ...000461_aibridge_cache_token_columns.up.sql | 26 ++++++ coderd/database/modelqueries.go | 2 + coderd/database/models.go | 12 ++- coderd/database/queries.sql.go | 91 ++++++++++--------- coderd/database/queries/aibridge.sql | 27 ++---- coderd/telemetry/telemetry_test.go | 20 ++-- codersdk/aibridge.go | 33 ++++--- docs/reference/api/aibridge.md | 10 ++ docs/reference/api/schemas.md | 72 +++++++++++---- .../aibridged/aibridged_integration_test.go | 1 + enterprise/aibridged/proto/aibridged.pb.go | 39 ++++++-- enterprise/aibridged/proto/aibridged.proto | 2 + enterprise/aibridged/translator.go | 16 ++-- enterprise/aibridgedserver/aibridgedserver.go | 18 ++-- .../aibridgedserver/aibridgedserver_test.go | 52 ++++++----- enterprise/coderd/aibridge_test.go | 66 ++++++++------ site/src/api/typesGenerated.ts | 9 +- .../ListSessionsPageView.stories.tsx | 2 + .../ListSessionsRow.stories.tsx | 2 + .../SessionTimeline.stories.tsx | 14 ++- site/src/testHelpers/entities.ts | 4 + 28 files changed, 423 insertions(+), 212 deletions(-) create mode 100644 coderd/database/migrations/000461_aibridge_cache_token_columns.down.sql create mode 100644 coderd/database/migrations/000461_aibridge_cache_token_columns.up.sql diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 74ef34d1be..4de4d52dc1 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -13133,6 +13133,12 @@ const docTemplate = `{ "codersdk.AIBridgeSessionThreadsTokenUsage": { "type": "object", "properties": { + "cache_read_input_tokens": { + "type": "integer" + }, + "cache_write_input_tokens": { + "type": "integer" + }, "input_tokens": { "type": "integer" }, @@ -13148,6 +13154,12 @@ const docTemplate = `{ "codersdk.AIBridgeSessionTokenUsageSummary": { "type": "object", "properties": { + "cache_read_input_tokens": { + "type": "integer" + }, + "cache_write_input_tokens": { + "type": "integer" + }, "input_tokens": { "type": "integer" }, @@ -13194,6 +13206,12 @@ const docTemplate = `{ "codersdk.AIBridgeTokenUsage": { "type": "object", "properties": { + "cache_read_input_tokens": { + "type": "integer" + }, + "cache_write_input_tokens": { + "type": "integer" + }, "created_at": { "type": "string", "format": "date-time" diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 38bbf2bc7a..de1619a7e2 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -11711,6 +11711,12 @@ "codersdk.AIBridgeSessionThreadsTokenUsage": { "type": "object", "properties": { + "cache_read_input_tokens": { + "type": "integer" + }, + "cache_write_input_tokens": { + "type": "integer" + }, "input_tokens": { "type": "integer" }, @@ -11726,6 +11732,12 @@ "codersdk.AIBridgeSessionTokenUsageSummary": { "type": "object", "properties": { + "cache_read_input_tokens": { + "type": "integer" + }, + "cache_write_input_tokens": { + "type": "integer" + }, "input_tokens": { "type": "integer" }, @@ -11772,6 +11784,12 @@ "codersdk.AIBridgeTokenUsage": { "type": "object", "properties": { + "cache_read_input_tokens": { + "type": "integer" + }, + "cache_write_input_tokens": { + "type": "integer" + }, "created_at": { "type": "string", "format": "date-time" diff --git a/coderd/database/db2sdk/db2sdk.go b/coderd/database/db2sdk/db2sdk.go index dce4bb7531..bf1cf50734 100644 --- a/coderd/database/db2sdk/db2sdk.go +++ b/coderd/database/db2sdk/db2sdk.go @@ -1037,8 +1037,10 @@ func AIBridgeSession(row database.ListAIBridgeSessionsRow) codersdk.AIBridgeSess StartedAt: row.StartedAt, Threads: row.Threads, TokenUsageSummary: codersdk.AIBridgeSessionTokenUsageSummary{ - InputTokens: row.InputTokens, - OutputTokens: row.OutputTokens, + InputTokens: row.InputTokens, + OutputTokens: row.OutputTokens, + CacheReadInputTokens: row.CacheReadInputTokens, + CacheWriteInputTokens: row.CacheWriteInputTokens, }, } // Ensure non-nil slices for JSON serialization. @@ -1062,13 +1064,15 @@ func AIBridgeSession(row database.ListAIBridgeSessionsRow) codersdk.AIBridgeSess func AIBridgeTokenUsage(usage database.AIBridgeTokenUsage) codersdk.AIBridgeTokenUsage { return codersdk.AIBridgeTokenUsage{ - ID: usage.ID, - InterceptionID: usage.InterceptionID, - ProviderResponseID: usage.ProviderResponseID, - InputTokens: usage.InputTokens, - OutputTokens: usage.OutputTokens, - Metadata: jsonOrEmptyMap(usage.Metadata), - CreatedAt: usage.CreatedAt, + ID: usage.ID, + InterceptionID: usage.InterceptionID, + ProviderResponseID: usage.ProviderResponseID, + InputTokens: usage.InputTokens, + OutputTokens: usage.OutputTokens, + CacheReadInputTokens: usage.CacheReadInputTokens, + CacheWriteInputTokens: usage.CacheWriteInputTokens, + Metadata: jsonOrEmptyMap(usage.Metadata), + CreatedAt: usage.CreatedAt, } } @@ -1179,9 +1183,11 @@ func AIBridgeSessionThreads( PageStartedAt: pageStartedAt, PageEndedAt: pageEndedAt, TokenUsageSummary: codersdk.AIBridgeSessionThreadsTokenUsage{ - InputTokens: session.InputTokens, - OutputTokens: session.OutputTokens, - Metadata: sessionTokenMeta, + InputTokens: session.InputTokens, + OutputTokens: session.OutputTokens, + CacheReadInputTokens: session.CacheReadInputTokens, + CacheWriteInputTokens: session.CacheWriteInputTokens, + Metadata: sessionTokenMeta, }, Threads: threads, } @@ -1314,17 +1320,19 @@ func buildAIBridgeThread( // aggregateTokenUsage sums token usage rows and aggregates metadata. func aggregateTokenUsage(tokens []database.AIBridgeTokenUsage) codersdk.AIBridgeSessionThreadsTokenUsage { - var inputTokens, outputTokens int64 + var inputTokens, outputTokens, cacheRead, cacheWrite int64 for _, tu := range tokens { inputTokens += tu.InputTokens outputTokens += tu.OutputTokens - // TODO: once https://github.com/coder/aibridge/issues/150 lands we - // should aggregate the other token types. + cacheRead += tu.CacheReadInputTokens + cacheWrite += tu.CacheWriteInputTokens } return codersdk.AIBridgeSessionThreadsTokenUsage{ - InputTokens: inputTokens, - OutputTokens: outputTokens, - Metadata: aggregateTokenMetadata(tokens), + InputTokens: inputTokens, + OutputTokens: outputTokens, + CacheReadInputTokens: cacheRead, + CacheWriteInputTokens: cacheWrite, + Metadata: aggregateTokenMetadata(tokens), } } diff --git a/coderd/database/db2sdk/db2sdk_test.go b/coderd/database/db2sdk/db2sdk_test.go index 4043580f90..2738b5670a 100644 --- a/coderd/database/db2sdk/db2sdk_test.go +++ b/coderd/database/db2sdk/db2sdk_test.go @@ -259,11 +259,13 @@ func TestAIBridgeInterception(t *testing.T) { }, tokenUsages: []database.AIBridgeTokenUsage{ { - ID: uuid.New(), - InterceptionID: interceptionID, - ProviderResponseID: "resp-123", - InputTokens: 100, - OutputTokens: 200, + ID: uuid.New(), + InterceptionID: interceptionID, + ProviderResponseID: "resp-123", + InputTokens: 100, + OutputTokens: 200, + CacheReadInputTokens: 50, + CacheWriteInputTokens: 10, Metadata: pqtype.NullRawMessage{ RawMessage: json.RawMessage(`{"cache":"hit"}`), Valid: true, @@ -413,6 +415,8 @@ func TestAIBridgeInterception(t *testing.T) { require.Equal(t, tu.ProviderResponseID, result.TokenUsages[i].ProviderResponseID) require.Equal(t, tu.InputTokens, result.TokenUsages[i].InputTokens) require.Equal(t, tu.OutputTokens, result.TokenUsages[i].OutputTokens) + require.Equal(t, tu.CacheReadInputTokens, result.TokenUsages[i].CacheReadInputTokens) + require.Equal(t, tu.CacheWriteInputTokens, result.TokenUsages[i].CacheWriteInputTokens) } // Verify user prompts are converted correctly. diff --git a/coderd/database/dbgen/dbgen.go b/coderd/database/dbgen/dbgen.go index 4662ccd537..1dce62c841 100644 --- a/coderd/database/dbgen/dbgen.go +++ b/coderd/database/dbgen/dbgen.go @@ -1613,13 +1613,15 @@ func AIBridgeInterception(t testing.TB, db database.Store, seed database.InsertA func AIBridgeTokenUsage(t testing.TB, db database.Store, seed database.InsertAIBridgeTokenUsageParams) database.AIBridgeTokenUsage { usage, err := db.InsertAIBridgeTokenUsage(genCtx, database.InsertAIBridgeTokenUsageParams{ - ID: takeFirst(seed.ID, uuid.New()), - InterceptionID: takeFirst(seed.InterceptionID, uuid.New()), - ProviderResponseID: takeFirst(seed.ProviderResponseID, "provider_response_id"), - InputTokens: takeFirst(seed.InputTokens, 100), - OutputTokens: takeFirst(seed.OutputTokens, 100), - Metadata: takeFirstSlice(seed.Metadata, json.RawMessage("{}")), - CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + ID: takeFirst(seed.ID, uuid.New()), + InterceptionID: takeFirst(seed.InterceptionID, uuid.New()), + ProviderResponseID: takeFirst(seed.ProviderResponseID, "provider_response_id"), + InputTokens: takeFirst(seed.InputTokens, 100), + OutputTokens: takeFirst(seed.OutputTokens, 100), + CacheReadInputTokens: seed.CacheReadInputTokens, + CacheWriteInputTokens: seed.CacheWriteInputTokens, + Metadata: takeFirstSlice(seed.Metadata, json.RawMessage("{}")), + CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), }) require.NoError(t, err, "insert aibridge token usage") return usage diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index d37e98a646..2b202d78c5 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -1134,7 +1134,9 @@ CREATE TABLE aibridge_token_usages ( input_tokens bigint NOT NULL, output_tokens bigint NOT NULL, metadata jsonb, - created_at timestamp with time zone NOT NULL + created_at timestamp with time zone NOT NULL, + cache_read_input_tokens bigint DEFAULT 0 NOT NULL, + cache_write_input_tokens bigint DEFAULT 0 NOT NULL ); COMMENT ON TABLE aibridge_token_usages IS 'Audit log of tokens used by intercepted requests in AI Bridge'; diff --git a/coderd/database/migrations/000461_aibridge_cache_token_columns.down.sql b/coderd/database/migrations/000461_aibridge_cache_token_columns.down.sql new file mode 100644 index 0000000000..e2d3ef9d6a --- /dev/null +++ b/coderd/database/migrations/000461_aibridge_cache_token_columns.down.sql @@ -0,0 +1,3 @@ +ALTER TABLE aibridge_token_usages + DROP COLUMN cache_read_input_tokens, + DROP COLUMN cache_write_input_tokens; diff --git a/coderd/database/migrations/000461_aibridge_cache_token_columns.up.sql b/coderd/database/migrations/000461_aibridge_cache_token_columns.up.sql new file mode 100644 index 0000000000..c8278ec7e7 --- /dev/null +++ b/coderd/database/migrations/000461_aibridge_cache_token_columns.up.sql @@ -0,0 +1,26 @@ +ALTER TABLE aibridge_token_usages + ADD COLUMN cache_read_input_tokens BIGINT NOT NULL DEFAULT 0, + ADD COLUMN cache_write_input_tokens BIGINT NOT NULL DEFAULT 0; + +-- Backfill from metadata JSONB. Old rows stored cache tokens under +-- provider-specific keys; new rows use the dedicated columns above. +UPDATE aibridge_token_usages +SET + + -- Cache-read metadata keys by provider: + -- Anthropic (/v1/messages): "cache_read_input" + -- OpenAI (/v1/responses): "input_cached" + -- OpenAI (/v1/chat/completions): "prompt_cached" + cache_read_input_tokens = GREATEST( + COALESCE((metadata->>'cache_read_input')::bigint, 0), + COALESCE((metadata->>'input_cached')::bigint, 0), + COALESCE((metadata->>'prompt_cached')::bigint, 0) + ), + + -- Cache-write metadata keys by provider: + -- Anthropic (/v1/messages): "cache_creation_input" + -- OpenAI does not report cache-write tokens. + cache_write_input_tokens = COALESCE((metadata->>'cache_creation_input')::bigint, 0) +WHERE metadata IS NOT NULL + AND cache_read_input_tokens = 0 + AND cache_write_input_tokens = 0; diff --git a/coderd/database/modelqueries.go b/coderd/database/modelqueries.go index ca45a90452..d0d08609ca 100644 --- a/coderd/database/modelqueries.go +++ b/coderd/database/modelqueries.go @@ -1029,6 +1029,8 @@ func (q *sqlQuerier) ListAuthorizedAIBridgeSessions(ctx context.Context, arg Lis &i.Threads, &i.InputTokens, &i.OutputTokens, + &i.CacheReadInputTokens, + &i.CacheWriteInputTokens, &i.LastPrompt, ); err != nil { return nil, err diff --git a/coderd/database/models.go b/coderd/database/models.go index 913ff51bf4..a38d39a21e 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -4055,11 +4055,13 @@ type AIBridgeTokenUsage struct { ID uuid.UUID `db:"id" json:"id"` InterceptionID uuid.UUID `db:"interception_id" json:"interception_id"` // The ID for the response in which the tokens were used, produced by the provider. - ProviderResponseID string `db:"provider_response_id" json:"provider_response_id"` - InputTokens int64 `db:"input_tokens" json:"input_tokens"` - OutputTokens int64 `db:"output_tokens" json:"output_tokens"` - Metadata pqtype.NullRawMessage `db:"metadata" json:"metadata"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + ProviderResponseID string `db:"provider_response_id" json:"provider_response_id"` + InputTokens int64 `db:"input_tokens" json:"input_tokens"` + OutputTokens int64 `db:"output_tokens" json:"output_tokens"` + Metadata pqtype.NullRawMessage `db:"metadata" json:"metadata"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + CacheReadInputTokens int64 `db:"cache_read_input_tokens" json:"cache_read_input_tokens"` + CacheWriteInputTokens int64 `db:"cache_write_input_tokens" json:"cache_write_input_tokens"` } // Audit log of tool calls in intercepted requests in AI Bridge diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 4d28a5c15d..5a75a6523c 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -148,21 +148,8 @@ token_aggregates AS ( SELECT COALESCE(SUM(tu.input_tokens), 0) AS token_count_input, COALESCE(SUM(tu.output_tokens), 0) AS token_count_output, - -- Cached tokens are stored in metadata JSON, extract if available. - -- Read tokens may be stored in: - -- - cache_read_input (Anthropic) - -- - prompt_cached (OpenAI) - COALESCE(SUM( - COALESCE((tu.metadata->>'cache_read_input')::bigint, 0) + - COALESCE((tu.metadata->>'prompt_cached')::bigint, 0) - ), 0) AS token_count_cached_read, - -- Written tokens may be stored in: - -- - cache_creation_input (Anthropic) - -- Note that cache_ephemeral_5m_input and cache_ephemeral_1h_input on - -- Anthropic are included in the cache_creation_input field. - COALESCE(SUM( - COALESCE((tu.metadata->>'cache_creation_input')::bigint, 0) - ), 0) AS token_count_cached_written, + COALESCE(SUM(tu.cache_read_input_tokens), 0) AS token_count_cached_read, + COALESCE(SUM(tu.cache_write_input_tokens), 0) AS token_count_cached_written, COUNT(tu.id) AS token_usages_count FROM interceptions_in_range i @@ -559,7 +546,7 @@ func (q *sqlQuerier) GetAIBridgeInterceptions(ctx context.Context) ([]AIBridgeIn const getAIBridgeTokenUsagesByInterceptionID = `-- name: GetAIBridgeTokenUsagesByInterceptionID :many SELECT - id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at + id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at, cache_read_input_tokens, cache_write_input_tokens FROM aibridge_token_usages WHERE interception_id = $1::uuid ORDER BY @@ -584,6 +571,8 @@ func (q *sqlQuerier) GetAIBridgeTokenUsagesByInterceptionID(ctx context.Context, &i.OutputTokens, &i.Metadata, &i.CreatedAt, + &i.CacheReadInputTokens, + &i.CacheWriteInputTokens, ); err != nil { return nil, err } @@ -781,21 +770,23 @@ func (q *sqlQuerier) InsertAIBridgeModelThought(ctx context.Context, arg InsertA const insertAIBridgeTokenUsage = `-- name: InsertAIBridgeTokenUsage :one INSERT INTO aibridge_token_usages ( - id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at + id, interception_id, provider_response_id, input_tokens, output_tokens, cache_read_input_tokens, cache_write_input_tokens, metadata, created_at ) VALUES ( - $1, $2, $3, $4, $5, COALESCE($6::jsonb, '{}'::jsonb), $7 + $1, $2, $3, $4, $5, $6, $7, COALESCE($8::jsonb, '{}'::jsonb), $9 ) -RETURNING id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at +RETURNING id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at, cache_read_input_tokens, cache_write_input_tokens ` type InsertAIBridgeTokenUsageParams struct { - ID uuid.UUID `db:"id" json:"id"` - InterceptionID uuid.UUID `db:"interception_id" json:"interception_id"` - ProviderResponseID string `db:"provider_response_id" json:"provider_response_id"` - InputTokens int64 `db:"input_tokens" json:"input_tokens"` - OutputTokens int64 `db:"output_tokens" json:"output_tokens"` - Metadata json.RawMessage `db:"metadata" json:"metadata"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + ID uuid.UUID `db:"id" json:"id"` + InterceptionID uuid.UUID `db:"interception_id" json:"interception_id"` + ProviderResponseID string `db:"provider_response_id" json:"provider_response_id"` + InputTokens int64 `db:"input_tokens" json:"input_tokens"` + OutputTokens int64 `db:"output_tokens" json:"output_tokens"` + CacheReadInputTokens int64 `db:"cache_read_input_tokens" json:"cache_read_input_tokens"` + CacheWriteInputTokens int64 `db:"cache_write_input_tokens" json:"cache_write_input_tokens"` + Metadata json.RawMessage `db:"metadata" json:"metadata"` + CreatedAt time.Time `db:"created_at" json:"created_at"` } func (q *sqlQuerier) InsertAIBridgeTokenUsage(ctx context.Context, arg InsertAIBridgeTokenUsageParams) (AIBridgeTokenUsage, error) { @@ -805,6 +796,8 @@ func (q *sqlQuerier) InsertAIBridgeTokenUsage(ctx context.Context, arg InsertAIB arg.ProviderResponseID, arg.InputTokens, arg.OutputTokens, + arg.CacheReadInputTokens, + arg.CacheWriteInputTokens, arg.Metadata, arg.CreatedAt, ) @@ -817,6 +810,8 @@ func (q *sqlQuerier) InsertAIBridgeTokenUsage(ctx context.Context, arg InsertAIB &i.OutputTokens, &i.Metadata, &i.CreatedAt, + &i.CacheReadInputTokens, + &i.CacheWriteInputTokens, ) return i, err } @@ -1448,6 +1443,8 @@ SELECT sp.threads, COALESCE(st.input_tokens, 0)::bigint AS input_tokens, COALESCE(st.output_tokens, 0)::bigint AS output_tokens, + COALESCE(st.cache_read_input_tokens, 0)::bigint AS cache_read_input_tokens, + COALESCE(st.cache_write_input_tokens, 0)::bigint AS cache_write_input_tokens, COALESCE(slp.prompt, '') AS last_prompt FROM session_page sp @@ -1469,7 +1466,9 @@ LEFT JOIN LATERAL ( -- Aggregate tokens only for this session's interceptions. SELECT COALESCE(SUM(tu.input_tokens), 0)::bigint AS input_tokens, - COALESCE(SUM(tu.output_tokens), 0)::bigint AS output_tokens + COALESCE(SUM(tu.output_tokens), 0)::bigint AS output_tokens, + COALESCE(SUM(tu.cache_read_input_tokens), 0)::bigint AS cache_read_input_tokens, + COALESCE(SUM(tu.cache_write_input_tokens), 0)::bigint AS cache_write_input_tokens FROM aibridge_token_usages tu WHERE tu.interception_id = ANY(sr.interception_ids) ) st ON true @@ -1501,21 +1500,23 @@ type ListAIBridgeSessionsParams struct { } type ListAIBridgeSessionsRow struct { - SessionID string `db:"session_id" json:"session_id"` - UserID uuid.UUID `db:"user_id" json:"user_id"` - UserUsername string `db:"user_username" json:"user_username"` - UserName string `db:"user_name" json:"user_name"` - UserAvatarUrl string `db:"user_avatar_url" json:"user_avatar_url"` - Providers []string `db:"providers" json:"providers"` - Models []string `db:"models" json:"models"` - Client string `db:"client" json:"client"` - Metadata json.RawMessage `db:"metadata" json:"metadata"` - StartedAt time.Time `db:"started_at" json:"started_at"` - EndedAt time.Time `db:"ended_at" json:"ended_at"` - Threads int64 `db:"threads" json:"threads"` - InputTokens int64 `db:"input_tokens" json:"input_tokens"` - OutputTokens int64 `db:"output_tokens" json:"output_tokens"` - LastPrompt string `db:"last_prompt" json:"last_prompt"` + SessionID string `db:"session_id" json:"session_id"` + UserID uuid.UUID `db:"user_id" json:"user_id"` + UserUsername string `db:"user_username" json:"user_username"` + UserName string `db:"user_name" json:"user_name"` + UserAvatarUrl string `db:"user_avatar_url" json:"user_avatar_url"` + Providers []string `db:"providers" json:"providers"` + Models []string `db:"models" json:"models"` + Client string `db:"client" json:"client"` + Metadata json.RawMessage `db:"metadata" json:"metadata"` + StartedAt time.Time `db:"started_at" json:"started_at"` + EndedAt time.Time `db:"ended_at" json:"ended_at"` + Threads int64 `db:"threads" json:"threads"` + InputTokens int64 `db:"input_tokens" json:"input_tokens"` + OutputTokens int64 `db:"output_tokens" json:"output_tokens"` + CacheReadInputTokens int64 `db:"cache_read_input_tokens" json:"cache_read_input_tokens"` + CacheWriteInputTokens int64 `db:"cache_write_input_tokens" json:"cache_write_input_tokens"` + LastPrompt string `db:"last_prompt" json:"last_prompt"` } // Returns paginated sessions with aggregated metadata, token counts, and @@ -1560,6 +1561,8 @@ func (q *sqlQuerier) ListAIBridgeSessions(ctx context.Context, arg ListAIBridgeS &i.Threads, &i.InputTokens, &i.OutputTokens, + &i.CacheReadInputTokens, + &i.CacheWriteInputTokens, &i.LastPrompt, ); err != nil { return nil, err @@ -1577,7 +1580,7 @@ func (q *sqlQuerier) ListAIBridgeSessions(ctx context.Context, arg ListAIBridgeS const listAIBridgeTokenUsagesByInterceptionIDs = `-- name: ListAIBridgeTokenUsagesByInterceptionIDs :many SELECT - id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at + id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at, cache_read_input_tokens, cache_write_input_tokens FROM aibridge_token_usages WHERE @@ -1604,6 +1607,8 @@ func (q *sqlQuerier) ListAIBridgeTokenUsagesByInterceptionIDs(ctx context.Contex &i.OutputTokens, &i.Metadata, &i.CreatedAt, + &i.CacheReadInputTokens, + &i.CacheWriteInputTokens, ); err != nil { return nil, err } diff --git a/coderd/database/queries/aibridge.sql b/coderd/database/queries/aibridge.sql index 900cdab655..69875b1a71 100644 --- a/coderd/database/queries/aibridge.sql +++ b/coderd/database/queries/aibridge.sql @@ -31,9 +31,9 @@ WHERE aibridge_interceptions.id = ( -- name: InsertAIBridgeTokenUsage :one INSERT INTO aibridge_token_usages ( - id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at + id, interception_id, provider_response_id, input_tokens, output_tokens, cache_read_input_tokens, cache_write_input_tokens, metadata, created_at ) VALUES ( - @id, @interception_id, @provider_response_id, @input_tokens, @output_tokens, COALESCE(@metadata::jsonb, '{}'::jsonb), @created_at + @id, @interception_id, @provider_response_id, @input_tokens, @output_tokens, @cache_read_input_tokens, @cache_write_input_tokens, COALESCE(@metadata::jsonb, '{}'::jsonb), @created_at ) RETURNING *; @@ -299,21 +299,8 @@ token_aggregates AS ( SELECT COALESCE(SUM(tu.input_tokens), 0) AS token_count_input, COALESCE(SUM(tu.output_tokens), 0) AS token_count_output, - -- Cached tokens are stored in metadata JSON, extract if available. - -- Read tokens may be stored in: - -- - cache_read_input (Anthropic) - -- - prompt_cached (OpenAI) - COALESCE(SUM( - COALESCE((tu.metadata->>'cache_read_input')::bigint, 0) + - COALESCE((tu.metadata->>'prompt_cached')::bigint, 0) - ), 0) AS token_count_cached_read, - -- Written tokens may be stored in: - -- - cache_creation_input (Anthropic) - -- Note that cache_ephemeral_5m_input and cache_ephemeral_1h_input on - -- Anthropic are included in the cache_creation_input field. - COALESCE(SUM( - COALESCE((tu.metadata->>'cache_creation_input')::bigint, 0) - ), 0) AS token_count_cached_written, + COALESCE(SUM(tu.cache_read_input_tokens), 0) AS token_count_cached_read, + COALESCE(SUM(tu.cache_write_input_tokens), 0) AS token_count_cached_written, COUNT(tu.id) AS token_usages_count FROM interceptions_in_range i @@ -552,6 +539,8 @@ SELECT sp.threads, COALESCE(st.input_tokens, 0)::bigint AS input_tokens, COALESCE(st.output_tokens, 0)::bigint AS output_tokens, + COALESCE(st.cache_read_input_tokens, 0)::bigint AS cache_read_input_tokens, + COALESCE(st.cache_write_input_tokens, 0)::bigint AS cache_write_input_tokens, COALESCE(slp.prompt, '') AS last_prompt FROM session_page sp @@ -573,7 +562,9 @@ LEFT JOIN LATERAL ( -- Aggregate tokens only for this session's interceptions. SELECT COALESCE(SUM(tu.input_tokens), 0)::bigint AS input_tokens, - COALESCE(SUM(tu.output_tokens), 0)::bigint AS output_tokens + COALESCE(SUM(tu.output_tokens), 0)::bigint AS output_tokens, + COALESCE(SUM(tu.cache_read_input_tokens), 0)::bigint AS cache_read_input_tokens, + COALESCE(SUM(tu.cache_write_input_tokens), 0)::bigint AS cache_write_input_tokens FROM aibridge_token_usages tu WHERE tu.interception_id = ANY(sr.interception_ids) ) st ON true diff --git a/coderd/telemetry/telemetry_test.go b/coderd/telemetry/telemetry_test.go index f679dfee9d..005839cc26 100644 --- a/coderd/telemetry/telemetry_test.go +++ b/coderd/telemetry/telemetry_test.go @@ -223,10 +223,12 @@ func TestTelemetry(t *testing.T) { StartedAt: previousAIBridgeInterceptionPeriod.Add(-30 * time.Minute), }, nil) _ = dbgen.AIBridgeTokenUsage(t, db, database.InsertAIBridgeTokenUsageParams{ - InterceptionID: aiBridgeInterception1.ID, - InputTokens: 100, - OutputTokens: 200, - Metadata: json.RawMessage(`{"cache_read_input":300,"cache_creation_input":400}`), + InterceptionID: aiBridgeInterception1.ID, + InputTokens: 100, + OutputTokens: 200, + CacheReadInputTokens: 300, + CacheWriteInputTokens: 400, + Metadata: json.RawMessage(`{"cache_read_input":300,"cache_creation_input":400}`), }) _ = dbgen.AIBridgeUserPrompt(t, db, database.InsertAIBridgeUserPromptParams{ InterceptionID: aiBridgeInterception1.ID, @@ -248,10 +250,12 @@ func TestTelemetry(t *testing.T) { StartedAt: aiBridgeInterception1.StartedAt, }, nil) _ = dbgen.AIBridgeTokenUsage(t, db, database.InsertAIBridgeTokenUsageParams{ - InterceptionID: aiBridgeInterception2.ID, - InputTokens: 100, - OutputTokens: 200, - Metadata: json.RawMessage(`{"cache_read_input":300,"cache_creation_input":400}`), + InterceptionID: aiBridgeInterception2.ID, + InputTokens: 100, + OutputTokens: 200, + CacheReadInputTokens: 300, + CacheWriteInputTokens: 400, + Metadata: json.RawMessage(`{"cache_read_input":300,"cache_creation_input":400}`), }) _ = dbgen.AIBridgeUserPrompt(t, db, database.InsertAIBridgeUserPromptParams{ InterceptionID: aiBridgeInterception2.ID, diff --git a/codersdk/aibridge.go b/codersdk/aibridge.go index 1a6d68569b..4fe742e5ff 100644 --- a/codersdk/aibridge.go +++ b/codersdk/aibridge.go @@ -28,13 +28,15 @@ type AIBridgeInterception struct { } type AIBridgeTokenUsage struct { - ID uuid.UUID `json:"id" format:"uuid"` - InterceptionID uuid.UUID `json:"interception_id" format:"uuid"` - ProviderResponseID string `json:"provider_response_id"` - InputTokens int64 `json:"input_tokens"` - OutputTokens int64 `json:"output_tokens"` - Metadata map[string]any `json:"metadata"` - CreatedAt time.Time `json:"created_at" format:"date-time"` + ID uuid.UUID `json:"id" format:"uuid"` + InterceptionID uuid.UUID `json:"interception_id" format:"uuid"` + ProviderResponseID string `json:"provider_response_id"` + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` + CacheReadInputTokens int64 `json:"cache_read_input_tokens"` + CacheWriteInputTokens int64 `json:"cache_write_input_tokens"` + Metadata map[string]any `json:"metadata"` + CreatedAt time.Time `json:"created_at" format:"date-time"` } type AIBridgeUserPrompt struct { @@ -79,8 +81,10 @@ type AIBridgeSession struct { } type AIBridgeSessionTokenUsageSummary struct { - InputTokens int64 `json:"input_tokens"` - OutputTokens int64 `json:"output_tokens"` + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` + CacheReadInputTokens int64 `json:"cache_read_input_tokens"` + CacheWriteInputTokens int64 `json:"cache_write_input_tokens"` } type AIBridgeListSessionsResponse struct { @@ -107,12 +111,13 @@ type AIBridgeSessionThreadsResponse struct { } // AIBridgeSessionThreadsTokenUsage represents aggregated token usage -// with metadata containing provider-specific fields like -// cache_creation_input, cache_read_input, etc. +// with metadata containing provider-specific fields. type AIBridgeSessionThreadsTokenUsage struct { - InputTokens int64 `json:"input_tokens"` - OutputTokens int64 `json:"output_tokens"` - Metadata map[string]any `json:"metadata"` + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` + CacheReadInputTokens int64 `json:"cache_read_input_tokens"` + CacheWriteInputTokens int64 `json:"cache_write_input_tokens"` + Metadata map[string]any `json:"metadata"` } // AIBridgeThread represents a single thread within a session. diff --git a/docs/reference/api/aibridge.md b/docs/reference/api/aibridge.md index e7a29afcf9..cdfff56175 100644 --- a/docs/reference/api/aibridge.md +++ b/docs/reference/api/aibridge.md @@ -84,6 +84,8 @@ curl -X GET http://coder-server:8080/api/v2/aibridge/interceptions \ "started_at": "2019-08-24T14:15:22Z", "token_usages": [ { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "created_at": "2019-08-24T14:15:22Z", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "input_tokens": 0, @@ -226,6 +228,8 @@ curl -X GET http://coder-server:8080/api/v2/aibridge/sessions \ "started_at": "2019-08-24T14:15:22Z", "threads": 0, "token_usage_summary": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "output_tokens": 0 } @@ -303,6 +307,8 @@ curl -X GET http://coder-server:8080/api/v2/aibridge/sessions/{session_id} \ } ], "token_usage": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -335,6 +341,8 @@ curl -X GET http://coder-server:8080/api/v2/aibridge/sessions/{session_id} \ "provider": "string", "started_at": "2019-08-24T14:15:22Z", "token_usage": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -345,6 +353,8 @@ curl -X GET http://coder-server:8080/api/v2/aibridge/sessions/{session_id} \ } ], "token_usage_summary": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, diff --git a/docs/reference/api/schemas.md b/docs/reference/api/schemas.md index 276360250d..c49c941e06 100644 --- a/docs/reference/api/schemas.md +++ b/docs/reference/api/schemas.md @@ -350,6 +350,8 @@ } ], "token_usage": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -504,6 +506,8 @@ "started_at": "2019-08-24T14:15:22Z", "token_usages": [ { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "created_at": "2019-08-24T14:15:22Z", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "input_tokens": 0, @@ -595,6 +599,8 @@ "started_at": "2019-08-24T14:15:22Z", "token_usages": [ { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "created_at": "2019-08-24T14:15:22Z", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "input_tokens": 0, @@ -679,6 +685,8 @@ "started_at": "2019-08-24T14:15:22Z", "threads": 0, "token_usage_summary": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "output_tokens": 0 } @@ -787,6 +795,8 @@ "started_at": "2019-08-24T14:15:22Z", "threads": 0, "token_usage_summary": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "output_tokens": 0 } @@ -847,6 +857,8 @@ } ], "token_usage": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -879,6 +891,8 @@ "provider": "string", "started_at": "2019-08-24T14:15:22Z", "token_usage": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -889,6 +903,8 @@ } ], "token_usage_summary": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -921,6 +937,8 @@ ```json { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -932,17 +950,21 @@ ### Properties -| Name | Type | Required | Restrictions | Description | -|--------------------|---------|----------|--------------|-------------| -| `input_tokens` | integer | false | | | -| `metadata` | object | false | | | -| » `[any property]` | any | false | | | -| `output_tokens` | integer | false | | | +| Name | Type | Required | Restrictions | Description | +|----------------------------|---------|----------|--------------|-------------| +| `cache_read_input_tokens` | integer | false | | | +| `cache_write_input_tokens` | integer | false | | | +| `input_tokens` | integer | false | | | +| `metadata` | object | false | | | +| » `[any property]` | any | false | | | +| `output_tokens` | integer | false | | | ## codersdk.AIBridgeSessionTokenUsageSummary ```json { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "output_tokens": 0 } @@ -950,10 +972,12 @@ ### Properties -| Name | Type | Required | Restrictions | Description | -|-----------------|---------|----------|--------------|-------------| -| `input_tokens` | integer | false | | | -| `output_tokens` | integer | false | | | +| Name | Type | Required | Restrictions | Description | +|----------------------------|---------|----------|--------------|-------------| +| `cache_read_input_tokens` | integer | false | | | +| `cache_write_input_tokens` | integer | false | | | +| `input_tokens` | integer | false | | | +| `output_tokens` | integer | false | | | ## codersdk.AIBridgeThread @@ -968,6 +992,8 @@ } ], "token_usage": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -1000,6 +1026,8 @@ "provider": "string", "started_at": "2019-08-24T14:15:22Z", "token_usage": { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "input_tokens": 0, "metadata": { "property1": null, @@ -1027,6 +1055,8 @@ ```json { + "cache_read_input_tokens": 0, + "cache_write_input_tokens": 0, "created_at": "2019-08-24T14:15:22Z", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "input_tokens": 0, @@ -1042,16 +1072,18 @@ ### Properties -| Name | Type | Required | Restrictions | Description | -|------------------------|---------|----------|--------------|-------------| -| `created_at` | string | false | | | -| `id` | string | false | | | -| `input_tokens` | integer | false | | | -| `interception_id` | string | false | | | -| `metadata` | object | false | | | -| » `[any property]` | any | false | | | -| `output_tokens` | integer | false | | | -| `provider_response_id` | string | false | | | +| Name | Type | Required | Restrictions | Description | +|----------------------------|---------|----------|--------------|-------------| +| `cache_read_input_tokens` | integer | false | | | +| `cache_write_input_tokens` | integer | false | | | +| `created_at` | string | false | | | +| `id` | string | false | | | +| `input_tokens` | integer | false | | | +| `interception_id` | string | false | | | +| `metadata` | object | false | | | +| » `[any property]` | any | false | | | +| `output_tokens` | integer | false | | | +| `provider_response_id` | string | false | | | ## codersdk.AIBridgeToolCall diff --git a/enterprise/aibridged/aibridged_integration_test.go b/enterprise/aibridged/aibridged_integration_test.go index 108b18dac0..43de8398ad 100644 --- a/enterprise/aibridged/aibridged_integration_test.go +++ b/enterprise/aibridged/aibridged_integration_test.go @@ -270,6 +270,7 @@ func TestIntegration(t *testing.T) { require.EqualValues(t, tokens[0].InputTokens, 45) require.EqualValues(t, tokens[0].OutputTokens, 15) require.EqualValues(t, gjson.Get(string(tokens[0].Metadata.RawMessage), "prompt_cached").Int(), 15) + require.EqualValues(t, 15, tokens[0].CacheReadInputTokens) tools, err := db.GetAIBridgeToolUsagesByInterceptionID(ctx, interceptions[0].ID) require.NoError(t, err) diff --git a/enterprise/aibridged/proto/aibridged.pb.go b/enterprise/aibridged/proto/aibridged.pb.go index 68fc425e65..60290759be 100644 --- a/enterprise/aibridged/proto/aibridged.pb.go +++ b/enterprise/aibridged/proto/aibridged.pb.go @@ -293,12 +293,14 @@ type RecordTokenUsageRequest struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - InterceptionId string `protobuf:"bytes,1,opt,name=interception_id,json=interceptionId,proto3" json:"interception_id,omitempty"` // UUID. - MsgId string `protobuf:"bytes,2,opt,name=msg_id,json=msgId,proto3" json:"msg_id,omitempty"` // ID provided by provider. - InputTokens int64 `protobuf:"varint,3,opt,name=input_tokens,json=inputTokens,proto3" json:"input_tokens,omitempty"` - OutputTokens int64 `protobuf:"varint,4,opt,name=output_tokens,json=outputTokens,proto3" json:"output_tokens,omitempty"` - Metadata map[string]*anypb.Any `protobuf:"bytes,5,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - CreatedAt *timestamppb.Timestamp `protobuf:"bytes,6,opt,name=created_at,json=createdAt,proto3" json:"created_at,omitempty"` + InterceptionId string `protobuf:"bytes,1,opt,name=interception_id,json=interceptionId,proto3" json:"interception_id,omitempty"` // UUID. + MsgId string `protobuf:"bytes,2,opt,name=msg_id,json=msgId,proto3" json:"msg_id,omitempty"` // ID provided by provider. + InputTokens int64 `protobuf:"varint,3,opt,name=input_tokens,json=inputTokens,proto3" json:"input_tokens,omitempty"` + OutputTokens int64 `protobuf:"varint,4,opt,name=output_tokens,json=outputTokens,proto3" json:"output_tokens,omitempty"` + Metadata map[string]*anypb.Any `protobuf:"bytes,5,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + CreatedAt *timestamppb.Timestamp `protobuf:"bytes,6,opt,name=created_at,json=createdAt,proto3" json:"created_at,omitempty"` + CacheReadInputTokens int64 `protobuf:"varint,7,opt,name=cache_read_input_tokens,json=cacheReadInputTokens,proto3" json:"cache_read_input_tokens,omitempty"` + CacheWriteInputTokens int64 `protobuf:"varint,8,opt,name=cache_write_input_tokens,json=cacheWriteInputTokens,proto3" json:"cache_write_input_tokens,omitempty"` } func (x *RecordTokenUsageRequest) Reset() { @@ -375,6 +377,20 @@ func (x *RecordTokenUsageRequest) GetCreatedAt() *timestamppb.Timestamp { return nil } +func (x *RecordTokenUsageRequest) GetCacheReadInputTokens() int64 { + if x != nil { + return x.CacheReadInputTokens + } + return 0 +} + +func (x *RecordTokenUsageRequest) GetCacheWriteInputTokens() int64 { + if x != nil { + return x.CacheWriteInputTokens + } + return 0 +} + type RecordTokenUsageResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1251,7 +1267,7 @@ var file_enterprise_aibridged_proto_aibridged_proto_rawDesc = []byte{ 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x07, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x41, 0x74, 0x22, 0x21, 0x0a, 0x1f, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x22, 0xf9, 0x02, 0x0a, 0x17, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x54, 0x6f, 0x6b, + 0x73, 0x65, 0x22, 0xe9, 0x03, 0x0a, 0x17, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x63, 0x65, @@ -1269,7 +1285,14 @@ var file_enterprise_aibridged_proto_aibridged_proto_rawDesc = []byte{ 0x12, 0x39, 0x0a, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, - 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x1a, 0x51, 0x0a, 0x0d, 0x4d, + 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x12, 0x35, 0x0a, 0x17, 0x63, + 0x61, 0x63, 0x68, 0x65, 0x5f, 0x72, 0x65, 0x61, 0x64, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x03, 0x52, 0x14, 0x63, 0x61, + 0x63, 0x68, 0x65, 0x52, 0x65, 0x61, 0x64, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x54, 0x6f, 0x6b, 0x65, + 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x18, 0x63, 0x61, 0x63, 0x68, 0x65, 0x5f, 0x77, 0x72, 0x69, 0x74, + 0x65, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x08, + 0x20, 0x01, 0x28, 0x03, 0x52, 0x15, 0x63, 0x61, 0x63, 0x68, 0x65, 0x57, 0x72, 0x69, 0x74, 0x65, + 0x49, 0x6e, 0x70, 0x75, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x1a, 0x51, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x2a, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, diff --git a/enterprise/aibridged/proto/aibridged.proto b/enterprise/aibridged/proto/aibridged.proto index fb81300ab2..a260ae8036 100644 --- a/enterprise/aibridged/proto/aibridged.proto +++ b/enterprise/aibridged/proto/aibridged.proto @@ -67,6 +67,8 @@ message RecordTokenUsageRequest { int64 output_tokens = 4; map metadata = 5; google.protobuf.Timestamp created_at = 6; + int64 cache_read_input_tokens = 7; + int64 cache_write_input_tokens = 8; } message RecordTokenUsageResponse {} diff --git a/enterprise/aibridged/translator.go b/enterprise/aibridged/translator.go index 352c0dcd5d..f60416ae3e 100644 --- a/enterprise/aibridged/translator.go +++ b/enterprise/aibridged/translator.go @@ -66,18 +66,20 @@ func (t *recorderTranslation) RecordTokenUsage(ctx context.Context, req *aibridg merged = aibridge.Metadata{} } - // Merge the token usage values into metadata; later we might want to store some of these in their own fields. + // Merge remaining extra token types into metadata. for k, v := range req.ExtraTokenTypes { merged[k] = v } _, err := t.client.RecordTokenUsage(ctx, &proto.RecordTokenUsageRequest{ - InterceptionId: req.InterceptionID, - MsgId: req.MsgID, - InputTokens: req.Input, - OutputTokens: req.Output, - Metadata: marshalForProto(merged), - CreatedAt: timestamppb.New(req.CreatedAt), + InterceptionId: req.InterceptionID, + MsgId: req.MsgID, + InputTokens: req.Input, + OutputTokens: req.Output, + CacheReadInputTokens: req.CacheReadInputTokens, + CacheWriteInputTokens: req.CacheWriteInputTokens, + Metadata: marshalForProto(merged), + CreatedAt: timestamppb.New(req.CreatedAt), }) return err } diff --git a/enterprise/aibridgedserver/aibridgedserver.go b/enterprise/aibridgedserver/aibridgedserver.go index b7b9a8dd93..f06be60b3d 100644 --- a/enterprise/aibridgedserver/aibridgedserver.go +++ b/enterprise/aibridgedserver/aibridgedserver.go @@ -247,6 +247,8 @@ func (s *Server) RecordTokenUsage(ctx context.Context, in *proto.RecordTokenUsag slog.F("msg_id", in.GetMsgId()), slog.F("input_tokens", in.GetInputTokens()), slog.F("output_tokens", in.GetOutputTokens()), + slog.F("cache_read_input_tokens", in.GetCacheReadInputTokens()), + slog.F("cache_write_input_tokens", in.GetCacheWriteInputTokens()), slog.F("created_at", in.GetCreatedAt().AsTime()), slog.F("metadata", metadata), ) @@ -258,13 +260,15 @@ func (s *Server) RecordTokenUsage(ctx context.Context, in *proto.RecordTokenUsag } _, err = s.store.InsertAIBridgeTokenUsage(ctx, database.InsertAIBridgeTokenUsageParams{ - ID: uuid.New(), - InterceptionID: intcID, - ProviderResponseID: in.GetMsgId(), - InputTokens: in.GetInputTokens(), - OutputTokens: in.GetOutputTokens(), - Metadata: out, - CreatedAt: in.GetCreatedAt().AsTime(), + ID: uuid.New(), + InterceptionID: intcID, + ProviderResponseID: in.GetMsgId(), + InputTokens: in.GetInputTokens(), + OutputTokens: in.GetOutputTokens(), + CacheReadInputTokens: in.GetCacheReadInputTokens(), + CacheWriteInputTokens: in.GetCacheWriteInputTokens(), + Metadata: out, + CreatedAt: in.GetCreatedAt().AsTime(), }) if err != nil { return nil, xerrors.Errorf("insert token usage: %w", err) diff --git a/enterprise/aibridgedserver/aibridgedserver_test.go b/enterprise/aibridgedserver/aibridgedserver_test.go index bb148443b8..e6d621a42f 100644 --- a/enterprise/aibridgedserver/aibridgedserver_test.go +++ b/enterprise/aibridgedserver/aibridgedserver_test.go @@ -852,12 +852,14 @@ func TestRecordTokenUsage(t *testing.T) { { name: "valid token usage", request: &proto.RecordTokenUsageRequest{ - InterceptionId: uuid.NewString(), - MsgId: "msg_123", - InputTokens: 100, - OutputTokens: 200, - Metadata: metadataProto, - CreatedAt: timestamppb.Now(), + InterceptionId: uuid.NewString(), + MsgId: "msg_123", + InputTokens: 100, + OutputTokens: 200, + CacheReadInputTokens: 50, + CacheWriteInputTokens: 10, + Metadata: metadataProto, + CreatedAt: timestamppb.Now(), }, setupMocks: func(t *testing.T, db *dbmock.MockStore, req *proto.RecordTokenUsageRequest) { interceptionID, err := uuid.Parse(req.GetInterceptionId()) @@ -869,17 +871,21 @@ func TestRecordTokenUsage(t *testing.T) { !assert.Equal(t, req.GetMsgId(), p.ProviderResponseID, "provider response ID") || !assert.Equal(t, req.GetInputTokens(), p.InputTokens, "input tokens") || !assert.Equal(t, req.GetOutputTokens(), p.OutputTokens, "output tokens") || + !assert.Equal(t, req.GetCacheReadInputTokens(), p.CacheReadInputTokens, "cache read input tokens") || + !assert.Equal(t, req.GetCacheWriteInputTokens(), p.CacheWriteInputTokens, "cache write input tokens") || !assert.JSONEq(t, metadataJSON, string(p.Metadata), "metadata") || !assert.WithinDuration(t, req.GetCreatedAt().AsTime(), p.CreatedAt, time.Second, "created at") { return false } return true })).Return(database.AIBridgeTokenUsage{ - ID: uuid.New(), - InterceptionID: interceptionID, - ProviderResponseID: req.GetMsgId(), - InputTokens: req.GetInputTokens(), - OutputTokens: req.GetOutputTokens(), + ID: uuid.New(), + InterceptionID: interceptionID, + ProviderResponseID: req.GetMsgId(), + InputTokens: req.GetInputTokens(), + OutputTokens: req.GetOutputTokens(), + CacheReadInputTokens: req.GetCacheReadInputTokens(), + CacheWriteInputTokens: req.GetCacheWriteInputTokens(), Metadata: pqtype.NullRawMessage{ RawMessage: json.RawMessage(metadataJSON), Valid: true, @@ -1401,20 +1407,24 @@ func TestStructuredLogging(t *testing.T) { }, recordFn: func(srv *aibridgedserver.Server, ctx context.Context, intcID uuid.UUID) error { _, err := srv.RecordTokenUsage(ctx, &proto.RecordTokenUsageRequest{ - InterceptionId: intcID.String(), - MsgId: "msg_123", - InputTokens: 100, - OutputTokens: 200, - Metadata: metadataProto, - CreatedAt: timestamppb.Now(), + InterceptionId: intcID.String(), + MsgId: "msg_123", + InputTokens: 100, + OutputTokens: 200, + CacheReadInputTokens: 50, + CacheWriteInputTokens: 10, + Metadata: metadataProto, + CreatedAt: timestamppb.Now(), }) return err }, expectedFields: map[string]any{ - "record_type": "token_usage", - "interception_id": interceptionID.String(), - "input_tokens": float64(100), // JSON numbers are float64. - "output_tokens": float64(200), + "record_type": "token_usage", + "interception_id": interceptionID.String(), + "input_tokens": float64(100), // JSON numbers are float64. + "output_tokens": float64(200), + "cache_read_input_tokens": float64(50), + "cache_write_input_tokens": float64(10), }, }, { diff --git a/enterprise/coderd/aibridge_test.go b/enterprise/coderd/aibridge_test.go index 8f088c3689..718dd4695c 100644 --- a/enterprise/coderd/aibridge_test.go +++ b/enterprise/coderd/aibridge_test.go @@ -1888,12 +1888,14 @@ func TestAIBridgeGetSessionThreads(t *testing.T) { // Add token usage on root with metadata. providerRespID := "resp-1" dbgen.AIBridgeTokenUsage(t, db, database.InsertAIBridgeTokenUsageParams{ - InterceptionID: root.ID, - ProviderResponseID: providerRespID, - InputTokens: 100, - OutputTokens: 50, - Metadata: json.RawMessage(`{"cache_read_input": 20, "cache_creation_input": 10}`), - CreatedAt: now, + InterceptionID: root.ID, + ProviderResponseID: providerRespID, + InputTokens: 100, + OutputTokens: 50, + CacheReadInputTokens: 20, + CacheWriteInputTokens: 10, + Metadata: json.RawMessage(`{"cache_read_input": 20, "cache_creation_input": 10}`), + CreatedAt: now, }) // Add two tool usages on root (demonstrates multiple tools per action). @@ -1921,12 +1923,13 @@ func TestAIBridgeGetSessionThreads(t *testing.T) { // Add token usage on child. dbgen.AIBridgeTokenUsage(t, db, database.InsertAIBridgeTokenUsageParams{ - InterceptionID: child.ID, - ProviderResponseID: "resp-2", - InputTokens: 200, - OutputTokens: 100, - Metadata: json.RawMessage(`{"cache_read_input": 30}`), - CreatedAt: now.Add(time.Minute), + InterceptionID: child.ID, + ProviderResponseID: "resp-2", + InputTokens: 200, + OutputTokens: 100, + CacheReadInputTokens: 30, + Metadata: json.RawMessage(`{"cache_read_input": 30}`), + CreatedAt: now.Add(time.Minute), }) // Add another tool usage on child. @@ -1956,9 +1959,11 @@ func TestAIBridgeGetSessionThreads(t *testing.T) { require.Equal(t, "claude-4", thread.Model) require.Equal(t, "anthropic", thread.Provider) - // Thread-level token aggregation. + // Thread-level token aggregation require.EqualValues(t, 300, thread.TokenUsage.InputTokens) require.EqualValues(t, 150, thread.TokenUsage.OutputTokens) + require.EqualValues(t, 50, thread.TokenUsage.CacheReadInputTokens) + require.EqualValues(t, 10, thread.TokenUsage.CacheWriteInputTokens) require.NotEmpty(t, thread.TokenUsage.Metadata) require.EqualValues(t, int64(50), thread.TokenUsage.Metadata["cache_read_input"]) require.EqualValues(t, int64(10), thread.TokenUsage.Metadata["cache_creation_input"]) @@ -2124,14 +2129,16 @@ func TestAIBridgeGetSessionThreads(t *testing.T) { firstThreadID = root.ID } - // Token usage on root: 100 input, 50 output, with cache metadata. + // Token usage on root: 100 input, 50 output, 20 cache read, 5 cache write. dbgen.AIBridgeTokenUsage(t, db, database.InsertAIBridgeTokenUsageParams{ - InterceptionID: root.ID, - ProviderResponseID: "resp-root", - InputTokens: 100, - OutputTokens: 50, - Metadata: json.RawMessage(`{"cache_read_input": 20, "cache_creation_input": 5}`), - CreatedAt: now.Add(offset), + InterceptionID: root.ID, + ProviderResponseID: "resp-root", + InputTokens: 100, + OutputTokens: 50, + CacheReadInputTokens: 20, + CacheWriteInputTokens: 5, + Metadata: json.RawMessage(`{"cache_read_input": 20, "cache_creation_input": 5}`), + CreatedAt: now.Add(offset), }) // Add a child interception with its own token usage. @@ -2146,14 +2153,15 @@ func TestAIBridgeGetSessionThreads(t *testing.T) { ThreadParentInterceptionID: uuid.NullUUID{UUID: root.ID, Valid: true}, }, &childEndedAt) - // Token usage on child: 200 input, 100 output, with cache metadata. + // Token usage on child: 200 input, 100 output, 30 cache read. dbgen.AIBridgeTokenUsage(t, db, database.InsertAIBridgeTokenUsageParams{ - InterceptionID: child.ID, - ProviderResponseID: "resp-child", - InputTokens: 200, - OutputTokens: 100, - Metadata: json.RawMessage(`{"cache_read_input": 30}`), - CreatedAt: now.Add(offset + 15*time.Minute), + InterceptionID: child.ID, + ProviderResponseID: "resp-child", + InputTokens: 200, + OutputTokens: 100, + CacheReadInputTokens: 30, + Metadata: json.RawMessage(`{"cache_read_input": 30}`), + CreatedAt: now.Add(offset + 15*time.Minute), }) } @@ -2173,6 +2181,10 @@ func TestAIBridgeGetSessionThreads(t *testing.T) { require.EqualValues(t, 900, res.TokenUsageSummary.InputTokens) require.EqualValues(t, 450, res.TokenUsageSummary.OutputTokens) + // Session-level cache tokens: 3 * (root 20 + child 30) = 150 read, + // 3 * root 5 = 15 write. + require.EqualValues(t, 150, res.TokenUsageSummary.CacheReadInputTokens) + require.EqualValues(t, 15, res.TokenUsageSummary.CacheWriteInputTokens) // Session-level metadata must aggregate across all 3 threads: // cache_read_input: 3 * (root 20 + child 30) = 150 // cache_creation_input: 3 * (root 5) = 15 diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 9e8b8c3a77..ba5ad299a1 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -164,12 +164,13 @@ export interface AIBridgeSessionThreadsResponse { // From codersdk/aibridge.go /** * AIBridgeSessionThreadsTokenUsage represents aggregated token usage - * with metadata containing provider-specific fields like - * cache_creation_input, cache_read_input, etc. + * with metadata containing provider-specific fields. */ export interface AIBridgeSessionThreadsTokenUsage { readonly input_tokens: number; readonly output_tokens: number; + readonly cache_read_input_tokens: number; + readonly cache_write_input_tokens: number; // empty interface{} type, falling back to unknown readonly metadata: Record; } @@ -178,6 +179,8 @@ export interface AIBridgeSessionThreadsTokenUsage { export interface AIBridgeSessionTokenUsageSummary { readonly input_tokens: number; readonly output_tokens: number; + readonly cache_read_input_tokens: number; + readonly cache_write_input_tokens: number; } // From codersdk/aibridge.go @@ -203,6 +206,8 @@ export interface AIBridgeTokenUsage { readonly provider_response_id: string; readonly input_tokens: number; readonly output_tokens: number; + readonly cache_read_input_tokens: number; + readonly cache_write_input_tokens: number; // empty interface{} type, falling back to unknown readonly metadata: Record; readonly created_at: string; diff --git a/site/src/pages/AIBridgePage/ListSessionsPage/ListSessionsPageView.stories.tsx b/site/src/pages/AIBridgePage/ListSessionsPage/ListSessionsPageView.stories.tsx index d4c6eeb40e..0c5f021969 100644 --- a/site/src/pages/AIBridgePage/ListSessionsPage/ListSessionsPageView.stories.tsx +++ b/site/src/pages/AIBridgePage/ListSessionsPage/ListSessionsPageView.stories.tsx @@ -102,6 +102,8 @@ export const MultipleSessions: Story = { token_usage_summary: { input_tokens: 1000 * (i + 1), output_tokens: 300 * (i + 1), + cache_read_input_tokens: 800 * (i + 1), + cache_write_input_tokens: 50 * (i + 1), }, })), }, diff --git a/site/src/pages/AIBridgePage/ListSessionsPage/ListSessionsRow.stories.tsx b/site/src/pages/AIBridgePage/ListSessionsPage/ListSessionsRow.stories.tsx index 3d8f6298ab..1407977398 100644 --- a/site/src/pages/AIBridgePage/ListSessionsPage/ListSessionsRow.stories.tsx +++ b/site/src/pages/AIBridgePage/ListSessionsPage/ListSessionsRow.stories.tsx @@ -74,6 +74,8 @@ export const LargeTokenCounts: Story = { token_usage_summary: { input_tokens: 198_000, output_tokens: 32_000, + cache_read_input_tokens: 150_000, + cache_write_input_tokens: 12_000, }, }, }, diff --git a/site/src/pages/AIBridgePage/SessionThreadsPage/SessionTimeline/SessionTimeline.stories.tsx b/site/src/pages/AIBridgePage/SessionThreadsPage/SessionTimeline/SessionTimeline.stories.tsx index e07d543d3b..ed858fd8a8 100644 --- a/site/src/pages/AIBridgePage/SessionThreadsPage/SessionTimeline/SessionTimeline.stories.tsx +++ b/site/src/pages/AIBridgePage/SessionThreadsPage/SessionTimeline/SessionTimeline.stories.tsx @@ -15,6 +15,8 @@ const mockThread: AIBridgeThread = { token_usage: { input_tokens: 1240, output_tokens: 320, + cache_read_input_tokens: 900, + cache_write_input_tokens: 140, metadata: { cache_read_input_tokens: 900 }, }, agentic_actions: [ @@ -23,6 +25,8 @@ const mockThread: AIBridgeThread = { token_usage: { input_tokens: 620, output_tokens: 160, + cache_read_input_tokens: 450, + cache_write_input_tokens: 70, metadata: {}, }, thinking: [ @@ -59,6 +63,8 @@ const mockThreadLong: AIBridgeThread = { token_usage: { input_tokens: 8500, output_tokens: 3200, + cache_read_input_tokens: 6000, + cache_write_input_tokens: 2000, metadata: { cache_read_input_tokens: 6000, cache_creation_input_tokens: 2000, @@ -67,7 +73,13 @@ const mockThreadLong: AIBridgeThread = { agentic_actions: [ { model: "claude-opus-4-6", - token_usage: { input_tokens: 2800, output_tokens: 1100, metadata: {} }, + token_usage: { + input_tokens: 2800, + output_tokens: 1100, + cache_read_input_tokens: 1800, + cache_write_input_tokens: 500, + metadata: {}, + }, thinking: [], tool_calls: [ { diff --git a/site/src/testHelpers/entities.ts b/site/src/testHelpers/entities.ts index b0b141465d..b441585e1c 100644 --- a/site/src/testHelpers/entities.ts +++ b/site/src/testHelpers/entities.ts @@ -5234,6 +5234,8 @@ export const MockInterception: TypesGen.AIBridgeInterception = { provider_response_id: "res_1234567890", input_tokens: 5, output_tokens: 1, + cache_read_input_tokens: 3, + cache_write_input_tokens: 1, metadata: {}, created_at: "2022-05-17T17:39:01.382927298Z", }, @@ -5310,6 +5312,8 @@ export const MockSession: TypesGen.AIBridgeSession = { token_usage_summary: { input_tokens: 1234, output_tokens: 4321, + cache_read_input_tokens: 980, + cache_write_input_tokens: 120, }, last_prompt: "But *can* I really fix it?", };