mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: add ai_model_prices table (#24932)
# Summary Implements https://linear.app/codercom/issue/AIGOV-282/add-ai-model-price-table-and-seed-generator This PR lays the groundwork for AI Bridge cost controls (per the AI Governance RFC). It adds the foundation needed for future cost tracking: a place to store per-model token prices, a way to keep those prices in sync with upstream pricing data, and a startup mechanism that ensures every deployment has prices loaded before AI Bridge starts processing requests. The price data comes from [models.dev](https://models.dev/), a community-maintained catalogue of AI provider pricing. A generator script fetches the latest prices, filters to Anthropic and OpenAI for now, and produces a seed file checked into the repository. On every server startup the seed is applied to the database, so new releases automatically pick up any price corrections that landed since the previous one. Existing rows are overwritten with the latest prices; rows for models no longer in the seed are left untouched. # Batching the AI model price seed: three approaches Context: at server startup we seed the `ai_model_prices` table from an embedded JSON price book (~70 rows today, will grow as we add providers, potentially 4000+). Each row is: ```text (provider, model, input_price, output_price, cache_read_price, cache_write_price) ``` Any of the four price columns can be: - `NULL` → “price unknown for this dimension” - explicit `0` → “free” The batch must be an UPSERT so re-running is idempotent and existing rows pick up new prices. We considered three implementations. --- ## Approach 1 — Per-row UPSERT in a Go loop ```go for _, row := range rows { if err := db.UpsertAIModelPrice(ctx, database.UpsertAIModelPriceParams{ Provider: row.Provider, Model: row.Model, InputPrice: nullInt64(row.InputPrice), // ... }); err != nil { return err } } ``` ### Pros - Trivial. - NULL handling falls out naturally from `sql.NullInt64`. ### Cons - `N` round-trips per seed. - With ~70 rows that means ~70 statement executions on every startup, even inside a transaction. - Doesn't scale gracefully as the price book grows, potentially 4000+. --- ## Approach 2 — `UNNEST` with parallel arrays Pass each column as a separate Go slice. Postgres unnests them in parallel into a virtual table, then `INSERT ... SELECT`. ```sql INSERT INTO ai_model_prices ( provider, model, input_price, output_price, cache_read_price, cache_write_price ) SELECT UNNEST(@providers::text[]), UNNEST(@models::text[]), NULLIF(UNNEST(@input_prices::bigint[]), -1), NULLIF(UNNEST(@output_prices::bigint[]), -1), NULLIF(UNNEST(@cache_read_prices::bigint[]), -1), NULLIF(UNNEST(@cache_write_prices::bigint[]), -1) ON CONFLICT (provider, model) DO UPDATE SET input_price = EXCLUDED.input_price, output_price = EXCLUDED.output_price, cache_read_price = EXCLUDED.cache_read_price, cache_write_price = EXCLUDED.cache_write_price, updated_at = NOW(); ``` Go side: flatten rows into six parallel slices. Use a sentinel (`-1`) for “missing”, since `lib/pq` can't encode `NULL` into a `bigint[]` element. ```go providers := make([]string, len(rows)) models := make([]string, len(rows)) inputs := make([]int64, len(rows)) outputs := make([]int64, len(rows)) cacheR := make([]int64, len(rows)) cacheW := make([]int64, len(rows)) for i, r := range rows { providers[i] = r.Provider models[i] = r.Model inputs[i] = -1 if r.InputPrice != nil { inputs[i] = *r.InputPrice } outputs[i] = -1 if r.OutputPrice != nil { outputs[i] = *r.OutputPrice } cacheR[i] = -1 if r.CacheReadPrice != nil { cacheR[i] = *r.CacheReadPrice } cacheW[i] = -1 if r.CacheWritePrice != nil { cacheW[i] = *r.CacheWritePrice } } return db.UpsertAIModelPrices(ctx, database.UpsertAIModelPricesParams{ Providers: providers, Models: models, InputPrices: inputs, OutputPrices: outputs, CacheReadPrices: cacheR, CacheWritePrices: cacheW, }) ``` ### Pros - Single round-trip. ### Cons - The generated `sqlc` params become plain `[]int64`, which can't represent `NULL`. --- ## Approach 3 — `jsonb_array_elements` over a single `@seed::jsonb` (chosen) Pass the raw seed JSON as one parameter; let Postgres expand and parse it. ```sql INSERT INTO ai_model_prices ( provider, model, input_price, output_price, cache_read_price, cache_write_price ) SELECT elem->>'provider', elem->>'model', (elem->>'input_price')::bigint, (elem->>'output_price')::bigint, (elem->>'cache_read_price')::bigint, (elem->>'cache_write_price')::bigint FROM jsonb_array_elements(@seed::jsonb) AS elem ON CONFLICT (provider, model) DO UPDATE SET input_price = EXCLUDED.input_price, output_price = EXCLUDED.output_price, cache_read_price = EXCLUDED.cache_read_price, cache_write_price = EXCLUDED.cache_write_price, updated_at = NOW(); ``` Go side reduces to: ```go return db.UpsertAIModelPrices(ctx, seedJSON) ``` ### Pros - Single round-trip. - NULLs fall out naturally: - `(elem->>'cache_write_price')::bigint` becomes `NULL` - no sentinels - The seed is already JSON: - Existing precedent: - `jsonb_array_elements` is already used elsewhere in the codebase ### Cons - Less type-safe at the SQL boundary than `UNNEST` - Slightly less standard than `UNNEST` - Readers need familiarity with: - `jsonb_array_elements` - `->>` extraction syntax - Postgres pays JSON parse cost - negligible at our scale --- --- # Decision We picked Approach 3. It collapses the round-trips like `UNNEST` does, but without: - nullable-array workarounds - sentinel values
This commit is contained in:
committed by
GitHub
parent
638e2220e9
commit
4124d1137d
@@ -0,0 +1,209 @@
|
||||
// aibridgepricesgen fetches model pricing from models.dev and writes a JSON
|
||||
// seed file consumable by the AI Bridge cost-control loader. Output is sorted
|
||||
// by (provider, model) so regenerations produce minimal diffs.
|
||||
//
|
||||
// Run via the gen/aibridge-prices Make target. Kept out of `make gen` because
|
||||
// the output depends on live upstream data; refreshing prices should land in
|
||||
// dedicated, reviewable commits rather than appearing as drift on unrelated
|
||||
// gen runs.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
const (
|
||||
sourceURL = "https://models.dev/api.json"
|
||||
fetchTimeout = 30 * time.Second
|
||||
// Cap the upstream body read. The current api.json is ~2 MiB, so 100
|
||||
// MiB is pure defense-in-depth against a misbehaving upstream eating
|
||||
// arbitrary memory on developer or CI machines. An overflow surfaces
|
||||
// as a JSON parse error (LimitReader truncates silently at the cap).
|
||||
maxBodyBytes = 100 << 20
|
||||
)
|
||||
|
||||
// supportedProviders lists the providers we ship prices for. Adding a
|
||||
// provider here is enough to include it on the next regeneration.
|
||||
var supportedProviders = []string{"anthropic", "openai"}
|
||||
|
||||
// upstreamProvider is the subset of a models.dev per-provider entry we read.
|
||||
type upstreamProvider struct {
|
||||
Models map[string]upstreamModel `json:"models"`
|
||||
}
|
||||
|
||||
type upstreamModel struct {
|
||||
Cost *upstreamCost `json:"cost"`
|
||||
}
|
||||
|
||||
// Pointers distinguish "key absent" (nil) from "key present and zero" (0).
|
||||
type upstreamCost struct {
|
||||
Input *float64 `json:"input"`
|
||||
Output *float64 `json:"output"`
|
||||
CacheRead *float64 `json:"cache_read"`
|
||||
CacheWrite *float64 `json:"cache_write"`
|
||||
}
|
||||
|
||||
// hasPricing reports whether the cost block has at least one populated price.
|
||||
// Returns false for a nil receiver, so callers can pass m.Cost without a
|
||||
// preceding nil check.
|
||||
func (c *upstreamCost) hasPricing() bool {
|
||||
if c == nil {
|
||||
return false
|
||||
}
|
||||
return c.Input != nil || c.Output != nil ||
|
||||
c.CacheRead != nil || c.CacheWrite != nil
|
||||
}
|
||||
|
||||
// Pointer fields preserve the distinction between "not populated by upstream"
|
||||
// (null) and "explicitly zero" (0).
|
||||
//
|
||||
// NOTE: the JSON contract for the price seed lives in three places that must
|
||||
// stay in sync: the tags here, the corresponding struct in the price seeder,
|
||||
// and the column extraction in the batch SQL upsert.
|
||||
type priceRow struct {
|
||||
Provider string `json:"provider"`
|
||||
Model string `json:"model"`
|
||||
InputPrice *int64 `json:"input_price"`
|
||||
OutputPrice *int64 `json:"output_price"`
|
||||
CacheReadPrice *int64 `json:"cache_read_price"`
|
||||
CacheWritePrice *int64 `json:"cache_write_price"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "aibridgepricesgen: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func run() error {
|
||||
upstream, err := fetch()
|
||||
if err != nil {
|
||||
return xerrors.Errorf("fetch %s: %w", sourceURL, err)
|
||||
}
|
||||
rows, err := convert(upstream, supportedProviders)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validate(rows); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := write(os.Stdout, rows); err != nil {
|
||||
return err
|
||||
}
|
||||
_, _ = fmt.Fprintf(os.Stderr, "aibridgepricesgen: wrote %d prices for %d provider(s)\n", len(rows), len(supportedProviders))
|
||||
return nil
|
||||
}
|
||||
|
||||
func fetch() (map[string]upstreamProvider, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), fetchTimeout)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, sourceURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, xerrors.Errorf("status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var data map[string]upstreamProvider
|
||||
if err := json.NewDecoder(io.LimitReader(resp.Body, maxBodyBytes)).Decode(&data); err != nil {
|
||||
return nil, xerrors.Errorf("parse: %w", err)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// convert flattens the upstream map into table-shaped rows for the configured
|
||||
// providers. If any configured provider is absent from the upstream payload,
|
||||
// every missing provider is reported and the function returns an error so the
|
||||
// caller doesn't ship an incomplete seed.
|
||||
func convert(upstream map[string]upstreamProvider, providers []string) ([]priceRow, error) {
|
||||
var (
|
||||
rows []priceRow
|
||||
missing []string
|
||||
)
|
||||
for _, providerID := range providers {
|
||||
provider, ok := upstream[providerID]
|
||||
if !ok || len(provider.Models) == 0 {
|
||||
missing = append(missing, providerID)
|
||||
continue
|
||||
}
|
||||
for modelID, m := range provider.Models {
|
||||
if !m.Cost.hasPricing() {
|
||||
continue
|
||||
}
|
||||
rows = append(rows, priceRow{
|
||||
Provider: providerID,
|
||||
Model: modelID,
|
||||
InputPrice: toMicros(m.Cost.Input),
|
||||
OutputPrice: toMicros(m.Cost.Output),
|
||||
CacheReadPrice: toMicros(m.Cost.CacheRead),
|
||||
CacheWritePrice: toMicros(m.Cost.CacheWrite),
|
||||
})
|
||||
}
|
||||
}
|
||||
if len(missing) > 0 {
|
||||
return nil, xerrors.Errorf("providers missing or empty in upstream: %v", missing)
|
||||
}
|
||||
|
||||
sort.Slice(rows, func(i, j int) bool {
|
||||
if rows[i].Provider != rows[j].Provider {
|
||||
return rows[i].Provider < rows[j].Provider
|
||||
}
|
||||
return rows[i].Model < rows[j].Model
|
||||
})
|
||||
return rows, nil
|
||||
}
|
||||
|
||||
// validate checks invariants on the converted rows. Catches upstream
|
||||
// changes that produce structurally valid but semantically broken seed
|
||||
// data, e.g. a renamed `cost` key that leaves every row with all-null
|
||||
// prices.
|
||||
func validate(rows []priceRow) error {
|
||||
for _, r := range rows {
|
||||
if r.InputPrice != nil || r.OutputPrice != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return xerrors.New("converted rows have no pricing data; upstream schema may have changed")
|
||||
}
|
||||
|
||||
// toMicros scales a price into integer micro-units (1 unit = 1,000,000),
|
||||
// rounding to avoid float-truncation errors. Returns nil for nil input, and
|
||||
// for negative values, which are treated as missing.
|
||||
func toMicros(price *float64) *int64 {
|
||||
if price == nil {
|
||||
return nil
|
||||
}
|
||||
if *price < 0 {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "warning: negative price %f, treating as missing\n", *price)
|
||||
return nil
|
||||
}
|
||||
micros := int64(math.Round(*price * 1_000_000))
|
||||
return µs
|
||||
}
|
||||
|
||||
func write(w io.Writer, rows []priceRow) error {
|
||||
enc := json.NewEncoder(w)
|
||||
enc.SetIndent("", " ")
|
||||
if err := enc.Encode(rows); err != nil {
|
||||
return xerrors.Errorf("encode: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestToMicros(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
in *float64
|
||||
want *int64
|
||||
}{
|
||||
{"missing", nil, nil},
|
||||
{"zero", floatPtr(0), int64Ptr(0)},
|
||||
{"whole", floatPtr(3), int64Ptr(3_000_000)},
|
||||
{"fractional", floatPtr(0.075), int64Ptr(75_000)},
|
||||
{"negative", floatPtr(-1), nil},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := toMicros(tc.in)
|
||||
if tc.want == nil {
|
||||
require.Nil(t, got)
|
||||
return
|
||||
}
|
||||
require.NotNil(t, got)
|
||||
require.Equal(t, *tc.want, *got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvert(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
const upstreamJSON = `{
|
||||
"anthropic": {
|
||||
"models": {
|
||||
"claude-sonnet-4-7": {
|
||||
"cost": {"input": 3, "output": 15, "cache_read": 0.3, "cache_write": 3.75}
|
||||
},
|
||||
"claude-haiku": {
|
||||
"cost": {"input": 0.8, "output": 4}
|
||||
}
|
||||
}
|
||||
},
|
||||
"openai": {
|
||||
"models": {
|
||||
"gpt-4o": {"cost": {"input": 2.5, "output": 10, "cache_read": 1.25}},
|
||||
"gpt-no-prices": {}
|
||||
}
|
||||
},
|
||||
"alibaba": {
|
||||
"models": {
|
||||
"should-be-ignored": {"cost": {"input": 1, "output": 1}}
|
||||
}
|
||||
}
|
||||
}`
|
||||
|
||||
var upstream map[string]upstreamProvider
|
||||
require.NoError(t, json.Unmarshal([]byte(upstreamJSON), &upstream))
|
||||
|
||||
rows, err := convert(upstream, []string{"anthropic", "openai"})
|
||||
require.NoError(t, err)
|
||||
|
||||
// alibaba is dropped (not a supported provider) and gpt-no-prices is
|
||||
// dropped (no per-token pricing), leaving three priced rows.
|
||||
require.Len(t, rows, 3)
|
||||
|
||||
// Sorted (provider, model).
|
||||
require.Equal(t, "anthropic", rows[0].Provider)
|
||||
require.Equal(t, "claude-haiku", rows[0].Model)
|
||||
require.Equal(t, "anthropic", rows[1].Provider)
|
||||
require.Equal(t, "claude-sonnet-4-7", rows[1].Model)
|
||||
require.Equal(t, "openai", rows[2].Provider)
|
||||
require.Equal(t, "gpt-4o", rows[2].Model)
|
||||
|
||||
// All four prices populated for Anthropic Sonnet.
|
||||
sonnet := rows[1]
|
||||
require.Equal(t, int64(3_000_000), *sonnet.InputPrice)
|
||||
require.Equal(t, int64(15_000_000), *sonnet.OutputPrice)
|
||||
require.Equal(t, int64(300_000), *sonnet.CacheReadPrice)
|
||||
require.Equal(t, int64(3_750_000), *sonnet.CacheWritePrice)
|
||||
|
||||
// Missing keys stay nil for OpenAI gpt-4o.
|
||||
gpt := rows[2]
|
||||
require.Equal(t, int64(2_500_000), *gpt.InputPrice)
|
||||
require.Equal(t, int64(10_000_000), *gpt.OutputPrice)
|
||||
require.Equal(t, int64(1_250_000), *gpt.CacheReadPrice)
|
||||
require.Nil(t, gpt.CacheWritePrice)
|
||||
}
|
||||
|
||||
// TestConvertMissingProvider covers both shapes of "configured provider has
|
||||
// no usable data": the provider's key is absent from upstream, or the key
|
||||
// exists but its Models map is empty. Both should fail loud so we never
|
||||
// ship a partial seed.
|
||||
func TestConvertMissingProvider(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("Absent", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
upstream := map[string]upstreamProvider{
|
||||
"openai": {Models: map[string]upstreamModel{
|
||||
"gpt-4o": {Cost: &upstreamCost{Input: floatPtr(2.5)}},
|
||||
}},
|
||||
}
|
||||
rows, err := convert(upstream, []string{"anthropic", "openai"})
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "anthropic")
|
||||
require.Nil(t, rows)
|
||||
})
|
||||
|
||||
t.Run("EmptyModels", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
upstream := map[string]upstreamProvider{
|
||||
"anthropic": {Models: map[string]upstreamModel{}},
|
||||
"openai": {Models: map[string]upstreamModel{
|
||||
"gpt-4o": {Cost: &upstreamCost{Input: floatPtr(2.5)}},
|
||||
}},
|
||||
}
|
||||
rows, err := convert(upstream, []string{"anthropic", "openai"})
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "anthropic")
|
||||
require.Nil(t, rows)
|
||||
})
|
||||
}
|
||||
|
||||
func TestValidate(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("PassesWhenAnyRowHasPricing", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
rows := []priceRow{
|
||||
{Provider: "openai", Model: "no-prices"},
|
||||
{Provider: "anthropic", Model: "claude", InputPrice: int64Ptr(3_000_000)},
|
||||
}
|
||||
require.NoError(t, validate(rows))
|
||||
})
|
||||
|
||||
t.Run("FailsWhenNoRowHasPricing", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Mirrors what would happen if upstream renamed the `cost` key:
|
||||
// Go's decoder silently drops it, every row gets all-null prices,
|
||||
// and convert returns syntactically valid rows with no pricing.
|
||||
rows := []priceRow{
|
||||
{Provider: "anthropic", Model: "claude-x"},
|
||||
{Provider: "openai", Model: "gpt-x"},
|
||||
}
|
||||
err := validate(rows)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "converted rows have no pricing data")
|
||||
})
|
||||
}
|
||||
|
||||
func floatPtr(v float64) *float64 { return &v }
|
||||
func int64Ptr(v int64) *int64 { return &v }
|
||||
Reference in New Issue
Block a user