feat: expose aibridged metrics (#20865)

Upgrades `coder/aibridge` to v0.2.0 which includes https://github.com/coder/aibridge/pull/62. Creates a `prometheus.Registerer` with a prefix `coder_aibridged_` and passes that along to coder/aibridge which actually exposes the metrics. Also includes a side-effect of a change described in https://github.com/coder/aibridge/pull/62#discussion_r2550017470. --------- Signed-off-by: Danny Kopping <danny@coder.com>
2026-06-02 20:48:20 +00:00 · 2025-11-24 18:16:06 +02:00
parent 6882c43b39
commit c6631e1e50
12 changed files with 295 additions and 124 deletions
@@ -104,90 +104,97 @@ deployment. They will always be available from the agent.
 <!-- Code generated by 'make docs/admin/integrations/prometheus.md'. DO NOT EDIT -->
-| Name                                                          | Type      | Description                                                                                                                      | Labels                                                                               |
+| Name                                                          | Type      | Description                                                                                                                                                                                                                   | Labels                                                                               |
-|---------------------------------------------------------------|-----------|----------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------|
+|---------------------------------------------------------------|-----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------|
-| `agent_scripts_executed_total`                                | counter   | Total number of scripts executed by the Coder agent. Includes cron scheduled scripts.                                            | `agent_name` `success` `template_name` `username` `workspace_name`                   |
+| `agent_scripts_executed_total`                                | counter   | Total number of scripts executed by the Coder agent. Includes cron scheduled scripts.                                                                                                                                         | `agent_name` `success` `template_name` `username` `workspace_name`                   |
-| `coderd_agents_apps`                                          | gauge     | Agent applications with statuses.                                                                                                | `agent_name` `app_name` `health` `username` `workspace_name`                         |
+| `coder_aibridged_injected_tool_invocations_total`             | counter   | The number of times an injected MCP tool was invoked by aibridge.                                                                                                                                                             | `model` `name` `provider` `server`                                                   |
-| `coderd_agents_connection_latencies_seconds`                  | gauge     | Agent connection latencies in seconds.                                                                                           | `agent_name` `derp_region` `preferred` `username` `workspace_name`                   |
+| `coder_aibridged_interceptions_duration_seconds`              | histogram | The total duration of intercepted requests, in seconds. The majority of this time will be the upstream processing of the request. aibridge has no control over upstream processing time, so it's just an illustrative metric. | `model` `provider`                                                                   |
-| `coderd_agents_connections`                                   | gauge     | Agent connections with statuses.                                                                                                 | `agent_name` `lifecycle_state` `status` `tailnet_node` `username` `workspace_name`   |
+| `coder_aibridged_interceptions_inflight`                      | gauge     | The number of intercepted requests which are being processed.                                                                                                                                                                 | `model` `provider` `route`                                                           |
-| `coderd_agents_up`                                            | gauge     | The number of active agents per workspace.                                                                                       | `template_name` `username` `workspace_name`                                          |
+| `coder_aibridged_interceptions_total`                         | counter   | The count of intercepted requests.                                                                                                                                                                                            | `initiator_id` `method` `model` `provider` `route` `status`                          |
-| `coderd_agentstats_connection_count`                          | gauge     | The number of established connections by agent                                                                                   | `agent_name` `username` `workspace_name`                                             |
+| `coder_aibridged_non_injected_tool_selections_total`          | counter   | The number of times an AI model selected a tool to be invoked by the client.                                                                                                                                                  | `model` `name` `provider`                                                            |
-| `coderd_agentstats_connection_median_latency_seconds`         | gauge     | The median agent connection latency                                                                                              | `agent_name` `username` `workspace_name`                                             |
+| `coder_aibridged_prompts_total`                               | counter   | The number of prompts issued by users (initiators).                                                                                                                                                                           | `initiator_id` `model` `provider`                                                    |
-| `coderd_agentstats_currently_reachable_peers`                 | gauge     | The number of peers (e.g. clients) that are currently reachable over the encrypted network.                                      | `agent_name` `connection_type` `template_name` `username` `workspace_name`           |
+| `coder_aibridged_tokens_total`                                | counter   | The number of tokens used by intercepted requests.                                                                                                                                                                            | `initiator_id` `model` `provider` `type`                                             |
-| `coderd_agentstats_rx_bytes`                                  | gauge     | Agent Rx bytes                                                                                                                   | `agent_name` `username` `workspace_name`                                             |
+| `coderd_agents_apps`                                          | gauge     | Agent applications with statuses.                                                                                                                                                                                             | `agent_name` `app_name` `health` `username` `workspace_name`                         |
-| `coderd_agentstats_session_count_jetbrains`                   | gauge     | The number of session established by JetBrains                                                                                   | `agent_name` `username` `workspace_name`                                             |
+| `coderd_agents_connection_latencies_seconds`                  | gauge     | Agent connection latencies in seconds.                                                                                                                                                                                        | `agent_name` `derp_region` `preferred` `username` `workspace_name`                   |
-| `coderd_agentstats_session_count_reconnecting_pty`            | gauge     | The number of session established by reconnecting PTY                                                                            | `agent_name` `username` `workspace_name`                                             |
+| `coderd_agents_connections`                                   | gauge     | Agent connections with statuses.                                                                                                                                                                                              | `agent_name` `lifecycle_state` `status` `tailnet_node` `username` `workspace_name`   |
-| `coderd_agentstats_session_count_ssh`                         | gauge     | The number of session established by SSH                                                                                         | `agent_name` `username` `workspace_name`                                             |
+| `coderd_agents_up`                                            | gauge     | The number of active agents per workspace.                                                                                                                                                                                    | `template_name` `username` `workspace_name`                                          |
-| `coderd_agentstats_session_count_vscode`                      | gauge     | The number of session established by VSCode                                                                                      | `agent_name` `username` `workspace_name`                                             |
+| `coderd_agentstats_connection_count`                          | gauge     | The number of established connections by agent                                                                                                                                                                                | `agent_name` `username` `workspace_name`                                             |
-| `coderd_agentstats_startup_script_seconds`                    | gauge     | The number of seconds the startup script took to execute.                                                                        | `agent_name` `success` `template_name` `username` `workspace_name`                   |
+| `coderd_agentstats_connection_median_latency_seconds`         | gauge     | The median agent connection latency                                                                                                                                                                                           | `agent_name` `username` `workspace_name`                                             |
-| `coderd_agentstats_tx_bytes`                                  | gauge     | Agent Tx bytes                                                                                                                   | `agent_name` `username` `workspace_name`                                             |
+| `coderd_agentstats_currently_reachable_peers`                 | gauge     | The number of peers (e.g. clients) that are currently reachable over the encrypted network.                                                                                                                                   | `agent_name` `connection_type` `template_name` `username` `workspace_name`           |
-| `coderd_api_active_users_duration_hour`                       | gauge     | The number of users that have been active within the last hour.                                                                  |                                                                                      |
+| `coderd_agentstats_rx_bytes`                                  | gauge     | Agent Rx bytes                                                                                                                                                                                                                | `agent_name` `username` `workspace_name`                                             |
-| `coderd_api_concurrent_requests`                              | gauge     | The number of concurrent API requests.                                                                                           |                                                                                      |
+| `coderd_agentstats_session_count_jetbrains`                   | gauge     | The number of session established by JetBrains                                                                                                                                                                                | `agent_name` `username` `workspace_name`                                             |
-| `coderd_api_concurrent_websockets`                            | gauge     | The total number of concurrent API websockets.                                                                                   |                                                                                      |
+| `coderd_agentstats_session_count_reconnecting_pty`            | gauge     | The number of session established by reconnecting PTY                                                                                                                                                                         | `agent_name` `username` `workspace_name`                                             |
-| `coderd_api_request_latencies_seconds`                        | histogram | Latency distribution of requests in seconds.                                                                                     | `method` `path`                                                                      |
+| `coderd_agentstats_session_count_ssh`                         | gauge     | The number of session established by SSH                                                                                                                                                                                      | `agent_name` `username` `workspace_name`                                             |
-| `coderd_api_requests_processed_total`                         | counter   | The total number of processed API requests                                                                                       | `code` `method` `path`                                                               |
+| `coderd_agentstats_session_count_vscode`                      | gauge     | The number of session established by VSCode                                                                                                                                                                                   | `agent_name` `username` `workspace_name`                                             |
-| `coderd_api_websocket_durations_seconds`                      | histogram | Websocket duration distribution of requests in seconds.                                                                          | `path`                                                                               |
+| `coderd_agentstats_startup_script_seconds`                    | gauge     | The number of seconds the startup script took to execute.                                                                                                                                                                     | `agent_name` `success` `template_name` `username` `workspace_name`                   |
-| `coderd_api_workspace_latest_build`                           | gauge     | The latest workspace builds with a status.                                                                                       | `status`                                                                             |
+| `coderd_agentstats_tx_bytes`                                  | gauge     | Agent Tx bytes                                                                                                                                                                                                                | `agent_name` `username` `workspace_name`                                             |
-| `coderd_api_workspace_latest_build_total`                     | gauge     | DEPRECATED: use coderd_api_workspace_latest_build instead                                                                        | `status`                                                                             |
+| `coderd_api_active_users_duration_hour`                       | gauge     | The number of users that have been active within the last hour.                                                                                                                                                               |                                                                                      |
-| `coderd_insights_applications_usage_seconds`                  | gauge     | The application usage per template.                                                                                              | `application_name` `slug` `template_name`                                            |
+| `coderd_api_concurrent_requests`                              | gauge     | The number of concurrent API requests.                                                                                                                                                                                        |                                                                                      |
-| `coderd_insights_parameters`                                  | gauge     | The parameter usage per template.                                                                                                | `parameter_name` `parameter_type` `parameter_value` `template_name`                  |
+| `coderd_api_concurrent_websockets`                            | gauge     | The total number of concurrent API websockets.                                                                                                                                                                                |                                                                                      |
-| `coderd_insights_templates_active_users`                      | gauge     | The number of active users of the template.                                                                                      | `template_name`                                                                      |
+| `coderd_api_request_latencies_seconds`                        | histogram | Latency distribution of requests in seconds.                                                                                                                                                                                  | `method` `path`                                                                      |
-| `coderd_license_active_users`                                 | gauge     | The number of active users.                                                                                                      |                                                                                      |
+| `coderd_api_requests_processed_total`                         | counter   | The total number of processed API requests                                                                                                                                                                                    | `code` `method` `path`                                                               |
-| `coderd_license_limit_users`                                  | gauge     | The user seats limit based on the active Coder license.                                                                          |                                                                                      |
+| `coderd_api_websocket_durations_seconds`                      | histogram | Websocket duration distribution of requests in seconds.                                                                                                                                                                       | `path`                                                                               |
-| `coderd_license_user_limit_enabled`                           | gauge     | Returns 1 if the current license enforces the user limit.                                                                        |                                                                                      |
+| `coderd_api_workspace_latest_build`                           | gauge     | The latest workspace builds with a status.                                                                                                                                                                                    | `status`                                                                             |
-| `coderd_metrics_collector_agents_execution_seconds`           | histogram | Histogram for duration of agents metrics collection in seconds.                                                                  |                                                                                      |
+| `coderd_api_workspace_latest_build_total`                     | gauge     | DEPRECATED: use coderd_api_workspace_latest_build instead                                                                                                                                                                     | `status`                                                                             |
-| `coderd_oauth2_external_requests_rate_limit`                  | gauge     | The total number of allowed requests per interval.                                                                               | `name` `resource`                                                                    |
+| `coderd_insights_applications_usage_seconds`                  | gauge     | The application usage per template.                                                                                                                                                                                           | `application_name` `slug` `template_name`                                            |
-| `coderd_oauth2_external_requests_rate_limit_next_reset_unix`  | gauge     | Unix timestamp of the next interval                                                                                              | `name` `resource`                                                                    |
+| `coderd_insights_parameters`                                  | gauge     | The parameter usage per template.                                                                                                                                                                                             | `parameter_name` `parameter_type` `parameter_value` `template_name`                  |
-| `coderd_oauth2_external_requests_rate_limit_remaining`        | gauge     | The remaining number of allowed requests in this interval.                                                                       | `name` `resource`                                                                    |
+| `coderd_insights_templates_active_users`                      | gauge     | The number of active users of the template.                                                                                                                                                                                   | `template_name`                                                                      |
-| `coderd_oauth2_external_requests_rate_limit_reset_in_seconds` | gauge     | Seconds until the next interval                                                                                                  | `name` `resource`                                                                    |
+| `coderd_license_active_users`                                 | gauge     | The number of active users.                                                                                                                                                                                                   |                                                                                      |
-| `coderd_oauth2_external_requests_rate_limit_total`            | gauge     | DEPRECATED: use coderd_oauth2_external_requests_rate_limit instead                                                               | `name` `resource`                                                                    |
+| `coderd_license_limit_users`                                  | gauge     | The user seats limit based on the active Coder license.                                                                                                                                                                       |                                                                                      |
-| `coderd_oauth2_external_requests_rate_limit_used`             | gauge     | The number of requests made in this interval.                                                                                    | `name` `resource`                                                                    |
+| `coderd_license_user_limit_enabled`                           | gauge     | Returns 1 if the current license enforces the user limit.                                                                                                                                                                     |                                                                                      |
-| `coderd_oauth2_external_requests_total`                       | counter   | The total number of api calls made to external oauth2 providers. 'status_code' will be 0 if the request failed with no response. | `name` `source` `status_code`                                                        |
+| `coderd_metrics_collector_agents_execution_seconds`           | histogram | Histogram for duration of agents metrics collection in seconds.                                                                                                                                                               |                                                                                      |
-| `coderd_prebuilt_workspace_claim_duration_seconds`            | histogram | Time to claim a prebuilt workspace by organization, template, and preset.                                                        | `organization_name` `preset_name` `template_name`                                    |
+| `coderd_oauth2_external_requests_rate_limit`                  | gauge     | The total number of allowed requests per interval.                                                                                                                                                                            | `name` `resource`                                                                    |
-| `coderd_provisionerd_job_timings_seconds`                     | histogram | The provisioner job time duration in seconds.                                                                                    | `provisioner` `status`                                                               |
+| `coderd_oauth2_external_requests_rate_limit_next_reset_unix`  | gauge     | Unix timestamp of the next interval                                                                                                                                                                                           | `name` `resource`                                                                    |
-| `coderd_provisionerd_jobs_current`                            | gauge     | The number of currently running provisioner jobs.                                                                                | `provisioner`                                                                        |
+| `coderd_oauth2_external_requests_rate_limit_remaining`        | gauge     | The remaining number of allowed requests in this interval.                                                                                                                                                                    | `name` `resource`                                                                    |
-| `coderd_provisionerd_num_daemons`                             | gauge     | The number of provisioner daemons.                                                                                               |                                                                                      |
+| `coderd_oauth2_external_requests_rate_limit_reset_in_seconds` | gauge     | Seconds until the next interval                                                                                                                                                                                               | `name` `resource`                                                                    |
-| `coderd_provisionerd_workspace_build_timings_seconds`         | histogram | The time taken for a workspace to build.                                                                                         | `status` `template_name` `template_version` `workspace_transition`                   |
+| `coderd_oauth2_external_requests_rate_limit_total`            | gauge     | DEPRECATED: use coderd_oauth2_external_requests_rate_limit instead                                                                                                                                                            | `name` `resource`                                                                    |
-| `coderd_workspace_builds_total`                               | counter   | The number of workspaces started, updated, or deleted.                                                                           | `action` `owner_email` `status` `template_name` `template_version` `workspace_name`  |
+| `coderd_oauth2_external_requests_rate_limit_used`             | gauge     | The number of requests made in this interval.                                                                                                                                                                                 | `name` `resource`                                                                    |
-| `coderd_workspace_creation_duration_seconds`                  | histogram | Time to create a workspace by organization, template, preset, and type (regular or prebuild).                                    | `organization_name` `preset_name` `template_name` `type`                             |
+| `coderd_oauth2_external_requests_total`                       | counter   | The total number of api calls made to external oauth2 providers. 'status_code' will be 0 if the request failed with no response.                                                                                              | `name` `source` `status_code`                                                        |
-| `coderd_workspace_creation_total`                             | counter   | Total regular (non-prebuilt) workspace creations by organization, template, and preset.                                          | `organization_name` `preset_name` `template_name`                                    |
+| `coderd_prebuilt_workspace_claim_duration_seconds`            | histogram | Time to claim a prebuilt workspace by organization, template, and preset.                                                                                                                                                     | `organization_name` `preset_name` `template_name`                                    |
-| `coderd_workspace_latest_build_status`                        | gauge     | The current workspace statuses by template, transition, and owner.                                                               | `status` `template_name` `template_version` `workspace_owner` `workspace_transition` |
+| `coderd_provisionerd_job_timings_seconds`                     | histogram | The provisioner job time duration in seconds.                                                                                                                                                                                 | `provisioner` `status`                                                               |
-| `go_gc_duration_seconds`                                      | summary   | A summary of the pause duration of garbage collection cycles.                                                                    |                                                                                      |
+| `coderd_provisionerd_jobs_current`                            | gauge     | The number of currently running provisioner jobs.                                                                                                                                                                             | `provisioner`                                                                        |
-| `go_goroutines`                                               | gauge     | Number of goroutines that currently exist.                                                                                       |                                                                                      |
+| `coderd_provisionerd_num_daemons`                             | gauge     | The number of provisioner daemons.                                                                                                                                                                                            |                                                                                      |
-| `go_info`                                                     | gauge     | Information about the Go environment.                                                                                            | `version`                                                                            |
+| `coderd_provisionerd_workspace_build_timings_seconds`         | histogram | The time taken for a workspace to build.                                                                                                                                                                                      | `status` `template_name` `template_version` `workspace_transition`                   |
-| `go_memstats_alloc_bytes`                                     | gauge     | Number of bytes allocated and still in use.                                                                                      |                                                                                      |
+| `coderd_workspace_builds_total`                               | counter   | The number of workspaces started, updated, or deleted.                                                                                                                                                                        | `action` `owner_email` `status` `template_name` `template_version` `workspace_name`  |
-| `go_memstats_alloc_bytes_total`                               | counter   | Total number of bytes allocated, even if freed.                                                                                  |                                                                                      |
+| `coderd_workspace_creation_duration_seconds`                  | histogram | Time to create a workspace by organization, template, preset, and type (regular or prebuild).                                                                                                                                 | `organization_name` `preset_name` `template_name` `type`                             |
-| `go_memstats_buck_hash_sys_bytes`                             | gauge     | Number of bytes used by the profiling bucket hash table.                                                                         |                                                                                      |
+| `coderd_workspace_creation_total`                             | counter   | Total regular (non-prebuilt) workspace creations by organization, template, and preset.                                                                                                                                       | `organization_name` `preset_name` `template_name`                                    |
-| `go_memstats_frees_total`                                     | counter   | Total number of frees.                                                                                                           |                                                                                      |
+| `coderd_workspace_latest_build_status`                        | gauge     | The current workspace statuses by template, transition, and owner.                                                                                                                                                            | `status` `template_name` `template_version` `workspace_owner` `workspace_transition` |
-| `go_memstats_gc_sys_bytes`                                    | gauge     | Number of bytes used for garbage collection system metadata.                                                                     |                                                                                      |
+| `go_gc_duration_seconds`                                      | summary   | A summary of the pause duration of garbage collection cycles.                                                                                                                                                                 |                                                                                      |
-| `go_memstats_heap_alloc_bytes`                                | gauge     | Number of heap bytes allocated and still in use.                                                                                 |                                                                                      |
+| `go_goroutines`                                               | gauge     | Number of goroutines that currently exist.                                                                                                                                                                                    |                                                                                      |
-| `go_memstats_heap_idle_bytes`                                 | gauge     | Number of heap bytes waiting to be used.                                                                                         |                                                                                      |
+| `go_info`                                                     | gauge     | Information about the Go environment.                                                                                                                                                                                         | `version`                                                                            |
-| `go_memstats_heap_inuse_bytes`                                | gauge     | Number of heap bytes that are in use.                                                                                            |                                                                                      |
+| `go_memstats_alloc_bytes`                                     | gauge     | Number of bytes allocated and still in use.                                                                                                                                                                                   |                                                                                      |
-| `go_memstats_heap_objects`                                    | gauge     | Number of allocated objects.                                                                                                     |                                                                                      |
+| `go_memstats_alloc_bytes_total`                               | counter   | Total number of bytes allocated, even if freed.                                                                                                                                                                               |                                                                                      |
-| `go_memstats_heap_released_bytes`                             | gauge     | Number of heap bytes released to OS.                                                                                             |                                                                                      |
+| `go_memstats_buck_hash_sys_bytes`                             | gauge     | Number of bytes used by the profiling bucket hash table.                                                                                                                                                                      |                                                                                      |
-| `go_memstats_heap_sys_bytes`                                  | gauge     | Number of heap bytes obtained from system.                                                                                       |                                                                                      |
+| `go_memstats_frees_total`                                     | counter   | Total number of frees.                                                                                                                                                                                                        |                                                                                      |
-| `go_memstats_last_gc_time_seconds`                            | gauge     | Number of seconds since 1970 of last garbage collection.                                                                         |                                                                                      |
+| `go_memstats_gc_sys_bytes`                                    | gauge     | Number of bytes used for garbage collection system metadata.                                                                                                                                                                  |                                                                                      |
-| `go_memstats_lookups_total`                                   | counter   | Total number of pointer lookups.                                                                                                 |                                                                                      |
+| `go_memstats_heap_alloc_bytes`                                | gauge     | Number of heap bytes allocated and still in use.                                                                                                                                                                              |                                                                                      |
-| `go_memstats_mallocs_total`                                   | counter   | Total number of mallocs.                                                                                                         |                                                                                      |
+| `go_memstats_heap_idle_bytes`                                 | gauge     | Number of heap bytes waiting to be used.                                                                                                                                                                                      |                                                                                      |
-| `go_memstats_mcache_inuse_bytes`                              | gauge     | Number of bytes in use by mcache structures.                                                                                     |                                                                                      |
+| `go_memstats_heap_inuse_bytes`                                | gauge     | Number of heap bytes that are in use.                                                                                                                                                                                         |                                                                                      |
-| `go_memstats_mcache_sys_bytes`                                | gauge     | Number of bytes used for mcache structures obtained from system.                                                                 |                                                                                      |
+| `go_memstats_heap_objects`                                    | gauge     | Number of allocated objects.                                                                                                                                                                                                  |                                                                                      |
-| `go_memstats_mspan_inuse_bytes`                               | gauge     | Number of bytes in use by mspan structures.                                                                                      |                                                                                      |
+| `go_memstats_heap_released_bytes`                             | gauge     | Number of heap bytes released to OS.                                                                                                                                                                                          |                                                                                      |
-| `go_memstats_mspan_sys_bytes`                                 | gauge     | Number of bytes used for mspan structures obtained from system.                                                                  |                                                                                      |
+| `go_memstats_heap_sys_bytes`                                  | gauge     | Number of heap bytes obtained from system.                                                                                                                                                                                    |                                                                                      |
-| `go_memstats_next_gc_bytes`                                   | gauge     | Number of heap bytes when next garbage collection will take place.                                                               |                                                                                      |
+| `go_memstats_last_gc_time_seconds`                            | gauge     | Number of seconds since 1970 of last garbage collection.                                                                                                                                                                      |                                                                                      |
-| `go_memstats_other_sys_bytes`                                 | gauge     | Number of bytes used for other system allocations.                                                                               |                                                                                      |
+| `go_memstats_lookups_total`                                   | counter   | Total number of pointer lookups.                                                                                                                                                                                              |                                                                                      |
-| `go_memstats_stack_inuse_bytes`                               | gauge     | Number of bytes in use by the stack allocator.                                                                                   |                                                                                      |
+| `go_memstats_mallocs_total`                                   | counter   | Total number of mallocs.                                                                                                                                                                                                      |                                                                                      |
-| `go_memstats_stack_sys_bytes`                                 | gauge     | Number of bytes obtained from system for stack allocator.                                                                        |                                                                                      |
+| `go_memstats_mcache_inuse_bytes`                              | gauge     | Number of bytes in use by mcache structures.                                                                                                                                                                                  |                                                                                      |
-| `go_memstats_sys_bytes`                                       | gauge     | Number of bytes obtained from system.                                                                                            |                                                                                      |
+| `go_memstats_mcache_sys_bytes`                                | gauge     | Number of bytes used for mcache structures obtained from system.                                                                                                                                                              |                                                                                      |
-| `go_threads`                                                  | gauge     | Number of OS threads created.                                                                                                    |                                                                                      |
+| `go_memstats_mspan_inuse_bytes`                               | gauge     | Number of bytes in use by mspan structures.                                                                                                                                                                                   |                                                                                      |
-| `process_cpu_seconds_total`                                   | counter   | Total user and system CPU time spent in seconds.                                                                                 |                                                                                      |
+| `go_memstats_mspan_sys_bytes`                                 | gauge     | Number of bytes used for mspan structures obtained from system.                                                                                                                                                               |                                                                                      |
-| `process_max_fds`                                             | gauge     | Maximum number of open file descriptors.                                                                                         |                                                                                      |
+| `go_memstats_next_gc_bytes`                                   | gauge     | Number of heap bytes when next garbage collection will take place.                                                                                                                                                            |                                                                                      |
-| `process_open_fds`                                            | gauge     | Number of open file descriptors.                                                                                                 |                                                                                      |
+| `go_memstats_other_sys_bytes`                                 | gauge     | Number of bytes used for other system allocations.                                                                                                                                                                            |                                                                                      |
-| `process_resident_memory_bytes`                               | gauge     | Resident memory size in bytes.                                                                                                   |                                                                                      |
+| `go_memstats_stack_inuse_bytes`                               | gauge     | Number of bytes in use by the stack allocator.                                                                                                                                                                                |                                                                                      |
-| `process_start_time_seconds`                                  | gauge     | Start time of the process since unix epoch in seconds.                                                                           |                                                                                      |
+| `go_memstats_stack_sys_bytes`                                 | gauge     | Number of bytes obtained from system for stack allocator.                                                                                                                                                                     |                                                                                      |
-| `process_virtual_memory_bytes`                                | gauge     | Virtual memory size in bytes.                                                                                                    |                                                                                      |
+| `go_memstats_sys_bytes`                                       | gauge     | Number of bytes obtained from system.                                                                                                                                                                                         |                                                                                      |
-| `process_virtual_memory_max_bytes`                            | gauge     | Maximum amount of virtual memory available in bytes.                                                                             |                                                                                      |
+| `go_threads`                                                  | gauge     | Number of OS threads created.                                                                                                                                                                                                 |                                                                                      |
-| `promhttp_metric_handler_requests_in_flight`                  | gauge     | Current number of scrapes being served.                                                                                          |                                                                                      |
+| `process_cpu_seconds_total`                                   | counter   | Total user and system CPU time spent in seconds.                                                                                                                                                                              |                                                                                      |
-| `promhttp_metric_handler_requests_total`                      | counter   | Total number of scrapes by HTTP status code.                                                                                     | `code`                                                                               |
+| `process_max_fds`                                             | gauge     | Maximum number of open file descriptors.                                                                                                                                                                                      |                                                                                      |
 | `process_open_fds`                                            | gauge     | Number of open file descriptors.                                                                                                                                                                                              |                                                                                      |
 | `process_resident_memory_bytes`                               | gauge     | Resident memory size in bytes.                                                                                                                                                                                                |                                                                                      |
 | `process_start_time_seconds`                                  | gauge     | Start time of the process since unix epoch in seconds.                                                                                                                                                                        |                                                                                      |
 | `process_virtual_memory_bytes`                                | gauge     | Virtual memory size in bytes.                                                                                                                                                                                                 |                                                                                      |
 | `process_virtual_memory_max_bytes`                            | gauge     | Maximum amount of virtual memory available in bytes.                                                                                                                                                                          |                                                                                      |
 | `promhttp_metric_handler_requests_in_flight`                  | gauge     | Current number of scrapes being served.                                                                                                                                                                                       |                                                                                      |
 | `promhttp_metric_handler_requests_total`                      | counter   | Total number of scrapes by HTTP status code.                                                                                                                                                                                  | `code`                                                                               |
 <!-- End generated by 'make docs/admin/integrations/prometheus.md'. -->
@@ -55,13 +55,14 @@ func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger)
 	ctx, cancel := context.WithCancel(ctx)
 	daemon := &Server{
-		logger:            logger,
+		logger:           logger,
-		clientDialer:      rpcDialer,
+		clientDialer:     rpcDialer,
 		clientCh:         make(chan DRPCClient),
 		lifecycleCtx:     ctx,
 		cancelFn:         cancel,
 		initConnectionCh: make(chan struct{}),
 		requestBridgePool: pool,
 		clientCh:          make(chan DRPCClient),
 		lifecycleCtx:      ctx,
 		cancelFn:          cancel,
 		initConnectionCh:  make(chan struct{}),
 	}
 	daemon.wg.Add(1)
@@ -9,6 +9,8 @@ import (
 	"testing"
 	"time"
 	"github.com/prometheus/client_golang/prometheus"
 	promtest "github.com/prometheus/client_golang/prometheus/testutil"
 	"github.com/stretchr/testify/require"
 	"github.com/coder/aibridge"
@@ -166,7 +168,7 @@ func TestIntegration(t *testing.T) {
 	logger := testutil.Logger(t)
 	providers := []aibridge.Provider{aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{BaseURL: mockOpenAI.URL})}
-	pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, logger)
+	pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, nil, logger)
 	require.NoError(t, err)
 	// Given: aibridged is started.
@@ -253,3 +255,109 @@ func TestIntegration(t *testing.T) {
 	// Then: the MCP server was initialized.
 	require.Contains(t, mcpTokenReceived, authLink.OAuthAccessToken, "mock MCP server not requested")
 }
 // TestIntegrationWithMetrics validates that Prometheus metrics are correctly incremented
 // when requests are processed through aibridged.
 func TestIntegrationWithMetrics(t *testing.T) {
 	t.Parallel()
 	ctx := testutil.Context(t, testutil.WaitLong)
 	// Create prometheus registry and metrics.
 	registry := prometheus.NewRegistry()
 	metrics := aibridge.NewMetrics(registry)
 	// Set up mock OpenAI server.
 	mockOpenAI := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(http.StatusOK)
 		_, _ = w.Write([]byte(`{
  "id": "chatcmpl-test",
  "object": "chat.completion",
  "created": 1753343279,
  "model": "gpt-4.1",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "test response"
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 10,
    "completion_tokens": 5,
    "total_tokens": 15
  }
 }`))
 	}))
 	t.Cleanup(mockOpenAI.Close)
 	// Database and coderd setup.
 	db, ps := dbtestutil.NewDB(t)
 	client, _, api, firstUser := coderdenttest.NewWithAPI(t, &coderdenttest.Options{
 		Options: &coderdtest.Options{
 			Database: db,
 			Pubsub:   ps,
 		},
 	})
 	userClient, _ := coderdtest.CreateAnotherUser(t, client, firstUser.OrganizationID)
 	// Create an API token for the user.
 	apiKey, err := userClient.CreateToken(ctx, "me", codersdk.CreateTokenRequest{
 		TokenName: fmt.Sprintf("test-key-%d", time.Now().UnixNano()),
 		Lifetime:  time.Hour,
 		Scope:     codersdk.APIKeyScopeCoderAll,
 	})
 	require.NoError(t, err)
 	// Create aibridge client.
 	aiBridgeClient, err := api.CreateInMemoryAIBridgeServer(ctx)
 	require.NoError(t, err)
 	logger := testutil.Logger(t)
 	providers := []aibridge.Provider{aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{BaseURL: mockOpenAI.URL})}
 	// Create pool with metrics.
 	pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, metrics, logger)
 	require.NoError(t, err)
 	// Given: aibridged is started.
 	srv, err := aibridged.New(ctx, pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
 		return aiBridgeClient, nil
 	}, logger)
 	require.NoError(t, err, "create new aibridged")
 	t.Cleanup(func() {
 		_ = srv.Shutdown(ctx)
 	})
 	// When: a request is made to aibridged.
 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, "/openai/v1/chat/completions", bytes.NewBufferString(`{
  "messages": [
    {
      "role": "user",
      "content": "test message"
    }
  ],
  "model": "gpt-4.1"
 }`))
 	require.NoError(t, err, "make request to test server")
 	req.Header.Add("Authorization", "Bearer "+apiKey.Key)
 	req.Header.Add("Accept", "application/json")
 	// When: aibridged handles the request.
 	rec := httptest.NewRecorder()
 	srv.ServeHTTP(rec, req)
 	require.Equal(t, http.StatusOK, rec.Code)
 	// Then: the interceptions metric should increase to 1.
 	// This is not exhaustively checking the available metrics; just an indicative one to prove
 	// the plumbing is working.
 	require.Eventually(t, func() bool {
 		count := promtest.ToFloat64(metrics.InterceptionCount)
 		return count == 1
 	}, testutil.WaitShort, testutil.IntervalFast, "interceptions_total metric should be 1")
 }
@@ -41,8 +41,7 @@ func newTestServer(t *testing.T) (*aibridged.Server, *mock.MockDRPCClient, *mock
 		pool,
 		func(ctx context.Context) (aibridged.DRPCClient, error) {
 			return client, nil
-		},
+		}, logger)
 		logger)
 	require.NoError(t, err, "create new aibridged")
 	t.Cleanup(func() {
 		srv.Shutdown(context.Background())
@@ -291,7 +290,7 @@ func TestRouting(t *testing.T) {
 				aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{BaseURL: openaiSrv.URL}),
 				aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{BaseURL: antSrv.URL}, nil),
 			}
-			pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, logger)
+			pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, nil, logger)
 			require.NoError(t, err)
 			conn := &mockDRPCConn{}
 			client.EXPECT().DRPCConn().AnyTimes().Return(conn)
@@ -51,11 +51,13 @@ type CachedBridgePool struct {
 	singleflight *singleflight.Group[string, *aibridge.RequestBridge]
 	metrics *aibridge.Metrics
 	shutDownOnce   sync.Once
 	shuttingDownCh chan struct{}
 }
-func NewCachedBridgePool(options PoolOptions, providers []aibridge.Provider, logger slog.Logger) (*CachedBridgePool, error) {
+func NewCachedBridgePool(options PoolOptions, providers []aibridge.Provider, metrics *aibridge.Metrics, logger slog.Logger) (*CachedBridgePool, error) {
 	cache, err := ristretto.NewCache(&ristretto.Config[string, *aibridge.RequestBridge]{
 		NumCounters:        options.MaxItems * 10,        // Docs suggest setting this 10x number of keys.
 		MaxCost:            options.MaxItems * cacheCost, // Up to n instances.
@@ -88,6 +90,8 @@ func NewCachedBridgePool(options PoolOptions, providers []aibridge.Provider, log
 		singleflight: &singleflight.Group[string, *aibridge.RequestBridge]{},
 		metrics: metrics,
 		shuttingDownCh: make(chan struct{}),
 	}, nil
 }
@@ -154,7 +158,7 @@ func (p *CachedBridgePool) Acquire(ctx context.Context, req Request, clientFn Cl
 			}
 		}
-		bridge, err := aibridge.NewRequestBridge(ctx, p.providers, p.logger, recorder, mcpServers)
+		bridge, err := aibridge.NewRequestBridge(ctx, p.providers, recorder, mcpServers, p.metrics, p.logger)
 		if err != nil {
 			return nil, xerrors.Errorf("create new request bridge: %w", err)
 		}
@@ -167,7 +171,7 @@ func (p *CachedBridgePool) Acquire(ctx context.Context, req Request, clientFn Cl
 	return instance, err
 }
-func (p *CachedBridgePool) Metrics() PoolMetrics {
+func (p *CachedBridgePool) CacheMetrics() PoolMetrics {
 	if p.cache == nil {
 		return nil
 	}
@@ -30,7 +30,7 @@ func TestPool(t *testing.T) {
 	mcpProxy := mcpmock.NewMockServerProxier(ctrl)
 	opts := aibridged.PoolOptions{MaxItems: 1, TTL: time.Second}
-	pool, err := aibridged.NewCachedBridgePool(opts, nil, logger)
+	pool, err := aibridged.NewCachedBridgePool(opts, nil, nil, logger)
 	require.NoError(t, err)
 	t.Cleanup(func() { pool.Shutdown(context.Background()) })
@@ -63,11 +63,11 @@ func TestPool(t *testing.T) {
 	require.NoError(t, err, "acquire pool instance")
 	require.Same(t, inst, instB)
-	metrics := pool.Metrics()
+	cacheMetrics := pool.CacheMetrics()
-	require.EqualValues(t, 1, metrics.KeysAdded())
+	require.EqualValues(t, 1, cacheMetrics.KeysAdded())
-	require.EqualValues(t, 0, metrics.KeysEvicted())
+	require.EqualValues(t, 0, cacheMetrics.KeysEvicted())
-	require.EqualValues(t, 1, metrics.Hits())
+	require.EqualValues(t, 1, cacheMetrics.Hits())
-	require.EqualValues(t, 1, metrics.Misses())
+	require.EqualValues(t, 1, cacheMetrics.Misses())
 	// This will get called again because a new instance will be created.
 	mcpProxy.EXPECT().Init(gomock.Any()).Times(1).Return(nil)
@@ -81,11 +81,11 @@ func TestPool(t *testing.T) {
 	require.NoError(t, err, "acquire pool instance")
 	require.NotSame(t, inst, inst2)
-	metrics = pool.Metrics()
+	cacheMetrics = pool.CacheMetrics()
-	require.EqualValues(t, 2, metrics.KeysAdded())
+	require.EqualValues(t, 2, cacheMetrics.KeysAdded())
-	require.EqualValues(t, 1, metrics.KeysEvicted())
+	require.EqualValues(t, 1, cacheMetrics.KeysEvicted())
-	require.EqualValues(t, 1, metrics.Hits())
+	require.EqualValues(t, 1, cacheMetrics.Hits())
-	require.EqualValues(t, 2, metrics.Misses())
+	require.EqualValues(t, 2, cacheMetrics.Misses())
 	// This will get called again because a new instance will be created.
 	mcpProxy.EXPECT().Init(gomock.Any()).Times(1).Return(nil)
@@ -99,11 +99,11 @@ func TestPool(t *testing.T) {
 	require.NoError(t, err, "acquire pool instance 2B")
 	require.NotSame(t, inst2, inst2B)
-	metrics = pool.Metrics()
+	cacheMetrics = pool.CacheMetrics()
-	require.EqualValues(t, 3, metrics.KeysAdded())
+	require.EqualValues(t, 3, cacheMetrics.KeysAdded())
-	require.EqualValues(t, 2, metrics.KeysEvicted())
+	require.EqualValues(t, 2, cacheMetrics.KeysEvicted())
-	require.EqualValues(t, 1, metrics.Hits())
+	require.EqualValues(t, 1, cacheMetrics.Hits())
-	require.EqualValues(t, 3, metrics.Misses())
+	require.EqualValues(t, 3, cacheMetrics.Misses())
 	// TODO: add test for expiry.
 	// This requires Go 1.25's [synctest](https://pkg.go.dev/testing/synctest) since the
@@ -57,6 +57,16 @@ func (t *recorderTranslation) RecordPromptUsage(ctx context.Context, req *aibrid
 }
 func (t *recorderTranslation) RecordTokenUsage(ctx context.Context, req *aibridge.TokenUsageRecord) error {
 	merged := req.Metadata
 	if merged == nil {
 		merged = aibridge.Metadata{}
 	}
 	// Merge the token usage values into metadata; later we might want to store some of these in their own fields.
 	for k, v := range req.ExtraTokenTypes {
 		merged[k] = v
 	}
 	_, err := t.client.RecordTokenUsage(ctx, &proto.RecordTokenUsageRequest{
 		InterceptionId: req.InterceptionID,
 		MsgId:          req.MsgID,
@@ -7,6 +7,8 @@ import (
 	"golang.org/x/xerrors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/coder/aibridge"
 	"github.com/coder/coder/v2/codersdk"
 	"github.com/coder/coder/v2/enterprise/aibridged"
@@ -31,8 +33,11 @@ func newAIBridgeDaemon(coderAPI *coderd.API) (*aibridged.Server, error) {
 		}, getBedrockConfig(coderAPI.DeploymentValues.AI.BridgeConfig.Bedrock)),
 	}
 	reg := prometheus.WrapRegistererWithPrefix("coder_aibridged_", coderAPI.PrometheusRegistry)
 	metrics := aibridge.NewMetrics(reg)
 	// Create pool for reusable stateful [aibridge.RequestBridge] instances (one per user).
-	pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, logger.Named("pool")) // TODO: configurable.
+	pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, metrics, logger.Named("pool")) // TODO: configurable size.
 	if err != nil {
 		return nil, xerrors.Errorf("create request pool: %w", err)
 	}
@@ -165,9 +165,9 @@ require (
 	github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e
 	github.com/pkg/sftp v1.13.7
 	github.com/prometheus-community/pro-bing v0.7.0
-	github.com/prometheus/client_golang v1.23.0
+	github.com/prometheus/client_golang v1.23.2
 	github.com/prometheus/client_model v0.6.2
-	github.com/prometheus/common v0.65.0
+	github.com/prometheus/common v0.66.1
 	github.com/quasilyte/go-ruleguard/dsl v0.3.22
 	github.com/robfig/cron/v3 v3.0.1
 	github.com/shirou/gopsutil/v4 v4.25.5
@@ -476,7 +476,7 @@ require (
 	github.com/anthropics/anthropic-sdk-go v1.18.0
 	github.com/brianvoe/gofakeit/v7 v7.9.0
 	github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225
-	github.com/coder/aibridge v0.1.7
+	github.com/coder/aibridge v0.2.0
 	github.com/coder/aisdk-go v0.0.9
 	github.com/coder/boundary v1.0.1-0.20250925154134-55a44f2a7945
 	github.com/coder/preview v1.0.4
@@ -919,8 +919,8 @@ github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv
 github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225 h1:tRIViZ5JRmzdOEo5wUWngaGEFBG8OaE1o2GIHN5ujJ8=
 github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225/go.mod h1:rNLVpYgEVeu1Zk29K64z6Od8RBP9DwqCu9OfCzh8MR4=
-github.com/coder/aibridge v0.1.7 h1:GTAM8nHawXMeb/pxAIwvzr76dyVGu9hw9qV6Gvpc7nw=
+github.com/coder/aibridge v0.2.0 h1:kAWhHD6fsmDLH1WxIwXPu9Ineijj+lVniko45C003Vo=
-github.com/coder/aibridge v0.1.7/go.mod h1:7GhrLbzf6uM3sCA7OPaDzvq9QNrCjNuzMy+WgipYwfQ=
+github.com/coder/aibridge v0.2.0/go.mod h1:2T0RSnIX1WTqFajzXsaNsoNe6mmNsNeCTxiHBWEsFnE=
 github.com/coder/aisdk-go v0.0.9 h1:Vzo/k2qwVGLTR10ESDeP2Ecek1SdPfZlEjtTfMveiVo=
 github.com/coder/aisdk-go v0.0.9/go.mod h1:KF6/Vkono0FJJOtWtveh5j7yfNrSctVTpwgweYWSp5M=
 github.com/coder/boundary v1.0.1-0.20250925154134-55a44f2a7945 h1:hDUf02kTX8EGR3+5B+v5KdYvORs4YNfDPci0zCs+pC0=
@@ -1718,15 +1718,15 @@ github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt
 github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
 github.com/prometheus-community/pro-bing v0.7.0 h1:KFYFbxC2f2Fp6c+TyxbCOEarf7rbnzr9Gw8eIb0RfZA=
 github.com/prometheus-community/pro-bing v0.7.0/go.mod h1:Moob9dvlY50Bfq6i88xIwfyw7xLFHH69LUgx9n5zqCE=
-github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc=
+github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
-github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE=
+github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w=
 github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
 github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
-github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE=
+github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
-github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
+github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
 github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
 github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
 github.com/puzpuzpuz/xsync/v3 v3.5.1 h1:GJYJZwO6IdxN/IKbneznS6yPkVC+c3zyY/j19c++5Fg=
@@ -64,7 +64,7 @@ func readMetrics() ([]*dto.MetricFamily, error) {
 	var metrics []*dto.MetricFamily
-	decoder := expfmt.NewDecoder(f, expfmt.NewFormat(expfmt.TypeProtoText))
+	decoder := expfmt.NewDecoder(f, expfmt.NewFormat(expfmt.TypeTextPlain))
 	for {
 		var m dto.MetricFamily
 		err = decoder.Decode(&m)
@@ -878,3 +878,40 @@ promhttp_metric_handler_requests_in_flight 1
 promhttp_metric_handler_requests_total{code="200"} 2
 promhttp_metric_handler_requests_total{code="500"} 0
 promhttp_metric_handler_requests_total{code="503"} 0
 # HELP coder_aibridged_injected_tool_invocations_total The number of times an injected MCP tool was invoked by aibridge.
 # TYPE coder_aibridged_injected_tool_invocations_total counter
 coder_aibridged_injected_tool_invocations_total{model="gpt-5-nano",name="coder_list_templates",provider="openai",server="https://xxx.pit-1.try.coder.app/api/experimental/mcp/http"} 1
 # HELP coder_aibridged_interceptions_duration_seconds The total duration of intercepted requests, in seconds. The majority of this time will be the upstream processing of the request. aibridge has no control over upstream processing time, so it's just an illustrative metric.
 # TYPE coder_aibridged_interceptions_duration_seconds histogram
 coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="0.5"} 0
 coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="2"} 0
 coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="5"} 3
 coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="15"} 6
 coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="30"} 6
 coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="60"} 6
 coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="120"} 6
 coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="+Inf"} 6
 coder_aibridged_interceptions_duration_seconds_sum{model="gpt-5-nano",provider="openai"} 34.120188692
 coder_aibridged_interceptions_duration_seconds_count{model="gpt-5-nano",provider="openai"} 6
 # HELP coder_aibridged_interceptions_inflight The number of intercepted requests which are being processed.
 # TYPE coder_aibridged_interceptions_inflight gauge
 coder_aibridged_interceptions_inflight{model="gpt-5-nano",provider="openai",route="/v1/chat/completions"} 0
 # HELP coder_aibridged_interceptions_total The count of intercepted requests.
 # TYPE coder_aibridged_interceptions_total counter
 coder_aibridged_interceptions_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",method="POST",model="gpt-5-nano",provider="openai",route="/v1/chat/completions",status="completed"} 6
 # HELP coder_aibridged_non_injected_tool_selections_total The number of times an AI model selected a tool to be invoked by the client.
 # TYPE coder_aibridged_non_injected_tool_selections_total counter
 coder_aibridged_non_injected_tool_selections_total{model="gpt-5-nano",name="read_file",provider="openai"} 2
 # HELP coder_aibridged_prompts_total The number of prompts issued by users (initiators).
 # TYPE coder_aibridged_prompts_total counter
 coder_aibridged_prompts_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai"} 4
 # HELP coder_aibridged_tokens_total The number of tokens used by intercepted requests.
 # TYPE coder_aibridged_tokens_total counter
 coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="completion_accepted_prediction"} 0
 coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="completion_audio"} 0
 coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="completion_reasoning"} 1664
 coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="completion_rejected_prediction"} 0
 coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="input"} 13823
 coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="output"} 2014
 coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="prompt_audio"} 0
 coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="prompt_cached"} 31872