feat: add ai provider status and reload freshness metrics (#25770)

Add metrics for `aibridged` and `aibridgeproxyd`'s provider statuses. AI providers can be modified, and possibly misconfigured, at runtime. These metrics help operators understand the state of these provider definitions in case unexpected behaviour is observed.
This commit is contained in:
Danny Kopping
2026-05-28 14:57:33 +02:00
committed by GitHub
parent 637855e276
commit 12520ee964
18 changed files with 704 additions and 125 deletions
+18
View File
@@ -208,3 +208,21 @@ coder_aibridgeproxyd_mitm_requests_total{provider=""} 0
# HELP coder_aibridgeproxyd_mitm_responses_total Total number of MITM responses by HTTP status code class.
# TYPE coder_aibridgeproxyd_mitm_responses_total counter
coder_aibridgeproxyd_mitm_responses_total{code="",provider=""} 0
# HELP coder_aibridged_provider_info One series per configured AI provider. Value is always 1; the status label (enabled, disabled, error) carries the alertable signal.
# TYPE coder_aibridged_provider_info gauge
coder_aibridged_provider_info{provider_name="",provider_type="",status=""} 0
# HELP coder_aibridged_providers_last_reload_timestamp_seconds Unix timestamp of the last provider reload attempt, success or failure.
# TYPE coder_aibridged_providers_last_reload_timestamp_seconds gauge
coder_aibridged_providers_last_reload_timestamp_seconds 0
# HELP coder_aibridged_providers_last_reload_success_timestamp_seconds Unix timestamp of the last provider reload that successfully refreshed the pool. A gap against coder_aibridged_providers_last_reload_timestamp_seconds means the loop is firing but the refresh function is failing.
# TYPE coder_aibridged_providers_last_reload_success_timestamp_seconds gauge
coder_aibridged_providers_last_reload_success_timestamp_seconds 0
# HELP coder_aibridgeproxyd_provider_info One series per configured AI provider. Value is always 1; the status label (enabled, disabled, error) carries the alertable signal.
# TYPE coder_aibridgeproxyd_provider_info gauge
coder_aibridgeproxyd_provider_info{provider_name="",provider_type="",status=""} 0
# HELP coder_aibridgeproxyd_providers_last_reload_timestamp_seconds Unix timestamp of the last provider reload attempt, success or failure.
# TYPE coder_aibridgeproxyd_providers_last_reload_timestamp_seconds gauge
coder_aibridgeproxyd_providers_last_reload_timestamp_seconds 0
# HELP coder_aibridgeproxyd_providers_last_reload_success_timestamp_seconds Unix timestamp of the last provider reload that successfully refreshed the router. A gap against coder_aibridgeproxyd_providers_last_reload_timestamp_seconds means the loop is firing but the refresh function is failing.
# TYPE coder_aibridgeproxyd_providers_last_reload_success_timestamp_seconds gauge
coder_aibridgeproxyd_providers_last_reload_success_timestamp_seconds 0
+1
View File
@@ -40,6 +40,7 @@ var scanDirs = []string{
//
// eliminate the need for this skip list.
var skipPaths = []string{
"coderd/aibridged/metrics.go",
"enterprise/aibridgeproxyd/metrics.go",
}