mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
perf(coderd): stop inline-resolving diff status on every GetChat call (#23901)
## Problem
Every `GET /api/experimental/chats/{chatID}` call was blocking for
200-800ms because the `getChat` handler called `resolveChatDiffStatus`,
which unconditionally hit the git provider API (e.g. GitHub's `GET
/repos/{owner}/{repo}/pulls?head=...`) via `ResolveBranchPullRequest` —
even when the cached diff status was fresh.
This made every chat page load at `/agents/{id}` noticeably slow.
## Root cause
The call chain was:
1. `getChat` → `resolveChatDiffStatus`
2. `resolveChatDiffStatus` → `resolveChatDiffReference` →
`gp.ResolveBranchPullRequest(...)` **(external HTTP call)**
3. Only **after** the external call: `chatDiffStatusIsStale(status,
now)` check
The staleness check happened after the expensive work, so every request
paid the cost regardless of cache freshness.
## Fix
`getChat` now returns the cached `chat_diff_statuses` row directly from
the database. The background `gitsync` worker already keeps these rows
fresh (every `DiffStatusTTL = 120s`), so inline resolution was
redundant.
The `resolveChatDiffContents` endpoint (which fetches actual diff
content) still uses the full resolution path since it needs to make
provider API calls by design.
## Changes
- `getChat` reads cached diff status from DB instead of calling
`resolveChatDiffStatus`
- Remove `resolveChatDiffStatus` (dead code — no production callers)
- Remove `chatDiffStatusIsStale` and `chatDiffStatusTTL` (dead code)
- Remove `RefreshesStaleStatusWithExternalAuth` test (tested the removed
inline refresh path)
<details><summary>Decision log</summary>
- **Why not just add a staleness gate?** The background worker already
handles refreshes on the same schedule. Adding an early-return-if-fresh
would work but leaves dead code for the stale path that's never
exercised in production (the worker gets there first). Removing the
inline path entirely is simpler and eliminates the external API
dependency from the read path.
- **Why keep `resolveChatDiffContents` unchanged?** That endpoint's job
is to fetch the actual diff content from the provider, so external API
calls are inherent to its purpose.
</details>
This commit is contained in:
+12
-74
@@ -56,7 +56,6 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
chatDiffStatusTTL = gitsync.DiffStatusTTL
|
||||
chatStreamBatchSize = 256
|
||||
|
||||
chatContextLimitModelConfigKey = "context_limit"
|
||||
@@ -1243,10 +1242,18 @@ func (api *API) getChat(rw http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
chat := httpmw.ChatParam(r)
|
||||
|
||||
diffStatus, err := api.resolveChatDiffStatus(ctx, chat)
|
||||
if err != nil {
|
||||
// Log but don't fail - diff status is supplementary.
|
||||
api.Logger.Error(ctx, "failed to resolve chat diff status",
|
||||
// Use the cached diff status from the database rather than
|
||||
// resolving it inline. Inline resolution calls out to the
|
||||
// git provider API (e.g. GitHub) on every request which
|
||||
// blocks the response for 200-800ms. The background gitsync
|
||||
// worker keeps the cached status fresh.
|
||||
var diffStatus *database.ChatDiffStatus
|
||||
status, err := api.Database.GetChatDiffStatusByChatID(ctx, chat.ID)
|
||||
switch {
|
||||
case err == nil:
|
||||
diffStatus = &status
|
||||
case !xerrors.Is(err, sql.ErrNoRows):
|
||||
api.Logger.Error(ctx, "failed to get cached chat diff status",
|
||||
slog.F("chat_id", chat.ID),
|
||||
slog.Error(err),
|
||||
)
|
||||
@@ -2358,68 +2365,6 @@ func chatWorkspaceAuditStatus(err error) int {
|
||||
return http.StatusInternalServerError
|
||||
}
|
||||
|
||||
func (api *API) resolveChatDiffStatus(
|
||||
ctx context.Context,
|
||||
chat database.Chat,
|
||||
) (*database.ChatDiffStatus, error) {
|
||||
status, found, err := api.getCachedChatDiffStatus(ctx, chat.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
|
||||
reference, err := api.resolveChatDiffReference(ctx, chat, found, status)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if reference.PullRequestURL != "" {
|
||||
if !found || !strings.EqualFold(strings.TrimSpace(status.Url.String), reference.PullRequestURL) {
|
||||
status, err = api.upsertChatDiffStatusReference(ctx, chat.ID, reference.PullRequestURL, now.Add(-time.Second))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
found = true
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
return nil, nil //nolint:nilnil // Callers handle nil status explicitly.
|
||||
}
|
||||
if !chatDiffStatusIsStale(status, now) {
|
||||
return &status, nil
|
||||
}
|
||||
|
||||
// Use the same refresh pipeline as the background worker
|
||||
// so both paths share identical provider/token resolution.
|
||||
refreshed, err := api.gitSyncWorker.RefreshChat(
|
||||
ctx, status, chat.OwnerID,
|
||||
)
|
||||
if err == nil && refreshed != nil {
|
||||
return refreshed, nil
|
||||
}
|
||||
if err == nil {
|
||||
// No PR exists yet; return what we have.
|
||||
return &status, nil
|
||||
}
|
||||
|
||||
api.Logger.Warn(ctx, "failed to refresh chat diff status",
|
||||
slog.F("chat_id", chat.ID),
|
||||
slog.Error(err),
|
||||
)
|
||||
|
||||
backoffStatus, backoffErr := api.upsertChatDiffStatusReference(ctx, chat.ID, reference.PullRequestURL, now.Add(chatDiffStatusTTL))
|
||||
if backoffErr != nil {
|
||||
api.Logger.Warn(ctx, "failed to extend chat diff status stale timestamp",
|
||||
slog.F("chat_id", chat.ID),
|
||||
slog.Error(backoffErr),
|
||||
)
|
||||
return &status, nil
|
||||
}
|
||||
|
||||
return &backoffStatus, nil
|
||||
}
|
||||
|
||||
func (api *API) resolveChatDiffContents(
|
||||
ctx context.Context,
|
||||
chat database.Chat,
|
||||
@@ -2686,13 +2631,6 @@ func (api *API) resolveGitProvider(origin string) gitprovider.Provider {
|
||||
return gp
|
||||
}
|
||||
|
||||
func chatDiffStatusIsStale(status database.ChatDiffStatus, now time.Time) bool {
|
||||
if !status.RefreshedAt.Valid {
|
||||
return true
|
||||
}
|
||||
return !status.StaleAt.After(now)
|
||||
}
|
||||
|
||||
func (api *API) resolveChatGitAccessToken(
|
||||
ctx context.Context,
|
||||
userID uuid.UUID,
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"fmt"
|
||||
"mime"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
@@ -22,7 +21,6 @@ import (
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/coderdtest"
|
||||
"github.com/coder/coder/v2/coderd/coderdtest/oidctest"
|
||||
"github.com/coder/coder/v2/coderd/database"
|
||||
"github.com/coder/coder/v2/coderd/database/db2sdk"
|
||||
"github.com/coder/coder/v2/coderd/database/dbauthz"
|
||||
@@ -4100,133 +4098,6 @@ func TestGetChatDiffStatus(t *testing.T) {
|
||||
_, err = otherClient.GetChat(ctx, createdChat.ID)
|
||||
requireSDKError(t, err, http.StatusNotFound)
|
||||
})
|
||||
|
||||
// Integration test: exercises the full GetChat handler refresh
|
||||
// path with a real DB, dbauthz, a mock GitHub API, and an
|
||||
// external-auth-linked user. Verifies that a stale chat diff
|
||||
// status is refreshed end-to-end via the gitsync worker's
|
||||
// Refresh pipeline (provider resolution, token acquisition
|
||||
// through external auth, and PR status fetch).
|
||||
t.Run("RefreshesStaleStatusWithExternalAuth", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitLong)
|
||||
|
||||
// Mock GitHub API over TLS so the git provider's URL patterns
|
||||
// (which require https://) match our PR URLs.
|
||||
ghAPI := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
switch {
|
||||
// PR status: GET /repos/{owner}/{repo}/pulls/{number}
|
||||
case r.URL.Path == "/repos/testorg/testrepo/pulls/42" && r.URL.Query().Get("per_page") == "":
|
||||
_, _ = w.Write([]byte(`{
|
||||
"state": "open",
|
||||
"merged": false,
|
||||
"draft": false,
|
||||
"additions": 25,
|
||||
"deletions": 7,
|
||||
"changed_files": 4,
|
||||
"head": {"sha": "abc123"}
|
||||
}`))
|
||||
// PR reviews: GET /repos/{owner}/{repo}/pulls/{number}/reviews
|
||||
case strings.HasSuffix(r.URL.Path, "/reviews"):
|
||||
_, _ = w.Write([]byte(`[]`))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
t.Cleanup(ghAPI.Close)
|
||||
|
||||
// The git provider derives webBaseURL from apiBaseURL.
|
||||
// For a TLS server at https://127.0.0.1:PORT, webBaseURL
|
||||
// is the same, and PR URL patterns match
|
||||
// https://127.0.0.1:PORT/{owner}/{repo}/pull/{number}.
|
||||
ghWebHost := strings.TrimPrefix(ghAPI.URL, "https://")
|
||||
prURL := fmt.Sprintf("https://%s/testorg/testrepo/pull/42", ghWebHost)
|
||||
remoteOrigin := fmt.Sprintf("https://%s/testorg/testrepo.git", ghWebHost)
|
||||
|
||||
// Set up a fake OIDC IDP for external auth login.
|
||||
const providerID = "test-github"
|
||||
fake := oidctest.NewFakeIDP(t, oidctest.WithServing())
|
||||
|
||||
rawClient, _, api := coderdtest.NewWithAPI(t, &coderdtest.Options{
|
||||
DeploymentValues: chatDeploymentValues(t),
|
||||
ExternalAuthConfigs: []*externalauth.Config{
|
||||
fake.ExternalAuthConfig(t, providerID, nil, func(cfg *externalauth.Config) {
|
||||
cfg.Type = codersdk.EnhancedExternalAuthProviderGitHub.String()
|
||||
// Point the git provider at our mock API server.
|
||||
cfg.APIBaseURL = ghAPI.URL
|
||||
// Match the remote origin (127.0.0.1 host).
|
||||
cfg.Regex = regexp.MustCompile(regexp.QuoteMeta(ghWebHost))
|
||||
}),
|
||||
},
|
||||
})
|
||||
client := codersdk.NewExperimentalClient(rawClient)
|
||||
db := api.Database
|
||||
|
||||
// Use the TLS mock server's HTTP client (which trusts its
|
||||
// self-signed cert) for git provider API calls.
|
||||
api.HTTPClient = ghAPI.Client()
|
||||
user := coderdtest.CreateFirstUser(t, client.Client)
|
||||
modelConfig := createChatModelConfig(t, client)
|
||||
|
||||
// Log in to the external auth provider so the user has an
|
||||
// ExternalAuthLink row in the DB. This is what
|
||||
// resolveChatGitAccessToken reads via GetExternalAuthLink.
|
||||
fake.ExternalLogin(t, client.Client)
|
||||
|
||||
// Insert a chat owned by the user.
|
||||
chat, err := db.InsertChat(dbauthz.AsSystemRestricted(ctx), database.InsertChatParams{
|
||||
OwnerID: user.UserID,
|
||||
LastModelConfigID: modelConfig.ID,
|
||||
Title: "rbac integration test",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Store a pre-resolved PR URL so the refresh path uses
|
||||
// ParsePullRequestURL directly (skipping branch-to-PR
|
||||
// resolution, which isn't what we're testing). The status
|
||||
// is stale (stale_at in the past) so the handler triggers
|
||||
// a full refresh through RefreshChat.
|
||||
_, err = db.UpsertChatDiffStatusReference(
|
||||
dbauthz.AsSystemRestricted(ctx),
|
||||
database.UpsertChatDiffStatusReferenceParams{
|
||||
ChatID: chat.ID,
|
||||
Url: sql.NullString{String: prURL, Valid: true},
|
||||
GitBranch: "feature/rbac-fix",
|
||||
GitRemoteOrigin: remoteOrigin,
|
||||
StaleAt: time.Now().Add(-time.Minute),
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Call GetChat which now resolves diff status inline.
|
||||
// This exercises the full code path:
|
||||
// resolveChatDiffStatus -> RefreshChat (with
|
||||
// AsSystemRestricted) -> Refresher.Refresh ->
|
||||
// resolveChatGitAccessToken (GetExternalAuthLink with
|
||||
// AsSystemRestricted) -> FetchPullRequestStatus (mock).
|
||||
//
|
||||
// Without the AsSystemRestricted fix, GetExternalAuthLink
|
||||
// would fail under the chatd RBAC context (missing
|
||||
// ActionReadPersonal), causing ErrNoTokenAvailable and a
|
||||
// refresh failure that silently returns stale data.
|
||||
result, err := client.GetChat(ctx, chat.ID)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, result.DiffStatus)
|
||||
status := result.DiffStatus
|
||||
|
||||
// The mock GitHub API returned PR #42 with 25 additions,
|
||||
// 7 deletions, 4 changed files, state "open".
|
||||
require.NotNil(t, status.RefreshedAt, "status should have been refreshed")
|
||||
require.NotNil(t, status.PullRequestState)
|
||||
require.Equal(t, "open", *status.PullRequestState)
|
||||
require.EqualValues(t, 25, status.Additions)
|
||||
require.EqualValues(t, 7, status.Deletions)
|
||||
require.EqualValues(t, 4, status.ChangedFiles)
|
||||
require.NotNil(t, status.URL)
|
||||
require.Contains(t, *status.URL, "pull/42")
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetChatDiffContents(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user