mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix(agent): make manifest-driven resync asynchronous and ctx-aware
The handleManifest Resync call was synchronous; on slow filesystems (CI macOS and Windows runners walking a real $HOME) the resolver walk could outlive the agent's graceful shutdown window, leaking a goroutine inside filepath.WalkDir and tripping goleak in the agent test suite. Two changes break the leak: - New Manager.Trigger() queues an asynchronous re-resolve on the trigger channel. handleManifest now calls Trigger instead of Resync, so manifest handling returns immediately and the background walk is owned by the Manager.Run goroutine that already respects gracefulCtx cancellation. - Resolver.walkDir threads ctx into the filepath.WalkDir callback, so when ctx cancels the walk aborts after the current directory read instead of running to completion.
This commit is contained in:
+6
-6
@@ -1384,13 +1384,13 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
|
||||
|
||||
// Manifest just landed; the agentcontext manager now has
|
||||
// a working directory to scan and a known set of scan
|
||||
// roots. Trigger a resync so the snapshot reflects the
|
||||
// workspace immediately instead of waiting for the next
|
||||
// filesystem event.
|
||||
// roots. Queue an asynchronous re-resolve so the snapshot
|
||||
// reflects the workspace immediately instead of waiting
|
||||
// for the next filesystem event. The result is handled
|
||||
// by the Manager.Run loop, which respects gracefulCtx
|
||||
// cancellation during shutdown.
|
||||
if a.contextManager != nil {
|
||||
if _, resyncErr := a.contextManager.Resync(ctx); resyncErr != nil {
|
||||
a.logger.Debug(ctx, "agentcontext resync after manifest failed", slog.Error(resyncErr))
|
||||
}
|
||||
a.contextManager.Trigger()
|
||||
}
|
||||
|
||||
// Write secret files after signaling manifest readiness so that network
|
||||
|
||||
@@ -439,6 +439,17 @@ func (m *Manager) signal() {
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger queues an asynchronous re-resolve. Trigger returns
|
||||
// immediately; the Run goroutine performs the filesystem walk
|
||||
// in the background and broadcasts when it finishes. Use
|
||||
// Trigger when the caller wants the watcher to pick up an
|
||||
// updated working directory or scan-root set but does not need
|
||||
// the new Snapshot synchronously. Trigger is a no-op when Run
|
||||
// has not started or the Manager is closed.
|
||||
func (m *Manager) Trigger() {
|
||||
m.signal()
|
||||
}
|
||||
|
||||
// scanRootsLocked returns the list of ScanRoots to feed the
|
||||
// resolver and watcher. The Manager's mutex must be held.
|
||||
func (m *Manager) scanRootsLocked() []ScanRoot {
|
||||
|
||||
@@ -242,7 +242,7 @@ func (r *Resolver) walk(ctx context.Context, roots []ScanRoot) (resources []Reso
|
||||
}
|
||||
continue
|
||||
}
|
||||
walkErr := r.walkDir(root, &resources, seenID)
|
||||
walkErr := r.walkDir(ctx, root, &resources, seenID)
|
||||
if walkErr != nil {
|
||||
snapErrs = append(snapErrs, fmt.Sprintf("walk %q: %s", root.Path, walkErr))
|
||||
}
|
||||
@@ -251,12 +251,17 @@ func (r *Resolver) walk(ctx context.Context, roots []ScanRoot) (resources []Reso
|
||||
}
|
||||
|
||||
// walkDir performs the recursive descent for a single scan
|
||||
// directory. It honors r.MaxDepth and skipDirNames.
|
||||
func (r *Resolver) walkDir(root ScanRoot, out *[]Resource, seenID map[string]struct{}) error {
|
||||
// directory. It honors r.MaxDepth and skipDirNames. The ctx is
|
||||
// checked inside the WalkDir callback so cancellation
|
||||
// terminates the walk even mid-root.
|
||||
func (r *Resolver) walkDir(ctx context.Context, root ScanRoot, out *[]Resource, seenID map[string]struct{}) error {
|
||||
rootDepth := strings.Count(filepath.Clean(root.Path), string(os.PathSeparator))
|
||||
maxDepth := rootDepth + r.MaxDepth
|
||||
|
||||
return filepath.WalkDir(root.Path, func(path string, d fs.DirEntry, err error) error {
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
return ctxErr
|
||||
}
|
||||
if err != nil {
|
||||
// Surface the error as Unreadable when we can
|
||||
// associate it with a single recognized file;
|
||||
|
||||
Reference in New Issue
Block a user