mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix(agent/agentcontext): address coder-agents-review round 4
CRF-40: ensure runDoneCh is closed even if NewWatcher fails so Close does not deadlock waiting on a goroutine that already exited. CRF-41: gate snapshot publishes with a monotonic resolveEpoch counter. Resync and resolveAndBroadcast drop m.mu around the filesystem walk; under concurrency a stale walk could overwrite a fresher one at a higher version number. Each pass now captures its epoch under the lock and skips the publish if a newer pass has started. Resync returns the currently published Snapshot in the stale case, which is guaranteed to be at least as fresh as the discarded result. CRF-42: hash the (capped) prefix of oversize SKILL.md files so an edit that keeps the file oversize still shifts the aggregate hash, matching readFileResource and preserving the change-detection contract.
This commit is contained in:
@@ -78,6 +78,14 @@ type Manager struct {
|
|||||||
snapshot Snapshot
|
snapshot Snapshot
|
||||||
// version monotonically increases per resolve pass.
|
// version monotonically increases per resolve pass.
|
||||||
version uint64
|
version uint64
|
||||||
|
// resolveEpoch increments at the start of every resolver
|
||||||
|
// pass that drops m.mu around the filesystem walk. Each
|
||||||
|
// pass captures the epoch it claimed; at publish time it
|
||||||
|
// compares its captured epoch against the current epoch and
|
||||||
|
// skips the publish if a newer pass has started, preventing
|
||||||
|
// an old walk's stale result from overwriting a newer one's
|
||||||
|
// fresh result at a higher version number.
|
||||||
|
resolveEpoch uint64
|
||||||
|
|
||||||
// subscribers receive a non-blocking signal whenever the
|
// subscribers receive a non-blocking signal whenever the
|
||||||
// snapshot changes. Subscribers must drain their channel
|
// snapshot changes. Subscribers must drain their channel
|
||||||
@@ -182,6 +190,10 @@ func (m *Manager) Run(ctx context.Context) error {
|
|||||||
m.running = true
|
m.running = true
|
||||||
close(m.runStartedCh)
|
close(m.runStartedCh)
|
||||||
m.mu.Unlock()
|
m.mu.Unlock()
|
||||||
|
// Close any early-exit path so Close does not block on
|
||||||
|
// runDoneCh after Run already set running=true. The deferred
|
||||||
|
// close runs even when NewWatcher fails.
|
||||||
|
defer close(m.runDoneCh)
|
||||||
|
|
||||||
watcher, err := NewWatcher(WatcherOptions{
|
watcher, err := NewWatcher(WatcherOptions{
|
||||||
Logger: m.logger.Named("watcher"),
|
Logger: m.logger.Named("watcher"),
|
||||||
@@ -202,7 +214,6 @@ func (m *Manager) Run(ctx context.Context) error {
|
|||||||
m.mu.Unlock()
|
m.mu.Unlock()
|
||||||
watcher.Sync(ctx, roots)
|
watcher.Sync(ctx, roots)
|
||||||
|
|
||||||
defer close(m.runDoneCh)
|
|
||||||
defer watcher.Close()
|
defer watcher.Close()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
@@ -386,6 +397,8 @@ func (m *Manager) Resync(ctx context.Context) (Snapshot, error) {
|
|||||||
resolver := m.resolver
|
resolver := m.resolver
|
||||||
watcher := m.watcher
|
watcher := m.watcher
|
||||||
schemaVersion := m.schemaVersion
|
schemaVersion := m.schemaVersion
|
||||||
|
m.resolveEpoch++
|
||||||
|
myEpoch := m.resolveEpoch
|
||||||
m.mu.Unlock()
|
m.mu.Unlock()
|
||||||
|
|
||||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||||
@@ -404,6 +417,20 @@ func (m *Manager) Resync(ctx context.Context) (Snapshot, error) {
|
|||||||
m.mu.Unlock()
|
m.mu.Unlock()
|
||||||
return m.Snapshot(), ErrManagerClosed
|
return m.Snapshot(), ErrManagerClosed
|
||||||
}
|
}
|
||||||
|
if m.resolveEpoch != myEpoch {
|
||||||
|
// A newer resolve pass started while this one was
|
||||||
|
// walking the filesystem. The newer pass's data
|
||||||
|
// strictly supersedes ours, so skip the publish to
|
||||||
|
// avoid overwriting a fresher Snapshot at a higher
|
||||||
|
// version. Return the currently published Snapshot,
|
||||||
|
// which is at least as fresh as ours.
|
||||||
|
published := m.snapshot
|
||||||
|
m.mu.Unlock()
|
||||||
|
if watcher != nil {
|
||||||
|
watcher.Sync(ctx, roots)
|
||||||
|
}
|
||||||
|
return published, nil
|
||||||
|
}
|
||||||
m.version++
|
m.version++
|
||||||
snap.Version = m.version
|
snap.Version = m.version
|
||||||
m.snapshot = snap
|
m.snapshot = snap
|
||||||
@@ -510,6 +537,8 @@ func (m *Manager) resolveAndBroadcast(ctx context.Context) {
|
|||||||
resolver := m.resolver
|
resolver := m.resolver
|
||||||
watcher := m.watcher
|
watcher := m.watcher
|
||||||
schemaVersion := m.schemaVersion
|
schemaVersion := m.schemaVersion
|
||||||
|
m.resolveEpoch++
|
||||||
|
myEpoch := m.resolveEpoch
|
||||||
m.mu.Unlock()
|
m.mu.Unlock()
|
||||||
|
|
||||||
if err := ctx.Err(); err != nil {
|
if err := ctx.Err(); err != nil {
|
||||||
@@ -526,6 +555,15 @@ func (m *Manager) resolveAndBroadcast(ctx context.Context) {
|
|||||||
snap.SchemaVersion = schemaVersion
|
snap.SchemaVersion = schemaVersion
|
||||||
|
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
|
if m.resolveEpoch != myEpoch {
|
||||||
|
// A newer resolve pass started while this one was
|
||||||
|
// walking the filesystem. Skip the publish so a
|
||||||
|
// stale-epoch result does not overwrite a fresher
|
||||||
|
// Snapshot at a higher version number. The newer
|
||||||
|
// pass will broadcast its own result.
|
||||||
|
m.mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
m.version++
|
m.version++
|
||||||
snap.Version = m.version
|
snap.Version = m.version
|
||||||
m.snapshot = snap
|
m.snapshot = snap
|
||||||
|
|||||||
@@ -443,6 +443,13 @@ func (r *Resolver) readSkillMeta(path string, info fs.FileInfo, userSource strin
|
|||||||
if safeUint64(info.Size()) > r.MaxResourceBytes {
|
if safeUint64(info.Size()) > r.MaxResourceBytes {
|
||||||
res.Status = StatusOversize
|
res.Status = StatusOversize
|
||||||
res.Error = fmt.Sprintf("file size %d exceeds per-resource cap of %d bytes", info.Size(), r.MaxResourceBytes)
|
res.Error = fmt.Sprintf("file size %d exceeds per-resource cap of %d bytes", info.Size(), r.MaxResourceBytes)
|
||||||
|
// Hash the (capped) prefix so an edit that keeps
|
||||||
|
// the file oversize still shifts the aggregate
|
||||||
|
// hash and triggers a re-broadcast. Mirrors the
|
||||||
|
// behavior in readFileResource.
|
||||||
|
if data, err := readFileCapped(path, safeInt64(r.MaxResourceBytes)); err == nil {
|
||||||
|
res.ContentHash = sha256.Sum256(data)
|
||||||
|
}
|
||||||
return res, true
|
return res, true
|
||||||
}
|
}
|
||||||
data, err := os.ReadFile(path)
|
data, err := os.ReadFile(path)
|
||||||
|
|||||||
Reference in New Issue
Block a user