diff --git a/agent/agent.go b/agent/agent.go index 33c7d3b6f6..49816a16db 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -459,7 +459,7 @@ func (a *agent) init() { Logger: a.logger.Named("agentcontext"), Clock: a.clock, WorkingDir: workingDirFn, - BuiltinRoots: defaultContextRoots(a.contextConfig, workingDirFn), + BuiltinRoots: defaultContextRoots(), InitialSources: initialContextSources(a.contextConfig, workingDirFn), AllowedRoots: defaultContextAllowedRoots(workingDirFn), }) diff --git a/agent/agentcontext/manager.go b/agent/agentcontext/manager.go index f9ece33068..fb74ecb424 100644 --- a/agent/agentcontext/manager.go +++ b/agent/agentcontext/manager.go @@ -90,10 +90,11 @@ type Manager struct { trigger chan struct{} // running tracks Run lifetime. - running bool - closed bool - closedCh chan struct{} - runDoneCh chan struct{} + running bool + closed bool + closedCh chan struct{} + runDoneCh chan struct{} + runStartedCh chan struct{} watcher *Watcher } @@ -135,6 +136,7 @@ func NewManager(opts ManagerOptions) (*Manager, error) { trigger: make(chan struct{}, 1), closedCh: make(chan struct{}), runDoneCh: make(chan struct{}), + runStartedCh: make(chan struct{}), } for _, s := range opts.InitialSources { @@ -179,6 +181,7 @@ func (m *Manager) Run(ctx context.Context) error { return xerrors.New("agentcontext: Manager already closed") } m.running = true + close(m.runStartedCh) m.mu.Unlock() watcher, err := NewWatcher(WatcherOptions{ @@ -218,6 +221,14 @@ func (m *Manager) Run(ctx context.Context) error { } } +// Started returns a channel that is closed once Run has +// claimed the running flag. Callers waiting to coordinate with +// the watcher loop can select on it; a closed channel never +// blocks, so this is safe to call repeatedly. +func (m *Manager) Started() <-chan struct{} { + return m.runStartedCh +} + // Close stops the Manager. Close is idempotent; subsequent // calls block until Run exits. func (m *Manager) Close() error { diff --git a/agent/agentcontext/manager_test.go b/agent/agentcontext/manager_test.go index 64a4540a7f..21384deb50 100644 --- a/agent/agentcontext/manager_test.go +++ b/agent/agentcontext/manager_test.go @@ -223,8 +223,15 @@ func TestManager_RunOnce(t *testing.T) { ctx, cancel := context.WithCancel(testutil.Context(t, testutil.WaitShort)) defer cancel() go func() { _ = m.Run(ctx) }() - // Brief wait so Run has a chance to set running=true. - time.Sleep(50 * time.Millisecond) + + // Wait for Run to claim the running flag, then verify the + // second call rejects with a deterministic error rather than + // racing the scheduler. + select { + case <-m.Started(): + case <-ctx.Done(): + t.Fatalf("manager never started: %v", ctx.Err()) + } err := m.Run(ctx) require.Error(t, err) diff --git a/agent/agentcontext/push.go b/agent/agentcontext/push.go index 8f43066f0d..613dd6c56f 100644 --- a/agent/agentcontext/push.go +++ b/agent/agentcontext/push.go @@ -8,6 +8,7 @@ import ( "golang.org/x/xerrors" "cdr.dev/slog/v3" + "github.com/coder/quartz" ) // PushRequest is the wire-format-independent payload the @@ -58,6 +59,10 @@ type PushOptions struct { InitialBackoff time.Duration // MaxBackoff caps the retry wait. Default 30s. MaxBackoff time.Duration + // Clock is the time source for retry backoffs. Optional; + // defaults to the Manager's clock so tests can trap waits + // with quartz instead of real sleeps. + Clock quartz.Clock } // RunPush ships the current snapshot to the Pusher, then ships @@ -81,6 +86,10 @@ func (m *Manager) RunPush(ctx context.Context, p Pusher, opts PushOptions) error if maxBackoff <= 0 { maxBackoff = 30 * time.Second } + clock := opts.Clock + if clock == nil { + clock = m.clock + } changes, unsub := m.SubscribeChanges() defer unsub() @@ -91,7 +100,7 @@ func (m *Manager) RunPush(ctx context.Context, p Pusher, opts PushOptions) error snap := m.Snapshot() req := snapshotToPushRequest(snap, initial) - err := pushWithRetry(ctx, p, req, initialBackoff, maxBackoff, logger) + err := pushWithRetry(ctx, p, req, initialBackoff, maxBackoff, clock, logger) switch { case err == nil: initial = false @@ -130,6 +139,7 @@ func pushWithRetry( p Pusher, req *PushRequest, initialBackoff, maxBackoff time.Duration, + clock quartz.Clock, logger slog.Logger, ) error { backoff := initialBackoff @@ -155,10 +165,12 @@ func pushWithRetry( slog.F("version", req.Version), slog.F("backoff", backoff), slog.Error(err)) + timer := clock.NewTimer(backoff) select { case <-ctx.Done(): + timer.Stop() return ctx.Err() - case <-time.After(backoff): + case <-timer.C: } backoff *= 2 if backoff > maxBackoff { diff --git a/agent/agentcontext/push_test.go b/agent/agentcontext/push_test.go index f533879e1f..85141f6b26 100644 --- a/agent/agentcontext/push_test.go +++ b/agent/agentcontext/push_test.go @@ -13,6 +13,7 @@ import ( "github.com/coder/coder/v2/agent/agentcontext" "github.com/coder/coder/v2/testutil" + "github.com/coder/quartz" ) // fakePusher records every push and lets the test control the @@ -156,6 +157,10 @@ func TestRunPush_StopsOnUnimplemented(t *testing.T) { func TestRunPush_RetriesTransientError(t *testing.T) { t.Parallel() + mClock := quartz.NewMock(t) + trap := mClock.Trap().NewTimer() + defer trap.Close() + m := newTestManager(t, agentcontext.ManagerOptions{ WorkingDir: func() string { return t.TempDir() }, }) @@ -169,11 +174,17 @@ func TestRunPush_RetriesTransientError(t *testing.T) { go func() { pushDone <- m.RunPush(ctx, p, agentcontext.PushOptions{ Logger: testutil.Logger(t).Named("push"), - InitialBackoff: 10 * time.Millisecond, + InitialBackoff: time.Second, + Clock: mClock, }) }() - // First push hits transient, second succeeds. + // First push hits transient and arms the retry timer. Wait for + // the timer creation, then advance the clock past the backoff. + call := trap.MustWait(ctx) + call.MustRelease(ctx) + mClock.Advance(time.Second).MustWait(ctx) + select { case <-p.signal: case <-time.After(testutil.WaitShort): diff --git a/agent/agentcontext/resolve.go b/agent/agentcontext/resolve.go index 80eb12fcfd..b60ed53422 100644 --- a/agent/agentcontext/resolve.go +++ b/agent/agentcontext/resolve.go @@ -70,20 +70,6 @@ var skipDirNames = map[string]struct{}{ "__pycache__": {}, } -// skillsParentNames are directory basenames that signal a -// skills container; their immediate children are scanned for -// SKILL.md files. -var skillsParentNames = map[string]struct{}{ - "skills": {}, - ".agents": {}, // covers ".agents/skills/" - "agents": {}, - "plugins": {}, // claude code plugin cache layout - "cache": {}, - ".coder": {}, - ".claude": {}, - "skills-dir": {}, -} - // recognizedInstructionFile reports whether name is one of the // instruction-file conventions, case-insensitively. func recognizedInstructionFile(name string) bool { @@ -541,18 +527,11 @@ func excluded(r Resource, reason string) Resource { // isSkillsContainer reports whether dir is a recognized skills // container directory whose immediate children carry SKILL.md -// files. +// files. Both bare "skills" and nested "/skills" +// directories qualify (e.g. ".agents/skills", +// "plugins/foo/skills"). func isSkillsContainer(dir string) bool { - base := filepath.Base(dir) - _, ok := skillsParentNames[base] - if ok && base == "skills" { - return true - } - // "/skills" form (e.g. ".agents/skills", "plugins/foo/skills"). - if strings.HasSuffix(filepath.ToSlash(dir), "/skills") { - return true - } - return false + return filepath.Base(dir) == "skills" } // resourceID builds a stable resource ID. Kind plus canonical diff --git a/agent/agentcontext/types.go b/agent/agentcontext/types.go index cf90a12b46..8c66706d57 100644 --- a/agent/agentcontext/types.go +++ b/agent/agentcontext/types.go @@ -2,7 +2,6 @@ package agentcontext import ( "crypto/sha256" - "encoding/hex" "sort" "strconv" ) @@ -207,12 +206,6 @@ func ComputeAggregateHash(resources []Resource) [32]byte { return out } -// AggregateHashHex returns the hex-encoded aggregate hash. -// Convenience for log lines and HTTP responses. -func (s Snapshot) AggregateHashHex() string { - return hex.EncodeToString(s.AggregateHash[:]) -} - // writeLengthPrefixed writes a uvarint length prefix followed // by the raw bytes of s. func writeLengthPrefixed(h interface{ Write([]byte) (int, error) }, s string) { diff --git a/agent/agentcontext/types_test.go b/agent/agentcontext/types_test.go index 585dac0c69..32f83d46dc 100644 --- a/agent/agentcontext/types_test.go +++ b/agent/agentcontext/types_test.go @@ -87,13 +87,3 @@ func TestComputeAggregateHash_ChangesOnContent(t *testing.T) { hash3 := agentcontext.ComputeAggregateHash([]agentcontext.Resource{withStatus}) require.NotEqual(t, hash1, hash3) } - -func TestSnapshotAggregateHashHex(t *testing.T) { - t.Parallel() - snap := agentcontext.Snapshot{ - AggregateHash: [32]byte{0xde, 0xad, 0xbe, 0xef}, - } - require.Equal(t, - "deadbeef0000000000000000000000000000000000000000000000000000000000000000"[:64], - snap.AggregateHashHex()) -} diff --git a/agent/agentcontext/watch_test.go b/agent/agentcontext/watch_test.go index ba7d7968e3..1797907380 100644 --- a/agent/agentcontext/watch_test.go +++ b/agent/agentcontext/watch_test.go @@ -33,11 +33,11 @@ func TestWatcher_FiresOnAgentsMdEdit(t *testing.T) { ctx := testutil.Context(t, testutil.WaitShort) w.Sync(ctx, []agentcontext.ScanRoot{{Path: dir}}) - // Edit the file. Use a slight delay so fsnotify is ready. - time.Sleep(50 * time.Millisecond) - require.NoError(t, os.WriteFile(filepath.Join(dir, "AGENTS.md"), []byte("v2"), 0o600)) - + // Rewrite the file inside Eventually so the test does not race + // fsnotify's watch-setup window. As soon as the watch is live, + // the next write fires the debounce timer. require.Eventually(t, func() bool { + _ = os.WriteFile(filepath.Join(dir, "AGENTS.md"), []byte("v2"), 0o600) return atomic.LoadInt32(&fires) >= 1 }, testutil.WaitShort, testutil.IntervalFast, "expected at least one fire after AGENTS.md edit") } @@ -60,12 +60,14 @@ func TestWatcher_FiresOnNewSkillFile(t *testing.T) { ctx := testutil.Context(t, testutil.WaitShort) w.Sync(ctx, []agentcontext.ScanRoot{{Path: dir}}) - time.Sleep(50 * time.Millisecond) + // Create SKILL.md inside Eventually so the test does not race + // fsnotify's watch-setup window. The Manager pre-creates the + // skill dir, then rewrites SKILL.md each tick until the watcher + // fires at least once. skillDir := filepath.Join(skillsRoot, "foo") require.NoError(t, os.MkdirAll(skillDir, 0o755)) - require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("---\nname: foo\ndescription: bar\n---\nbody"), 0o600)) - require.Eventually(t, func() bool { + _ = os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("---\nname: foo\ndescription: bar\n---\nbody"), 0o600) return atomic.LoadInt32(&fires) >= 1 }, testutil.WaitShort, testutil.IntervalFast, "expected fire after SKILL.md create") } diff --git a/agent/agentcontext_seed.go b/agent/agentcontext_seed.go index 0fb93bcce0..6af8624216 100644 --- a/agent/agentcontext_seed.go +++ b/agent/agentcontext_seed.go @@ -13,7 +13,7 @@ import ( // The slice is intentionally tolerant of missing entries; the // resolver silently skips canonicalization failures and // non-existent paths. -func defaultContextRoots(_ agentcontextconfig.Config, _ func() string) []string { +func defaultContextRoots() []string { roots := make([]string, 0, 8) // Working directory is added by the manager itself via the