mirror of
https://github.com/coder/coder.git
synced 2026-06-04 13:38:21 +00:00
68e4155fed
Adds an in-memory trigram-indexed file finder package at `agent/filefinder`, designed to power a future `FindFiles` HTTP handler on the WorkspaceAgent. ## What it does Fast fuzzy file search with VS Code-quality matching across millions of files. Sub-millisecond search latency at 100K files. ## Architecture - **Index**: append-only docs slice with trigram + prefix posting lists - **Snapshot**: lock-free reader view via frozen slice headers + shallow-copied deleted set - **Search pipeline**: trigram intersection → fuzzy fallback (prefix bucket + subsequence) → brute-force scan (capped at 5K docs) - **Scoring**: subsequence match, basename prefix, boundary hits, contiguous runs, depth/length penalties - **Engine**: multi-root with fsnotify watcher (50ms batch coalescing), atomic snapshot publishing ## Benchmarks (10K files) | Query Type | Latency | |---|---| | exact_basename (`handler.go`) | ~43µs | | short_query (`ha`) | ~7µs | | fuzzy_basename (`hndlr`) | ~50µs | | path_structured (`internal/handler`) | ~29µs | | multi_token (`api handler`) | ~15µs | ## File inventory (11 files, 3273 lines) | File | Lines | Purpose | |---|---|---| | `text.go` | 264 | Normalization, trigram extraction, scoring | | `delta.go` | 128 | Index, Snapshot, CRUD operations | | `query.go` | 272 | Query planning, search strategies, top-K merge | | `engine.go` | 323 | Multi-root engine, watcher integration | | `watcher_fs.go` | 201 | fsnotify wrapper with batch coalescing | | `*_test.go` | 2085 | Unit tests, integration tests, benchmarks | --------- Co-authored-by: Coder <coder@users.noreply.github.com>
86 lines
2.6 KiB
Go
86 lines
2.6 KiB
Go
package filefinder
|
|
|
|
// Test helpers that need internal access.
|
|
|
|
// MakeTestSnapshot builds a Snapshot from a list of paths. Useful for
|
|
// query-level tests that don't need a real filesystem.
|
|
func MakeTestSnapshot(paths []string) *Snapshot {
|
|
idx := NewIndex()
|
|
for _, p := range paths {
|
|
idx.Add(p, 0)
|
|
}
|
|
return idx.Snapshot()
|
|
}
|
|
|
|
// BuildTestIndex walks root and returns a populated Index, the same
|
|
// way Engine.AddRoot does but without starting a watcher.
|
|
func BuildTestIndex(root string) (*Index, error) {
|
|
return walkRoot(root)
|
|
}
|
|
|
|
// IndexIsDeleted reports whether the document at id is tombstoned.
|
|
func IndexIsDeleted(idx *Index, id uint32) bool {
|
|
return idx.deleted[id]
|
|
}
|
|
|
|
// IndexByGramLen returns the number of entries in the trigram index.
|
|
func IndexByGramLen(idx *Index) int {
|
|
return len(idx.byGram)
|
|
}
|
|
|
|
// IndexByPrefix1Len returns the number of posting-list entries for
|
|
// the given single-byte prefix.
|
|
func IndexByPrefix1Len(idx *Index, b byte) int {
|
|
return len(idx.byPrefix1[b])
|
|
}
|
|
|
|
// SnapshotCount returns the number of documents in a Snapshot.
|
|
func SnapshotCount(snap *Snapshot) int {
|
|
return len(snap.docs)
|
|
}
|
|
|
|
// EngineSnapLen returns the number of root snapshots currently held
|
|
// by the engine, or -1 if the pointer is nil.
|
|
func EngineSnapLen(eng *Engine) int {
|
|
p := eng.snap.Load()
|
|
if p == nil {
|
|
return -1
|
|
}
|
|
return len(*p)
|
|
}
|
|
|
|
// DefaultScoreParamsForTest exposes defaultScoreParams for tests.
|
|
var DefaultScoreParamsForTest = defaultScoreParams
|
|
|
|
// ScoreParamsForTest is a type alias for scoreParams.
|
|
type ScoreParamsForTest = scoreParams
|
|
|
|
// Exported aliases for internal functions used in tests.
|
|
var (
|
|
NewQueryPlanForTest = newQueryPlan
|
|
SearchSnapshotForTest = searchSnapshot
|
|
IntersectSortedForTest = intersectSorted
|
|
IntersectAllForTest = intersectAll
|
|
MergeAndScoreForTest = mergeAndScore
|
|
NormalizeQueryForTest = normalizeQuery
|
|
NormalizePathBytesForTest = normalizePathBytes
|
|
ExtractTrigramsForTest = extractTrigrams
|
|
ExtractBasenameForTest = extractBasename
|
|
ExtractSegmentsForTest = extractSegments
|
|
Prefix1ForTest = prefix1
|
|
Prefix2ForTest = prefix2
|
|
IsSubsequenceForTest = isSubsequence
|
|
LongestContiguousMatchForTest = longestContiguousMatch
|
|
IsBoundaryForTest = isBoundary
|
|
CountBoundaryHitsForTest = countBoundaryHits
|
|
EqualFoldASCIIForTest = equalFoldASCII
|
|
ScorePathForTest = scorePath
|
|
PackTrigramForTest = packTrigram
|
|
)
|
|
|
|
// Type aliases for internal types used in tests.
|
|
type (
|
|
CandidateForTest = candidate
|
|
QueryPlanForTest = queryPlan
|
|
)
|