Files
coder/site/bin.go
T
Spike Curtis 6b1adb8b12 chore: refactor site handler to take cache dir (#21918)
relates to: https://github.com/coder/internal/issues/1300

Refactors the options to the site handler to take the cache directory, rather than expecting the caller to call `ExtractOrReadBinFS` and pass the results.

This is important in this stack because we need direct access to the cache directory for compressed file caching.
2026-02-06 10:56:48 +04:00

454 lines
12 KiB
Go

package site
import (
"archive/tar"
"bytes"
"crypto/sha1" // nolint: gosec // not used for cryptography
"encoding/hex"
"errors"
"fmt"
"io"
"io/fs"
"net/http"
"os"
"path"
"path/filepath"
"slices"
"strings"
"sync"
"github.com/klauspost/compress/zstd"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/singleflight"
"golang.org/x/xerrors"
)
// errHashMismatch is a sentinel error used in verifyBinSha1IsCurrent.
var errHashMismatch = xerrors.New("hash mismatch")
type binHandler struct {
metadataCache *binMetadataCache
binFS http.FileSystem
}
func (h *binHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
if !strings.HasPrefix(r.URL.Path, "/bin/") {
rw.WriteHeader(http.StatusNotFound)
_, _ = rw.Write([]byte("not found"))
return
}
r.URL.Path = strings.TrimPrefix(r.URL.Path, "/bin")
// Convert underscores in the filename to hyphens. We eventually want to
// change our hyphen-based filenames to underscores, but we need to
// support both for now.
r.URL.Path = strings.ReplaceAll(r.URL.Path, "_", "-")
// Set ETag header to the SHA1 hash of the file contents.
name := filePath(r.URL.Path)
if name == "" || name == "/" {
// Serve the directory listing. This intentionally allows directory listings to
// be served. This file system should not contain anything sensitive.
http.FileServer(h.binFS).ServeHTTP(rw, r)
return
}
if strings.Contains(name, "/") {
// We only serve files from the root of this directory, so avoid any
// shenanigans by blocking slashes in the URL path.
http.NotFound(rw, r)
return
}
metadata, err := h.metadataCache.getMetadata(name)
if xerrors.Is(err, os.ErrNotExist) {
http.NotFound(rw, r)
return
}
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
// http.FileServer will not set Content-Length when performing chunked
// transport encoding, which is used for large files like our binaries
// so stream compression can be used.
//
// Clients like IDE extensions and the desktop apps can compare the
// value of this header with the amount of bytes written to disk after
// decompression to show progress. Without this, they cannot show
// progress without disabling compression.
//
// There isn't really a spec for a length header for the "inner" content
// size, but some nginx modules use this header.
rw.Header().Set("X-Original-Content-Length", fmt.Sprintf("%d", metadata.sizeBytes))
// Get and set ETag header. Must be quoted.
rw.Header().Set("ETag", fmt.Sprintf(`%q`, metadata.sha1Hash))
// http.FileServer will see the ETag header and automatically handle
// If-Match and If-None-Match headers on the request properly.
http.FileServer(h.binFS).ServeHTTP(rw, r)
}
func newBinHandler(options *Options) (*binHandler, error) {
binFS, binHashes, err := ExtractOrReadBinFS(options.CacheDir, options.SiteFS)
if err != nil {
return nil, xerrors.Errorf("extract or read bin filesystem: %w", err)
}
return &binHandler{
binFS: binFS,
metadataCache: newBinMetadataCache(binFS, binHashes),
}, nil
}
// ExtractOrReadBinFS checks the provided fs for compressed coder binaries and
// extracts them into dest/bin if found. As a fallback, the provided FS is
// checked for a /bin directory, if it is non-empty it is returned. Finally
// dest/bin is returned as a fallback allowing binaries to be manually placed in
// dest (usually ${CODER_CACHE_DIRECTORY}/site/bin).
//
// Returns a http.FileSystem that serves unpacked binaries, and a map of binary
// name to SHA1 hash. The returned hash map may be incomplete or contain hashes
// for missing files.
func ExtractOrReadBinFS(dest string, siteFS fs.FS) (http.FileSystem, map[string]string, error) {
if dest == "" {
// No destination on fs, embedded fs is the only option.
binFS, err := fs.Sub(siteFS, "bin")
if err != nil {
return nil, nil, xerrors.Errorf("cache path is empty and embedded fs does not have /bin: %w", err)
}
return http.FS(binFS), nil, nil
}
dest = filepath.Join(dest, "bin")
mkdest := func() (http.FileSystem, error) {
err := os.MkdirAll(dest, 0o700)
if err != nil {
return nil, xerrors.Errorf("mkdir failed: %w", err)
}
return http.Dir(dest), nil
}
archive, err := siteFS.Open("bin/coder.tar.zst")
if err != nil {
if xerrors.Is(err, fs.ErrNotExist) {
files, err := fs.ReadDir(siteFS, "bin")
if err != nil {
if xerrors.Is(err, fs.ErrNotExist) {
// Given fs does not have a bin directory, serve from cache
// directory without extracting anything.
binFS, err := mkdest()
if err != nil {
return nil, nil, xerrors.Errorf("mkdest failed: %w", err)
}
return binFS, map[string]string{}, nil
}
return nil, nil, xerrors.Errorf("site fs read dir failed: %w", err)
}
if len(filterFiles(files, "GITKEEP")) > 0 {
// If there are other files than bin/GITKEEP, serve the files.
binFS, err := fs.Sub(siteFS, "bin")
if err != nil {
return nil, nil, xerrors.Errorf("site fs sub dir failed: %w", err)
}
return http.FS(binFS), nil, nil
}
// Nothing we can do, serve the cache directory, thus allowing
// binaries to be placed there.
binFS, err := mkdest()
if err != nil {
return nil, nil, xerrors.Errorf("mkdest failed: %w", err)
}
return binFS, map[string]string{}, nil
}
return nil, nil, xerrors.Errorf("open coder binary archive failed: %w", err)
}
defer archive.Close()
binFS, err := mkdest()
if err != nil {
return nil, nil, err
}
shaFiles, err := parseSHA1(siteFS)
if err != nil {
return nil, nil, xerrors.Errorf("parse sha1 file failed: %w", err)
}
ok, err := verifyBinSha1IsCurrent(dest, siteFS, shaFiles)
if err != nil {
return nil, nil, xerrors.Errorf("verify coder binaries sha1 failed: %w", err)
}
if !ok {
n, err := extractBin(dest, archive)
if err != nil {
return nil, nil, xerrors.Errorf("extract coder binaries failed: %w", err)
}
if n == 0 {
return nil, nil, xerrors.New("no files were extracted from coder binaries archive")
}
}
return binFS, shaFiles, nil
}
func extractBin(dest string, r io.Reader) (numExtracted int, err error) {
opts := []zstd.DOption{
// Concurrency doesn't help us when decoding the tar and
// can actually slow us down.
zstd.WithDecoderConcurrency(1),
// Ignoring checksums can give a slight performance
// boost but it's probably not worth the reduced safety.
zstd.IgnoreChecksum(false),
// Allow the decoder to use more memory giving us a 2-3x
// performance boost.
zstd.WithDecoderLowmem(false),
}
zr, err := zstd.NewReader(r, opts...)
if err != nil {
return 0, xerrors.Errorf("open zstd archive failed: %w", err)
}
defer zr.Close()
tr := tar.NewReader(zr)
n := 0
for {
h, err := tr.Next()
if err != nil {
if errors.Is(err, io.EOF) {
return n, nil
}
return n, xerrors.Errorf("read tar archive failed: %w", err)
}
if h.Name == "." || strings.Contains(h.Name, "..") {
continue
}
name := filepath.Join(dest, filepath.Base(h.Name))
f, err := os.Create(name)
if err != nil {
return n, xerrors.Errorf("create file failed: %w", err)
}
//#nosec // We created this tar, no risk of decompression bomb.
_, err = io.Copy(f, tr)
if err != nil {
_ = f.Close()
return n, xerrors.Errorf("write file contents failed: %w", err)
}
err = f.Close()
if err != nil {
return n, xerrors.Errorf("close file failed: %w", err)
}
n++
}
}
type binMetadata struct {
sizeBytes int64 // -1 if not known yet
// SHA1 was chosen because it's fast to compute and reasonable for
// determining if a file has changed. The ETag is not used a security
// measure.
sha1Hash string // always set if in the cache
}
type binMetadataCache struct {
binFS http.FileSystem
originalHashes map[string]string
metadata map[string]binMetadata
mut sync.RWMutex
sf singleflight.Group
sem chan struct{}
}
func newBinMetadataCache(binFS http.FileSystem, binSha1Hashes map[string]string) *binMetadataCache {
b := &binMetadataCache{
binFS: binFS,
originalHashes: make(map[string]string, len(binSha1Hashes)),
metadata: make(map[string]binMetadata, len(binSha1Hashes)),
mut: sync.RWMutex{},
sf: singleflight.Group{},
sem: make(chan struct{}, 4),
}
// Previously we copied binSha1Hashes to the cache immediately. Since we now
// read other information like size from the file, we can't do that. Instead
// we copy the hashes to a different map that will be used to populate the
// cache on the first request.
for k, v := range binSha1Hashes {
b.originalHashes[k] = v
}
return b
}
func (b *binMetadataCache) getMetadata(name string) (binMetadata, error) {
b.mut.RLock()
metadata, ok := b.metadata[name]
b.mut.RUnlock()
if ok {
return metadata, nil
}
// Avoid DOS by using a pool, and only doing work once per file.
v, err, _ := b.sf.Do(name, func() (any, error) {
b.sem <- struct{}{}
defer func() { <-b.sem }()
// Reject any invalid or non-basename paths before touching the filesystem.
if name == "" ||
name == "." ||
strings.Contains(name, "/") ||
strings.Contains(name, "\\") ||
!fs.ValidPath(name) ||
path.Base(name) != name {
return binMetadata{}, os.ErrNotExist
}
f, err := b.binFS.Open(name)
if err != nil {
return binMetadata{}, err
}
defer f.Close()
var metadata binMetadata
stat, err := f.Stat()
if err != nil {
return binMetadata{}, err
}
metadata.sizeBytes = stat.Size()
if hash, ok := b.originalHashes[name]; ok {
metadata.sha1Hash = hash
} else {
h := sha1.New() //#nosec // Not used for cryptography.
_, err := io.Copy(h, f)
if err != nil {
return binMetadata{}, err
}
metadata.sha1Hash = hex.EncodeToString(h.Sum(nil))
}
b.mut.Lock()
b.metadata[name] = metadata
b.mut.Unlock()
return metadata, nil
})
if err != nil {
return binMetadata{}, err
}
//nolint:forcetypeassert
return v.(binMetadata), nil
}
func filterFiles(files []fs.DirEntry, names ...string) []fs.DirEntry {
var filtered []fs.DirEntry
for _, f := range files {
if slices.Contains(names, f.Name()) {
continue
}
filtered = append(filtered, f)
}
return filtered
}
func verifyBinSha1IsCurrent(dest string, siteFS fs.FS, shaFiles map[string]string) (ok bool, err error) {
b1, err := fs.ReadFile(siteFS, "bin/coder.sha1")
if err != nil {
return false, xerrors.Errorf("read coder sha1 from embedded fs failed: %w", err)
}
b2, err := os.ReadFile(filepath.Join(dest, "coder.sha1"))
if err != nil {
if xerrors.Is(err, fs.ErrNotExist) {
return false, nil
}
return false, xerrors.Errorf("read coder sha1 failed: %w", err)
}
// Check shasum files for equality for early-exit.
if !bytes.Equal(b1, b2) {
return false, nil
}
var eg errgroup.Group
// Speed up startup by verifying files concurrently. Concurrency
// is limited to save resources / early-exit. Early-exit speed
// could be improved by using a context aware io.Reader and
// passing the context from errgroup.WithContext.
eg.SetLimit(3)
// Verify the hash of each on-disk binary.
for file, hash1 := range shaFiles {
eg.Go(func() error {
hash2, err := sha1HashFile(filepath.Join(dest, file))
if err != nil {
if xerrors.Is(err, fs.ErrNotExist) {
return errHashMismatch
}
return xerrors.Errorf("hash file failed: %w", err)
}
if !strings.EqualFold(hash1, hash2) {
return errHashMismatch
}
return nil
})
}
err = eg.Wait()
if err != nil {
if xerrors.Is(err, errHashMismatch) {
return false, nil
}
return false, err
}
return true, nil
}
// sha1HashFile computes a SHA1 hash of the file, returning the hex
// representation.
func sha1HashFile(name string) (string, error) {
//#nosec // Not used for cryptography.
hash := sha1.New()
f, err := os.Open(name)
if err != nil {
return "", err
}
defer f.Close()
_, err = io.Copy(hash, f)
if err != nil {
return "", err
}
b := make([]byte, hash.Size())
hash.Sum(b[:0])
return hex.EncodeToString(b), nil
}
func parseSHA1(siteFS fs.FS) (map[string]string, error) {
b, err := fs.ReadFile(siteFS, "bin/coder.sha1")
if err != nil {
return nil, xerrors.Errorf("read coder sha1 from embedded fs failed: %w", err)
}
shaFiles := make(map[string]string)
for _, line := range bytes.Split(bytes.TrimSpace(b), []byte{'\n'}) {
parts := bytes.Split(line, []byte{' ', '*'})
if len(parts) != 2 {
return nil, xerrors.Errorf("malformed sha1 file: %w", err)
}
shaFiles[string(parts[1])] = strings.ToLower(string(parts[0]))
}
if len(shaFiles) == 0 {
return nil, xerrors.Errorf("empty sha1 file: %w", err)
}
return shaFiles, nil
}