mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
15885f8b36
Relates to: https://github.com/coder/internal/issues/1300 Adds the `cachecompress.Compressor` to the binary handler.
481 lines
13 KiB
Go
481 lines
13 KiB
Go
package site
|
|
|
|
import (
|
|
"archive/tar"
|
|
"bytes"
|
|
"crypto/sha1" // nolint: gosec // not used for cryptography
|
|
"encoding/hex"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/fs"
|
|
"net/http"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"slices"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/klauspost/compress/zstd"
|
|
"golang.org/x/sync/errgroup"
|
|
"golang.org/x/sync/singleflight"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/coderd/cachecompress"
|
|
)
|
|
|
|
const CompressionLevel = 5
|
|
|
|
// errHashMismatch is a sentinel error used in verifyBinSha1IsCurrent.
|
|
var errHashMismatch = xerrors.New("hash mismatch")
|
|
|
|
type binHandler struct {
|
|
metadataCache *binMetadataCache
|
|
handler http.Handler
|
|
}
|
|
|
|
func (h *binHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
|
|
if !strings.HasPrefix(r.URL.Path, "/bin/") {
|
|
rw.WriteHeader(http.StatusNotFound)
|
|
_, _ = rw.Write([]byte("not found"))
|
|
return
|
|
}
|
|
r.URL.Path = strings.TrimPrefix(r.URL.Path, "/bin")
|
|
// Convert underscores in the filename to hyphens. We eventually want to
|
|
// change our hyphen-based filenames to underscores, but we need to
|
|
// support both for now.
|
|
r.URL.Path = strings.ReplaceAll(r.URL.Path, "_", "-")
|
|
|
|
// Set ETag header to the SHA1 hash of the file contents.
|
|
name := filePath(r.URL.Path)
|
|
if name == "" || name == "/" {
|
|
// Serve the directory listing. This intentionally allows directory listings to
|
|
// be served. This file system should not contain anything sensitive.
|
|
h.handler.ServeHTTP(rw, r)
|
|
return
|
|
}
|
|
if strings.Contains(name, "/") {
|
|
// We only serve files from the root of this directory, so avoid any
|
|
// shenanigans by blocking slashes in the URL path.
|
|
http.NotFound(rw, r)
|
|
return
|
|
}
|
|
|
|
metadata, err := h.metadataCache.getMetadata(name)
|
|
if xerrors.Is(err, os.ErrNotExist) {
|
|
http.NotFound(rw, r)
|
|
return
|
|
}
|
|
if err != nil {
|
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// http.FileServer will not set Content-Length when performing chunked
|
|
// transport encoding, which is used for large files like our binaries
|
|
// so stream compression can be used.
|
|
//
|
|
// Clients like IDE extensions and the desktop apps can compare the
|
|
// value of this header with the amount of bytes written to disk after
|
|
// decompression to show progress. Without this, they cannot show
|
|
// progress without disabling compression.
|
|
//
|
|
// There isn't really a spec for a length header for the "inner" content
|
|
// size, but some nginx modules use this header.
|
|
rw.Header().Set("X-Original-Content-Length", fmt.Sprintf("%d", metadata.sizeBytes))
|
|
|
|
// Get and set ETag header. Must be quoted.
|
|
rw.Header().Set("ETag", fmt.Sprintf(`%q`, metadata.sha1Hash))
|
|
|
|
// http.FileServer will see the ETag header and automatically handle
|
|
// If-Match and If-None-Match headers on the request properly.
|
|
h.handler.ServeHTTP(rw, r)
|
|
}
|
|
|
|
func newBinHandler(options *Options) (*binHandler, error) {
|
|
cacheDir := options.CacheDir
|
|
compressedCacheDir := ""
|
|
if cacheDir != "" {
|
|
// split the cache dir into ./compressed and ./orig containing the compressed files and the original
|
|
// uncompressed files respectively.
|
|
compressedCacheDir = filepath.Join(cacheDir, "compressed")
|
|
err := os.MkdirAll(compressedCacheDir, 0o700)
|
|
if err != nil {
|
|
// cached dir was provided, but we can't write to it
|
|
return nil, xerrors.Errorf("failed to create compressed directory in cache dir: %w", err)
|
|
}
|
|
cacheDir = filepath.Join(cacheDir, "orig")
|
|
err = os.MkdirAll(cacheDir, 0o700)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("failed to create orig directory in cache dir: %w", err)
|
|
}
|
|
}
|
|
// note that ExtractOrReadBinFS handles an empty cacheDir; this often arises in testing.
|
|
binFS, binHashes, err := ExtractOrReadBinFS(cacheDir, options.SiteFS)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("extract or read bin filesystem: %w", err)
|
|
}
|
|
h := &binHandler{
|
|
metadataCache: newBinMetadataCache(binFS, binHashes),
|
|
}
|
|
if compressedCacheDir != "" {
|
|
h.handler = cachecompress.NewCompressor(options.Logger, CompressionLevel, compressedCacheDir, binFS)
|
|
} else {
|
|
h.handler = http.FileServer(binFS)
|
|
}
|
|
return h, nil
|
|
}
|
|
|
|
// ExtractOrReadBinFS checks the provided fs for compressed coder binaries and
|
|
// extracts them into dest/bin if found. As a fallback, the provided FS is
|
|
// checked for a /bin directory, if it is non-empty it is returned. Finally
|
|
// dest/bin is returned as a fallback allowing binaries to be manually placed in
|
|
// dest (usually ${CODER_CACHE_DIRECTORY}/site/orig/bin).
|
|
//
|
|
// Returns a http.FileSystem that serves unpacked binaries, and a map of binary
|
|
// name to SHA1 hash. The returned hash map may be incomplete or contain hashes
|
|
// for missing files.
|
|
func ExtractOrReadBinFS(dest string, siteFS fs.FS) (http.FileSystem, map[string]string, error) {
|
|
if dest == "" {
|
|
// No destination on fs, embedded fs is the only option.
|
|
binFS, err := fs.Sub(siteFS, "bin")
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("cache path is empty and embedded fs does not have /bin: %w", err)
|
|
}
|
|
return http.FS(binFS), nil, nil
|
|
}
|
|
|
|
dest = filepath.Join(dest, "bin")
|
|
mkdest := func() (http.FileSystem, error) {
|
|
err := os.MkdirAll(dest, 0o700)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("mkdir failed: %w", err)
|
|
}
|
|
return http.Dir(dest), nil
|
|
}
|
|
|
|
archive, err := siteFS.Open("bin/coder.tar.zst")
|
|
if err != nil {
|
|
if xerrors.Is(err, fs.ErrNotExist) {
|
|
files, err := fs.ReadDir(siteFS, "bin")
|
|
if err != nil {
|
|
if xerrors.Is(err, fs.ErrNotExist) {
|
|
// Given fs does not have a bin directory, serve from cache
|
|
// directory without extracting anything.
|
|
binFS, err := mkdest()
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("mkdest failed: %w", err)
|
|
}
|
|
return binFS, map[string]string{}, nil
|
|
}
|
|
return nil, nil, xerrors.Errorf("site fs read dir failed: %w", err)
|
|
}
|
|
|
|
if len(filterFiles(files, "GITKEEP")) > 0 {
|
|
// If there are other files than bin/GITKEEP, serve the files.
|
|
binFS, err := fs.Sub(siteFS, "bin")
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("site fs sub dir failed: %w", err)
|
|
}
|
|
return http.FS(binFS), nil, nil
|
|
}
|
|
|
|
// Nothing we can do, serve the cache directory, thus allowing
|
|
// binaries to be placed there.
|
|
binFS, err := mkdest()
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("mkdest failed: %w", err)
|
|
}
|
|
return binFS, map[string]string{}, nil
|
|
}
|
|
return nil, nil, xerrors.Errorf("open coder binary archive failed: %w", err)
|
|
}
|
|
defer archive.Close()
|
|
|
|
binFS, err := mkdest()
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
shaFiles, err := parseSHA1(siteFS)
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("parse sha1 file failed: %w", err)
|
|
}
|
|
|
|
ok, err := verifyBinSha1IsCurrent(dest, siteFS, shaFiles)
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("verify coder binaries sha1 failed: %w", err)
|
|
}
|
|
if !ok {
|
|
n, err := extractBin(dest, archive)
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("extract coder binaries failed: %w", err)
|
|
}
|
|
if n == 0 {
|
|
return nil, nil, xerrors.New("no files were extracted from coder binaries archive")
|
|
}
|
|
}
|
|
|
|
return binFS, shaFiles, nil
|
|
}
|
|
|
|
func extractBin(dest string, r io.Reader) (numExtracted int, err error) {
|
|
opts := []zstd.DOption{
|
|
// Concurrency doesn't help us when decoding the tar and
|
|
// can actually slow us down.
|
|
zstd.WithDecoderConcurrency(1),
|
|
// Ignoring checksums can give a slight performance
|
|
// boost but it's probably not worth the reduced safety.
|
|
zstd.IgnoreChecksum(false),
|
|
// Allow the decoder to use more memory giving us a 2-3x
|
|
// performance boost.
|
|
zstd.WithDecoderLowmem(false),
|
|
}
|
|
zr, err := zstd.NewReader(r, opts...)
|
|
if err != nil {
|
|
return 0, xerrors.Errorf("open zstd archive failed: %w", err)
|
|
}
|
|
defer zr.Close()
|
|
|
|
tr := tar.NewReader(zr)
|
|
n := 0
|
|
for {
|
|
h, err := tr.Next()
|
|
if err != nil {
|
|
if errors.Is(err, io.EOF) {
|
|
return n, nil
|
|
}
|
|
return n, xerrors.Errorf("read tar archive failed: %w", err)
|
|
}
|
|
if h.Name == "." || strings.Contains(h.Name, "..") {
|
|
continue
|
|
}
|
|
|
|
name := filepath.Join(dest, filepath.Base(h.Name))
|
|
f, err := os.Create(name)
|
|
if err != nil {
|
|
return n, xerrors.Errorf("create file failed: %w", err)
|
|
}
|
|
//#nosec // We created this tar, no risk of decompression bomb.
|
|
_, err = io.Copy(f, tr)
|
|
if err != nil {
|
|
_ = f.Close()
|
|
return n, xerrors.Errorf("write file contents failed: %w", err)
|
|
}
|
|
err = f.Close()
|
|
if err != nil {
|
|
return n, xerrors.Errorf("close file failed: %w", err)
|
|
}
|
|
|
|
n++
|
|
}
|
|
}
|
|
|
|
type binMetadata struct {
|
|
sizeBytes int64 // -1 if not known yet
|
|
// SHA1 was chosen because it's fast to compute and reasonable for
|
|
// determining if a file has changed. The ETag is not used a security
|
|
// measure.
|
|
sha1Hash string // always set if in the cache
|
|
}
|
|
|
|
type binMetadataCache struct {
|
|
binFS http.FileSystem
|
|
originalHashes map[string]string
|
|
|
|
metadata map[string]binMetadata
|
|
mut sync.RWMutex
|
|
sf singleflight.Group
|
|
sem chan struct{}
|
|
}
|
|
|
|
func newBinMetadataCache(binFS http.FileSystem, binSha1Hashes map[string]string) *binMetadataCache {
|
|
b := &binMetadataCache{
|
|
binFS: binFS,
|
|
originalHashes: make(map[string]string, len(binSha1Hashes)),
|
|
|
|
metadata: make(map[string]binMetadata, len(binSha1Hashes)),
|
|
mut: sync.RWMutex{},
|
|
sf: singleflight.Group{},
|
|
sem: make(chan struct{}, 4),
|
|
}
|
|
|
|
// Previously we copied binSha1Hashes to the cache immediately. Since we now
|
|
// read other information like size from the file, we can't do that. Instead
|
|
// we copy the hashes to a different map that will be used to populate the
|
|
// cache on the first request.
|
|
for k, v := range binSha1Hashes {
|
|
b.originalHashes[k] = v
|
|
}
|
|
|
|
return b
|
|
}
|
|
|
|
func (b *binMetadataCache) getMetadata(name string) (binMetadata, error) {
|
|
b.mut.RLock()
|
|
metadata, ok := b.metadata[name]
|
|
b.mut.RUnlock()
|
|
if ok {
|
|
return metadata, nil
|
|
}
|
|
|
|
// Avoid DOS by using a pool, and only doing work once per file.
|
|
v, err, _ := b.sf.Do(name, func() (any, error) {
|
|
b.sem <- struct{}{}
|
|
defer func() { <-b.sem }()
|
|
|
|
// Reject any invalid or non-basename paths before touching the filesystem.
|
|
if name == "" ||
|
|
name == "." ||
|
|
strings.Contains(name, "/") ||
|
|
strings.Contains(name, "\\") ||
|
|
!fs.ValidPath(name) ||
|
|
path.Base(name) != name {
|
|
return binMetadata{}, os.ErrNotExist
|
|
}
|
|
|
|
f, err := b.binFS.Open(name)
|
|
if err != nil {
|
|
return binMetadata{}, err
|
|
}
|
|
defer f.Close()
|
|
|
|
var metadata binMetadata
|
|
|
|
stat, err := f.Stat()
|
|
if err != nil {
|
|
return binMetadata{}, err
|
|
}
|
|
metadata.sizeBytes = stat.Size()
|
|
|
|
if hash, ok := b.originalHashes[name]; ok {
|
|
metadata.sha1Hash = hash
|
|
} else {
|
|
h := sha1.New() //#nosec // Not used for cryptography.
|
|
_, err := io.Copy(h, f)
|
|
if err != nil {
|
|
return binMetadata{}, err
|
|
}
|
|
metadata.sha1Hash = hex.EncodeToString(h.Sum(nil))
|
|
}
|
|
|
|
b.mut.Lock()
|
|
b.metadata[name] = metadata
|
|
b.mut.Unlock()
|
|
return metadata, nil
|
|
})
|
|
if err != nil {
|
|
return binMetadata{}, err
|
|
}
|
|
|
|
//nolint:forcetypeassert
|
|
return v.(binMetadata), nil
|
|
}
|
|
|
|
func filterFiles(files []fs.DirEntry, names ...string) []fs.DirEntry {
|
|
var filtered []fs.DirEntry
|
|
for _, f := range files {
|
|
if slices.Contains(names, f.Name()) {
|
|
continue
|
|
}
|
|
filtered = append(filtered, f)
|
|
}
|
|
return filtered
|
|
}
|
|
|
|
func verifyBinSha1IsCurrent(dest string, siteFS fs.FS, shaFiles map[string]string) (ok bool, err error) {
|
|
b1, err := fs.ReadFile(siteFS, "bin/coder.sha1")
|
|
if err != nil {
|
|
return false, xerrors.Errorf("read coder sha1 from embedded fs failed: %w", err)
|
|
}
|
|
b2, err := os.ReadFile(filepath.Join(dest, "coder.sha1"))
|
|
if err != nil {
|
|
if xerrors.Is(err, fs.ErrNotExist) {
|
|
return false, nil
|
|
}
|
|
return false, xerrors.Errorf("read coder sha1 failed: %w", err)
|
|
}
|
|
|
|
// Check shasum files for equality for early-exit.
|
|
if !bytes.Equal(b1, b2) {
|
|
return false, nil
|
|
}
|
|
|
|
var eg errgroup.Group
|
|
// Speed up startup by verifying files concurrently. Concurrency
|
|
// is limited to save resources / early-exit. Early-exit speed
|
|
// could be improved by using a context aware io.Reader and
|
|
// passing the context from errgroup.WithContext.
|
|
eg.SetLimit(3)
|
|
|
|
// Verify the hash of each on-disk binary.
|
|
for file, hash1 := range shaFiles {
|
|
eg.Go(func() error {
|
|
hash2, err := sha1HashFile(filepath.Join(dest, file))
|
|
if err != nil {
|
|
if xerrors.Is(err, fs.ErrNotExist) {
|
|
return errHashMismatch
|
|
}
|
|
return xerrors.Errorf("hash file failed: %w", err)
|
|
}
|
|
if !strings.EqualFold(hash1, hash2) {
|
|
return errHashMismatch
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
err = eg.Wait()
|
|
if err != nil {
|
|
if xerrors.Is(err, errHashMismatch) {
|
|
return false, nil
|
|
}
|
|
return false, err
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
// sha1HashFile computes a SHA1 hash of the file, returning the hex
|
|
// representation.
|
|
func sha1HashFile(name string) (string, error) {
|
|
//#nosec // Not used for cryptography.
|
|
hash := sha1.New()
|
|
f, err := os.Open(name)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
|
|
_, err = io.Copy(hash, f)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
b := make([]byte, hash.Size())
|
|
hash.Sum(b[:0])
|
|
|
|
return hex.EncodeToString(b), nil
|
|
}
|
|
|
|
func parseSHA1(siteFS fs.FS) (map[string]string, error) {
|
|
b, err := fs.ReadFile(siteFS, "bin/coder.sha1")
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("read coder sha1 from embedded fs failed: %w", err)
|
|
}
|
|
|
|
shaFiles := make(map[string]string)
|
|
for _, line := range bytes.Split(bytes.TrimSpace(b), []byte{'\n'}) {
|
|
parts := bytes.Split(line, []byte{' ', '*'})
|
|
if len(parts) != 2 {
|
|
return nil, xerrors.Errorf("malformed sha1 file: %w", err)
|
|
}
|
|
shaFiles[string(parts[1])] = strings.ToLower(string(parts[0]))
|
|
}
|
|
if len(shaFiles) == 0 {
|
|
return nil, xerrors.Errorf("empty sha1 file: %w", err)
|
|
}
|
|
|
|
return shaFiles, nil
|
|
}
|