mirror of
https://github.com/coder/coder.git
synced 2026-06-03 21:18:24 +00:00
2b70122e4a
## Summary - avoid duplicating preset headers when cachecompress serves compressed `/bin/*` responses - add a cachecompress regression test for preset `X-Original-Content-Length` and `ETag` headers - strengthen site binary tests to assert those headers stay single-valued ## Problem `site/bin.go` sets `X-Original-Content-Length` and `ETag` on the real response writer before delegating. `cachecompress` then snapshotted those headers and replayed them with `Header().Add(...)`, which duplicated them on compressed responses. For `coder-desktop-macos`, duplicate `X-Original-Content-Length` values can collapse into a comma-separated string and fail `Int64` parsing, causing the file size to show as `Unknown`. ## Testing - `/usr/local/go/bin/go test ./coderd/cachecompress -run 'TestCompressorPresetHeaders|TestCompressorHeadings' -count=1` - `/usr/local/go/bin/go test ./site -run TestServingBin -count=1` - `PATH=/usr/local/go/bin:$PATH make lint/go` ## Notes - Skipped full `make pre-commit` with explicit approval because local environment/tooling blocked it (Node version/path interaction in generated site targets, plus missing local tools before setup).
439 lines
14 KiB
Go
439 lines
14 KiB
Go
// Package cachecompress creates a compressed cache of static files based on an http.FS. It is modified from
|
|
// https://github.com/go-chi/chi Compressor middleware. See the LICENSE file in this directory for copyright
|
|
// information.
|
|
package cachecompress
|
|
|
|
import (
|
|
"compress/flate"
|
|
"compress/gzip"
|
|
"context"
|
|
"encoding/base64"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
|
|
"golang.org/x/xerrors"
|
|
|
|
"cdr.dev/slog/v3"
|
|
)
|
|
|
|
type cacheKey struct {
|
|
encoding string
|
|
urlPath string
|
|
}
|
|
|
|
func (c cacheKey) filePath(cacheDir string) string {
|
|
// URLs can have slashes or other characters we don't want the file system interpreting. So we just encode the path
|
|
// to a flat base64 filename.
|
|
filename := base64.URLEncoding.EncodeToString([]byte(c.urlPath))
|
|
return filepath.Join(cacheDir, c.encoding, filename)
|
|
}
|
|
|
|
func getCacheKey(encoding string, r *http.Request) cacheKey {
|
|
return cacheKey{
|
|
encoding: encoding,
|
|
urlPath: r.URL.Path,
|
|
}
|
|
}
|
|
|
|
type ref struct {
|
|
key cacheKey
|
|
done chan struct{}
|
|
err chan error
|
|
}
|
|
|
|
// Compressor represents a set of encoding configurations.
|
|
type Compressor struct {
|
|
logger slog.Logger
|
|
// The mapping of encoder names to encoder functions.
|
|
encoders map[string]EncoderFunc
|
|
// The mapping of pooled encoders to pools.
|
|
pooledEncoders map[string]*sync.Pool
|
|
// The list of encoders in order of decreasing precedence.
|
|
encodingPrecedence []string
|
|
level int // The compression level.
|
|
cacheDir string
|
|
orig http.FileSystem
|
|
|
|
mu sync.Mutex
|
|
cache map[cacheKey]ref
|
|
}
|
|
|
|
// NewCompressor creates a new Compressor that will handle encoding responses.
|
|
//
|
|
// The level should be one of the ones defined in the flate package.
|
|
// The types are the content types that are allowed to be compressed.
|
|
func NewCompressor(logger slog.Logger, level int, cacheDir string, orig http.FileSystem) *Compressor {
|
|
c := &Compressor{
|
|
logger: logger.Named("cachecompress"),
|
|
level: level,
|
|
encoders: make(map[string]EncoderFunc),
|
|
pooledEncoders: make(map[string]*sync.Pool),
|
|
cacheDir: cacheDir,
|
|
orig: orig,
|
|
cache: make(map[cacheKey]ref),
|
|
}
|
|
|
|
// Set the default encoders. The precedence order uses the reverse
|
|
// ordering that the encoders were added. This means adding new encoders
|
|
// will move them to the front of the order.
|
|
//
|
|
// TODO:
|
|
// lzma: Opera.
|
|
// sdch: Chrome, Android. Gzip output + dictionary header.
|
|
// br: Brotli, see https://github.com/go-chi/chi/pull/326
|
|
|
|
// HTTP 1.1 "deflate" (RFC 2616) stands for DEFLATE data (RFC 1951)
|
|
// wrapped with zlib (RFC 1950). The zlib wrapper uses Adler-32
|
|
// checksum compared to CRC-32 used in "gzip" and thus is faster.
|
|
//
|
|
// But.. some old browsers (MSIE, Safari 5.1) incorrectly expect
|
|
// raw DEFLATE data only, without the mentioned zlib wrapper.
|
|
// Because of this major confusion, most modern browsers try it
|
|
// both ways, first looking for zlib headers.
|
|
// Quote by Mark Adler: http://stackoverflow.com/a/9186091/385548
|
|
//
|
|
// The list of browsers having problems is quite big, see:
|
|
// http://zoompf.com/blog/2012/02/lose-the-wait-http-compression
|
|
// https://web.archive.org/web/20120321182910/http://www.vervestudios.co/projects/compression-tests/results
|
|
//
|
|
// That's why we prefer gzip over deflate. It's just more reliable
|
|
// and not significantly slower than deflate.
|
|
c.SetEncoder("deflate", encoderDeflate)
|
|
|
|
// TODO: Exception for old MSIE browsers that can't handle non-HTML?
|
|
// https://zoompf.com/blog/2012/02/lose-the-wait-http-compression
|
|
c.SetEncoder("gzip", encoderGzip)
|
|
|
|
// NOTE: Not implemented, intentionally:
|
|
// case "compress": // LZW. Deprecated.
|
|
// case "bzip2": // Too slow on-the-fly.
|
|
// case "zopfli": // Too slow on-the-fly.
|
|
// case "xz": // Too slow on-the-fly.
|
|
return c
|
|
}
|
|
|
|
// SetEncoder can be used to set the implementation of a compression algorithm.
|
|
//
|
|
// The encoding should be a standardized identifier. See:
|
|
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
|
|
//
|
|
// For example, add the Brotli algorithm:
|
|
//
|
|
// import brotli_enc "gopkg.in/kothar/brotli-go.v0/enc"
|
|
//
|
|
// compressor := middleware.NewCompressor(5, "text/html")
|
|
// compressor.SetEncoder("br", func(w io.Writer, level int) io.Writer {
|
|
// params := brotli_enc.NewBrotliParams()
|
|
// params.SetQuality(level)
|
|
// return brotli_enc.NewBrotliWriter(params, w)
|
|
// })
|
|
func (c *Compressor) SetEncoder(encoding string, fn EncoderFunc) {
|
|
encoding = strings.ToLower(encoding)
|
|
if encoding == "" {
|
|
panic("the encoding can not be empty")
|
|
}
|
|
if fn == nil {
|
|
panic("attempted to set a nil encoder function")
|
|
}
|
|
|
|
// If we are adding a new encoder that is already registered, we have to
|
|
// clear that one out first.
|
|
delete(c.pooledEncoders, encoding)
|
|
delete(c.encoders, encoding)
|
|
|
|
// If the encoder supports Resetting (IoReseterWriter), then it can be pooled.
|
|
encoder := fn(io.Discard, c.level)
|
|
if _, ok := encoder.(ioResetterWriter); ok {
|
|
pool := &sync.Pool{
|
|
New: func() interface{} {
|
|
return fn(io.Discard, c.level)
|
|
},
|
|
}
|
|
c.pooledEncoders[encoding] = pool
|
|
}
|
|
// If the encoder is not in the pooledEncoders, add it to the normal encoders.
|
|
if _, ok := c.pooledEncoders[encoding]; !ok {
|
|
c.encoders[encoding] = fn
|
|
}
|
|
|
|
for i, v := range c.encodingPrecedence {
|
|
if v == encoding {
|
|
c.encodingPrecedence = append(c.encodingPrecedence[:i], c.encodingPrecedence[i+1:]...)
|
|
}
|
|
}
|
|
|
|
c.encodingPrecedence = append([]string{encoding}, c.encodingPrecedence...)
|
|
}
|
|
|
|
// ServeHTTP returns the response from the orig file system, compressed if possible.
|
|
func (c *Compressor) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|
encoding := c.selectEncoder(r.Header)
|
|
|
|
// we can only serve a cached response if all the following:
|
|
// 1. they requested an encoding we support
|
|
// 2. they are requesting the whole file, not a range
|
|
// 3. the method is GET
|
|
if encoding == "" || r.Header.Get("Range") != "" || r.Method != "GET" {
|
|
http.FileServer(c.orig).ServeHTTP(w, r)
|
|
return
|
|
}
|
|
|
|
// Whether we should serve a cached response also depends in a fairly complex way on the path and request
|
|
// headers. In particular, we don't need a cached response for non-existing files/directories, and should not serve
|
|
// a cached response if the correct Etag for the file is provided. This logic is all handled by the http.FileServer,
|
|
// and we don't want to reimplement it here. So, what we'll do is send a HEAD request to the http.FileServer to see
|
|
// what it would do.
|
|
headReq := r.Clone(r.Context())
|
|
headReq.Method = http.MethodHead
|
|
headRW := &compressResponseWriter{
|
|
w: io.Discard,
|
|
headers: make(http.Header),
|
|
}
|
|
// deep-copy the headers already set on the response. This includes things like ETags.
|
|
for key, values := range w.Header() {
|
|
for _, value := range values {
|
|
headRW.headers.Add(key, value)
|
|
}
|
|
}
|
|
http.FileServer(c.orig).ServeHTTP(headRW, headReq)
|
|
if headRW.code != http.StatusOK {
|
|
// again, fall back to the file server. This is often a 404 Not Found, or a 304 Not Modified if they provided
|
|
// the correct ETag.
|
|
http.FileServer(c.orig).ServeHTTP(w, r)
|
|
return
|
|
}
|
|
|
|
cref := c.getRef(encoding, r)
|
|
c.serveRef(w, r, headRW.headers, cref)
|
|
}
|
|
|
|
func (c *Compressor) serveRef(w http.ResponseWriter, r *http.Request, headers http.Header, cref ref) {
|
|
select {
|
|
case <-r.Context().Done():
|
|
w.WriteHeader(http.StatusServiceUnavailable)
|
|
return
|
|
case <-cref.done:
|
|
cachePath := cref.key.filePath(c.cacheDir)
|
|
cacheFile, err := os.Open(cachePath)
|
|
if err != nil {
|
|
c.logger.Error(context.Background(), "failed to open compressed cache file",
|
|
slog.F("cache_path", cachePath), slog.F("url_path", cref.key.urlPath), slog.Error(err))
|
|
// fall back to uncompressed
|
|
http.FileServer(c.orig).ServeHTTP(w, r)
|
|
}
|
|
defer cacheFile.Close()
|
|
|
|
// we need to remove or modify the Content-Length, if any, set by the FileServer because it will be for
|
|
// uncompressed data and wrong.
|
|
info, err := cacheFile.Stat()
|
|
if err != nil {
|
|
c.logger.Error(context.Background(), "failed to stat compressed cache file",
|
|
slog.F("cache_path", cachePath), slog.F("url_path", cref.key.urlPath), slog.Error(err))
|
|
headers.Del("Content-Length")
|
|
} else {
|
|
headers.Set("Content-Length", fmt.Sprintf("%d", info.Size()))
|
|
}
|
|
|
|
for key, values := range headers {
|
|
w.Header()[key] = values
|
|
}
|
|
w.Header().Set("Content-Encoding", cref.key.encoding)
|
|
w.Header().Add("Vary", "Accept-Encoding")
|
|
w.WriteHeader(http.StatusOK)
|
|
_, err = io.Copy(w, cacheFile)
|
|
if err != nil {
|
|
// most commonly, the writer will hang up before we are done.
|
|
c.logger.Debug(context.Background(), "failed to write compressed cache file", slog.Error(err))
|
|
}
|
|
return
|
|
case <-cref.err:
|
|
// fall back to uncompressed
|
|
http.FileServer(c.orig).ServeHTTP(w, r)
|
|
return
|
|
}
|
|
}
|
|
|
|
func (c *Compressor) getRef(encoding string, r *http.Request) ref {
|
|
ck := getCacheKey(encoding, r)
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
cref, ok := c.cache[ck]
|
|
if ok {
|
|
return cref
|
|
}
|
|
// we are the first to encode
|
|
cref = ref{
|
|
key: ck,
|
|
|
|
done: make(chan struct{}),
|
|
err: make(chan error),
|
|
}
|
|
c.cache[ck] = cref
|
|
go c.compress(context.Background(), encoding, cref, r)
|
|
return cref
|
|
}
|
|
|
|
func (c *Compressor) compress(ctx context.Context, encoding string, cref ref, r *http.Request) {
|
|
cachePath := cref.key.filePath(c.cacheDir)
|
|
var err error
|
|
// we want to handle closing either cref.done or cref.err in a defer at the bottom of the stack so that the encoder
|
|
// and cache file are both closed first (higher in the defer stack). This prevents data races where waiting HTTP
|
|
// handlers start reading the file before all the data has been flushed.
|
|
defer func() {
|
|
if err != nil {
|
|
if rErr := os.Remove(cachePath); rErr != nil {
|
|
// nolint: gocritic // best effort, just debug log any errors
|
|
c.logger.Debug(ctx, "failed to remove cache file",
|
|
slog.F("main_err", err), slog.F("remove_err", rErr), slog.F("cache_path", cachePath))
|
|
}
|
|
c.mu.Lock()
|
|
delete(c.cache, cref.key)
|
|
c.mu.Unlock()
|
|
close(cref.err)
|
|
return
|
|
}
|
|
close(cref.done)
|
|
}()
|
|
|
|
cacheDir := filepath.Dir(cachePath)
|
|
err = os.MkdirAll(cacheDir, 0o700)
|
|
if err != nil {
|
|
c.logger.Error(ctx, "failed to create cache directory", slog.F("cache_dir", cacheDir))
|
|
return
|
|
}
|
|
|
|
// We will truncate and overwrite any existing files. This is important in the case that we get restarted
|
|
// with the same cache dir, possibly with different source files.
|
|
cacheFile, err := os.OpenFile(cachePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
|
|
if err != nil {
|
|
c.logger.Error(ctx, "failed to open compression cache file",
|
|
slog.F("path", cachePath), slog.Error(err))
|
|
return
|
|
}
|
|
defer cacheFile.Close()
|
|
encoder, cleanup := c.getEncoder(encoding, cacheFile)
|
|
if encoder == nil {
|
|
// can only hit this if there is a programming error
|
|
c.logger.Critical(ctx, "got nil encoder", slog.F("encoding", encoding))
|
|
err = xerrors.New("nil encoder")
|
|
return
|
|
}
|
|
defer cleanup()
|
|
defer encoder.Close() // ensures we flush, needs to be called before cleanup(), so we defer after it.
|
|
|
|
cw := &compressResponseWriter{
|
|
w: encoder,
|
|
headers: make(http.Header), // ignored
|
|
}
|
|
http.FileServer(c.orig).ServeHTTP(cw, r)
|
|
if cw.code != http.StatusOK {
|
|
// log at debug because this is likely just a 404
|
|
c.logger.Debug(ctx, "file server failed to serve",
|
|
slog.F("encoding", encoding), slog.F("url_path", cref.key.urlPath), slog.F("http_code", cw.code))
|
|
// mark the error so that we clean up correctly
|
|
err = xerrors.New("file server failed to serve")
|
|
return
|
|
}
|
|
// success!
|
|
}
|
|
|
|
// selectEncoder returns the name of the encoder
|
|
func (c *Compressor) selectEncoder(h http.Header) string {
|
|
header := h.Get("Accept-Encoding")
|
|
|
|
// Parse the names of all accepted algorithms from the header.
|
|
accepted := strings.Split(strings.ToLower(header), ",")
|
|
|
|
// Find supported encoder by accepted list by precedence
|
|
for _, name := range c.encodingPrecedence {
|
|
if matchAcceptEncoding(accepted, name) {
|
|
return name
|
|
}
|
|
}
|
|
|
|
// No encoder found to match the accepted encoding
|
|
return ""
|
|
}
|
|
|
|
// getEncoder returns a writer that encodes and writes to the provided writer, and a cleanup func.
|
|
func (c *Compressor) getEncoder(name string, w io.Writer) (io.WriteCloser, func()) {
|
|
if pool, ok := c.pooledEncoders[name]; ok {
|
|
encoder, typeOK := pool.Get().(ioResetterWriter)
|
|
if !typeOK {
|
|
return nil, nil
|
|
}
|
|
cleanup := func() {
|
|
pool.Put(encoder)
|
|
}
|
|
encoder.Reset(w)
|
|
return encoder, cleanup
|
|
}
|
|
if fn, ok := c.encoders[name]; ok {
|
|
return fn(w, c.level), func() {}
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
func matchAcceptEncoding(accepted []string, encoding string) bool {
|
|
for _, v := range accepted {
|
|
if strings.Contains(v, encoding) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// An EncoderFunc is a function that wraps the provided io.Writer with a
|
|
// streaming compression algorithm and returns it.
|
|
//
|
|
// In case of failure, the function should return nil.
|
|
type EncoderFunc func(w io.Writer, level int) io.WriteCloser
|
|
|
|
// Interface for types that allow resetting io.Writers.
|
|
type ioResetterWriter interface {
|
|
io.WriteCloser
|
|
Reset(w io.Writer)
|
|
}
|
|
|
|
func encoderGzip(w io.Writer, level int) io.WriteCloser {
|
|
gw, err := gzip.NewWriterLevel(w, level)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
return gw
|
|
}
|
|
|
|
func encoderDeflate(w io.Writer, level int) io.WriteCloser {
|
|
dw, err := flate.NewWriter(w, level)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
return dw
|
|
}
|
|
|
|
type compressResponseWriter struct {
|
|
w io.Writer
|
|
headers http.Header
|
|
code int
|
|
}
|
|
|
|
func (cw *compressResponseWriter) Header() http.Header {
|
|
return cw.headers
|
|
}
|
|
|
|
func (cw *compressResponseWriter) WriteHeader(code int) {
|
|
cw.code = code
|
|
}
|
|
|
|
func (cw *compressResponseWriter) Write(p []byte) (int, error) {
|
|
if cw.code == 0 {
|
|
cw.code = http.StatusOK
|
|
}
|
|
return cw.w.Write(p)
|
|
}
|