diff --git a/archive/archive.go b/archive/archive.go index db78b8c700..54b6f31b24 100644 --- a/archive/archive.go +++ b/archive/archive.go @@ -6,43 +6,153 @@ import ( "bytes" "errors" "io" - "log" + "math" "strings" + + "golang.org/x/xerrors" ) +// Ref: +// https://github.com/golang/go/blob/go1.24.0/src/archive/tar/format.go +// https://github.com/golang/go/blob/go1.24.0/src/archive/tar/writer.go +const ( + tarBlockSize = 512 + tarEndBlockBytes = 2 * tarBlockSize +) + +// ErrArchiveTooLarge reports that archive expansion would exceed the +// configured limit. +var ErrArchiveTooLarge = xerrors.New("archive exceeds maximum size") + +// ErrInvalidZipContent reports that a ZIP entry is malformed or its +// contents fail validation during conversion. +var ErrInvalidZipContent = xerrors.New("invalid zip content") + // CreateTarFromZip converts the given zipReader to a tar archive. +// maxSize limits the total tar output, including tar metadata. func CreateTarFromZip(zipReader *zip.Reader, maxSize int64) ([]byte, error) { + err := validateZipArchiveSize(zipReader, maxSize) + if err != nil { + return nil, err + } + var tarBuffer bytes.Buffer - err := writeTarArchive(&tarBuffer, zipReader, maxSize) + err = writeTarArchive(&tarBuffer, zipReader, maxSize) if err != nil { return nil, err } return tarBuffer.Bytes(), nil } -func writeTarArchive(w io.Writer, zipReader *zip.Reader, maxSize int64) error { - tarWriter := tar.NewWriter(w) - defer tarWriter.Close() +// validateZipArchiveSize performs a metadata-based preflight size +// check before conversion. The actual tar output limit will still be +// enforced while streaming. +func validateZipArchiveSize(zipReader *zip.Reader, maxSize int64) error { + if maxSize < 0 { + return ErrArchiveTooLarge + } + + maxBytes := uint64(maxSize) + totalBytes := uint64(tarEndBlockBytes) + if totalBytes > maxBytes { + return ErrArchiveTooLarge + } for _, file := range zipReader.File { - err := processFileInZipArchive(file, tarWriter, maxSize) + entrySize, err := projectedTarEntrySize(file) + if err != nil { + return err + } + if entrySize > maxBytes-totalBytes { + return ErrArchiveTooLarge + } + totalBytes += entrySize + } + + return nil +} + +func projectedTarEntrySize(file *zip.File) (uint64, error) { + // Each tar entry contributes one header block plus its data + // rounded up to the next tar block boundary. + size := file.UncompressedSize64 + if remainder := size % tarBlockSize; remainder != 0 { + padding := tarBlockSize - remainder + if size > math.MaxUint64-padding { + return 0, ErrArchiveTooLarge + } + size += padding + } + + if size > math.MaxUint64-tarBlockSize { + return 0, ErrArchiveTooLarge + } + + return tarBlockSize + size, nil +} + +type limitedWriter struct { + w io.Writer + remaining int64 +} + +func (w *limitedWriter) Write(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil + } + if w.remaining <= 0 { + return 0, ErrArchiveTooLarge + } + + origLen := len(p) + if int64(origLen) > w.remaining { + p = p[:int(w.remaining)] + } + + n, err := w.w.Write(p) + // io.Writer may report both written bytes and an error, so + // account for any accepted bytes before returning the error. + w.remaining -= int64(n) + if err != nil { + return n, err + } + if n < origLen { + return n, ErrArchiveTooLarge + } + return n, nil +} + +func writeTarArchive(w io.Writer, zipReader *zip.Reader, maxSize int64) error { + tarWriter := tar.NewWriter(&limitedWriter{ + w: w, + remaining: maxSize, + }) + + for _, file := range zipReader.File { + err := processFileInZipArchive(file, tarWriter) if err != nil { return err } } - return nil + + return tarWriter.Close() } -func processFileInZipArchive(file *zip.File, tarWriter *tar.Writer, maxSize int64) error { +func processFileInZipArchive(file *zip.File, tarWriter *tar.Writer) error { fileReader, err := file.Open() if err != nil { return err } defer fileReader.Close() + size := file.FileInfo().Size() + if size < 0 { + return ErrArchiveTooLarge + } + err = tarWriter.WriteHeader(&tar.Header{ Name: file.Name, - Size: file.FileInfo().Size(), + Size: size, Mode: int64(file.Mode()), ModTime: file.Modified, // Note: Zip archives do not store ownership information. @@ -53,12 +163,17 @@ func processFileInZipArchive(file *zip.File, tarWriter *tar.Writer, maxSize int6 return err } - n, err := io.CopyN(tarWriter, fileReader, maxSize) - log.Println(file.Name, n, err) - if errors.Is(err, io.EOF) { - err = nil + _, err = io.CopyN(tarWriter, fileReader, size) + switch { + case errors.Is(err, io.EOF), errors.Is(err, io.ErrUnexpectedEOF): + return ErrInvalidZipContent + case errors.Is(err, zip.ErrChecksum), errors.Is(err, zip.ErrFormat): + return ErrInvalidZipContent + case err != nil: + return err + default: + return nil } - return err } // CreateZipFromTar converts the given tarReader to a zip archive. diff --git a/archive/archive_test.go b/archive/archive_test.go index c10d103622..79f3d894e3 100644 --- a/archive/archive_test.go +++ b/archive/archive_test.go @@ -4,6 +4,7 @@ import ( "archive/tar" "archive/zip" "bytes" + "encoding/binary" "io/fs" "os" "os/exec" @@ -35,14 +36,15 @@ func TestCreateTarFromZip(t *testing.T) { zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) require.NoError(t, err, "failed to parse sample zip file") - tarBytes, err := archive.CreateTarFromZip(zr, int64(len(zipBytes))) + wantTar := archivetest.TestTarFileBytes() + gotTar, err := archive.CreateTarFromZip(zr, int64(len(wantTar))) require.NoError(t, err, "failed to convert zip to tar") - archivetest.AssertSampleTarFile(t, tarBytes) + archivetest.AssertSampleTarFile(t, gotTar) tempDir := t.TempDir() tempFilePath := filepath.Join(tempDir, "test.tar") - err = os.WriteFile(tempFilePath, tarBytes, 0o600) + err = os.WriteFile(tempFilePath, gotTar, 0o600) require.NoError(t, err, "failed to write converted tar file") cmd := exec.CommandContext(ctx, "tar", "--extract", "--verbose", "--file", tempFilePath, "--directory", tempDir) @@ -50,6 +52,97 @@ func TestCreateTarFromZip(t *testing.T) { assertExtractedFiles(t, tempDir, true) } +func buildTestZip(t *testing.T, files map[string]string) []byte { + t.Helper() + + var zipBytes bytes.Buffer + zw := zip.NewWriter(&zipBytes) + for name, contents := range files { + w, err := zw.Create(name) + require.NoError(t, err) + + _, err = w.Write([]byte(contents)) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) + + return zipBytes.Bytes() +} + +func TestCreateTarFromZip_RejectsOversizedAggregateExpansion(t *testing.T) { + t.Parallel() + + zipBytes := buildTestZip(t, map[string]string{ + "a.txt": strings.Repeat("a", 600), + "b.txt": strings.Repeat("b", 600), + "c.txt": strings.Repeat("c", 600), + }) + + zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) + require.NoError(t, err) + + tarBytes, err := archive.CreateTarFromZip(zr, 1024) + require.Error(t, err) + require.Nil(t, tarBytes) +} + +func TestCreateTarFromZip_RejectsInvalidZipMetadata(t *testing.T) { + t.Parallel() + + // Ref: https://github.com/golang/go/blob/go1.24.0/src/archive/zip/struct.go + corruptZipUncompressedSize := func(t *testing.T, zipBytes []byte, size uint32) []byte { + t.Helper() + + const ( + directoryHeaderSignature = "PK\x01\x02" + uncompressedSizeOffset = 24 + ) + hdrOffset := bytes.Index(zipBytes, []byte(directoryHeaderSignature)) + require.NotEqual(t, -1, hdrOffset, "missing ZIP central directory header") + corrupted := bytes.Clone(zipBytes) + sizeBytes := corrupted[hdrOffset+uncompressedSizeOffset : hdrOffset+uncompressedSizeOffset+4] + binary.LittleEndian.PutUint32(sizeBytes, size) + + return corrupted + } + + zipBytes := buildTestZip(t, map[string]string{ + "hello.txt": "hello", + }) + zipBytes = corruptZipUncompressedSize(t, zipBytes, 6) + + zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) + require.NoError(t, err) + + // Keep the size limit large so this test exercises the invalid + // ZIP metadata path rather than the tar output limit. + maxSize := int64(4096) + tarBytes, err := archive.CreateTarFromZip(zr, maxSize) + require.ErrorIs(t, err, archive.ErrInvalidZipContent) + require.Nil(t, tarBytes) +} + +func TestCreateTarFromZip_RejectsOversizedTarOverhead(t *testing.T) { + t.Parallel() + + // Empty files keep the ZIP payload tiny while still forcing tar + // headers and end-of-archive blocks to consume output budget. + zipBytes := buildTestZip(t, map[string]string{ + "empty-a.txt": "", + "empty-b.txt": "", + }) + + zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) + require.NoError(t, err) + + // Two empty tar entries still need 2 header blocks plus the 2 + // end-of-archive blocks, so the output is 2048 bytes and must + // exceed this limit. + tarBytes, err := archive.CreateTarFromZip(zr, 2047) + require.Error(t, err) + require.Nil(t, tarBytes) +} + func TestCreateZipFromTar(t *testing.T) { t.Parallel() if runtime.GOOS != "linux" { diff --git a/coderd/files.go b/coderd/files.go index b77bd81375..07040b20fe 100644 --- a/coderd/files.go +++ b/coderd/files.go @@ -80,11 +80,24 @@ func (api *API) postFile(rw http.ResponseWriter, r *http.Request) { data, err = archive.CreateTarFromZip(zipReader, HTTPFileMaxBytes) if err != nil { - httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ - Message: "Internal error processing .zip archive.", - Detail: err.Error(), - }) - return + switch { + case errors.Is(err, archive.ErrArchiveTooLarge): + httpapi.Write(ctx, rw, http.StatusRequestEntityTooLarge, codersdk.Response{ + Message: "Expanded .zip archive exceeds maximum size.", + }) + return + case errors.Is(err, archive.ErrInvalidZipContent): + httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ + Message: "Invalid .zip archive contents.", + }) + return + default: + httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Internal error processing .zip archive.", + Detail: err.Error(), + }) + return + } } contentType = tarMimeType } diff --git a/coderd/files_test.go b/coderd/files_test.go index e1a87aad29..1f6a7e94f8 100644 --- a/coderd/files_test.go +++ b/coderd/files_test.go @@ -2,8 +2,11 @@ package coderd_test import ( "archive/tar" + "archive/zip" "bytes" "context" + "encoding/binary" + "io" "net/http" "sync" "testing" @@ -14,6 +17,7 @@ import ( "github.com/coder/coder/v2/archive" "github.com/coder/coder/v2/archive/archivetest" + "github.com/coder/coder/v2/coderd" "github.com/coder/coder/v2/coderd/coderdtest" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/testutil" @@ -22,6 +26,19 @@ import ( func TestPostFiles(t *testing.T) { t.Parallel() + buildZipWithFile := func(t *testing.T, name string, writeContents func(w io.Writer) error) []byte { + t.Helper() + + var zipBytes bytes.Buffer + zw := zip.NewWriter(&zipBytes) + w, err := zw.Create(name) + require.NoError(t, err) + require.NoError(t, writeContents(w)) + require.NoError(t, zw.Close()) + + return zipBytes.Bytes() + } + // Single instance shared across all sub-tests. Each sub-test // creates independent resources with unique IDs so parallel // execution is safe. @@ -65,6 +82,39 @@ func TestPostFiles(t *testing.T) { _, err = client.Upload(ctx, codersdk.ContentTypeTar, bytes.NewReader(data)) require.NoError(t, err) }) + t.Run("InvalidZipMetadata", func(t *testing.T) { + t.Parallel() + + corruptZipUncompressedSize := func(t *testing.T, zipBytes []byte, size uint32) []byte { + t.Helper() + + const ( + directoryHeaderSignature = "PK\x01\x02" + uncompressedSizeOffset = 24 + ) + hdrOffset := bytes.Index(zipBytes, []byte(directoryHeaderSignature)) + require.NotEqual(t, -1, hdrOffset, "missing ZIP central directory header") + corrupted := bytes.Clone(zipBytes) + sizeBytes := corrupted[hdrOffset+uncompressedSizeOffset : hdrOffset+uncompressedSizeOffset+4] + binary.LittleEndian.PutUint32(sizeBytes, size) + + return corrupted + } + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + zipBytes := buildZipWithFile(t, "hello.txt", func(w io.Writer) error { + _, err := w.Write([]byte("hello")) + return err + }) + zipBytes = corruptZipUncompressedSize(t, zipBytes, 6) + + _, err := client.Upload(ctx, codersdk.ContentTypeZip, bytes.NewReader(zipBytes)) + var apiErr *codersdk.Error + require.ErrorAs(t, err, &apiErr) + require.Equal(t, http.StatusBadRequest, apiErr.StatusCode()) + }) t.Run("InsertConcurrent", func(t *testing.T) { t.Parallel() ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) @@ -86,6 +136,43 @@ func TestPostFiles(t *testing.T) { wg.Done() end.Wait() }) + //nolint:paralleltest // This subtest is intentionally serial to + // avoid extra memory pressure. + t.Run("OversizedZipExpansion", func(t *testing.T) { + buildZipWithSizedFile := func(t *testing.T, name string, size int64) []byte { + return buildZipWithFile(t, name, func(w io.Writer) error { + chunk := bytes.Repeat([]byte("a"), 32*1024) + for written := int64(0); written < size; { + n := len(chunk) + if remaining := size - written; int64(n) > remaining { + n = int(remaining) + } + + _, err := w.Write(chunk[:n]) + if err != nil { + return err + } + written += int64(n) + } + + return nil + }) + } + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + // Leave only enough room for the tar trailer. The single + // entry header then pushes the converted tar output over the + // file size limit. + size := int64(coderd.HTTPFileMaxBytes - 1024) + zipBytes := buildZipWithSizedFile(t, "oversized.txt", size) + + _, err := client.Upload(ctx, codersdk.ContentTypeZip, bytes.NewReader(zipBytes)) + var apiErr *codersdk.Error + require.ErrorAs(t, err, &apiErr) + require.Equal(t, http.StatusRequestEntityTooLarge, apiErr.StatusCode()) + }) } func TestDownload(t *testing.T) {