fix(site/src/pages/AgentsPage): sanitize chat upload filenames client-side (#25421)

Filenames from the OS (e.g. `Screen Shot 2025-01-01 at 10.00.00 AM.png`
or `My Report (final).pdf`) flow unchanged through the chat-attach hooks
into chip labels, the persisted-attachment localStorage records, the
upload `Content-Disposition` header, and downstream LLM prompts.
Characters such as parentheses, brackets, quotes, shell or URL or path
metacharacters, whitespace, and control codes are all valid in HTTP
transport today but tend to break things further down the line (LLM tool
calls that quote the name, audit logs, any future S3/path interpolation,
shell-quoted tooling).

Sanitize at the boundary in `useFileAttachments.handleAttach` and
`useChatDraftAttachments.handleAttach` by mapping each incoming `File`
through a new `renameChatFileForUpload` helper. The helper replaces
`()[]{}<>'\"\`;,:*?|&#$\\/`, whitespace, and control characters with
`_`, collapses adjacent underscores, trims leading or trailing `_`, `.`,
or whitespace, and falls back to `"file"` if the result is empty. ASCII
alphanumerics, `.`, `-`, `_`, and all other Unicode letters and symbols
(CJK, emoji, accented Latin) are preserved so localized names remain
readable. Already-safe names return the same `File` reference; the chat
UI keys preview-URL, upload-state, and text-content Maps on the `File`
object, so identity must be stable.

The server's existing `chatfiles.NormalizeStoredFileName` (control-char
strip plus 255-byte truncate) is untouched. This is a client-only
hardening pass.
This commit is contained in:
Dean Sheather
2026-05-18 17:07:31 +10:00
committed by GitHub
parent e75bd3aca4
commit 5cc655806f
4 changed files with 131 additions and 5 deletions
@@ -2,7 +2,10 @@ import { useEffect, useRef, useState } from "react";
import { API } from "#/api/api";
import { MaxChatFileSizeBytes } from "#/api/typesGenerated";
import type { UploadState } from "../components/AgentChatInput";
import { getChatFileURL } from "../utils/chatAttachments";
import {
getChatFileURL,
renameChatFileForUpload,
} from "../utils/chatAttachments";
import {
clearChatDraftAttachmentRecords,
fileToDataURL,
@@ -715,7 +718,12 @@ export function useChatDraftAttachments(
beginUpload(entry);
};
const handleAttach = (files: File[]) => {
const handleAttach = (incomingFiles: File[]) => {
// Sanitize filenames at the boundary so chip labels, the
// chat-draft localStorage record, the upload header, and any
// downstream LLM prompt all see safe names. Already-safe
// names return the same File by reference.
const files = incomingFiles.map(renameChatFileForUpload);
const scopeKey = getDraftScopeKey(organizationId, chatId);
// Snapshot provider + budget so a mid-resize switch
// can't relabel the error with the new provider.
@@ -9,7 +9,10 @@ import {
import { API } from "#/api/api";
import { MaxChatFileSizeBytes } from "#/api/typesGenerated";
import type { UploadState } from "../components/AgentChatInput";
import { getChatFileURL } from "../utils/chatAttachments";
import {
getChatFileURL,
renameChatFileForUpload,
} from "../utils/chatAttachments";
import {
formatAgentAttachmentTooLargeError,
formatAgentAttachmentUploadError,
@@ -362,7 +365,14 @@ export function useFileAttachments(
}
};
const handleAttach = (files: File[]) => {
const handleAttach = (incomingFiles: File[]) => {
// Sanitize filenames at the boundary so chip labels, the
// persisted-attachment localStorage record, the upload
// header, and any downstream LLM prompt all see safe names.
// Already-safe names return the same File by reference; the
// File identity is used as a Map key below (previewUrls,
// uploadStates, textContents).
const files = incomingFiles.map(renameChatFileForUpload);
// Originals enter state with a "processing" status so the
// send gate blocks dispatch until processResizes finishes.
// Snapshot provider + budget so a mid-resize switch can't
@@ -1,5 +1,9 @@
import { describe, expect, it } from "vitest";
import { isChatAttachmentFile } from "./chatAttachments";
import {
isChatAttachmentFile,
renameChatFileForUpload,
sanitizeChatFileName,
} from "./chatAttachments";
describe("isChatAttachmentFile", () => {
it("accepts allowlisted MIME types", () => {
@@ -30,3 +34,60 @@ describe("isChatAttachmentFile", () => {
expect(isChatAttachmentFile(file)).toBe(false);
});
});
describe("sanitizeChatFileName", () => {
it.each([
// Already safe.
["clean.pdf", "clean.pdf"],
// Spaces, parens collapsed into a single underscore each.
["My Report (final).pdf", "My_Report_final_.pdf"],
// `!` is kept; only `&` and the space become underscores.
["weird & stuff!.txt", "weird_stuff!.txt"],
// Path separators (forward and backslash) become underscores.
["path/with\\slash.png", "path_with_slash.png"],
// Leading dots/spaces/underscores are trimmed.
[" .leading.dots.txt", "leading.dots.txt"],
// Non-ASCII letters survive.
["日本語のファイル.txt", "日本語のファイル.txt"],
// Emoji survive.
["🔥emoji🔥.png", "🔥emoji🔥.png"],
// Control characters are stripped (replaced and trimmed).
["\u0000\u0001\tcontrol.bin", "control.bin"],
// Underscore-only collapses to empty then falls back to "file".
["___", "file"],
// Empty input falls back to "file".
["", "file"],
// Trailing problem characters are also trimmed.
["foo!.pdf ", "foo!.pdf"],
])("sanitizes %j to %j", (input, expected) => {
expect(sanitizeChatFileName(input)).toBe(expected);
});
});
describe("renameChatFileForUpload", () => {
it("returns the same File reference when the name is already safe", () => {
const file = new File(["png"], "clean.png", { type: "image/png" });
// Identity matters: useFileAttachments keys preview-URL,
// upload-state, and text-content Maps on the File object.
expect(renameChatFileForUpload(file)).toBe(file);
});
it("returns a new File with a sanitized name when needed", () => {
const file = new File(["pdf-bytes"], "My Report (final).pdf", {
type: "application/pdf",
lastModified: 1_700_000_000_000,
});
const renamed = renameChatFileForUpload(file);
expect(renamed).not.toBe(file);
expect(renamed.name).toBe("My_Report_final_.pdf");
expect(renamed.type).toBe("application/pdf");
expect(renamed.lastModified).toBe(1_700_000_000_000);
// File size preserved; byte content is covered transitively by
// the File constructor, and jsdom's Blob backing in this
// project is not reliable enough for an explicit text() probe.
expect(renamed.size).toBe(file.size);
});
});
@@ -98,3 +98,50 @@ export const isChatAttachmentFile = (file: File): boolean => {
}
return ChatAttachmentMediaTypes.some((mediaType) => mediaType === file.type);
};
// Matches characters that commonly cause trouble downstream: bracketing
// punctuation, quotes, shell or URL or path metacharacters, path
// separators, any whitespace, and control characters. ASCII alphanumerics,
// `.`, `-`, `_`, and all other Unicode letters and symbols (CJK, emoji,
// accented Latin) are preserved so localized filenames remain readable.
const unsafeChatFileNameChars = /[()[\]{}<>'"`;,:*?|&#$\\/\s\p{Cc}]/gu;
/**
* Replaces characters that commonly cause trouble downstream (shells,
* LLM prompts, audit logs, path interpolation) with underscores. Keeps
* dots, dashes, underscores, ASCII alphanumerics, and non-ASCII letters
* so localized names remain readable. The server still applies its own
* normalization (control-char strip plus 255-byte truncate) on top of this.
*
* If the sanitized name is empty after trimming leading or trailing `_`,
* `.`, or whitespace, falls back to `"file"` so the server's
* "filename required" contract still holds.
*/
export const sanitizeChatFileName = (name: string): string => {
const replaced = name.replace(unsafeChatFileNameChars, "_");
// Collapse runs of underscores introduced by replacement into a single
// underscore so `foo (final).pdf` becomes `foo_final_.pdf` rather than
// `foo__final_.pdf`. Pre-existing `__` in the original name is also
// collapsed; acceptable tradeoff for tidier names.
const collapsed = replaced.replace(/_+/g, "_");
const trimmed = collapsed.replace(/^[_.\s]+|[_.\s]+$/g, "");
return trimmed === "" ? "file" : trimmed;
};
/**
* Returns a new File whose `name` is sanitized via `sanitizeChatFileName`.
* If the sanitized name is identical to the original, returns the input
* File unchanged to preserve referential equality. The chat UI keys
* preview-URL, upload-state, and text-content Maps on the File object,
* so identity must be stable for already-safe names.
*/
export const renameChatFileForUpload = (file: File): File => {
const sanitized = sanitizeChatFileName(file.name);
if (sanitized === file.name) {
return file;
}
return new File([file], sanitized, {
type: file.type,
lastModified: file.lastModified,
});
};