From 623e72d72dcf8e3dfceeedc2ceedee4b4efc1bf4 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 21 Apr 2026 13:55:24 +0300 Subject: [PATCH] chore: add no-emdash/endash rule to agent instructions and CI lint (#24375) Add a lint check that prevents introduction of Unicode emdash (U+2014) and endash (U+2013) characters. These are almost exclusively introduced by AI agents and conflict with the project writing style. The lint script (scripts/check_emdash.sh) checks only added lines in the current diff by default, so existing violations do not block CI. Pass --all to scan the entire repo for auditing. Agent instructions in AGENTS.md, site/AGENTS.md, and the docs style guide now explicitly ban emdash, endash, and " -- " as punctuation, with guidance to use commas, semicolons, or periods instead. --- .claude/docs/DOCS_STYLE_GUIDE.md | 7 ++ AGENTS.md | 16 +++++ Makefile | 8 ++- scripts/check_emdash.sh | 113 +++++++++++++++++++++++++++++++ site/AGENTS.md | 5 +- 5 files changed, 146 insertions(+), 3 deletions(-) create mode 100755 scripts/check_emdash.sh diff --git a/.claude/docs/DOCS_STYLE_GUIDE.md b/.claude/docs/DOCS_STYLE_GUIDE.md index 00ee7758f8..70ffdb0b68 100644 --- a/.claude/docs/DOCS_STYLE_GUIDE.md +++ b/.claude/docs/DOCS_STYLE_GUIDE.md @@ -150,6 +150,13 @@ Then ask: "Could you provide a screenshot of the Template Insights page? I've ad - Inline: `` `coder server` `` - Blocks: Use triple backticks with language identifier +### Punctuation + +- Do not use emdash (U+2014), endash (U+2013), or ` -- ` as punctuation + in code, comments, string literals, or documentation. Use commas, + semicolons, or periods instead. Restructure the sentence if needed. + For numeric ranges, use a plain hyphen (e.g., `0-100`). + ### Instructions - **Numbered lists** for sequential steps diff --git a/AGENTS.md b/AGENTS.md index 3428b7a315..0aa58f7c57 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -286,6 +286,22 @@ ctx, cancel := context.WithTimeout(ctx, 5*time.Minute) ctx, cancel := context.WithTimeout(ctx, 5*time.Minute) ``` +### No Emdash or Endash + +Do not use emdash (U+2014), endash (U+2013), or ` -- ` as punctuation +in code, comments, string literals, or documentation. Use commas, +semicolons, or periods instead. Restructure the sentence if needed. +Do not replace an emdash with ` -- `. Unicode emdash and endash are +caught by `make lint/emdash`. + +```go +// Good: uses a period to separate the clauses. +// This is slow. We should cache it. + +// Good: uses a comma to join related clauses. +// This is slow, so we should cache it. +``` + ### Avoid Unnecessary Changes When fixing a bug or adding a feature, don't modify code unrelated to your diff --git a/Makefile b/Makefile index c711d2c62f..6a8feb0478 100644 --- a/Makefile +++ b/Makefile @@ -699,11 +699,11 @@ endif # GitHub Actions linters are run in a separate CI job (lint-actions) that only # triggers when workflow files change, so we skip them here when CI=true. LINT_ACTIONS_TARGETS := $(if $(CI),,lint/actions/actionlint) -lint: lint/shellcheck lint/go lint/ts lint/examples lint/helm lint/site-icons lint/markdown lint/check-scopes lint/migrations lint/bootstrap $(LINT_ACTIONS_TARGETS) +lint: lint/shellcheck lint/go lint/ts lint/examples lint/helm lint/site-icons lint/markdown lint/check-scopes lint/migrations lint/bootstrap lint/emdash $(LINT_ACTIONS_TARGETS) .PHONY: lint # Subset of lint that does not require Go or Node toolchains. -lint-light: lint/shellcheck lint/markdown lint/helm lint/bootstrap lint/migrations lint/actions/actionlint lint/typos +lint-light: lint/shellcheck lint/markdown lint/helm lint/bootstrap lint/migrations lint/actions/actionlint lint/typos lint/emdash .PHONY: lint-light lint/site-icons: @@ -738,6 +738,10 @@ lint/bootstrap: bash scripts/check_bootstrap_quotes.sh .PHONY: lint/bootstrap +lint/emdash: + bash scripts/check_emdash.sh +.PHONY: lint/emdash + lint/helm: cd helm/ diff --git a/scripts/check_emdash.sh b/scripts/check_emdash.sh new file mode 100755 index 0000000000..c41bf34111 --- /dev/null +++ b/scripts/check_emdash.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +set -euo pipefail +# shellcheck source=scripts/lib.sh +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" +cdroot + +echo "--- check for emdash/endash characters" + +mode="changed" +for arg in "$@"; do + if [[ "$arg" == "--all" ]]; then + mode="all" + fi +done + +# Build the pattern from raw bytes so the script itself does not +# contain literal emdash/endash characters (which would trigger +# the check when the script is in the diff). +emdash=$'\xE2\x80\x94' +endash=$'\xE2\x80\x93' +pattern="${emdash}|${endash}" + +scan_all_files() { + local output + output=$(git ls-files -z | xargs -0 grep -IEn "$pattern" 2>/dev/null || true) + if [[ -n "$output" ]]; then + echo "$output" + found=1 + else + found=0 + fi +} + +if [[ "$mode" == "all" ]]; then + scan_all_files +else + base="" + if [[ -n "${GITHUB_BASE_REF:-}" ]]; then + base="origin/${GITHUB_BASE_REF}" + elif git rev-parse --verify origin/main >/dev/null 2>&1; then + base=$(git merge-base HEAD origin/main 2>/dev/null || echo "origin/main") + fi + + if [[ -z "$base" ]]; then + echo "WARNING: no base ref found, scanning all tracked files." + scan_all_files + else + # Ensure the base ref is fetchable. CI shallow clones + # (fetch-depth: 1) may not have the base branch available. + if ! git rev-parse --verify "$base" >/dev/null 2>&1; then + ref="${base#origin/}" + echo "Base ref $base not found locally, fetching $ref..." + git fetch origin "$ref" --depth=1 2>/dev/null || true + if ! git rev-parse --verify "$base" >/dev/null 2>&1; then + echo "ERROR: could not fetch base ref $base." + exit 1 + fi + fi + + found=0 + if ! diff_output=$(git diff "$base" -U0 -- . 2>&1); then + echo "ERROR: git diff against $base failed:" + echo "$diff_output" + exit 1 + fi + + if [[ -z "$diff_output" ]]; then + echo "OK: no changes to check." + exit 0 + fi + + # Parse the diff to check only added lines for emdash/endash. + current_file="" + current_line=0 + while IFS= read -r diff_line; do + if [[ "$diff_line" =~ ^\+\+\+\ b/(.*) ]]; then + current_file="${BASH_REMATCH[1]}" + fi + # Anchored to hunk header structure to avoid matching + # digits from trailing function context. + if [[ "$diff_line" =~ ^@@\ -[0-9,]+\ \+([0-9]+) ]]; then + current_line=${BASH_REMATCH[1]} + continue + fi + if [[ "$diff_line" =~ ^\+ ]] && [[ ! "$diff_line" =~ ^\+\+\+\ [ab/] ]]; then + if echo "$diff_line" | grep -Eq "$pattern"; then + echo "${current_file}:${current_line}:${diff_line:1}" + found=1 + fi + ((current_line++)) || true + fi + done <<<"$diff_output" + fi +fi + +if [[ "$found" -ne 0 ]]; then + echo "" + echo "ERROR: Found emdash (U+2014) or endash (U+2013) characters." + echo "" + echo " Do not use emdash or endash in code, comments, string literals," + echo " or documentation. Use commas, semicolons, or periods instead." + echo " Restructure the sentence if needed. Do not replace them with" + echo " ' -- ' either." + echo "" + echo " Example:" + echo " Bad: This is slow [emdash] we should cache it." + echo " Good: This is slow. We should cache it." + echo " Good: This is slow, so we should cache it." + echo "" + exit 1 +fi + +echo "OK: no emdash or endash characters found." diff --git a/site/AGENTS.md b/site/AGENTS.md index f201a52cf4..8f81b92f66 100644 --- a/site/AGENTS.md +++ b/site/AGENTS.md @@ -71,8 +71,11 @@ When investigating or editing TypeScript/React code, always use the TypeScript l If sibling components initialize state with `useMemo`, don't switch to `useState(initialFn)` in the same file without reason. - Match errors by error code or HTTP status, never by comparing error - message strings. String matching is brittle — messages change, get + message strings. String matching is brittle; messages change, get localized, or get reformatted. +- Do not use emdash (U+2014), endash (U+2013), or ` -- ` as punctuation + in code, comments, string literals, or documentation. Use commas, + semicolons, or periods instead. Restructure the sentence if needed. ## TypeScript Type Safety