fix(coderd): strip markdown code fences from Anthropic task name responses (#23024)

- Adds `extractJSON()` to strip markdown code fences before JSON parsing and wire into the `json.Unmarshal` call in `generateFromAnthropic`.
- Accepts variadic `RequestOption` in `generateFromAnthropic` so tests can inject a mock Anthropic server via `WithBaseURL`.
- Adds table-driven cases covering bare JSON, fenced with/without language tag, surrounding whitespace, and multiline JSON.
- Adds end-to-end cases using `httptest.NewServer` to serve fake Anthropic SSE streams with bare and fenced responses.
This commit is contained in:
Cian Johnston
2026-03-13 17:35:26 +00:00
committed by GitHub
parent efe114119f
commit 03d0fc4f4c
2 changed files with 177 additions and 3 deletions
+27 -3
View File
@@ -96,6 +96,27 @@ var (
ErrNoNameGenerated = xerrors.New("no task name generated")
)
// extractJSON strips optional markdown code fences (```json or
// ```) that LLMs sometimes wrap around JSON output, returning
// only the inner JSON string. Only well-formed fences with a
// newline after the opening backticks are stripped; malformed
// fences are left untouched so that json.Unmarshal fails
// cleanly and the caller can fall back to other strategies.
func extractJSON(s string) string {
s = strings.TrimSpace(s)
if strings.HasPrefix(s, "```") {
// Only strip when there is a newline separating the
// fence line from the body. Without one we cannot
// reliably tell the fence from the content.
if idx := strings.Index(s, "\n"); idx != -1 {
s = s[idx+1:]
s = strings.TrimSuffix(s, "```")
s = strings.TrimSpace(s)
}
}
return s
}
type TaskName struct {
Name string `json:"task_name"`
DisplayName string `json:"display_name"`
@@ -188,7 +209,7 @@ func generateFromPrompt(prompt string) (TaskName, error) {
// generateFromAnthropic uses Claude (Anthropic) to generate semantic task and display names from a user prompt.
// It sends the prompt to Claude with a structured system prompt requesting JSON output containing both names.
// Returns an error if the API call fails, the response is invalid, or Claude returns an "unnamed" placeholder.
func generateFromAnthropic(ctx context.Context, prompt string, apiKey string, model anthropic.Model) (TaskName, error) {
func generateFromAnthropic(ctx context.Context, prompt string, apiKey string, model anthropic.Model, opts ...anthropicoption.RequestOption) (TaskName, error) {
anthropicModel := model
if anthropicModel == "" {
anthropicModel = defaultModel
@@ -216,6 +237,7 @@ func generateFromAnthropic(ctx context.Context, prompt string, apiKey string, mo
anthropicOptions := anthropic.DefaultClientOptions()
anthropicOptions = append(anthropicOptions, anthropicoption.WithAPIKey(apiKey))
anthropicOptions = append(anthropicOptions, opts...)
anthropicClient := anthropic.NewClient(anthropicOptions...)
stream, err := anthropicDataStream(ctx, anthropicClient, anthropicModel, conversation)
@@ -234,9 +256,11 @@ func generateFromAnthropic(ctx context.Context, prompt string, apiKey string, mo
return TaskName{}, ErrNoNameGenerated
}
// Parse the JSON response
// Parse the JSON response. LLMs sometimes wrap JSON in
// markdown code fences (```json ... ```), so we strip
// those before unmarshalling.
var taskNameResponse TaskName
if err := json.Unmarshal([]byte(acc.Messages()[0].Content), &taskNameResponse); err != nil {
if err := json.Unmarshal([]byte(extractJSON(acc.Messages()[0].Content)), &taskNameResponse); err != nil {
return TaskName{}, xerrors.Errorf("failed to parse anthropic response: %w", err)
}
+150
View File
@@ -1,10 +1,15 @@
package taskname
import (
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/anthropics/anthropic-sdk-go"
anthropicoption "github.com/anthropics/anthropic-sdk-go/option"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/codersdk"
@@ -113,6 +118,151 @@ func TestGenerateFromPrompt(t *testing.T) {
}
}
func TestExtractJSON(t *testing.T) {
t.Parallel()
tests := []struct {
name string
input string
expected string
}{
{
name: "BareJSON",
input: `{"display_name": "Fix bug", "task_name": "fix-bug"}`,
expected: `{"display_name": "Fix bug", "task_name": "fix-bug"}`,
},
{
name: "FencedJSON",
input: "```json\n{\"display_name\": \"Fix bug\", \"task_name\": \"fix-bug\"}\n```",
expected: `{"display_name": "Fix bug", "task_name": "fix-bug"}`,
},
{
name: "FencedNoLanguage",
input: "```\n{\"display_name\": \"Fix bug\", \"task_name\": \"fix-bug\"}\n```",
expected: `{"display_name": "Fix bug", "task_name": "fix-bug"}`,
},
{
name: "FencedWithSurroundingWhitespace",
input: " \n```json\n{\"display_name\": \"Fix bug\", \"task_name\": \"fix-bug\"}\n```\n ",
expected: `{"display_name": "Fix bug", "task_name": "fix-bug"}`,
},
{
name: "BareJSONWithWhitespace",
input: " \n{\"display_name\": \"Fix bug\", \"task_name\": \"fix-bug\"}\n ",
expected: `{"display_name": "Fix bug", "task_name": "fix-bug"}`,
},
{
name: "FencedMultilineJSON",
input: "```json\n{\n \"display_name\": \"Fix bug\",\n \"task_name\": \"fix-bug\"\n}\n```",
expected: "{\n \"display_name\": \"Fix bug\",\n \"task_name\": \"fix-bug\"\n}",
},
{
name: "FencedNoNewlinePassthrough",
input: "```json{\"display_name\": \"Fix bug\", \"task_name\": \"fix-bug\"}```",
expected: "```json{\"display_name\": \"Fix bug\", \"task_name\": \"fix-bug\"}```",
},
{
name: "NonJSONFencedContent",
input: "```foo: {}, bar: {}```",
expected: "```foo: {}, bar: {}```",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := extractJSON(tc.input)
require.Equal(t, tc.expected, got)
})
}
}
// fakeAnthropicSSE builds a minimal Anthropic Messages SSE stream
// whose sole text content is the provided string.
func fakeAnthropicSSE(t *testing.T, text string) string {
t.Helper()
// Use json.Marshal to produce a correctly escaped JSON
// string value, then strip the surrounding quotes.
escapedBytes, err := json.Marshal(text)
require.NoError(t, err)
escaped := string(escapedBytes[1 : len(escapedBytes)-1])
return fmt.Sprintf(`event: message_start
data: {"type":"message_start","message":{"id":"msg_test","type":"message","role":"assistant","model":"claude-haiku-4-5-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":1}}}
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"%s"}}
event: content_block_stop
data: {"type":"content_block_stop","index":0}
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":20}}
event: message_stop
data: {"type":"message_stop"}
`, escaped)
}
func TestGenerateFromAnthropicMock(t *testing.T) {
t.Parallel()
tests := []struct {
name string
responseText string
expectedDisplayName string
expectedNamePrefix string
}{
{
name: "BareJSON",
responseText: `{"display_name": "Fix bug", "task_name": "fix-bug"}`,
expectedDisplayName: "Fix bug",
expectedNamePrefix: "fix-bug-",
},
{
name: "FencedJSON",
responseText: "```json\n{\"display_name\": \"Debug auth\", \"task_name\": \"debug-auth\"}\n```",
expectedDisplayName: "Debug auth",
expectedNamePrefix: "debug-auth-",
},
{
name: "FencedNoLanguage",
responseText: "```\n{\"display_name\": \"Setup CI\", \"task_name\": \"setup-ci\"}\n```",
expectedDisplayName: "Setup CI",
expectedNamePrefix: "setup-ci-",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
_, _ = w.Write([]byte(fakeAnthropicSSE(t, tc.responseText)))
}))
t.Cleanup(srv.Close)
ctx := testutil.Context(t, testutil.WaitShort)
taskName, err := generateFromAnthropic(
ctx, "test prompt", "fake-key",
anthropic.ModelClaudeHaiku4_5,
anthropicoption.WithBaseURL(srv.URL),
)
require.NoError(t, err)
require.NoError(t, codersdk.NameValid(taskName.Name))
require.True(t, strings.HasPrefix(taskName.Name, tc.expectedNamePrefix),
"expected name %q to have prefix %q", taskName.Name, tc.expectedNamePrefix)
require.Equal(t, tc.expectedDisplayName, taskName.DisplayName)
})
}
}
func TestGenerateFromAnthropic(t *testing.T) {
t.Parallel()