From 8e7ec7885ac080d6f89cf0b6df0d0ab04f6a3aa4 Mon Sep 17 00:00:00 2001 From: Jeremy McSpadden Date: Wed, 25 Mar 2026 21:35:09 -0500 Subject: [PATCH 1/5] fix(search): enforce hard search budget and survive context compaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Native search: use monotonic high-water mark (Math.max) instead of overwriting sessionSearchCount from history. Prevents budget reset when context compaction removes web_search_tool_result blocks. - Custom search tool: add MAX_SEARCHES_PER_SESSION=15 hard cap across all queries (not just consecutive duplicates). Returns budget_exhausted error when limit reached. - Tighten MAX_CONSECUTIVE_DUPES from 3 to 1 — block on the 2nd identical search since cached results make repeats pointless. - Add tests for compaction-safe high-water mark, session budget enforcement, and budget reset on session_start. Closes #2583 --- .../search-the-web/native-search.ts | 11 +- .../extensions/search-the-web/tool-search.ts | 24 +++- src/tests/native-search.test.ts | 45 ++++++ src/tests/search-loop-guard.test.ts | 131 +++++++++++++----- 4 files changed, 173 insertions(+), 38 deletions(-) diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts index a153f8cc3..0f7805528 100644 --- a/src/resources/extensions/search-the-web/native-search.ts +++ b/src/resources/extensions/search-the-web/native-search.ts @@ -176,11 +176,15 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: ); payload.tools = tools; - // ── Session-level search budget (#1309) ────────────────────────────── + // ── Session-level search budget (#1309, #compaction-safe) ───────────── // Count web_search_tool_result blocks in the conversation history to // determine how many native searches have already been used this session. // The Anthropic API's max_uses resets per request, so without this guard, // pause_turn → resubmit cycles allow unlimited total searches. + // + // Use the monotonic high-water mark: take the max of the history count + // and the running counter. This prevents budget resets when context + // compaction removes web_search_tool_result blocks from history. if (Array.isArray(messages)) { let historySearchCount = 0; for (const msg of messages) { @@ -192,8 +196,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: } } } - // Sync counter from history (handles session restore / context replay) - sessionSearchCount = historySearchCount; + // High-water mark: never decrease the counter, even if compaction + // removes web_search_tool_result blocks from the visible history. + sessionSearchCount = Math.max(sessionSearchCount, historySearchCount); } const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount); diff --git a/src/resources/extensions/search-the-web/tool-search.ts b/src/resources/extensions/search-the-web/tool-search.ts index 399a399df..e645a502f 100644 --- a/src/resources/extensions/search-the-web/tool-search.ts +++ b/src/resources/extensions/search-the-web/tool-search.ts @@ -106,14 +106,20 @@ searchCache.startPurgeInterval(60_000); // Consecutive duplicate search guard (#949) // Tracks recent query keys to detect and break search loops. -const MAX_CONSECUTIVE_DUPES = 3; +const MAX_CONSECUTIVE_DUPES = 1; let lastSearchKey = ""; let consecutiveDupeCount = 0; -/** Reset session-scoped duplicate-search guard state. */ +// Session-level total search budget (all queries, not just duplicates). +// Prevents unbounded search accumulation across varied queries. +const MAX_SEARCHES_PER_SESSION = 15; +let sessionTotalSearches = 0; + +/** Reset session-scoped search guard state (both duplicate and budget). */ export function resetSearchLoopGuardState(): void { lastSearchKey = ""; consecutiveDupeCount = 0; + sessionTotalSearches = 0; } // Summarizer responses: max 50 entries, 15-minute TTL @@ -357,6 +363,17 @@ export function registerSearchTool(pi: ExtensionAPI) { }; } + // ------------------------------------------------------------------ + // Session-level search budget + // ------------------------------------------------------------------ + if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) { + return { + content: [{ type: "text" as const, text: `⚠️ Search budget exhausted: ${sessionTotalSearches}/${MAX_SEARCHES_PER_SESSION} searches used this session. The information you need should already be in previous search results. Stop searching and use those results to proceed with your task.` }], + isError: true, + details: { errorKind: "budget_exhausted", error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})` } satisfies Partial, + }; + } + const count = params.count ?? 5; const wantSummary = params.summary ?? false; @@ -410,6 +427,9 @@ export function registerSearchTool(pi: ExtensionAPI) { consecutiveDupeCount = 1; } + // Count every search that passes the guards toward the session budget. + sessionTotalSearches++; + const cached = searchCache.get(cacheKey); if (cached) { diff --git a/src/tests/native-search.test.ts b/src/tests/native-search.test.ts index 55c964f79..c6ff41310 100644 --- a/src/tests/native-search.test.ts +++ b/src/tests/native-search.test.ts @@ -855,6 +855,51 @@ test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => { assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)"); }); +test("session search budget: survives context compaction (high-water mark)", async () => { + const pi = createMockPI(); + registerNativeSearchHooks(pi); + + await pi.fire("model_select", { + type: "model_select", + model: { provider: "anthropic", name: "claude-sonnet-4-6" }, + previousModel: undefined, + source: "set", + }); + + // First request: history has 12 web_search_tool_result blocks + const searchBlocks = Array.from({ length: 12 }, (_, i) => ({ + type: "web_search_tool_result", + tool_use_id: `ws${i}`, + content: [], + })); + + let payload: Record = { + model: "claude-sonnet-4-6-20250514", + tools: [{ name: "bash", type: "custom" }], + messages: [{ role: "user", content: [{ type: "text", text: "search" }, ...searchBlocks] }], + }; + + await pi.fire("before_provider_request", { type: "before_provider_request", payload }); + let tools = payload.tools as any[]; + let nativeTool = tools.find((t: any) => t.type === "web_search_20250305"); + assert.ok(nativeTool, "Should still inject web_search with 12/15 used"); + assert.equal(nativeTool.max_uses, 3, "Should have 3 remaining (15 - 12)"); + + // Second request: context was compacted — search blocks gone from history. + // Without high-water mark, the budget would reset to 15. + payload = { + model: "claude-sonnet-4-6-20250514", + tools: [{ name: "bash", type: "custom" }], + messages: [{ role: "user", content: "compacted context — no search blocks" }], + }; + + await pi.fire("before_provider_request", { type: "before_provider_request", payload }); + tools = payload.tools as any[]; + nativeTool = tools.find((t: any) => t.type === "web_search_20250305"); + assert.ok(nativeTool, "Should still inject web_search with 12/15 used (high-water mark)"); + assert.equal(nativeTool.max_uses, 3, "High-water mark should preserve 12 — only 3 remaining"); +}); + // ─── stripThinkingFromHistory tests ───────────────────────────────────────── test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => { diff --git a/src/tests/search-loop-guard.test.ts b/src/tests/search-loop-guard.test.ts index be4c7023a..c80ff4796 100644 --- a/src/tests/search-loop-guard.test.ts +++ b/src/tests/search-loop-guard.test.ts @@ -11,7 +11,7 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { registerSearchTool } from "../resources/extensions/search-the-web/tool-search.ts"; +import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts"; import searchExtension from "../resources/extensions/search-the-web/index.ts"; const ORIGINAL_ENV = { @@ -72,6 +72,8 @@ function createMockPI() { const toolsByName = new Map(); let registeredTool: any = null; + let activeTools: string[] = []; + const pi = { on(event: string, handler: (...args: any[]) => unknown) { handlers.push({ event, handler }); @@ -91,6 +93,8 @@ function createMockPI() { getRegisteredTool(name = "search-the-web") { return toolsByName.get(name) ?? registeredTool; }, + getActiveTools() { return activeTools; }, + setActiveTools(tools: string[]) { activeTools = tools; }, writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt", }; @@ -134,18 +138,16 @@ test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t) const execute = tool.execute.bind(tool); - // Calls 1–3: below threshold, should return search results (not an error) - for (let i = 1; i <= 3; i++) { - const result = await callSearch(execute, "loop test query", `call-${i}`); - assert.notEqual(result.isError, true, `call ${i} should not trigger loop guard`); - } + // Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1) + const result1 = await callSearch(execute, "loop test query", "call-1"); + assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard"); - // Call 4: hits the threshold — guard fires - const result4 = await callSearch(execute, "loop test query", "call-4"); - assert.equal(result4.isError, true, "call 4 should trigger the loop guard"); - assert.equal(result4.details?.errorKind, "search_loop"); + // Call 2: identical query — guard fires immediately (threshold = 1) + const result2 = await callSearch(execute, "loop test query", "call-2"); + assert.equal(result2.isError, true, "call 2 should trigger the loop guard"); + assert.equal(result2.details?.errorKind, "search_loop"); assert.ok( - result4.content[0].text.includes("Search loop detected"), + result2.content[0].text.includes("Search loop detected"), "error message should mention search loop" ); }); @@ -174,11 +176,9 @@ test("search loop guard resets at session_start boundary", async (t) => { assert.ok(tool, "search tool should be registered"); const execute = tool.execute.bind(tool); - // Trigger guard in session 1 - for (let i = 1; i <= 4; i++) { - await callSearch(execute, query, `s1-call-${i}`); - } - const guardResult = await callSearch(execute, query, "s1-call-5"); + // Trigger guard in session 1 (call 1 succeeds, call 2 fires guard) + await callSearch(execute, query, "s1-call-1"); + const guardResult = await callSearch(execute, query, "s1-call-2"); assert.equal(guardResult.isError, true, "session 1 should be guarded"); assert.equal(guardResult.details?.errorKind, "search_loop"); @@ -211,28 +211,26 @@ test("search loop guard stays armed after firing — subsequent duplicates immed const tool = pi.getRegisteredTool(); const execute = tool.execute.bind(tool); - // Exhaust the initial window (calls 1–3 succeed, call 4 fires guard) - for (let i = 1; i <= 3; i++) { - await callSearch(execute, query, `call-${i}`); - } - const guardFirst = await callSearch(execute, query, "call-4"); - assert.equal(guardFirst.isError, true, "call 4 should trigger the loop guard"); + // Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1) + await callSearch(execute, query, "call-1"); + const guardFirst = await callSearch(execute, query, "call-2"); + assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard"); - // Key regression test: call 5 (and beyond) must ALSO trigger the guard. - // The original bug reset state on trigger, so call 5 was treated as a fresh + // Key regression test: call 3 (and beyond) must ALSO trigger the guard. + // The original bug reset state on trigger, so call 3 was treated as a fresh // first search and the loop restarted. - const guardSecond = await callSearch(execute, query, "call-5"); + const guardSecond = await callSearch(execute, query, "call-3"); assert.equal( guardSecond.isError, true, - "call 5 should STILL trigger the loop guard (guard must stay armed after firing)" + "call 3 should STILL trigger the loop guard (guard must stay armed after firing)" ); assert.equal(guardSecond.details?.errorKind, "search_loop"); - // Call 6 as well — guard should keep firing - const guardThird = await callSearch(execute, query, "call-6"); + // Call 4 as well — guard should keep firing + const guardThird = await callSearch(execute, query, "call-4"); assert.equal( guardThird.isError, true, - "call 6 should STILL trigger the loop guard" + "call 4 should STILL trigger the loop guard" ); }); @@ -255,10 +253,9 @@ test("search loop guard resets cleanly when a different query is issued", async const tool = pi.getRegisteredTool(); const execute = tool.execute.bind(tool); - // Trigger guard for queryA - for (let i = 1; i <= 4; i++) { - await callSearch(execute, queryA, `call-a-${i}`); - } + // Trigger guard for queryA (call 1 succeeds, call 2 fires guard) + await callSearch(execute, queryA, "call-a-1"); + await callSearch(execute, queryA, "call-a-2"); // Issue a different query — should succeed (resets the duplicate counter) const resultB = await callSearch(execute, queryB, "call-b-1"); @@ -267,3 +264,71 @@ test("search loop guard resets cleanly when a different query is issued", async "a different query after guard should not be treated as a loop" ); }); + +test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => { + process.env.BRAVE_API_KEY = "test-key-budget"; + delete process.env.TAVILY_API_KEY; + delete process.env.OLLAMA_API_KEY; + const restoreFetch = mockFetch(makeBraveResponse()); + + t.after(() => { + restoreFetch(); + restoreSearchEnv(); + }); + + // Reset guard state (including session budget) and register directly + resetSearchLoopGuardState(); + const pi = createMockPI(); + registerSearchTool(pi as any); + + const tool = pi.getRegisteredTool(); + assert.ok(tool, "search tool should be registered"); + const execute = tool.execute.bind(tool); + + // Issue 15 unique queries — all should succeed (budget = 15) + for (let i = 1; i <= 15; i++) { + const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`); + assert.notEqual(result.isError, true, `query ${i} should succeed within budget`); + } + + // Query 16: budget exhausted — should be blocked + const blocked = await callSearch(execute, "one more query", "budget-16"); + assert.equal(blocked.isError, true, "query 16 should be blocked by budget"); + assert.equal(blocked.details?.errorKind, "budget_exhausted"); + assert.ok( + blocked.content[0].text.includes("Search budget exhausted"), + "error message should mention budget" + ); +}); + +test("session search budget resets via resetSearchLoopGuardState", async (t) => { + process.env.BRAVE_API_KEY = "test-key-budget-reset"; + delete process.env.TAVILY_API_KEY; + delete process.env.OLLAMA_API_KEY; + const restoreFetch = mockFetch(makeBraveResponse()); + + t.after(() => { + restoreFetch(); + restoreSearchEnv(); + }); + + // Reset and register directly + resetSearchLoopGuardState(); + const pi = createMockPI(); + registerSearchTool(pi as any); + + const tool = pi.getRegisteredTool(); + const execute = tool.execute.bind(tool); + + // Exhaust budget + for (let i = 1; i <= 15; i++) { + await callSearch(execute, `budget reset query ${i}`, `br-${i}`); + } + const exhausted = await callSearch(execute, "exhausted query", "br-exhausted"); + assert.equal(exhausted.isError, true, "budget should be exhausted"); + + // Reset simulates new session + resetSearchLoopGuardState(); + const fresh = await callSearch(execute, "fresh session query", "br-fresh"); + assert.notEqual(fresh.isError, true, "first query after reset should succeed"); +}); From fde0be6979197e2561db49966e1b49ae7b4e6218 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Wed, 25 Mar 2026 22:18:05 -0600 Subject: [PATCH 2/5] fix(headless): disable overall timeout for auto-mode, fix lock-guard auto-select (#2586) Auto-mode sessions are long-running (minutes to hours) with their own internal per-unit timeout via auto-supervisor. The 300s overall timeout was killing active sessions mid-execution, triggering wasteful restart cycles. Changes: - Disable overall timeout for auto-mode when using the default 300s (user can still set --timeout explicitly, including --timeout 0) - Guard timeout timer creation for null when timeout is 0 - Cancel overall timeout when new-milestone --auto chains into auto-mode - Fix headless auto-responder to pick "Force start" for lock-guard prompts instead of "View status" (which silently blocked auto-mode) - Allow --timeout 0 to explicitly disable timeout for any command Co-Authored-By: Claude Opus 4.6 (1M context) --- src/headless-ui.ts | 13 +++++++++++-- src/headless.ts | 38 +++++++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/src/headless-ui.ts b/src/headless-ui.ts index 5b7453aac..387be26ca 100644 --- a/src/headless-ui.ts +++ b/src/headless-ui.ts @@ -40,9 +40,18 @@ export function handleExtensionUIRequest( let response: Record switch (method) { - case 'select': - response = { type: 'extension_ui_response', id, value: event.options?.[0] ?? '' } + case 'select': { + // Lock-guard prompts list "View status" first, but headless needs "Force start" + // to proceed. Detect by title and pick the force option. + const title = String(event.title ?? '') + let selected = event.options?.[0] ?? '' + if (title.includes('Auto-mode is running') && event.options) { + const forceOption = event.options.find(o => o.toLowerCase().includes('force start')) + if (forceOption) selected = forceOption + } + response = { type: 'extension_ui_response', id, value: selected } break + } case 'confirm': response = { type: 'extension_ui_response', id, confirmed: true } break diff --git a/src/headless.ts b/src/headless.ts index b14922271..29e9614f2 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -90,8 +90,8 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { if (!positionalStarted && arg.startsWith('--')) { if (arg === '--timeout' && i + 1 < args.length) { options.timeout = parseInt(args[++i], 10) - if (Number.isNaN(options.timeout) || options.timeout <= 0) { - process.stderr.write('[headless] Error: --timeout must be a positive integer (milliseconds)\n') + if (Number.isNaN(options.timeout) || options.timeout < 0) { + process.stderr.write('[headless] Error: --timeout must be a non-negative integer (milliseconds, 0 to disable)\n') process.exit(1) } } else if (arg === '--json') { @@ -183,6 +183,14 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): options.timeout = 600_000 // 10 minutes } + // auto-mode sessions are long-running (minutes to hours) with their own internal + // per-unit timeout via auto-supervisor. Disable the overall timeout unless the + // user explicitly set --timeout. + const isAutoMode = options.command === 'auto' + if (isAutoMode && options.timeout === 300_000) { + options.timeout = 0 + } + // Supervised mode cannot share stdin with --context - if (options.supervised && options.context === '-') { process.stderr.write('[headless] Error: --supervised cannot be used with --context - (both require stdin)\n') @@ -337,12 +345,14 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): // Precompute supervised response timeout const responseTimeout = options.responseTimeout ?? 30_000 - // Overall timeout - const timeoutTimer = setTimeout(() => { - process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`) - exitCode = 1 - resolveCompletion() - }, options.timeout) + // Overall timeout (disabled when options.timeout === 0, e.g. auto-mode) + const timeoutTimer = options.timeout > 0 + ? setTimeout(() => { + process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`) + exitCode = 1 + resolveCompletion() + }, options.timeout) + : null // Event handler client.onEvent((event) => { @@ -434,7 +444,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): interrupted = true exitCode = 1 client.stop().finally(() => { - clearTimeout(timeoutTimer) + if (timeoutTimer) clearTimeout(timeoutTimer) if (idleTimer) clearTimeout(idleTimer) process.exit(exitCode) }) @@ -447,7 +457,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): await client.start() } catch (err) { process.stderr.write(`[headless] Error: Failed to start RPC session: ${err instanceof Error ? err.message : String(err)}\n`) - clearTimeout(timeoutTimer) + if (timeoutTimer) clearTimeout(timeoutTimer) process.exit(1) } @@ -456,7 +466,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): if (!internalProcess?.stdin) { process.stderr.write('[headless] Error: Cannot access child process stdin\n') await client.stop() - clearTimeout(timeoutTimer) + if (timeoutTimer) clearTimeout(timeoutTimer) process.exit(1) } @@ -511,7 +521,9 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): process.stderr.write('[headless] Milestone ready — chaining into auto-mode...\n') } - // Reset completion state for the auto-mode phase + // Reset completion state for the auto-mode phase. + // Disable the overall timeout — auto-mode has its own internal supervisor. + if (timeoutTimer) clearTimeout(timeoutTimer) completed = false milestoneReady = false blocked = false @@ -532,7 +544,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): } // Cleanup - clearTimeout(timeoutTimer) + if (timeoutTimer) clearTimeout(timeoutTimer) if (idleTimer) clearTimeout(idleTimer) pendingResponseTimers.forEach((timer) => clearTimeout(timer)) pendingResponseTimers.clear() From ebb5afbd571c7e4daeb46a5666c8cccfccba040b Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Wed, 25 Mar 2026 22:18:26 -0600 Subject: [PATCH 3/5] fix: use GitHub Issue Types via GraphQL instead of classification labels The forensics prompt and gh skill used --label "bug" / --label "type:feature" for issue classification, polluting the label taxonomy and leaving the Type field unset. gh issue create has no --type flag, so issue types must be set via GraphQL mutation after creation. Closes #2579 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../extensions/gsd/prompts/forensics.md | 13 ++++++++--- .../github-workflows/references/gh/SKILL.md | 23 ++++++++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/resources/extensions/gsd/prompts/forensics.md b/src/resources/extensions/gsd/prompts/forensics.md index f576d17c4..9112a773f 100644 --- a/src/resources/extensions/gsd/prompts/forensics.md +++ b/src/resources/extensions/gsd/prompts/forensics.md @@ -142,9 +142,10 @@ Then **offer GitHub issue creation**: "Would you like me to create a GitHub issu If yes, create using the `bash` tool: ```bash -gh issue create --repo gsd-build/gsd-2 \ +# Step 1: Create issue (use labels for metadata, NOT for classification — type is set via GraphQL) +ISSUE_URL=$(gh issue create --repo gsd-build/gsd-2 \ --title "..." \ - --label "bug" --label "auto-generated" \ + --label "auto-generated" \ --body "$(cat <<'EOF' ## Problem [1-2 sentence summary] @@ -169,7 +170,13 @@ gh issue create --repo gsd-build/gsd-2 \ --- *Auto-generated by `/gsd forensics`* EOF -)" +)") + +# Step 2: Set issue type via GraphQL (gh issue create has no --type flag) +ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$') +ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id') +TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id') +gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }' ``` ### Redaction Rules (CRITICAL) diff --git a/src/resources/skills/github-workflows/references/gh/SKILL.md b/src/resources/skills/github-workflows/references/gh/SKILL.md index 2d1f4a53d..05d40f337 100644 --- a/src/resources/skills/github-workflows/references/gh/SKILL.md +++ b/src/resources/skills/github-workflows/references/gh/SKILL.md @@ -103,9 +103,12 @@ gh issue list -R gsd-build/gsd-2 gh issue list -R gsd-build/gsd-2 --label "priority:p1" --state open # Create issue with labels and milestone +# NOTE: Do NOT use labels for issue classification (bug, feature, etc.) +# Use labels for metadata (priority, status, auto-generated) only. +# Issue classification uses GitHub Issue Types, set via GraphQL after creation. gh issue create -R gsd-build/gsd-2 \ --title "feat: add feature X" \ - --label "priority:p1" --label "type:feature" \ + --label "priority:p1" \ --milestone "v1.0" # View issue @@ -120,6 +123,24 @@ gh issue edit -R gsd-build/gsd-2 \ --remove-label "status:needs-grooming" ``` +### Issue Types (Classification) + +`gh issue create` has no `--type` flag. Issue types (Bug, Feature Request, etc.) are set via GraphQL after creation: + +```bash +# Step 1: Create the issue (returns URL) +ISSUE_URL=$(gh issue create -R gsd-build/gsd-2 \ + --title "..." --body "...") + +# Step 2: Set the issue type via GraphQL +ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$') +ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id') +TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id') +gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }' +``` + +Replace `"Bug"` with the appropriate type name (`"Feature Request"`, `"Task"`, etc.). + ### Labels ```bash From 36ff7ac4fedd9fbc05d6020f8a61916ee5144315 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Wed, 25 Mar 2026 22:19:53 -0600 Subject: [PATCH 4/5] Fix complete-milestone prompt with structured parameter definitions Replace the free-form parameter listing in step 7 of complete-milestone.md with structured, typed parameter definitions that match the tool schema in db-tools.ts. Parameters are grouped into required and optional sections with explicit types (marking arrays as arrays, booleans as booleans) to prevent LLM validation failures when calling gsd_complete_milestone. Fixes #2581 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../gsd/prompts/complete-milestone.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/resources/extensions/gsd/prompts/complete-milestone.md b/src/resources/extensions/gsd/prompts/complete-milestone.md index 0ce59eeb7..4e11e80a6 100644 --- a/src/resources/extensions/gsd/prompts/complete-milestone.md +++ b/src/resources/extensions/gsd/prompts/complete-milestone.md @@ -35,7 +35,24 @@ Then: **Success path** (all verifications passed — continue with steps 7–11): -7. **Persist completion through `gsd_complete_milestone`.** Call it with: `milestoneId`, `title`, `oneLiner`, `narrative`, `successCriteriaResults`, `definitionOfDoneResults`, `requirementOutcomes`, `keyDecisions`, `keyFiles`, `lessonsLearned`, `followUps`, `deviations`, `verificationPassed: true`. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding. +7. **Persist completion through `gsd_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding. + + **Required parameters:** + - `milestoneId` (string) — Milestone ID (e.g. M001) + - `title` (string) — Milestone title + - `oneLiner` (string) — One-sentence summary of what the milestone achieved + - `narrative` (string) — Detailed narrative of what happened during the milestone + - `successCriteriaResults` (string) — Markdown detailing how each success criterion was met or not met + - `definitionOfDoneResults` (string) — Markdown detailing how each definition-of-done item was met + - `requirementOutcomes` (string) — Markdown detailing requirement status transitions with evidence + - `keyDecisions` (array of strings) — Key architectural/pattern decisions made during the milestone + - `keyFiles` (array of strings) — Key files created or modified during the milestone + - `lessonsLearned` (array of strings) — Lessons learned during the milestone + - `verificationPassed` (boolean) — Must be `true` — confirms that code change verification, success criteria, and definition of done checks all passed before completion + + **Optional parameters:** + - `followUps` (string) — Follow-up items for future milestones + - `deviations` (string) — Deviations from the original plan 8. For each requirement whose status changed in step 6, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically. 9. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state. 10. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`. From ac4e3ac392cf92171a47e20f54e595d36e6ddfa1 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Wed, 25 Mar 2026 22:26:59 -0600 Subject: [PATCH 5/5] fix(tests): replace undefined assertTrue/assertEq with assert.ok/assert.equal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doctor-environment and doctor-git tests used assertTrue and assertEq which are not defined — they should be assert.ok and assert.equal from the imported node:assert/strict module. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../gsd/tests/doctor-environment.test.ts | 16 ++++++++-------- .../extensions/gsd/tests/doctor-git.test.ts | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/resources/extensions/gsd/tests/doctor-environment.test.ts b/src/resources/extensions/gsd/tests/doctor-environment.test.ts index 59263f2b7..af55c2f66 100644 --- a/src/resources/extensions/gsd/tests/doctor-environment.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-environment.test.ts @@ -143,8 +143,8 @@ describe('doctor-environment', async () => { cleanups.push(dir); const results = runEnvironmentChecks(dir); const depsCheck = results.find(r => r.name === "dependencies"); - assertTrue(depsCheck !== undefined, "dependencies check runs"); - assertEq(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale"); + assert.ok(depsCheck !== undefined, "dependencies check runs"); + assert.equal(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale"); } console.log("\n=== env: yarn marker file newer than lockfile → ok (#1974) ==="); @@ -167,8 +167,8 @@ describe('doctor-environment', async () => { cleanups.push(dir); const results = runEnvironmentChecks(dir); const depsCheck = results.find(r => r.name === "dependencies"); - assertTrue(depsCheck !== undefined, "dependencies check runs"); - assertEq(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale"); + assert.ok(depsCheck !== undefined, "dependencies check runs"); + assert.equal(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale"); } console.log("\n=== env: pnpm marker file newer than lockfile → ok (#1974) ==="); @@ -191,8 +191,8 @@ describe('doctor-environment', async () => { cleanups.push(dir); const results = runEnvironmentChecks(dir); const depsCheck = results.find(r => r.name === "dependencies"); - assertTrue(depsCheck !== undefined, "dependencies check runs"); - assertEq(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale"); + assert.ok(depsCheck !== undefined, "dependencies check runs"); + assert.equal(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale"); } console.log("\n=== env: no marker file falls back to dir mtime → stale warning (#1974) ==="); @@ -212,8 +212,8 @@ describe('doctor-environment', async () => { cleanups.push(dir); const results = runEnvironmentChecks(dir); const depsCheck = results.find(r => r.name === "dependencies"); - assertTrue(depsCheck !== undefined, "dependencies check runs"); - assertEq(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning"); + assert.ok(depsCheck !== undefined, "dependencies check runs"); + assert.equal(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning"); } // ── Env File Check ───────────────────────────────────────────────── diff --git a/src/resources/extensions/gsd/tests/doctor-git.test.ts b/src/resources/extensions/gsd/tests/doctor-git.test.ts index eabb2daf5..cdffe17ae 100644 --- a/src/resources/extensions/gsd/tests/doctor-git.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-git.test.ts @@ -167,22 +167,22 @@ describe('doctor-git', async () => { const fixed = await runGSDDoctor(dir, { fix: true, isolationMode: "worktree" }); // The fix must NOT skip removal — it should chdir out and remove - assertTrue( + assert.ok( !fixed.fixesApplied.some(f => f.includes("skipped removing worktree")), "does NOT skip removal when cwd is inside worktree", ); - assertTrue( + assert.ok( fixed.fixesApplied.some(f => f.includes("removed orphaned worktree")), "removes orphaned worktree even when cwd was inside it", ); // Verify worktree is gone const wtList = run("git worktree list", dir); - assertTrue(!wtList.includes("milestone/M001"), "worktree removed after fix with cwd inside"); + assert.ok(!wtList.includes("milestone/M001"), "worktree removed after fix with cwd inside"); // Verify cwd was moved out (should be basePath, not still inside worktree) const newCwd = process.cwd(); - assertTrue( + assert.ok( !newCwd.startsWith(wtPath), "cwd moved out of worktree after fix", );