From 8e7ec7885ac080d6f89cf0b6df0d0ab04f6a3aa4 Mon Sep 17 00:00:00 2001
From: Jeremy McSpadden <jeremy@fluxlabs.net>
Date: Wed, 25 Mar 2026 21:35:09 -0500
Subject: [PATCH 1/5] fix(search): enforce hard search budget and survive
 context compaction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Native search: use monotonic high-water mark (Math.max) instead of
  overwriting sessionSearchCount from history. Prevents budget reset
  when context compaction removes web_search_tool_result blocks.
- Custom search tool: add MAX_SEARCHES_PER_SESSION=15 hard cap across
  all queries (not just consecutive duplicates). Returns budget_exhausted
  error when limit reached.
- Tighten MAX_CONSECUTIVE_DUPES from 3 to 1 — block on the 2nd identical
  search since cached results make repeats pointless.
- Add tests for compaction-safe high-water mark, session budget
  enforcement, and budget reset on session_start.

Closes #2583
---
 .../search-the-web/native-search.ts           |  11 +-
 .../extensions/search-the-web/tool-search.ts  |  24 +++-
 src/tests/native-search.test.ts               |  45 ++++++
 src/tests/search-loop-guard.test.ts           | 131 +++++++++++++-----
 4 files changed, 173 insertions(+), 38 deletions(-)

diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts
index a153f8cc3..0f7805528 100644
--- a/src/resources/extensions/search-the-web/native-search.ts
+++ b/src/resources/extensions/search-the-web/native-search.ts
@@ -176,11 +176,15 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
     );
     payload.tools = tools;
 
-    // ── Session-level search budget (#1309) ──────────────────────────────
+    // ── Session-level search budget (#1309, #compaction-safe) ─────────────
     // Count web_search_tool_result blocks in the conversation history to
     // determine how many native searches have already been used this session.
     // The Anthropic API's max_uses resets per request, so without this guard,
     // pause_turn → resubmit cycles allow unlimited total searches.
+    //
+    // Use the monotonic high-water mark: take the max of the history count
+    // and the running counter. This prevents budget resets when context
+    // compaction removes web_search_tool_result blocks from history.
     if (Array.isArray(messages)) {
       let historySearchCount = 0;
       for (const msg of messages) {
@@ -192,8 +196,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
           }
         }
       }
-      // Sync counter from history (handles session restore / context replay)
-      sessionSearchCount = historySearchCount;
+      // High-water mark: never decrease the counter, even if compaction
+      // removes web_search_tool_result blocks from the visible history.
+      sessionSearchCount = Math.max(sessionSearchCount, historySearchCount);
     }
 
     const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
diff --git a/src/resources/extensions/search-the-web/tool-search.ts b/src/resources/extensions/search-the-web/tool-search.ts
index 399a399df..e645a502f 100644
--- a/src/resources/extensions/search-the-web/tool-search.ts
+++ b/src/resources/extensions/search-the-web/tool-search.ts
@@ -106,14 +106,20 @@ searchCache.startPurgeInterval(60_000);
 
 // Consecutive duplicate search guard (#949)
 // Tracks recent query keys to detect and break search loops.
-const MAX_CONSECUTIVE_DUPES = 3;
+const MAX_CONSECUTIVE_DUPES = 1;
 let lastSearchKey = "";
 let consecutiveDupeCount = 0;
 
-/** Reset session-scoped duplicate-search guard state. */
+// Session-level total search budget (all queries, not just duplicates).
+// Prevents unbounded search accumulation across varied queries.
+const MAX_SEARCHES_PER_SESSION = 15;
+let sessionTotalSearches = 0;
+
+/** Reset session-scoped search guard state (both duplicate and budget). */
 export function resetSearchLoopGuardState(): void {
   lastSearchKey = "";
   consecutiveDupeCount = 0;
+  sessionTotalSearches = 0;
 }
 
 // Summarizer responses: max 50 entries, 15-minute TTL
@@ -357,6 +363,17 @@ export function registerSearchTool(pi: ExtensionAPI) {
         };
       }
 
+      // ------------------------------------------------------------------
+      // Session-level search budget
+      // ------------------------------------------------------------------
+      if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) {
+        return {
+          content: [{ type: "text" as const, text: `⚠️ Search budget exhausted: ${sessionTotalSearches}/${MAX_SEARCHES_PER_SESSION} searches used this session. The information you need should already be in previous search results. Stop searching and use those results to proceed with your task.` }],
+          isError: true,
+          details: { errorKind: "budget_exhausted", error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})` } satisfies Partial<SearchDetails>,
+        };
+      }
+
       const count = params.count ?? 5;
       const wantSummary = params.summary ?? false;
 
@@ -410,6 +427,9 @@ export function registerSearchTool(pi: ExtensionAPI) {
         consecutiveDupeCount = 1;
       }
 
+      // Count every search that passes the guards toward the session budget.
+      sessionTotalSearches++;
+
       const cached = searchCache.get(cacheKey);
 
       if (cached) {
diff --git a/src/tests/native-search.test.ts b/src/tests/native-search.test.ts
index 55c964f79..c6ff41310 100644
--- a/src/tests/native-search.test.ts
+++ b/src/tests/native-search.test.ts
@@ -855,6 +855,51 @@ test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => {
   assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)");
 });
 
+test("session search budget: survives context compaction (high-water mark)", async () => {
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  // First request: history has 12 web_search_tool_result blocks
+  const searchBlocks = Array.from({ length: 12 }, (_, i) => ({
+    type: "web_search_tool_result",
+    tool_use_id: `ws${i}`,
+    content: [],
+  }));
+
+  let payload: Record<string, unknown> = {
+    model: "claude-sonnet-4-6-20250514",
+    tools: [{ name: "bash", type: "custom" }],
+    messages: [{ role: "user", content: [{ type: "text", text: "search" }, ...searchBlocks] }],
+  };
+
+  await pi.fire("before_provider_request", { type: "before_provider_request", payload });
+  let tools = payload.tools as any[];
+  let nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
+  assert.ok(nativeTool, "Should still inject web_search with 12/15 used");
+  assert.equal(nativeTool.max_uses, 3, "Should have 3 remaining (15 - 12)");
+
+  // Second request: context was compacted — search blocks gone from history.
+  // Without high-water mark, the budget would reset to 15.
+  payload = {
+    model: "claude-sonnet-4-6-20250514",
+    tools: [{ name: "bash", type: "custom" }],
+    messages: [{ role: "user", content: "compacted context — no search blocks" }],
+  };
+
+  await pi.fire("before_provider_request", { type: "before_provider_request", payload });
+  tools = payload.tools as any[];
+  nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
+  assert.ok(nativeTool, "Should still inject web_search with 12/15 used (high-water mark)");
+  assert.equal(nativeTool.max_uses, 3, "High-water mark should preserve 12 — only 3 remaining");
+});
+
 // ─── stripThinkingFromHistory tests ─────────────────────────────────────────
 
 test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => {
diff --git a/src/tests/search-loop-guard.test.ts b/src/tests/search-loop-guard.test.ts
index be4c7023a..c80ff4796 100644
--- a/src/tests/search-loop-guard.test.ts
+++ b/src/tests/search-loop-guard.test.ts
@@ -11,7 +11,7 @@
 
 import test from "node:test";
 import assert from "node:assert/strict";
-import { registerSearchTool } from "../resources/extensions/search-the-web/tool-search.ts";
+import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts";
 import searchExtension from "../resources/extensions/search-the-web/index.ts";
 
 const ORIGINAL_ENV = {
@@ -72,6 +72,8 @@ function createMockPI() {
   const toolsByName = new Map<string, any>();
   let registeredTool: any = null;
 
+  let activeTools: string[] = [];
+
   const pi = {
     on(event: string, handler: (...args: any[]) => unknown) {
       handlers.push({ event, handler });
@@ -91,6 +93,8 @@ function createMockPI() {
     getRegisteredTool(name = "search-the-web") {
       return toolsByName.get(name) ?? registeredTool;
     },
+    getActiveTools() { return activeTools; },
+    setActiveTools(tools: string[]) { activeTools = tools; },
     writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt",
   };
 
@@ -134,18 +138,16 @@ test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t)
 
   const execute = tool.execute.bind(tool);
 
-  // Calls 1–3: below threshold, should return search results (not an error)
-  for (let i = 1; i <= 3; i++) {
-    const result = await callSearch(execute, "loop test query", `call-${i}`);
-    assert.notEqual(result.isError, true, `call ${i} should not trigger loop guard`);
-  }
+  // Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1)
+  const result1 = await callSearch(execute, "loop test query", "call-1");
+  assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard");
 
-  // Call 4: hits the threshold — guard fires
-  const result4 = await callSearch(execute, "loop test query", "call-4");
-  assert.equal(result4.isError, true, "call 4 should trigger the loop guard");
-  assert.equal(result4.details?.errorKind, "search_loop");
+  // Call 2: identical query — guard fires immediately (threshold = 1)
+  const result2 = await callSearch(execute, "loop test query", "call-2");
+  assert.equal(result2.isError, true, "call 2 should trigger the loop guard");
+  assert.equal(result2.details?.errorKind, "search_loop");
   assert.ok(
-    result4.content[0].text.includes("Search loop detected"),
+    result2.content[0].text.includes("Search loop detected"),
     "error message should mention search loop"
   );
 });
@@ -174,11 +176,9 @@ test("search loop guard resets at session_start boundary", async (t) => {
   assert.ok(tool, "search tool should be registered");
   const execute = tool.execute.bind(tool);
 
-  // Trigger guard in session 1
-  for (let i = 1; i <= 4; i++) {
-    await callSearch(execute, query, `s1-call-${i}`);
-  }
-  const guardResult = await callSearch(execute, query, "s1-call-5");
+  // Trigger guard in session 1 (call 1 succeeds, call 2 fires guard)
+  await callSearch(execute, query, "s1-call-1");
+  const guardResult = await callSearch(execute, query, "s1-call-2");
   assert.equal(guardResult.isError, true, "session 1 should be guarded");
   assert.equal(guardResult.details?.errorKind, "search_loop");
 
@@ -211,28 +211,26 @@ test("search loop guard stays armed after firing — subsequent duplicates immed
   const tool = pi.getRegisteredTool();
   const execute = tool.execute.bind(tool);
 
-  // Exhaust the initial window (calls 1–3 succeed, call 4 fires guard)
-  for (let i = 1; i <= 3; i++) {
-    await callSearch(execute, query, `call-${i}`);
-  }
-  const guardFirst = await callSearch(execute, query, "call-4");
-  assert.equal(guardFirst.isError, true, "call 4 should trigger the loop guard");
+  // Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1)
+  await callSearch(execute, query, "call-1");
+  const guardFirst = await callSearch(execute, query, "call-2");
+  assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard");
 
-  // Key regression test: call 5 (and beyond) must ALSO trigger the guard.
-  // The original bug reset state on trigger, so call 5 was treated as a fresh
+  // Key regression test: call 3 (and beyond) must ALSO trigger the guard.
+  // The original bug reset state on trigger, so call 3 was treated as a fresh
   // first search and the loop restarted.
-  const guardSecond = await callSearch(execute, query, "call-5");
+  const guardSecond = await callSearch(execute, query, "call-3");
   assert.equal(
     guardSecond.isError, true,
-    "call 5 should STILL trigger the loop guard (guard must stay armed after firing)"
+    "call 3 should STILL trigger the loop guard (guard must stay armed after firing)"
   );
   assert.equal(guardSecond.details?.errorKind, "search_loop");
 
-  // Call 6 as well — guard should keep firing
-  const guardThird = await callSearch(execute, query, "call-6");
+  // Call 4 as well — guard should keep firing
+  const guardThird = await callSearch(execute, query, "call-4");
   assert.equal(
     guardThird.isError, true,
-    "call 6 should STILL trigger the loop guard"
+    "call 4 should STILL trigger the loop guard"
   );
 });
 
@@ -255,10 +253,9 @@ test("search loop guard resets cleanly when a different query is issued", async
   const tool = pi.getRegisteredTool();
   const execute = tool.execute.bind(tool);
 
-  // Trigger guard for queryA
-  for (let i = 1; i <= 4; i++) {
-    await callSearch(execute, queryA, `call-a-${i}`);
-  }
+  // Trigger guard for queryA (call 1 succeeds, call 2 fires guard)
+  await callSearch(execute, queryA, "call-a-1");
+  await callSearch(execute, queryA, "call-a-2");
 
   // Issue a different query — should succeed (resets the duplicate counter)
   const resultB = await callSearch(execute, queryB, "call-b-1");
@@ -267,3 +264,71 @@ test("search loop guard resets cleanly when a different query is issued", async
     "a different query after guard should not be treated as a loop"
   );
 });
+
+test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => {
+  process.env.BRAVE_API_KEY = "test-key-budget";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
+  const restoreFetch = mockFetch(makeBraveResponse());
+
+  t.after(() => {
+    restoreFetch();
+    restoreSearchEnv();
+  });
+
+  // Reset guard state (including session budget) and register directly
+  resetSearchLoopGuardState();
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+
+  const tool = pi.getRegisteredTool();
+  assert.ok(tool, "search tool should be registered");
+  const execute = tool.execute.bind(tool);
+
+  // Issue 15 unique queries — all should succeed (budget = 15)
+  for (let i = 1; i <= 15; i++) {
+    const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`);
+    assert.notEqual(result.isError, true, `query ${i} should succeed within budget`);
+  }
+
+  // Query 16: budget exhausted — should be blocked
+  const blocked = await callSearch(execute, "one more query", "budget-16");
+  assert.equal(blocked.isError, true, "query 16 should be blocked by budget");
+  assert.equal(blocked.details?.errorKind, "budget_exhausted");
+  assert.ok(
+    blocked.content[0].text.includes("Search budget exhausted"),
+    "error message should mention budget"
+  );
+});
+
+test("session search budget resets via resetSearchLoopGuardState", async (t) => {
+  process.env.BRAVE_API_KEY = "test-key-budget-reset";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
+  const restoreFetch = mockFetch(makeBraveResponse());
+
+  t.after(() => {
+    restoreFetch();
+    restoreSearchEnv();
+  });
+
+  // Reset and register directly
+  resetSearchLoopGuardState();
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+
+  const tool = pi.getRegisteredTool();
+  const execute = tool.execute.bind(tool);
+
+  // Exhaust budget
+  for (let i = 1; i <= 15; i++) {
+    await callSearch(execute, `budget reset query ${i}`, `br-${i}`);
+  }
+  const exhausted = await callSearch(execute, "exhausted query", "br-exhausted");
+  assert.equal(exhausted.isError, true, "budget should be exhausted");
+
+  // Reset simulates new session
+  resetSearchLoopGuardState();
+  const fresh = await callSearch(execute, "fresh session query", "br-fresh");
+  assert.notEqual(fresh.isError, true, "first query after reset should succeed");
+});

From fde0be6979197e2561db49966e1b49ae7b4e6218 Mon Sep 17 00:00:00 2001
From: Lex Christopherson <lex@glittercowboy.com>
Date: Wed, 25 Mar 2026 22:18:05 -0600
Subject: [PATCH 2/5] fix(headless): disable overall timeout for auto-mode, fix
 lock-guard auto-select (#2586)

Auto-mode sessions are long-running (minutes to hours) with their own
internal per-unit timeout via auto-supervisor. The 300s overall timeout
was killing active sessions mid-execution, triggering wasteful restart
cycles.

Changes:
- Disable overall timeout for auto-mode when using the default 300s
  (user can still set --timeout explicitly, including --timeout 0)
- Guard timeout timer creation for null when timeout is 0
- Cancel overall timeout when new-milestone --auto chains into auto-mode
- Fix headless auto-responder to pick "Force start" for lock-guard
  prompts instead of "View status" (which silently blocked auto-mode)
- Allow --timeout 0 to explicitly disable timeout for any command

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/headless-ui.ts | 13 +++++++++++--
 src/headless.ts    | 38 +++++++++++++++++++++++++-------------
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/src/headless-ui.ts b/src/headless-ui.ts
index 5b7453aac..387be26ca 100644
--- a/src/headless-ui.ts
+++ b/src/headless-ui.ts
@@ -40,9 +40,18 @@ export function handleExtensionUIRequest(
   let response: Record<string, unknown>
 
   switch (method) {
-    case 'select':
-      response = { type: 'extension_ui_response', id, value: event.options?.[0] ?? '' }
+    case 'select': {
+      // Lock-guard prompts list "View status" first, but headless needs "Force start"
+      // to proceed. Detect by title and pick the force option.
+      const title = String(event.title ?? '')
+      let selected = event.options?.[0] ?? ''
+      if (title.includes('Auto-mode is running') && event.options) {
+        const forceOption = event.options.find(o => o.toLowerCase().includes('force start'))
+        if (forceOption) selected = forceOption
+      }
+      response = { type: 'extension_ui_response', id, value: selected }
       break
+    }
     case 'confirm':
       response = { type: 'extension_ui_response', id, confirmed: true }
       break
diff --git a/src/headless.ts b/src/headless.ts
index b14922271..29e9614f2 100644
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -90,8 +90,8 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
     if (!positionalStarted && arg.startsWith('--')) {
       if (arg === '--timeout' && i + 1 < args.length) {
         options.timeout = parseInt(args[++i], 10)
-        if (Number.isNaN(options.timeout) || options.timeout <= 0) {
-          process.stderr.write('[headless] Error: --timeout must be a positive integer (milliseconds)\n')
+        if (Number.isNaN(options.timeout) || options.timeout < 0) {
+          process.stderr.write('[headless] Error: --timeout must be a non-negative integer (milliseconds, 0 to disable)\n')
           process.exit(1)
         }
       } else if (arg === '--json') {
@@ -183,6 +183,14 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     options.timeout = 600_000 // 10 minutes
   }
 
+  // auto-mode sessions are long-running (minutes to hours) with their own internal
+  // per-unit timeout via auto-supervisor. Disable the overall timeout unless the
+  // user explicitly set --timeout.
+  const isAutoMode = options.command === 'auto'
+  if (isAutoMode && options.timeout === 300_000) {
+    options.timeout = 0
+  }
+
   // Supervised mode cannot share stdin with --context -
   if (options.supervised && options.context === '-') {
     process.stderr.write('[headless] Error: --supervised cannot be used with --context - (both require stdin)\n')
@@ -337,12 +345,14 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
   // Precompute supervised response timeout
   const responseTimeout = options.responseTimeout ?? 30_000
 
-  // Overall timeout
-  const timeoutTimer = setTimeout(() => {
-    process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`)
-    exitCode = 1
-    resolveCompletion()
-  }, options.timeout)
+  // Overall timeout (disabled when options.timeout === 0, e.g. auto-mode)
+  const timeoutTimer = options.timeout > 0
+    ? setTimeout(() => {
+        process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`)
+        exitCode = 1
+        resolveCompletion()
+      }, options.timeout)
+    : null
 
   // Event handler
   client.onEvent((event) => {
@@ -434,7 +444,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     interrupted = true
     exitCode = 1
     client.stop().finally(() => {
-      clearTimeout(timeoutTimer)
+      if (timeoutTimer) clearTimeout(timeoutTimer)
       if (idleTimer) clearTimeout(idleTimer)
       process.exit(exitCode)
     })
@@ -447,7 +457,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     await client.start()
   } catch (err) {
     process.stderr.write(`[headless] Error: Failed to start RPC session: ${err instanceof Error ? err.message : String(err)}\n`)
-    clearTimeout(timeoutTimer)
+    if (timeoutTimer) clearTimeout(timeoutTimer)
     process.exit(1)
   }
 
@@ -456,7 +466,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
   if (!internalProcess?.stdin) {
     process.stderr.write('[headless] Error: Cannot access child process stdin\n')
     await client.stop()
-    clearTimeout(timeoutTimer)
+    if (timeoutTimer) clearTimeout(timeoutTimer)
     process.exit(1)
   }
 
@@ -511,7 +521,9 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
       process.stderr.write('[headless] Milestone ready — chaining into auto-mode...\n')
     }
 
-    // Reset completion state for the auto-mode phase
+    // Reset completion state for the auto-mode phase.
+    // Disable the overall timeout — auto-mode has its own internal supervisor.
+    if (timeoutTimer) clearTimeout(timeoutTimer)
     completed = false
     milestoneReady = false
     blocked = false
@@ -532,7 +544,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
   }
 
   // Cleanup
-  clearTimeout(timeoutTimer)
+  if (timeoutTimer) clearTimeout(timeoutTimer)
   if (idleTimer) clearTimeout(idleTimer)
   pendingResponseTimers.forEach((timer) => clearTimeout(timer))
   pendingResponseTimers.clear()

From ebb5afbd571c7e4daeb46a5666c8cccfccba040b Mon Sep 17 00:00:00 2001
From: Lex Christopherson <lex@glittercowboy.com>
Date: Wed, 25 Mar 2026 22:18:26 -0600
Subject: [PATCH 3/5] fix: use GitHub Issue Types via GraphQL instead of
 classification labels

The forensics prompt and gh skill used --label "bug" / --label "type:feature"
for issue classification, polluting the label taxonomy and leaving the Type
field unset. gh issue create has no --type flag, so issue types must be set
via GraphQL mutation after creation.

Closes #2579

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../extensions/gsd/prompts/forensics.md       | 13 ++++++++---
 .../github-workflows/references/gh/SKILL.md   | 23 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/resources/extensions/gsd/prompts/forensics.md b/src/resources/extensions/gsd/prompts/forensics.md
index f576d17c4..9112a773f 100644
--- a/src/resources/extensions/gsd/prompts/forensics.md
+++ b/src/resources/extensions/gsd/prompts/forensics.md
@@ -142,9 +142,10 @@ Then **offer GitHub issue creation**: "Would you like me to create a GitHub issu
 If yes, create using the `bash` tool:
 
 ```bash
-gh issue create --repo gsd-build/gsd-2 \
+# Step 1: Create issue (use labels for metadata, NOT for classification — type is set via GraphQL)
+ISSUE_URL=$(gh issue create --repo gsd-build/gsd-2 \
   --title "..." \
-  --label "bug" --label "auto-generated" \
+  --label "auto-generated" \
   --body "$(cat <<'EOF'
 ## Problem
 [1-2 sentence summary]
@@ -169,7 +170,13 @@ gh issue create --repo gsd-build/gsd-2 \
 ---
 *Auto-generated by `/gsd forensics`*
 EOF
-)"
+)")
+
+# Step 2: Set issue type via GraphQL (gh issue create has no --type flag)
+ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
+ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
+TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
+gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
 ```
 
 ### Redaction Rules (CRITICAL)
diff --git a/src/resources/skills/github-workflows/references/gh/SKILL.md b/src/resources/skills/github-workflows/references/gh/SKILL.md
index 2d1f4a53d..05d40f337 100644
--- a/src/resources/skills/github-workflows/references/gh/SKILL.md
+++ b/src/resources/skills/github-workflows/references/gh/SKILL.md
@@ -103,9 +103,12 @@ gh issue list -R gsd-build/gsd-2
 gh issue list -R gsd-build/gsd-2 --label "priority:p1" --state open
 
 # Create issue with labels and milestone
+# NOTE: Do NOT use labels for issue classification (bug, feature, etc.)
+# Use labels for metadata (priority, status, auto-generated) only.
+# Issue classification uses GitHub Issue Types, set via GraphQL after creation.
 gh issue create -R gsd-build/gsd-2 \
   --title "feat: add feature X" \
-  --label "priority:p1" --label "type:feature" \
+  --label "priority:p1" \
   --milestone "v1.0"
 
 # View issue
@@ -120,6 +123,24 @@ gh issue edit <number> -R gsd-build/gsd-2 \
   --remove-label "status:needs-grooming"
 ```
 
+### Issue Types (Classification)
+
+`gh issue create` has no `--type` flag. Issue types (Bug, Feature Request, etc.) are set via GraphQL after creation:
+
+```bash
+# Step 1: Create the issue (returns URL)
+ISSUE_URL=$(gh issue create -R gsd-build/gsd-2 \
+  --title "..." --body "...")
+
+# Step 2: Set the issue type via GraphQL
+ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
+ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
+TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
+gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
+```
+
+Replace `"Bug"` with the appropriate type name (`"Feature Request"`, `"Task"`, etc.).
+
 ### Labels
 
 ```bash

From 36ff7ac4fedd9fbc05d6020f8a61916ee5144315 Mon Sep 17 00:00:00 2001
From: Lex Christopherson <lex@glittercowboy.com>
Date: Wed, 25 Mar 2026 22:19:53 -0600
Subject: [PATCH 4/5] Fix complete-milestone prompt with structured parameter
 definitions

Replace the free-form parameter listing in step 7 of complete-milestone.md
with structured, typed parameter definitions that match the tool schema in
db-tools.ts. Parameters are grouped into required and optional sections with
explicit types (marking arrays as arrays, booleans as booleans) to prevent
LLM validation failures when calling gsd_complete_milestone.

Fixes #2581

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../gsd/prompts/complete-milestone.md         | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/resources/extensions/gsd/prompts/complete-milestone.md b/src/resources/extensions/gsd/prompts/complete-milestone.md
index 0ce59eeb7..4e11e80a6 100644
--- a/src/resources/extensions/gsd/prompts/complete-milestone.md
+++ b/src/resources/extensions/gsd/prompts/complete-milestone.md
@@ -35,7 +35,24 @@ Then:
 
 **Success path** (all verifications passed — continue with steps 7–11):
 
-7. **Persist completion through `gsd_complete_milestone`.** Call it with: `milestoneId`, `title`, `oneLiner`, `narrative`, `successCriteriaResults`, `definitionOfDoneResults`, `requirementOutcomes`, `keyDecisions`, `keyFiles`, `lessonsLearned`, `followUps`, `deviations`, `verificationPassed: true`. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
+7. **Persist completion through `gsd_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
+
+   **Required parameters:**
+   - `milestoneId` (string) — Milestone ID (e.g. M001)
+   - `title` (string) — Milestone title
+   - `oneLiner` (string) — One-sentence summary of what the milestone achieved
+   - `narrative` (string) — Detailed narrative of what happened during the milestone
+   - `successCriteriaResults` (string) — Markdown detailing how each success criterion was met or not met
+   - `definitionOfDoneResults` (string) — Markdown detailing how each definition-of-done item was met
+   - `requirementOutcomes` (string) — Markdown detailing requirement status transitions with evidence
+   - `keyDecisions` (array of strings) — Key architectural/pattern decisions made during the milestone
+   - `keyFiles` (array of strings) — Key files created or modified during the milestone
+   - `lessonsLearned` (array of strings) — Lessons learned during the milestone
+   - `verificationPassed` (boolean) — Must be `true` — confirms that code change verification, success criteria, and definition of done checks all passed before completion
+
+   **Optional parameters:**
+   - `followUps` (string) — Follow-up items for future milestones
+   - `deviations` (string) — Deviations from the original plan
 8. For each requirement whose status changed in step 6, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically.
 9. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state.
 10. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.

From ac4e3ac392cf92171a47e20f54e595d36e6ddfa1 Mon Sep 17 00:00:00 2001
From: Lex Christopherson <lex@glittercowboy.com>
Date: Wed, 25 Mar 2026 22:26:59 -0600
Subject: [PATCH 5/5] fix(tests): replace undefined assertTrue/assertEq with
 assert.ok/assert.equal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The doctor-environment and doctor-git tests used assertTrue and assertEq
which are not defined — they should be assert.ok and assert.equal from
the imported node:assert/strict module.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../gsd/tests/doctor-environment.test.ts         | 16 ++++++++--------
 .../extensions/gsd/tests/doctor-git.test.ts      |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/resources/extensions/gsd/tests/doctor-environment.test.ts b/src/resources/extensions/gsd/tests/doctor-environment.test.ts
index 59263f2b7..af55c2f66 100644
--- a/src/resources/extensions/gsd/tests/doctor-environment.test.ts
+++ b/src/resources/extensions/gsd/tests/doctor-environment.test.ts
@@ -143,8 +143,8 @@ describe('doctor-environment', async () => {
       cleanups.push(dir);
       const results = runEnvironmentChecks(dir);
       const depsCheck = results.find(r => r.name === "dependencies");
-      assertTrue(depsCheck !== undefined, "dependencies check runs");
-      assertEq(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.equal(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale");
     }
 
     console.log("\n=== env: yarn marker file newer than lockfile → ok (#1974) ===");
@@ -167,8 +167,8 @@ describe('doctor-environment', async () => {
       cleanups.push(dir);
       const results = runEnvironmentChecks(dir);
       const depsCheck = results.find(r => r.name === "dependencies");
-      assertTrue(depsCheck !== undefined, "dependencies check runs");
-      assertEq(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.equal(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale");
     }
 
     console.log("\n=== env: pnpm marker file newer than lockfile → ok (#1974) ===");
@@ -191,8 +191,8 @@ describe('doctor-environment', async () => {
       cleanups.push(dir);
       const results = runEnvironmentChecks(dir);
       const depsCheck = results.find(r => r.name === "dependencies");
-      assertTrue(depsCheck !== undefined, "dependencies check runs");
-      assertEq(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.equal(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale");
     }
 
     console.log("\n=== env: no marker file falls back to dir mtime → stale warning (#1974) ===");
@@ -212,8 +212,8 @@ describe('doctor-environment', async () => {
       cleanups.push(dir);
       const results = runEnvironmentChecks(dir);
       const depsCheck = results.find(r => r.name === "dependencies");
-      assertTrue(depsCheck !== undefined, "dependencies check runs");
-      assertEq(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.equal(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning");
     }
 
     // ── Env File Check ─────────────────────────────────────────────────
diff --git a/src/resources/extensions/gsd/tests/doctor-git.test.ts b/src/resources/extensions/gsd/tests/doctor-git.test.ts
index eabb2daf5..cdffe17ae 100644
--- a/src/resources/extensions/gsd/tests/doctor-git.test.ts
+++ b/src/resources/extensions/gsd/tests/doctor-git.test.ts
@@ -167,22 +167,22 @@ describe('doctor-git', async () => {
         const fixed = await runGSDDoctor(dir, { fix: true, isolationMode: "worktree" });
 
         // The fix must NOT skip removal — it should chdir out and remove
-        assertTrue(
+        assert.ok(
           !fixed.fixesApplied.some(f => f.includes("skipped removing worktree")),
           "does NOT skip removal when cwd is inside worktree",
         );
-        assertTrue(
+        assert.ok(
           fixed.fixesApplied.some(f => f.includes("removed orphaned worktree")),
           "removes orphaned worktree even when cwd was inside it",
         );
 
         // Verify worktree is gone
         const wtList = run("git worktree list", dir);
-        assertTrue(!wtList.includes("milestone/M001"), "worktree removed after fix with cwd inside");
+        assert.ok(!wtList.includes("milestone/M001"), "worktree removed after fix with cwd inside");
 
         // Verify cwd was moved out (should be basePath, not still inside worktree)
         const newCwd = process.cwd();
-        assertTrue(
+        assert.ok(
           !newCwd.startsWith(wtPath),
           "cwd moved out of worktree after fix",
         );