From 80750b7d4037a1cdf447f3a75604461199c51d76 Mon Sep 17 00:00:00 2001 From: snowdamiz Date: Sat, 21 Mar 2026 18:07:10 -0400 Subject: [PATCH 01/18] fix(web): lazily compute default package root to avoid Windows standalone crash The standalone Next.js bundle bakes import.meta.url at build time with the Linux CI runner's absolute path. On Windows, fileURLToPath() rejects the Unix file:// URL at module load time, crashing all API routes with ERR_INVALID_FILE_URL_PATH before GSD_WEB_PACKAGE_ROOT can be checked. Replace the eager top-level const with a lazy getter that: 1. Defers evaluation until GSD_WEB_PACKAGE_ROOT is actually absent 2. Catches the cross-platform fileURLToPath failure gracefully 3. Falls back to process.cwd() when the baked-in URL is invalid 4. Caches the result so the computation only runs once Add regression tests verifying: - GSD_WEB_PACKAGE_ROOT is used when set - Lazy fallback returns a valid absolute path without throwing - Memoization is stable across calls - Module loads without crash (the original failure mode) Closes gsd-build/gsd-2#1881 --- src/tests/web-bridge-package-root.test.ts | 70 +++++++++++++++++++++++ src/web/bridge-service.ts | 27 ++++++++- 2 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 src/tests/web-bridge-package-root.test.ts diff --git a/src/tests/web-bridge-package-root.test.ts b/src/tests/web-bridge-package-root.test.ts new file mode 100644 index 000000000..f919ce873 --- /dev/null +++ b/src/tests/web-bridge-package-root.test.ts @@ -0,0 +1,70 @@ +/** + * Regression tests for the default package root fallback in bridge-service. + * + * Issue: gsd-build/gsd-2#1881 + * The standalone Next.js bundle bakes import.meta.url at build time with the + * CI runner's absolute path. On Windows, fileURLToPath() rejects the Unix + * file:// URL at module load time, 500-ing all API routes. + * + * The fix makes the fallback lazy and catch-guarded so the module loads safely + * on any OS regardless of what import.meta.url resolved to at build time. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { resolve } from "node:path"; + +const bridge = await import("../web/bridge-service.ts"); + +test("resolveBridgeRuntimeConfig uses GSD_WEB_PACKAGE_ROOT when set", () => { + const env = { + GSD_WEB_PACKAGE_ROOT: "/custom/package/root", + GSD_WEB_PROJECT_CWD: "/some/project", + } as unknown as NodeJS.ProcessEnv; + + const config = bridge.resolveBridgeRuntimeConfig(env); + assert.equal(config.packageRoot, "/custom/package/root"); +}); + +test("resolveBridgeRuntimeConfig falls back to lazy default when GSD_WEB_PACKAGE_ROOT is absent", () => { + // Reset the memoized value so we exercise the lazy computation path. + bridge.resetDefaultPackageRootForTests(); + + const env = { + GSD_WEB_PROJECT_CWD: "/some/project", + } as unknown as NodeJS.ProcessEnv; + + // Should not throw — the lazy getter catches cross-platform failures. + const config = bridge.resolveBridgeRuntimeConfig(env); + assert.equal(typeof config.packageRoot, "string"); + assert.ok(config.packageRoot.length > 0, "packageRoot must be a non-empty string"); +}); + +test("lazy default package root is an absolute path", () => { + bridge.resetDefaultPackageRootForTests(); + + const env = { + GSD_WEB_PROJECT_CWD: "/some/project", + } as unknown as NodeJS.ProcessEnv; + + const config = bridge.resolveBridgeRuntimeConfig(env); + // resolve() returns the same path if already absolute. + assert.equal(config.packageRoot, resolve(config.packageRoot)); +}); + +test("lazy default package root is memoized across calls", () => { + bridge.resetDefaultPackageRootForTests(); + + const env = {} as unknown as NodeJS.ProcessEnv; + + const first = bridge.resolveBridgeRuntimeConfig(env).packageRoot; + const second = bridge.resolveBridgeRuntimeConfig(env).packageRoot; + assert.equal(first, second, "memoized value should be stable across calls"); +}); + +test("module loads without throwing (regression: eager fileURLToPath crash)", () => { + // The fact that we can import bridge-service at the top of this file without + // an unhandled exception is itself the primary regression gate. This test + // makes that contract explicit. + assert.ok(typeof bridge.resolveBridgeRuntimeConfig === "function"); +}); diff --git a/src/web/bridge-service.ts b/src/web/bridge-service.ts index 32ed1048b..fc942bf71 100644 --- a/src/web/bridge-service.ts +++ b/src/web/bridge-service.ts @@ -39,7 +39,30 @@ import { } from "./auto-dashboard-service.ts"; import { resolveGsdCliEntry } from "./cli-entry.ts"; -const DEFAULT_PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "../.."); +// Lazily computed fallback — import.meta.url is baked in at build time by +// webpack, so when the standalone bundle built on Linux CI runs on Windows the +// literal file:// URL contains a Unix path that fileURLToPath() rejects. +// Deferring the computation means it only fires when GSD_WEB_PACKAGE_ROOT is +// absent, and if it does fire we handle the cross-platform failure gracefully. +let _defaultPackageRoot: string | undefined; +function getDefaultPackageRoot(): string { + if (_defaultPackageRoot !== undefined) return _defaultPackageRoot; + try { + _defaultPackageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "../.."); + } catch { + // Standalone bundle running on a different OS than the builder — the + // baked-in import.meta.url is not a valid local file URL. Fall back to + // cwd which is the best available approximation; callers that need the + // real package root should set GSD_WEB_PACKAGE_ROOT. + _defaultPackageRoot = process.cwd(); + } + return _defaultPackageRoot; +} + +/** @internal — test-only: reset the memoized default package root */ +export function resetDefaultPackageRootForTests(): void { + _defaultPackageRoot = undefined; +} const RESPONSE_TIMEOUT_MS = 30_000; const START_TIMEOUT_MS = 150_000; const MAX_STDERR_BUFFER = 8_000; @@ -1047,7 +1070,7 @@ async function fallbackWorkspaceIndex(basePath: string): Promise Date: Sat, 21 Mar 2026 18:23:10 -0400 Subject: [PATCH 02/18] fix: prevent worktree sync from overwriting state and forward-sync completed-units.json syncProjectRootToWorktree used cpSync defaults which overwrote worktree-authoritative files (VALIDATION.md, SUMMARY.md). This caused validate-milestone to loop infinitely because its output got clobbered each iteration. Additionally, completed-units.json was never forward-synced from project root to worktree, so after crash recovery the worktree re-dispatched already-completed units. - Add `{ force: false }` to safeCopyRecursive in syncProjectRootToWorktree so existing worktree files are never overwritten (additive-only copy). - Add forward-sync of completed-units.json from project root to worktree with `{ force: true }` (project root is authoritative for completion state). - Add regression tests covering both bugs and edge cases. Fixes #1886 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../extensions/gsd/auto-worktree-sync.ts | 17 +- .../worktree-sync-overwrite-loop.test.ts | 204 ++++++++++++++++++ 2 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/worktree-sync-overwrite-loop.test.ts diff --git a/src/resources/extensions/gsd/auto-worktree-sync.ts b/src/resources/extensions/gsd/auto-worktree-sync.ts index 643576098..cfe5113c6 100644 --- a/src/resources/extensions/gsd/auto-worktree-sync.ts +++ b/src/resources/extensions/gsd/auto-worktree-sync.ts @@ -44,11 +44,24 @@ export function syncProjectRootToWorktree( const prGsd = join(projectRoot, ".gsd"); const wtGsd = join(worktreePath, ".gsd"); - // Copy milestone directory from project root to worktree if the project root - // has newer artifacts (e.g. slices that don't exist in the worktree yet) + // Copy milestone directory from project root to worktree — additive only. + // force:false prevents cpSync from overwriting existing worktree files. + // Without this, worktree-authoritative files (e.g. VALIDATION.md written + // by validate-milestone) get clobbered by stale project root copies, + // causing an infinite re-validation loop (#1886). safeCopyRecursive( join(prGsd, "milestones", milestoneId), join(wtGsd, "milestones", milestoneId), + { force: false }, + ); + + // Forward-sync completed-units.json from project root to worktree. + // Project root is authoritative for completion state after crash recovery; + // without this, the worktree re-dispatches already-completed units (#1886). + safeCopy( + join(prGsd, "completed-units.json"), + join(wtGsd, "completed-units.json"), + { force: true }, ); // Delete worktree gsd.db so it rebuilds from the freshly synced files. diff --git a/src/resources/extensions/gsd/tests/worktree-sync-overwrite-loop.test.ts b/src/resources/extensions/gsd/tests/worktree-sync-overwrite-loop.test.ts new file mode 100644 index 000000000..211c87d8d --- /dev/null +++ b/src/resources/extensions/gsd/tests/worktree-sync-overwrite-loop.test.ts @@ -0,0 +1,204 @@ +/** + * worktree-sync-overwrite-loop.test.ts — Regression tests for #1886. + * + * Reproduces the infinite validate-milestone loop caused by two bugs + * in syncProjectRootToWorktree: + * + * 1. safeCopyRecursive overwrites worktree-authoritative files (e.g. + * VALIDATION.md written by validate-milestone gets clobbered by the + * stale project root copy that lacks the file). + * + * 2. completed-units.json is not forward-synced from project root to + * worktree, so the worktree never learns about already-completed units. + * + * Covers: + * - syncProjectRootToWorktree does NOT overwrite existing worktree files + * - syncProjectRootToWorktree copies files missing from the worktree + * - completed-units.json is forward-synced from project root to worktree + * - completed-units.json sync uses force:true (project root is authoritative) + */ + +import { + mkdtempSync, + mkdirSync, + writeFileSync, + rmSync, + existsSync, + readFileSync, +} from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { syncProjectRootToWorktree } from "../auto-worktree-sync.ts"; +import { createTestContext } from "./test-helpers.ts"; + +const { assertTrue, assertEq, report } = createTestContext(); + +function createBase(name: string): string { + const base = mkdtempSync(join(tmpdir(), `gsd-wt-1886-${name}-`)); + mkdirSync(join(base, ".gsd", "milestones"), { recursive: true }); + return base; +} + +function cleanup(base: string): void { + rmSync(base, { recursive: true, force: true }); +} + +async function main(): Promise { + // ─── 1. Worktree VALIDATION.md must NOT be overwritten by project root ── + console.log( + "\n=== 1. #1886: worktree VALIDATION.md preserved (not overwritten) ===", + ); + { + const mainBase = createBase("main"); + const wtBase = createBase("wt"); + + try { + // Project root has an older CONTEXT but no VALIDATION + const prM004 = join(mainBase, ".gsd", "milestones", "M004"); + mkdirSync(prM004, { recursive: true }); + writeFileSync(join(prM004, "M004-CONTEXT.md"), "# old context"); + + // Worktree has CONTEXT + VALIDATION (written by validate-milestone) + const wtM004 = join(wtBase, ".gsd", "milestones", "M004"); + mkdirSync(wtM004, { recursive: true }); + writeFileSync(join(wtM004, "M004-CONTEXT.md"), "# worktree context"); + writeFileSync( + join(wtM004, "M004-VALIDATION.md"), + "verdict: pass\nremediation_round: 1", + ); + + syncProjectRootToWorktree(mainBase, wtBase, "M004"); + + // VALIDATION.md must still exist in worktree + assertTrue( + existsSync(join(wtM004, "M004-VALIDATION.md")), + "#1886: VALIDATION.md still exists after sync", + ); + assertEq( + readFileSync(join(wtM004, "M004-VALIDATION.md"), "utf-8"), + "verdict: pass\nremediation_round: 1", + "#1886: VALIDATION.md content preserved", + ); + + // CONTEXT.md should NOT be overwritten — worktree version is authoritative + assertEq( + readFileSync(join(wtM004, "M004-CONTEXT.md"), "utf-8"), + "# worktree context", + "#1886: existing worktree CONTEXT.md not overwritten", + ); + } finally { + cleanup(mainBase); + cleanup(wtBase); + } + } + + // ─── 2. Missing files ARE still copied from project root ──────────────── + console.log("\n=== 2. #1886: missing worktree files still copied ==="); + { + const mainBase = createBase("main"); + const wtBase = createBase("wt"); + + try { + const prM004 = join(mainBase, ".gsd", "milestones", "M004"); + mkdirSync(prM004, { recursive: true }); + writeFileSync(join(prM004, "M004-CONTEXT.md"), "# from project root"); + writeFileSync(join(prM004, "M004-ROADMAP.md"), "# roadmap"); + + // Worktree has no M004 directory at all + syncProjectRootToWorktree(mainBase, wtBase, "M004"); + + assertTrue( + existsSync(join(wtBase, ".gsd", "milestones", "M004", "M004-CONTEXT.md")), + "#1886: missing CONTEXT.md copied from project root", + ); + assertTrue( + existsSync(join(wtBase, ".gsd", "milestones", "M004", "M004-ROADMAP.md")), + "#1886: missing ROADMAP.md copied from project root", + ); + } finally { + cleanup(mainBase); + cleanup(wtBase); + } + } + + // ─── 3. completed-units.json forward-synced from project root ─────────── + console.log( + "\n=== 3. #1886: completed-units.json forward-synced to worktree ===", + ); + { + const mainBase = createBase("main"); + const wtBase = createBase("wt"); + + try { + // Project root has completed units (authoritative after crash recovery) + writeFileSync( + join(mainBase, ".gsd", "completed-units.json"), + JSON.stringify(["validate-milestone/M004"]), + ); + + // Worktree has empty completed-units + writeFileSync( + join(wtBase, ".gsd", "completed-units.json"), + JSON.stringify([]), + ); + + syncProjectRootToWorktree(mainBase, wtBase, "M004"); + + const wtCompleted = JSON.parse( + readFileSync(join(wtBase, ".gsd", "completed-units.json"), "utf-8"), + ); + assertEq( + wtCompleted, + ["validate-milestone/M004"], + "#1886: completed-units.json synced from project root (force:true)", + ); + } finally { + cleanup(mainBase); + cleanup(wtBase); + } + } + + // ─── 4. completed-units.json: no-op when project root has no file ─────── + console.log( + "\n=== 4. #1886: completed-units.json no-op when missing in project root ===", + ); + { + const mainBase = createBase("main"); + const wtBase = createBase("wt"); + + try { + // Project root milestone dir must exist for sync to run + const prM004 = join(mainBase, ".gsd", "milestones", "M004"); + mkdirSync(prM004, { recursive: true }); + + // No completed-units.json in project root + // Worktree has its own + writeFileSync( + join(wtBase, ".gsd", "completed-units.json"), + JSON.stringify(["some-unit/M001"]), + ); + + syncProjectRootToWorktree(mainBase, wtBase, "M004"); + + const wtCompleted = JSON.parse( + readFileSync(join(wtBase, ".gsd", "completed-units.json"), "utf-8"), + ); + assertEq( + wtCompleted, + ["some-unit/M001"], + "#1886: worktree completed-units.json untouched when project root has none", + ); + } finally { + cleanup(mainBase); + cleanup(wtBase); + } + } + + report(); +} + +main().catch((error) => { + console.error(error); + process.exit(1); +}); From 2662c41bf6c3a93930f221a3deec2b529ec9c724 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Sat, 21 Mar 2026 23:30:15 -0400 Subject: [PATCH 03/18] fix(roadmap): recognize '## Slice Roadmap' header in extractSlicesSection The regex in extractSlicesSection matched Slices, Slice Overview, Slice Table, Slice Summary, and Slice Status but not Slice Roadmap. When a roadmap used the '## Slice Roadmap' heading, the section extractor returned empty, causing the parser to fall through to prose headers which lack checkbox state -- marking all slices as incomplete and trapping auto-mode in a dispatch loop. Add 'Roadmap' to the alternation and a regression test that verifies checkbox slices under the '## Slice Roadmap' heading are parsed with correct done state. Fixes #1940 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../extensions/gsd/roadmap-slices.ts | 4 +-- .../gsd/tests/roadmap-slices.test.ts | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/resources/extensions/gsd/roadmap-slices.ts b/src/resources/extensions/gsd/roadmap-slices.ts index 4c4cb4ceb..c5487ed80 100644 --- a/src/resources/extensions/gsd/roadmap-slices.ts +++ b/src/resources/extensions/gsd/roadmap-slices.ts @@ -41,8 +41,8 @@ export function expandDependencies(deps: string[]): string[] { } function extractSlicesSection(content: string): string { - // Match "## Slices", "## Slice Overview", "## Slice Table", etc. - const headingMatch = /^## Slice(?:s| Overview| Table| Summary| Status)\b.*$/m.exec(content); + // Match "## Slices", "## Slice Overview", "## Slice Table", "## Slice Roadmap", etc. + const headingMatch = /^## Slice(?:s| Overview| Table| Summary| Status| Roadmap)\b.*$/m.exec(content); if (!headingMatch || headingMatch.index == null) return ""; const start = headingMatch.index + headingMatch[0].length; diff --git a/src/resources/extensions/gsd/tests/roadmap-slices.test.ts b/src/resources/extensions/gsd/tests/roadmap-slices.test.ts index 3a954d353..93d5aedca 100644 --- a/src/resources/extensions/gsd/tests/roadmap-slices.test.ts +++ b/src/resources/extensions/gsd/tests/roadmap-slices.test.ts @@ -236,6 +236,32 @@ test("parseRoadmapSlices: ## Slices with valid checkboxes does NOT invoke prose assert.equal(slices[0]?.done, true); }); +// ── Regression test for #1940 ─────────────────────────────────────────────── +// '## Slice Roadmap' header is not recognized by extractSlicesSection, causing +// checkbox-format slices to be missed and all slices reported as incomplete. + +test("parseRoadmapSlices: ## Slice Roadmap heading recognized (#1940)", () => { + const roadmapContent = [ + "# M002: Current Milestone", "", + "**Vision:** Ship it.", "", + "## Slice Roadmap", "", + "- [x] **S01: Foundation** `risk:low` `depends:[]`", + " > After this: base layer works.", + "- [x] **S02: Core Logic** `risk:medium` `depends:[S01]`", + "- [ ] **S03: Polish** `risk:low` `depends:[S01,S02]`", "", + "## Boundary Map", + ].join("\n"); + const slices = parseRoadmapSlices(roadmapContent); + assert.equal(slices.length, 3, "should parse 3 slices under '## Slice Roadmap'"); + assert.equal(slices[0]?.id, "S01"); + assert.equal(slices[0]?.done, true, "S01 should be marked done"); + assert.equal(slices[1]?.id, "S02"); + assert.equal(slices[1]?.done, true, "S02 should be marked done"); + assert.equal(slices[2]?.id, "S03"); + assert.equal(slices[2]?.done, false, "S03 should be pending"); + assert.deepEqual(slices[2]?.depends, ["S01", "S02"]); +}); + test("parseRoadmapSlices: ## Slices with only non-matching lines returns prose fallback results", () => { const weirdContent = `# M020: Odd From b672f44014dacc189c098f3e19cc1b5d22e1c4fe Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Sat, 21 Mar 2026 23:42:03 -0400 Subject: [PATCH 04/18] fix(doctor): chdir out of orphaned worktree before removal (#1946) The orphaned_auto_worktree fix skipped removal when process.cwd() was inside the worktree, creating a deadlock where the doctor repeatedly detected the orphan but never cleaned it up. Now chdir to basePath first, matching the existing pattern in removeWorktree(). Fixes #1946 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/doctor-checks.ts | 19 ++++--- .../extensions/gsd/tests/doctor-git.test.ts | 50 +++++++++++++++++++ 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/src/resources/extensions/gsd/doctor-checks.ts b/src/resources/extensions/gsd/doctor-checks.ts index 64eb0a921..c06e878bb 100644 --- a/src/resources/extensions/gsd/doctor-checks.ts +++ b/src/resources/extensions/gsd/doctor-checks.ts @@ -70,18 +70,25 @@ export async function checkGitHealth( }); if (shouldFix("orphaned_auto_worktree")) { - // Never remove a worktree matching current working directory + // If cwd is inside the worktree, chdir out first — matching the + // pattern in removeWorktree() (#1946). Without this, git cannot + // remove the worktree and the doctor enters a deadlock where it + // detects the orphan every run but never cleans it up. const cwd = process.cwd(); if (wt.path === cwd || cwd.startsWith(wt.path + sep)) { - fixesApplied.push(`skipped removing worktree at ${wt.path} (is cwd)`); - } else { try { - nativeWorktreeRemove(basePath, wt.path, true); - fixesApplied.push(`removed orphaned worktree ${wt.path}`); + process.chdir(basePath); } catch { - fixesApplied.push(`failed to remove worktree ${wt.path}`); + fixesApplied.push(`skipped removing worktree at ${wt.path} (cannot chdir to basePath)`); + continue; } } + try { + nativeWorktreeRemove(basePath, wt.path, true); + fixesApplied.push(`removed orphaned worktree ${wt.path}`); + } catch { + fixesApplied.push(`failed to remove worktree ${wt.path}`); + } } } } diff --git a/src/resources/extensions/gsd/tests/doctor-git.test.ts b/src/resources/extensions/gsd/tests/doctor-git.test.ts index 10e12e4d9..0fc8eae96 100644 --- a/src/resources/extensions/gsd/tests/doctor-git.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-git.test.ts @@ -149,6 +149,56 @@ async function main(): Promise { console.log("\n=== orphaned_auto_worktree (skipped on Windows) ==="); } + // ─── Test 1b: Orphaned worktree fix when cwd is inside worktree (#1946) ── + // Reproduces the deadlock: if process.cwd() is inside the orphaned worktree, + // the doctor must chdir out before removing it — not skip the removal. + if (process.platform !== "win32") { + console.log("\n=== orphaned_auto_worktree (cwd inside worktree) ==="); + { + const dir = createRepoWithCompletedMilestone(); + cleanups.push(dir); + + // Create worktree with milestone/M001 branch under .gsd/worktrees/ + mkdirSync(join(dir, ".gsd", "worktrees"), { recursive: true }); + run("git worktree add -b milestone/M001 .gsd/worktrees/M001", dir); + + const wtPath = realpathSync(join(dir, ".gsd", "worktrees", "M001")); + + // Simulate the deadlock: set cwd inside the orphaned worktree + const previousCwd = process.cwd(); + process.chdir(wtPath); + try { + const fixed = await runGSDDoctor(dir, { fix: true, isolationMode: "worktree" }); + + // The fix must NOT skip removal — it should chdir out and remove + assertTrue( + !fixed.fixesApplied.some(f => f.includes("skipped removing worktree")), + "does NOT skip removal when cwd is inside worktree", + ); + assertTrue( + fixed.fixesApplied.some(f => f.includes("removed orphaned worktree")), + "removes orphaned worktree even when cwd was inside it", + ); + + // Verify worktree is gone + const wtList = run("git worktree list", dir); + assertTrue(!wtList.includes("milestone/M001"), "worktree removed after fix with cwd inside"); + + // Verify cwd was moved out (should be basePath, not still inside worktree) + const newCwd = process.cwd(); + assertTrue( + !newCwd.startsWith(wtPath), + "cwd moved out of worktree after fix", + ); + } finally { + // Restore cwd — the worktree dir may be gone, so chdir to previousCwd + try { process.chdir(previousCwd); } catch { process.chdir(dir); } + } + } + } else { + console.log("\n=== orphaned_auto_worktree (cwd inside worktree — skipped on Windows) ==="); + } + // ─── Test 2: Stale milestone branch detection & fix ──────────────── // Skip on Windows: git branch glob matching and path resolution // behave differently in Windows temp dirs. From 2f73814552deb4035ae2a966647e3bddc3d9f401 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Sun, 22 Mar 2026 00:32:25 -0400 Subject: [PATCH 05/18] fix(doctor): compare lockfile mtime against install marker, not directory mtime (#1974) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stale dependency check compared package-lock.json mtime against the node_modules directory mtime. Directory mtime only updates when entries are added or removed — not when files inside are updated. This caused a permanent false-positive warning after `npm install` when dependencies were already up to date, because npm rewrites the lockfile (advancing its mtime) without adding/removing directory entries. Compare against package manager marker files instead: - npm: node_modules/.package-lock.json - yarn: node_modules/.yarn-integrity - pnpm: node_modules/.modules.yaml Falls back to directory mtime when no marker file exists. Fixes #1974 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../extensions/gsd/doctor-environment.ts | 37 ++++-- .../gsd/tests/doctor-environment.test.ts | 116 +++++++++++++++++- 2 files changed, 145 insertions(+), 8 deletions(-) diff --git a/src/resources/extensions/gsd/doctor-environment.ts b/src/resources/extensions/gsd/doctor-environment.ts index 61f61cd85..1f2666c49 100644 --- a/src/resources/extensions/gsd/doctor-environment.ts +++ b/src/resources/extensions/gsd/doctor-environment.ts @@ -118,21 +118,44 @@ function checkDependenciesInstalled(basePath: string): EnvironmentCheckResult | }; } - // Check if lockfile is newer than node_modules - const lockfiles = ["package-lock.json", "yarn.lock", "pnpm-lock.yaml"]; - for (const lockfile of lockfiles) { - const lockPath = join(basePath, lockfile); + // Check if lockfile is newer than the last install. + // + // Each package manager writes a metadata marker inside node_modules on + // every install. Comparing the lockfile mtime against the marker is + // reliable; comparing against the node_modules *directory* mtime is not, + // because directory mtime only changes when entries are added or removed + // — not when files inside it are updated. (#1974) + const lockfiles: Array<{ lock: string; markers: string[] }> = [ + { lock: "package-lock.json", markers: ["node_modules/.package-lock.json"] }, + { lock: "yarn.lock", markers: ["node_modules/.yarn-integrity"] }, + { lock: "pnpm-lock.yaml", markers: ["node_modules/.modules.yaml"] }, + ]; + + for (const { lock, markers } of lockfiles) { + const lockPath = join(basePath, lock); if (!existsSync(lockPath)) continue; try { const lockMtime = statSync(lockPath).mtimeMs; - const nmMtime = statSync(nodeModules).mtimeMs; - if (lockMtime > nmMtime) { + // Prefer the package manager's marker file; fall back to directory mtime + // only when no marker exists (e.g., manually created node_modules). + let installMtime = 0; + for (const marker of markers) { + const markerPath = join(basePath, marker); + if (existsSync(markerPath)) { + installMtime = Math.max(installMtime, statSync(markerPath).mtimeMs); + } + } + if (installMtime === 0) { + installMtime = statSync(nodeModules).mtimeMs; + } + + if (lockMtime > installMtime) { return { name: "dependencies", status: "warning", - message: `${lockfile} is newer than node_modules — dependencies may be stale`, + message: `${lock} is newer than node_modules — dependencies may be stale`, detail: `Run npm install / yarn / pnpm install to update`, }; } diff --git a/src/resources/extensions/gsd/tests/doctor-environment.test.ts b/src/resources/extensions/gsd/tests/doctor-environment.test.ts index cc7f396a7..b89cf0f9d 100644 --- a/src/resources/extensions/gsd/tests/doctor-environment.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-environment.test.ts @@ -13,7 +13,7 @@ * - Report formatting */ -import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync, utimesSync } from "node:fs"; import { join, dirname } from "node:path"; import { tmpdir } from "node:os"; @@ -109,6 +109,120 @@ async function main(): Promise { assertEq(depsCheck!.status, "ok", "existing node_modules is ok"); } + // ── Stale Dependencies: marker file check (#1974) ────────────────── + console.log("\n=== env: npm marker file newer than lockfile → ok (#1974) ==="); + { + // Simulate the exact bug scenario: + // 1. node_modules dir mtime is old (no entries added/removed recently) + // 2. package-lock.json mtime is recent (npm rewrote it) + // 3. node_modules/.package-lock.json mtime is between dir and lockfile + // (npm wrote it during the same install that rewrote the lockfile) + // + // The bug: code compares lockfile mtime vs dir mtime → false positive warning + // The fix: compare lockfile mtime vs marker file mtime → correctly ok + const dir = createProjectDir({ + "package.json": JSON.stringify({ name: "test" }), + }); + mkdirSync(join(dir, "node_modules"), { recursive: true }); + + // Simulate the exact bug: npm install with "up to date" rewrites the + // lockfile and the marker, but no packages are added/removed so the + // directory mtime should be old. We write the marker first (which + // bumps dir mtime), then force the dir mtime back to the past. + // + // Timeline: dir(T-120s) < lockfile(T-5s) ≈ marker(T-5s) + // Bug: code compares lockfile vs dir → false positive stale warning + // Fix: code compares lockfile vs marker → correctly reports ok + const dirTime = new Date(Date.now() - 120_000); + const installTime = new Date(Date.now() - 5_000); + + // Write marker file (this bumps dir mtime as a side effect) + writeFileSync(join(dir, "node_modules", ".package-lock.json"), "{}"); + utimesSync(join(dir, "node_modules", ".package-lock.json"), installTime, installTime); + + // Force dir mtime back to the past — simulates no top-level entries changed + utimesSync(join(dir, "node_modules"), dirTime, dirTime); + + // Lockfile written at install time (same as marker, or slightly after) + writeFileSync(join(dir, "package-lock.json"), "{}"); + utimesSync(join(dir, "package-lock.json"), installTime, installTime); + + cleanups.push(dir); + const results = runEnvironmentChecks(dir); + const depsCheck = results.find(r => r.name === "dependencies"); + assertTrue(depsCheck !== undefined, "dependencies check runs"); + assertEq(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale"); + } + + console.log("\n=== env: yarn marker file newer than lockfile → ok (#1974) ==="); + { + const dir = createProjectDir({ + "package.json": JSON.stringify({ name: "test" }), + }); + mkdirSync(join(dir, "node_modules"), { recursive: true }); + + const dirTime = new Date(Date.now() - 120_000); + const installTime = new Date(Date.now() - 5_000); + + writeFileSync(join(dir, "node_modules", ".yarn-integrity"), "{}"); + utimesSync(join(dir, "node_modules", ".yarn-integrity"), installTime, installTime); + utimesSync(join(dir, "node_modules"), dirTime, dirTime); + + writeFileSync(join(dir, "yarn.lock"), ""); + utimesSync(join(dir, "yarn.lock"), installTime, installTime); + + cleanups.push(dir); + const results = runEnvironmentChecks(dir); + const depsCheck = results.find(r => r.name === "dependencies"); + assertTrue(depsCheck !== undefined, "dependencies check runs"); + assertEq(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale"); + } + + console.log("\n=== env: pnpm marker file newer than lockfile → ok (#1974) ==="); + { + const dir = createProjectDir({ + "package.json": JSON.stringify({ name: "test" }), + }); + mkdirSync(join(dir, "node_modules"), { recursive: true }); + + const dirTime = new Date(Date.now() - 120_000); + const installTime = new Date(Date.now() - 5_000); + + writeFileSync(join(dir, "node_modules", ".modules.yaml"), "{}"); + utimesSync(join(dir, "node_modules", ".modules.yaml"), installTime, installTime); + utimesSync(join(dir, "node_modules"), dirTime, dirTime); + + writeFileSync(join(dir, "pnpm-lock.yaml"), ""); + utimesSync(join(dir, "pnpm-lock.yaml"), installTime, installTime); + + cleanups.push(dir); + const results = runEnvironmentChecks(dir); + const depsCheck = results.find(r => r.name === "dependencies"); + assertTrue(depsCheck !== undefined, "dependencies check runs"); + assertEq(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale"); + } + + console.log("\n=== env: no marker file falls back to dir mtime → stale warning (#1974) ==="); + { + // No marker file exists, lockfile newer than dir → should still warn + const dir = createProjectDir({ + "package.json": JSON.stringify({ name: "test" }), + }); + mkdirSync(join(dir, "node_modules"), { recursive: true }); + + const past = new Date(Date.now() - 60_000); + utimesSync(join(dir, "node_modules"), past, past); + + writeFileSync(join(dir, "package-lock.json"), "{}"); + // No marker file written — fallback to dir mtime comparison + + cleanups.push(dir); + const results = runEnvironmentChecks(dir); + const depsCheck = results.find(r => r.name === "dependencies"); + assertTrue(depsCheck !== undefined, "dependencies check runs"); + assertEq(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning"); + } + // ── Env File Check ───────────────────────────────────────────────── console.log("\n=== env: .env.example without .env detected ==="); { From 2a3493c291ea99133e6b24c3db6ee24c6c0ea2ad Mon Sep 17 00:00:00 2001 From: Jay the Reaper <198331141+TheReaperJay@users.noreply.github.com> Date: Sun, 22 Mar 2026 22:23:04 +0700 Subject: [PATCH 06/18] fix(pi-coding-agent): prevent crash when login is cancelled --- .../src/modes/interactive/interactive-mode.ts | 39 ++++--------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts index 6795d2064..d4b7a0a84 100644 --- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts +++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts @@ -3372,14 +3372,6 @@ export class InteractiveMode { this.ui.setFocus(dialog); this.ui.requestRender(); - // Promise for manual code input (racing with callback server) - let manualCodeResolve: ((code: string) => void) | undefined; - let manualCodeReject: ((err: Error) => void) | undefined; - const manualCodePromise = new Promise((resolve, reject) => { - manualCodeResolve = resolve; - manualCodeReject = reject; - }); - // Restore editor helper — also disposes the dialog to reject any // dangling promises and prevent the UI from getting stuck. const restoreEditor = () => { @@ -3395,23 +3387,7 @@ export class InteractiveMode { onAuth: (info: { url: string; instructions?: string }) => { dialog.showAuth(info.url, info.instructions); - if (usesCallbackServer) { - // Show input for manual paste, racing with callback - dialog - .showManualInput("Paste redirect URL below, or complete login in browser:") - .then((value) => { - if (value && manualCodeResolve) { - manualCodeResolve(value); - manualCodeResolve = undefined; - } - }) - .catch(() => { - if (manualCodeReject) { - manualCodeReject(new Error("Login cancelled")); - manualCodeReject = undefined; - } - }); - } else if (providerId === "github-copilot") { + if (!usesCallbackServer && providerId === "github-copilot") { // GitHub Copilot polls after onAuth dialog.showWaiting("Waiting for browser authentication..."); } @@ -3426,7 +3402,12 @@ export class InteractiveMode { dialog.showProgress(message); }, - onManualCodeInput: () => manualCodePromise, + // Callback-server providers race browser callback with pasted redirect URL. + // Keep manual-input promise ownership inside provider flow to avoid + // orphaned rejections when the callback is not consumed. + onManualCodeInput: usesCallbackServer + ? () => dialog.showManualInput("Paste redirect URL below, or complete login in browser:") + : undefined, signal: dialog.signal, }); @@ -3458,12 +3439,6 @@ export class InteractiveMode { this.showStatus(`Logged in to ${providerName}. Credentials saved to ${getAuthPath()}`); } catch (error: unknown) { restoreEditor(); - // Also reject the manual code promise if it's still pending - if (manualCodeReject) { - manualCodeReject(new Error("Login cancelled")); - manualCodeReject = undefined; - manualCodeResolve = undefined; - } const errorMsg = error instanceof Error ? error.message : String(error); if (errorMsg !== "Login cancelled" && !errorMsg.includes("Superseded") && !errorMsg.includes("disposed")) { this.showError(`Failed to login to ${providerName}: ${errorMsg}`); From 00163685a95b2b4171c27338ac323e3380017190 Mon Sep 17 00:00:00 2001 From: frizynn Date: Sun, 22 Mar 2026 22:29:19 -0300 Subject: [PATCH 07/18] fix(rpc): resolve double-set race, missing error ID, and stream handler Fix three bugs in the RPC subsystem: 1. rpc-client.ts: Remove duplicate `pendingRequests.set(id, ...)` call that immediately gets overwritten. The first set stored bare resolve/reject without timeout cleanup, creating a race window where timeout could fire with the wrong handler. 2. rpc-mode.ts: Unknown command error response now preserves the request's id instead of returning `id: undefined`, fixing request-response correlation for unrecognized commands. 3. jsonl.ts: Add missing `error` event handler on the input stream to prevent unhandled exceptions, and include it in the cleanup function returned by `attachJsonlLineReader`. --- packages/pi-coding-agent/src/modes/rpc/jsonl.ts | 6 ++++++ packages/pi-coding-agent/src/modes/rpc/rpc-client.ts | 2 -- packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/packages/pi-coding-agent/src/modes/rpc/jsonl.ts b/packages/pi-coding-agent/src/modes/rpc/jsonl.ts index 8962c7340..5392defef 100644 --- a/packages/pi-coding-agent/src/modes/rpc/jsonl.ts +++ b/packages/pi-coding-agent/src/modes/rpc/jsonl.ts @@ -48,11 +48,17 @@ export function attachJsonlLineReader(stream: Readable, onLine: (line: string) = } }; + const onError = (_err: Error) => { + // Stream errors are non-fatal for JSONL reading + }; + stream.on("data", onData); stream.on("end", onEnd); + stream.on("error", onError); return () => { stream.off("data", onData); stream.off("end", onEnd); + stream.off("error", onError); }; } diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts index a3f91ecc4..319a7418c 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts @@ -482,8 +482,6 @@ export class RpcClient { const fullCommand = { ...command, id } as RpcCommand; return new Promise((resolve, reject) => { - this.pendingRequests.set(id, { resolve, reject }); - const timeout = setTimeout(() => { this.pendingRequests.delete(id); reject(new Error(`Timeout waiting for response to ${command.type}. Stderr: ${this.stderr}`)); diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts index dc02b4491..e41e5ac3b 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts @@ -586,8 +586,8 @@ export async function runRpcMode(session: AgentSession): Promise { } default: { - const unknownCommand = command as { type: string }; - return error(undefined, unknownCommand.type, `Unknown command: ${unknownCommand.type}`); + const unknownCommand = command as { type: string; id?: string }; + return error(unknownCommand.id, unknownCommand.type, `Unknown command: ${unknownCommand.type}`); } } }; From 498a4b5310ee35cf2389c45a412df4883cc3177c Mon Sep 17 00:00:00 2001 From: frizynn Date: Sun, 22 Mar 2026 22:30:11 -0300 Subject: [PATCH 08/18] fix(ai): resolve WebSocket listener leaks and bound session cache Fix two memory leaks in the OpenAI Codex Responses WebSocket code: 1. parseWebSocket() onMessage handler: The fire-and-forget async IIFE could error after the await on decodeWebSocketData(), swallowing the error and leaving all four event listeners attached to the socket indefinitely. Wrap the entire handler body in try/catch, signal the error to the generator loop via `failed`/`done`, and call cleanup() to remove listeners immediately. JSON SyntaxErrors are treated as non-fatal (malformed message skipped). 2. websocketSessionCache: The Map grows without bound when many distinct session IDs are used over the lifetime of a process. Add a MAX_WEBSOCKET_CACHE_SIZE (10) constant and evict the oldest entry (first key in insertion order) before inserting a new one, closing the evicted socket and clearing its idle timer. Also extract the duplicated removeEventListener calls in parseWebSocket into a shared cleanup() helper used by both the onMessage error path and the finally block. --- .../src/providers/openai-codex-responses.ts | 47 +++++++++++++++---- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/packages/pi-ai/src/providers/openai-codex-responses.ts b/packages/pi-ai/src/providers/openai-codex-responses.ts index 3a93e9fa0..294290188 100644 --- a/packages/pi-ai/src/providers/openai-codex-responses.ts +++ b/packages/pi-ai/src/providers/openai-codex-responses.ts @@ -451,6 +451,7 @@ async function* parseSSE(response: Response): AsyncGenerator void; @@ -635,6 +636,20 @@ async function acquireWebSocket( const socket = await connectWebSocket(url, headers, signal); const entry: CachedWebSocketConnection = { socket, busy: true }; + + // Evict the oldest entry if the cache is at capacity (LRU eviction). + if (websocketSessionCache.size >= MAX_WEBSOCKET_CACHE_SIZE) { + const oldestKey = websocketSessionCache.keys().next().value; + if (oldestKey) { + const oldEntry = websocketSessionCache.get(oldestKey); + websocketSessionCache.delete(oldestKey); + if (oldEntry) { + if (oldEntry.idleTimer) clearTimeout(oldEntry.idleTimer); + closeWebSocketSilently(oldEntry.socket); + } + } + } + websocketSessionCache.set(sessionId, entry); return { socket, @@ -705,12 +720,19 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy resolve(); }; + const cleanup = () => { + socket.removeEventListener("message", onMessage); + socket.removeEventListener("error", onError); + socket.removeEventListener("close", onClose); + signal?.removeEventListener("abort", onAbort); + }; + const onMessage: WebSocketListener = (event) => { void (async () => { - if (!event || typeof event !== "object" || !("data" in event)) return; - const text = await decodeWebSocketData((event as { data?: unknown }).data); - if (!text) return; try { + if (!event || typeof event !== "object" || !("data" in event)) return; + const text = await decodeWebSocketData((event as { data?: unknown }).data); + if (!text) return; const parsed = JSON.parse(text) as Record; const type = typeof parsed.type === "string" ? parsed.type : ""; if (type === "response.completed" || type === "response.done") { @@ -719,7 +741,19 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy } queue.push(parsed); wake(); - } catch {} + } catch (err) { + // Ensure listeners are cleaned up if the async handler errors. + // Without this, the fire-and-forget promise would swallow the + // error while leaving listeners attached to the socket. + if (err instanceof SyntaxError) { + // JSON parse failure — skip the malformed message. + return; + } + failed = err instanceof Error ? err : new Error(String(err)); + done = true; + cleanup(); + wake(); + } })(); }; @@ -775,10 +809,7 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy throw new Error("WebSocket stream closed before response.completed"); } } finally { - socket.removeEventListener("message", onMessage); - socket.removeEventListener("error", onError); - socket.removeEventListener("close", onClose); - signal?.removeEventListener("abort", onAbort); + cleanup(); } } From 806cb76e72eac2448673ac3ddeb11937e3bd63d0 Mon Sep 17 00:00:00 2001 From: frizynn Date: Sun, 22 Mar 2026 22:30:44 -0300 Subject: [PATCH 09/18] fix: resolve race conditions in blob-store, discovery-cache, and agent-loop - blob-store: Replace non-atomic check-then-act (existsSync + writeFileSync) with writeFileSync using 'wx' flag for atomic exclusive creation - discovery-cache: Re-read from disk before mutations to avoid stale overwrites, and use temp file + rename for atomic saves - agent-loop: Deep copy messages array in agentLoopContinue to prevent shared reference mutations from affecting the original context --- packages/pi-agent-core/src/agent-loop.ts | 5 ++++- packages/pi-coding-agent/src/core/blob-store.ts | 9 ++++++--- packages/pi-coding-agent/src/core/discovery-cache.ts | 11 +++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/packages/pi-agent-core/src/agent-loop.ts b/packages/pi-agent-core/src/agent-loop.ts index fa05a0eff..8379d5853 100644 --- a/packages/pi-agent-core/src/agent-loop.ts +++ b/packages/pi-agent-core/src/agent-loop.ts @@ -118,7 +118,10 @@ export function agentLoopContinue( (async () => { const newMessages: AgentMessage[] = []; - const currentContext: AgentContext = { ...context }; + const currentContext: AgentContext = { + ...context, + messages: [...context.messages], + }; stream.push({ type: "agent_start" }); stream.push({ type: "turn_start" }); diff --git a/packages/pi-coding-agent/src/core/blob-store.ts b/packages/pi-coding-agent/src/core/blob-store.ts index 16262c892..9ad9e4f49 100644 --- a/packages/pi-coding-agent/src/core/blob-store.ts +++ b/packages/pi-coding-agent/src/core/blob-store.ts @@ -6,7 +6,7 @@ * provides automatic deduplication across sessions. */ import { createHash } from "node:crypto"; -import { mkdirSync, readdirSync, readFileSync, writeFileSync, existsSync, accessSync, unlinkSync, statSync } from "node:fs"; +import { mkdirSync, readdirSync, readFileSync, writeFileSync, accessSync, unlinkSync, statSync } from "node:fs"; import { join } from "node:path"; const BLOB_PREFIX = "blob:sha256:"; @@ -37,8 +37,11 @@ export class BlobStore { }, }; - if (!existsSync(blobPath)) { - writeFileSync(blobPath, data); + try { + writeFileSync(blobPath, data, { flag: "wx" }); // Atomic: fails if file exists + } catch (err: any) { + if (err.code !== "EEXIST") throw err; + // File already exists — expected for content-addressed storage } return result; } diff --git a/packages/pi-coding-agent/src/core/discovery-cache.ts b/packages/pi-coding-agent/src/core/discovery-cache.ts index a75633c2f..d9d9bded8 100644 --- a/packages/pi-coding-agent/src/core/discovery-cache.ts +++ b/packages/pi-coding-agent/src/core/discovery-cache.ts @@ -3,7 +3,7 @@ * Stores results at {agentDir}/discovery-cache.json with per-provider TTLs. */ -import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; +import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from "fs"; import { dirname, join } from "path"; import { getAgentDir } from "../config.js"; import { type DiscoveredModel, getDefaultTTL } from "./model-discovery.js"; @@ -35,6 +35,8 @@ export class ModelDiscoveryCache { } set(provider: string, models: DiscoveredModel[], ttlMs?: number): void { + // Re-read from disk to get the latest state before modifying + this.load(); this.data.entries[provider] = { models, fetchedAt: Date.now(), @@ -50,6 +52,8 @@ export class ModelDiscoveryCache { } clear(provider?: string): void { + // Re-read from disk to get the latest state before modifying + this.load(); if (provider) { delete this.data.entries[provider]; } else { @@ -89,7 +93,10 @@ export class ModelDiscoveryCache { if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } - writeFileSync(this.cachePath, JSON.stringify(this.data, null, 2), "utf-8"); + // Atomic write: write to temp file then rename to avoid partial reads + const tmpPath = this.cachePath + ".tmp"; + writeFileSync(tmpPath, JSON.stringify(this.data, null, 2), "utf-8"); + renameSync(tmpPath, this.cachePath); } catch { // Silently ignore write failures (read-only FS, permissions, etc.) } From ca0be14f32bfa50c33ccffd14feafe7f02161891 Mon Sep 17 00:00:00 2001 From: mastertyko <11311479+mastertyko@users.noreply.github.com> Date: Wed, 25 Mar 2026 23:54:28 +0100 Subject: [PATCH 10/18] fix: preserve doctor missing-dir checks for active legacy slices Doctor's DB-backed slice normalization already marks pending slices, but the legacy roadmap fallback only returned done/not-done. That made future unstarted slices look active during milestone-scoped doctor runs, producing false missing_slice_dir errors. Infer a doctor-local pending state for legacy slices by treating every undone slice except the current active slice as unstarted. This keeps active-slice missing directory checks intact while skipping false positives for future slices, and adds a regression test for the legacy fallback path. Closes #2518 --- src/resources/extensions/gsd/doctor.ts | 10 ++- .../gsd/tests/doctor-fixlevel.test.ts | 66 ++++++++++++++++++- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/resources/extensions/gsd/doctor.ts b/src/resources/extensions/gsd/doctor.ts index 445278977..83fc8a754 100644 --- a/src/resources/extensions/gsd/doctor.ts +++ b/src/resources/extensions/gsd/doctor.ts @@ -487,7 +487,15 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; demo: s.demo, })); } else { - slices = parseLegacyRoadmap(roadmapContent).slices; + const activeMilestoneId = state.activeMilestone?.id; + const activeSliceId = state.activeSlice?.id; + slices = parseLegacyRoadmap(roadmapContent).slices.map(s => ({ + ...s, + // Legacy roadmaps only encode done vs not-done. For doctor's + // missing-directory checks, treat every undone slice except the + // current active slice as effectively pending/unstarted. + pending: !s.done && (milestoneId !== activeMilestoneId || s.id !== activeSliceId), + })); } // Wrap in Roadmap-compatible shape for detectCircularDependencies const roadmap = { slices }; diff --git a/src/resources/extensions/gsd/tests/doctor-fixlevel.test.ts b/src/resources/extensions/gsd/tests/doctor-fixlevel.test.ts index 21f15cdbc..a1d5a4aba 100644 --- a/src/resources/extensions/gsd/tests/doctor-fixlevel.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-fixlevel.test.ts @@ -15,6 +15,7 @@ import { tmpdir } from "node:os"; import test from "node:test"; import assert from "node:assert/strict"; import { runGSDDoctor } from "../doctor.ts"; +import { closeDatabase } from "../gsd-db.ts"; function makeTmp(name: string): string { const dir = join(tmpdir(), `doctor-fixlevel-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`); @@ -112,6 +113,70 @@ test("fixLevel:all — no reconciliation issue codes are reported", async (t) => assert.ok(roadmapContent.includes("- [ ] **S01"), "roadmap should remain unchecked"); }); +test("legacy roadmap fallback: future slices are treated as pending, active slice is not", async (t) => { + const tmp = makeTmp("legacy-pending-fallback"); + t.after(() => { + try { closeDatabase(); } catch { /* noop */ } + rmSync(tmp, { recursive: true, force: true }); + }); + + // Force the legacy parser branch. + try { closeDatabase(); } catch { /* noop */ } + + const gsd = join(tmp, ".gsd"); + const m = join(gsd, "milestones", "M001"); + const s01 = join(m, "slices", "S01", "tasks"); + mkdirSync(s01, { recursive: true }); + + writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test + +## Slices + +- [x] **S01: Done Slice** \`risk:low\` \`depends:[]\` + > Done +- [ ] **S02: Active Slice** \`risk:medium\` \`depends:[S01]\` + > In progress +- [ ] **S03: Future Slice** \`risk:low\` \`depends:[S02]\` + > Later +- [ ] **S04: Future Slice Two** \`risk:low\` \`depends:[S03]\` + > Later +`); + + writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Done Slice + +**Goal:** done + +## Tasks + +- [x] **T01: Done task** \`est:5m\` +`); + + // Active slice exists in state/registry but has no directory yet — this should + // still be reported as a real error, while future untouched slices should be skipped. + const report = await runGSDDoctor(tmp, { scope: "M001" }); + const missingSliceDirUnits = report.issues + .filter(i => i.code === "missing_slice_dir") + .map(i => i.unitId) + .sort(); + + assert.deepStrictEqual( + missingSliceDirUnits, + ["M001/S02"], + "legacy fallback should only report the active slice, not future unstarted slices", + ); + + const missingTasksDirUnits = report.issues + .filter(i => i.code === "missing_tasks_dir") + .map(i => i.unitId) + .sort(); + + assert.deepStrictEqual( + missingTasksDirUnits, + [], + "future slices without directories should be skipped before missing_tasks_dir checks", + ); +}); + test("fixLevel:all — delimiter_in_title still fixable", async (t) => { const tmp = makeTmp("delimiter-fix"); t.after(() => rmSync(tmp, { recursive: true, force: true })); @@ -141,7 +206,6 @@ test("fixLevel:all — delimiter_in_title still fixable", async (t) => { const report = await runGSDDoctor(tmp, { fix: true }); - const delimiterIssues = report.issues.filter(i => i.code === "delimiter_in_title"); // The milestone-level delimiter is auto-fixed, but the report may or may not include it // depending on whether it was fixed successfully. Just verify it ran without crashing. assert.ok(report.issues !== undefined, "doctor produces a report"); From 2e4d1489ae0549dbe5e96a5f8bef7c5f304223cf Mon Sep 17 00:00:00 2001 From: mastertyko <11311479+mastertyko@users.noreply.github.com> Date: Thu, 26 Mar 2026 00:34:09 +0100 Subject: [PATCH 11/18] fix: guard writeIntegrationBranch against workflow-template branches writeIntegrationBranch already rejects slice branches (SLICE_BRANCH_RE) and quick-task branches (QUICK_BRANCH_RE), but has no guard for the 8 workflow- template branches (gsd/hotfix/*, gsd/bugfix/*, gsd/spike/*, etc.). When a user runs `/gsd start hotfix` during an active milestone, the ephemeral hotfix branch gets recorded as the integration target and the milestone later merges to the wrong branch. Add WORKFLOW_BRANCH_RE (/^gsd\/(?!M\d)[\w-]+\//) that matches all gsd// branches while excluding milestone slice branches (gsd/M001/S01). The negative lookahead ensures milestone branches starting with 'M' followed by a digit are not affected. Same root cause as gsd/quick/* (#1293, PR #1342). Closes #2498 --- src/resources/extensions/gsd/git-service.ts | 13 +++++ .../extensions/gsd/tests/git-service.test.ts | 49 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/src/resources/extensions/gsd/git-service.ts b/src/resources/extensions/gsd/git-service.ts index 9f17574e5..69851c418 100644 --- a/src/resources/extensions/gsd/git-service.ts +++ b/src/resources/extensions/gsd/git-service.ts @@ -246,6 +246,15 @@ export function readIntegrationBranch(basePath: string, milestoneId: string): st /** Regex matching GSD quick-task branches: gsd/quick/- */ export const QUICK_BRANCH_RE = /^gsd\/quick\//; +/** + * Matches all GSD workflow-template branches: gsd//. + * + * Template IDs are lowercase alphanumeric with hyphens (e.g. hotfix, bugfix, + * small-feature, dep-upgrade). The negative lookahead excludes milestone + * branches (gsd/M001/... or gsd/M001-abc123/...) which use SLICE_BRANCH_RE. + */ +export const WORKFLOW_BRANCH_RE = /^gsd\/(?!M\d)[\w-]+\//; + export function writeIntegrationBranch( basePath: string, milestoneId: string, @@ -257,6 +266,10 @@ export function writeIntegrationBranch( // to their origin branch on completion. Recording one as the integration // target causes milestone merges to land on the wrong branch (#1293). if (QUICK_BRANCH_RE.test(branch)) return; + // Don't record workflow-template branches (hotfix, bugfix, spike, etc.) — + // same root cause as quick-task branches (#2498). All templates create + // gsd// branches that are ephemeral. + if (WORKFLOW_BRANCH_RE.test(branch)) return; // Validate if (!VALID_BRANCH_NAME.test(branch)) return; // Skip if already recorded with the same branch (idempotent across restarts). diff --git a/src/resources/extensions/gsd/tests/git-service.test.ts b/src/resources/extensions/gsd/tests/git-service.test.ts index cd7168ada..3e4b3ffda 100644 --- a/src/resources/extensions/gsd/tests/git-service.test.ts +++ b/src/resources/extensions/gsd/tests/git-service.test.ts @@ -868,6 +868,55 @@ describe('git-service', async () => { rmSync(repo, { recursive: true, force: true }); }); + // ─── writeIntegrationBranch: rejects workflow-template branches (#2498) ─ + + test('Integration branch: rejects workflow-template branches', () => { + const repo = initBranchTestRepo(); + + // All 8 registered workflow templates should be rejected + writeIntegrationBranch(repo, "M001", "gsd/hotfix/fix-login"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "hotfix branch is not recorded"); + + writeIntegrationBranch(repo, "M001", "gsd/bugfix/null-pointer"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "bugfix branch is not recorded"); + + writeIntegrationBranch(repo, "M001", "gsd/small-feature/add-button"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "small-feature branch is not recorded"); + + writeIntegrationBranch(repo, "M001", "gsd/refactor/rename-module"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "refactor branch is not recorded"); + + writeIntegrationBranch(repo, "M001", "gsd/spike/evaluate-lib"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "spike branch is not recorded"); + + writeIntegrationBranch(repo, "M001", "gsd/security-audit/owasp-scan"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "security-audit branch is not recorded"); + + writeIntegrationBranch(repo, "M001", "gsd/dep-upgrade/bump-react"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "dep-upgrade branch is not recorded"); + + writeIntegrationBranch(repo, "M001", "gsd/full-project/new-app"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "full-project branch is not recorded"); + + rmSync(repo, { recursive: true, force: true }); + }); + + // ─── writeIntegrationBranch: still records legitimate branches ──────── + + test('Integration branch: records non-ephemeral gsd branches', () => { + const repo = initBranchTestRepo(); + + // A normal feature branch should still be recorded + writeIntegrationBranch(repo, "M001", "feature/new-thing"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "feature/new-thing", "normal branches are recorded"); + + // The main branch should be recorded + writeIntegrationBranch(repo, "M002", "main"); + assert.deepStrictEqual(readIntegrationBranch(repo, "M002"), "main", "main branch is recorded"); + + rmSync(repo, { recursive: true, force: true }); + }); + // ─── writeIntegrationBranch: rejects invalid branch names ───────────── test('Integration branch: rejects invalid names', () => { From 006184456a4039342dc7facdfb30acfd1911f22f Mon Sep 17 00:00:00 2001 From: mastertyko <11311479+mastertyko@users.noreply.github.com> Date: Mon, 23 Mar 2026 18:46:44 +0100 Subject: [PATCH 12/18] fix(gsd): use explicit parameter syntax in skill activation prompts The skill activation block used positional-looking syntax `Call Skill('name')` which caused LLMs (especially non-Anthropic models) to pass `{name: "..."}` instead of the required `{skill: "..."}` parameter. This triggered tool validation failures and stuck dispatch loops in auto-mode. Change the prompt template to `Call Skill({ skill: 'name' })` which makes the parameter name explicit and matches the Skill tool schema. Update all 4 affected test assertions to match the new format. Closes #2224 --- src/resources/extensions/gsd/auto-prompts.ts | 4 +++- .../extensions/gsd/tests/skill-activation.test.ts | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index d683102dc..102aebb63 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -421,7 +421,9 @@ function resolvePreferredSkillNames( function formatSkillActivationBlock(skillNames: string[]): string { if (skillNames.length === 0) return ""; - const calls = skillNames.map(name => `Call Skill('${name}')`).join('. '); + // Use explicit parameter syntax so LLMs pass { skill: "..." } instead of { name: "..." }. + // Positional-looking `Skill('name')` caused validation failures — see #2224. + const calls = skillNames.map(name => `Call Skill({ skill: '${name}' })`).join('. '); return `${calls}.`; } diff --git a/src/resources/extensions/gsd/tests/skill-activation.test.ts b/src/resources/extensions/gsd/tests/skill-activation.test.ts index 673e8911c..312c078bf 100644 --- a/src/resources/extensions/gsd/tests/skill-activation.test.ts +++ b/src/resources/extensions/gsd/tests/skill-activation.test.ts @@ -75,7 +75,7 @@ test("buildSkillActivationBlock activates skills via prefer_skills when context prefer_skills: ["react"], }); - assert.match(result, /Call Skill\('react'\)/); + assert.match(result, /Call Skill\(\{ skill: 'react' \}\)/); assert.doesNotMatch(result, /swiftui/); } finally { cleanup(base); @@ -92,7 +92,7 @@ test("buildSkillActivationBlock includes always_use_skills from preferences usin always_use_skills: ["swift-testing"], }); - assert.equal(result, "Call Skill('swift-testing')."); + assert.equal(result, "Call Skill({ skill: 'swift-testing' })."); } finally { cleanup(base); } @@ -120,8 +120,8 @@ test("buildSkillActivationBlock includes skill_rules matches and task-plan skill skill_rules: [{ when: "prisma database schema", use: ["prisma"] }], }); - assert.match(result, /Call Skill\('accessibility'\)/); - assert.match(result, /Call Skill\('prisma'\)/); + assert.match(result, /Call Skill\(\{ skill: 'accessibility' \}\)/); + assert.match(result, /Call Skill\(\{ skill: 'prisma' \}\)/); } finally { cleanup(base); } From 5a64da32d336b015713082f139511af50d059987 Mon Sep 17 00:00:00 2001 From: mastertyko <11311479+mastertyko@users.noreply.github.com> Date: Mon, 23 Mar 2026 20:17:01 +0100 Subject: [PATCH 13/18] review: clarify comment wording, add special-character test Address review feedback: - Update comment to clarify that the function-call-like syntax led LLMs to infer a positional parameter name (not 'positional-looking') - Add test documenting current behavior when skill names contain special characters (quotes, apostrophes) --- src/resources/extensions/gsd/auto-prompts.ts | 3 ++- .../gsd/tests/skill-activation.test.ts | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index 102aebb63..f06bca4da 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -422,7 +422,8 @@ function resolvePreferredSkillNames( function formatSkillActivationBlock(skillNames: string[]): string { if (skillNames.length === 0) return ""; // Use explicit parameter syntax so LLMs pass { skill: "..." } instead of { name: "..." }. - // Positional-looking `Skill('name')` caused validation failures — see #2224. + // The function-call-like syntax `Skill('name')` led LLMs to infer a positional + // parameter name, causing tool validation failures — see #2224. const calls = skillNames.map(name => `Call Skill({ skill: '${name}' })`).join('. '); return `${calls}.`; } diff --git a/src/resources/extensions/gsd/tests/skill-activation.test.ts b/src/resources/extensions/gsd/tests/skill-activation.test.ts index 312c078bf..064b68f5c 100644 --- a/src/resources/extensions/gsd/tests/skill-activation.test.ts +++ b/src/resources/extensions/gsd/tests/skill-activation.test.ts @@ -191,3 +191,23 @@ test("buildSkillActivationBlock does not activate skills from extraContext or ta cleanup(base); } }); + +test("buildSkillActivationBlock handles skill names with special characters safely", () => { + const base = makeTempBase(); + try { + // Skill names come from directory names — test that quotes/braces don't break the template + writeSkill(base, "my-skill's", "Skill with apostrophe in name."); + loadOnlyTestSkills(base); + + const result = buildBlock(base, {}, { + always_use_skills: ["my-skill's"], + }); + + // The skill name is interpolated as-is — this documents current behavior. + // A future guard (e.g. /^[a-z0-9-]+$/) could reject such names. + assert.match(result, /skill_activation/); + assert.match(result, /my-skill's/); + } finally { + cleanup(base); + } +}); From e9c89941743de29fc912b2c64d125fa29f03ab75 Mon Sep 17 00:00:00 2001 From: mastertyko <11311479+mastertyko@users.noreply.github.com> Date: Tue, 24 Mar 2026 18:09:55 +0100 Subject: [PATCH 14/18] fix: add SAFE_SKILL_NAME guard to reject prompt-injection via crafted skill names Adds /^[a-z0-9][a-z0-9-]*$/ validation in formatSkillActivationBlock() so that skill names containing quotes, braces, or other special characters are silently filtered out before interpolation into the prompt string. Addresses the prompt injection surface noted by @trek-e in PR review. Updates the special-character test to verify rejection instead of passthrough. --- src/resources/extensions/gsd/auto-prompts.ts | 9 ++++-- .../gsd/tests/skill-activation.test.ts | 32 +++++++++++++++---- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index f06bca4da..e8136371d 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -419,12 +419,17 @@ function resolvePreferredSkillNames( .map(skill => normalizeSkillReference(skill.name)); } +/** Skill names must be lowercase alphanumeric with hyphens — reject anything else + * to prevent prompt injection via crafted directory names. */ +const SAFE_SKILL_NAME = /^[a-z0-9][a-z0-9-]*$/; + function formatSkillActivationBlock(skillNames: string[]): string { - if (skillNames.length === 0) return ""; + const safe = skillNames.filter(name => SAFE_SKILL_NAME.test(name)); + if (safe.length === 0) return ""; // Use explicit parameter syntax so LLMs pass { skill: "..." } instead of { name: "..." }. // The function-call-like syntax `Skill('name')` led LLMs to infer a positional // parameter name, causing tool validation failures — see #2224. - const calls = skillNames.map(name => `Call Skill({ skill: '${name}' })`).join('. '); + const calls = safe.map(name => `Call Skill({ skill: '${name}' })`).join('. '); return `${calls}.`; } diff --git a/src/resources/extensions/gsd/tests/skill-activation.test.ts b/src/resources/extensions/gsd/tests/skill-activation.test.ts index 064b68f5c..f02310935 100644 --- a/src/resources/extensions/gsd/tests/skill-activation.test.ts +++ b/src/resources/extensions/gsd/tests/skill-activation.test.ts @@ -192,10 +192,11 @@ test("buildSkillActivationBlock does not activate skills from extraContext or ta } }); -test("buildSkillActivationBlock handles skill names with special characters safely", () => { +test("buildSkillActivationBlock rejects skill names with special characters", () => { const base = makeTempBase(); try { - // Skill names come from directory names — test that quotes/braces don't break the template + // Skill names with quotes, braces, or other non-alphanumeric characters are + // rejected by the SAFE_SKILL_NAME guard to prevent prompt injection. writeSkill(base, "my-skill's", "Skill with apostrophe in name."); loadOnlyTestSkills(base); @@ -203,10 +204,29 @@ test("buildSkillActivationBlock handles skill names with special characters safe always_use_skills: ["my-skill's"], }); - // The skill name is interpolated as-is — this documents current behavior. - // A future guard (e.g. /^[a-z0-9-]+$/) could reject such names. - assert.match(result, /skill_activation/); - assert.match(result, /my-skill's/); + // Unsafe skill name is filtered out — empty result + assert.equal(result, ""); + } finally { + cleanup(base); + } +}); + +test("buildSkillActivationBlock allows valid skill names and rejects invalid ones", () => { + const base = makeTempBase(); + try { + writeSkill(base, "react", "React skill."); + writeSkill(base, "bad'name", "Injection attempt."); + writeSkill(base, "good-skill-2", "Another valid skill."); + loadOnlyTestSkills(base); + + const result = buildBlock(base, {}, { + always_use_skills: ["react", "bad'name", "good-skill-2"], + }); + + assert.match(result, /skill_activation/); + assert.match(result, /Call Skill\(\{ skill: 'react' \}\)/); + assert.match(result, /Call Skill\(\{ skill: 'good-skill-2' \}\)/); + assert.doesNotMatch(result, /bad'name/); } finally { cleanup(base); } From c06e42eec41a1eeb9804c2168866b45e15e5e3b4 Mon Sep 17 00:00:00 2001 From: Jeremy McSpadden Date: Wed, 25 Mar 2026 19:39:09 -0500 Subject: [PATCH 15/18] fix(remote-questions): use static ESM import for AuthStorage hydration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hydrateRemoteTokensFromAuth() function used require() to load AuthStorage from @gsd/pi-coding-agent, but the package is ESM-only ("type": "module" with only an "import" export condition). Node's require() always throws for ESM packages, and the outer try/catch silently swallowed the error — making hydration a no-op. Replace require() with a static ESM import (consistent with every other extension) and use AuthStorage.create() which resolves the auth.json path internally via getAgentDir(). Closes #2565 --- src/resources/extensions/remote-questions/config.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/resources/extensions/remote-questions/config.ts b/src/resources/extensions/remote-questions/config.ts index 7aa95fa3e..b0f4e3138 100644 --- a/src/resources/extensions/remote-questions/config.ts +++ b/src/resources/extensions/remote-questions/config.ts @@ -2,7 +2,7 @@ * Remote Questions — configuration resolution and validation */ -import { join } from "node:path"; +import { AuthStorage } from "@gsd/pi-coding-agent"; import { loadEffectiveGSDPreferences, type RemoteQuestionsConfig } from "../gsd/preferences.js"; import type { RemoteChannel } from "./types.js"; @@ -54,9 +54,7 @@ function hydrateRemoteTokensFromAuth(): void { if (needed.length === 0) return; try { - const { AuthStorage } = require("@gsd/pi-coding-agent") as typeof import("@gsd/pi-coding-agent"); - const authPath = join(process.env.HOME ?? "~", ".gsd", "agent", "auth.json"); - const auth = AuthStorage.create(authPath); + const auth = AuthStorage.create(); for (const [providerId, envVar] of needed) { try { @@ -72,7 +70,7 @@ function hydrateRemoteTokensFromAuth(): void { } } } catch { - // AuthStorage unavailable (unit tests, stripped build) — skip silently. + // AuthStorage unavailable or auth.json missing/unreadable — skip silently. } } From 8e7ec7885ac080d6f89cf0b6df0d0ab04f6a3aa4 Mon Sep 17 00:00:00 2001 From: Jeremy McSpadden Date: Wed, 25 Mar 2026 21:35:09 -0500 Subject: [PATCH 16/18] fix(search): enforce hard search budget and survive context compaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Native search: use monotonic high-water mark (Math.max) instead of overwriting sessionSearchCount from history. Prevents budget reset when context compaction removes web_search_tool_result blocks. - Custom search tool: add MAX_SEARCHES_PER_SESSION=15 hard cap across all queries (not just consecutive duplicates). Returns budget_exhausted error when limit reached. - Tighten MAX_CONSECUTIVE_DUPES from 3 to 1 — block on the 2nd identical search since cached results make repeats pointless. - Add tests for compaction-safe high-water mark, session budget enforcement, and budget reset on session_start. Closes #2583 --- .../search-the-web/native-search.ts | 11 +- .../extensions/search-the-web/tool-search.ts | 24 +++- src/tests/native-search.test.ts | 45 ++++++ src/tests/search-loop-guard.test.ts | 131 +++++++++++++----- 4 files changed, 173 insertions(+), 38 deletions(-) diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts index a153f8cc3..0f7805528 100644 --- a/src/resources/extensions/search-the-web/native-search.ts +++ b/src/resources/extensions/search-the-web/native-search.ts @@ -176,11 +176,15 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: ); payload.tools = tools; - // ── Session-level search budget (#1309) ────────────────────────────── + // ── Session-level search budget (#1309, #compaction-safe) ───────────── // Count web_search_tool_result blocks in the conversation history to // determine how many native searches have already been used this session. // The Anthropic API's max_uses resets per request, so without this guard, // pause_turn → resubmit cycles allow unlimited total searches. + // + // Use the monotonic high-water mark: take the max of the history count + // and the running counter. This prevents budget resets when context + // compaction removes web_search_tool_result blocks from history. if (Array.isArray(messages)) { let historySearchCount = 0; for (const msg of messages) { @@ -192,8 +196,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: } } } - // Sync counter from history (handles session restore / context replay) - sessionSearchCount = historySearchCount; + // High-water mark: never decrease the counter, even if compaction + // removes web_search_tool_result blocks from the visible history. + sessionSearchCount = Math.max(sessionSearchCount, historySearchCount); } const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount); diff --git a/src/resources/extensions/search-the-web/tool-search.ts b/src/resources/extensions/search-the-web/tool-search.ts index 399a399df..e645a502f 100644 --- a/src/resources/extensions/search-the-web/tool-search.ts +++ b/src/resources/extensions/search-the-web/tool-search.ts @@ -106,14 +106,20 @@ searchCache.startPurgeInterval(60_000); // Consecutive duplicate search guard (#949) // Tracks recent query keys to detect and break search loops. -const MAX_CONSECUTIVE_DUPES = 3; +const MAX_CONSECUTIVE_DUPES = 1; let lastSearchKey = ""; let consecutiveDupeCount = 0; -/** Reset session-scoped duplicate-search guard state. */ +// Session-level total search budget (all queries, not just duplicates). +// Prevents unbounded search accumulation across varied queries. +const MAX_SEARCHES_PER_SESSION = 15; +let sessionTotalSearches = 0; + +/** Reset session-scoped search guard state (both duplicate and budget). */ export function resetSearchLoopGuardState(): void { lastSearchKey = ""; consecutiveDupeCount = 0; + sessionTotalSearches = 0; } // Summarizer responses: max 50 entries, 15-minute TTL @@ -357,6 +363,17 @@ export function registerSearchTool(pi: ExtensionAPI) { }; } + // ------------------------------------------------------------------ + // Session-level search budget + // ------------------------------------------------------------------ + if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) { + return { + content: [{ type: "text" as const, text: `⚠️ Search budget exhausted: ${sessionTotalSearches}/${MAX_SEARCHES_PER_SESSION} searches used this session. The information you need should already be in previous search results. Stop searching and use those results to proceed with your task.` }], + isError: true, + details: { errorKind: "budget_exhausted", error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})` } satisfies Partial, + }; + } + const count = params.count ?? 5; const wantSummary = params.summary ?? false; @@ -410,6 +427,9 @@ export function registerSearchTool(pi: ExtensionAPI) { consecutiveDupeCount = 1; } + // Count every search that passes the guards toward the session budget. + sessionTotalSearches++; + const cached = searchCache.get(cacheKey); if (cached) { diff --git a/src/tests/native-search.test.ts b/src/tests/native-search.test.ts index 55c964f79..c6ff41310 100644 --- a/src/tests/native-search.test.ts +++ b/src/tests/native-search.test.ts @@ -855,6 +855,51 @@ test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => { assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)"); }); +test("session search budget: survives context compaction (high-water mark)", async () => { + const pi = createMockPI(); + registerNativeSearchHooks(pi); + + await pi.fire("model_select", { + type: "model_select", + model: { provider: "anthropic", name: "claude-sonnet-4-6" }, + previousModel: undefined, + source: "set", + }); + + // First request: history has 12 web_search_tool_result blocks + const searchBlocks = Array.from({ length: 12 }, (_, i) => ({ + type: "web_search_tool_result", + tool_use_id: `ws${i}`, + content: [], + })); + + let payload: Record = { + model: "claude-sonnet-4-6-20250514", + tools: [{ name: "bash", type: "custom" }], + messages: [{ role: "user", content: [{ type: "text", text: "search" }, ...searchBlocks] }], + }; + + await pi.fire("before_provider_request", { type: "before_provider_request", payload }); + let tools = payload.tools as any[]; + let nativeTool = tools.find((t: any) => t.type === "web_search_20250305"); + assert.ok(nativeTool, "Should still inject web_search with 12/15 used"); + assert.equal(nativeTool.max_uses, 3, "Should have 3 remaining (15 - 12)"); + + // Second request: context was compacted — search blocks gone from history. + // Without high-water mark, the budget would reset to 15. + payload = { + model: "claude-sonnet-4-6-20250514", + tools: [{ name: "bash", type: "custom" }], + messages: [{ role: "user", content: "compacted context — no search blocks" }], + }; + + await pi.fire("before_provider_request", { type: "before_provider_request", payload }); + tools = payload.tools as any[]; + nativeTool = tools.find((t: any) => t.type === "web_search_20250305"); + assert.ok(nativeTool, "Should still inject web_search with 12/15 used (high-water mark)"); + assert.equal(nativeTool.max_uses, 3, "High-water mark should preserve 12 — only 3 remaining"); +}); + // ─── stripThinkingFromHistory tests ───────────────────────────────────────── test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => { diff --git a/src/tests/search-loop-guard.test.ts b/src/tests/search-loop-guard.test.ts index be4c7023a..c80ff4796 100644 --- a/src/tests/search-loop-guard.test.ts +++ b/src/tests/search-loop-guard.test.ts @@ -11,7 +11,7 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { registerSearchTool } from "../resources/extensions/search-the-web/tool-search.ts"; +import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts"; import searchExtension from "../resources/extensions/search-the-web/index.ts"; const ORIGINAL_ENV = { @@ -72,6 +72,8 @@ function createMockPI() { const toolsByName = new Map(); let registeredTool: any = null; + let activeTools: string[] = []; + const pi = { on(event: string, handler: (...args: any[]) => unknown) { handlers.push({ event, handler }); @@ -91,6 +93,8 @@ function createMockPI() { getRegisteredTool(name = "search-the-web") { return toolsByName.get(name) ?? registeredTool; }, + getActiveTools() { return activeTools; }, + setActiveTools(tools: string[]) { activeTools = tools; }, writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt", }; @@ -134,18 +138,16 @@ test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t) const execute = tool.execute.bind(tool); - // Calls 1–3: below threshold, should return search results (not an error) - for (let i = 1; i <= 3; i++) { - const result = await callSearch(execute, "loop test query", `call-${i}`); - assert.notEqual(result.isError, true, `call ${i} should not trigger loop guard`); - } + // Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1) + const result1 = await callSearch(execute, "loop test query", "call-1"); + assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard"); - // Call 4: hits the threshold — guard fires - const result4 = await callSearch(execute, "loop test query", "call-4"); - assert.equal(result4.isError, true, "call 4 should trigger the loop guard"); - assert.equal(result4.details?.errorKind, "search_loop"); + // Call 2: identical query — guard fires immediately (threshold = 1) + const result2 = await callSearch(execute, "loop test query", "call-2"); + assert.equal(result2.isError, true, "call 2 should trigger the loop guard"); + assert.equal(result2.details?.errorKind, "search_loop"); assert.ok( - result4.content[0].text.includes("Search loop detected"), + result2.content[0].text.includes("Search loop detected"), "error message should mention search loop" ); }); @@ -174,11 +176,9 @@ test("search loop guard resets at session_start boundary", async (t) => { assert.ok(tool, "search tool should be registered"); const execute = tool.execute.bind(tool); - // Trigger guard in session 1 - for (let i = 1; i <= 4; i++) { - await callSearch(execute, query, `s1-call-${i}`); - } - const guardResult = await callSearch(execute, query, "s1-call-5"); + // Trigger guard in session 1 (call 1 succeeds, call 2 fires guard) + await callSearch(execute, query, "s1-call-1"); + const guardResult = await callSearch(execute, query, "s1-call-2"); assert.equal(guardResult.isError, true, "session 1 should be guarded"); assert.equal(guardResult.details?.errorKind, "search_loop"); @@ -211,28 +211,26 @@ test("search loop guard stays armed after firing — subsequent duplicates immed const tool = pi.getRegisteredTool(); const execute = tool.execute.bind(tool); - // Exhaust the initial window (calls 1–3 succeed, call 4 fires guard) - for (let i = 1; i <= 3; i++) { - await callSearch(execute, query, `call-${i}`); - } - const guardFirst = await callSearch(execute, query, "call-4"); - assert.equal(guardFirst.isError, true, "call 4 should trigger the loop guard"); + // Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1) + await callSearch(execute, query, "call-1"); + const guardFirst = await callSearch(execute, query, "call-2"); + assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard"); - // Key regression test: call 5 (and beyond) must ALSO trigger the guard. - // The original bug reset state on trigger, so call 5 was treated as a fresh + // Key regression test: call 3 (and beyond) must ALSO trigger the guard. + // The original bug reset state on trigger, so call 3 was treated as a fresh // first search and the loop restarted. - const guardSecond = await callSearch(execute, query, "call-5"); + const guardSecond = await callSearch(execute, query, "call-3"); assert.equal( guardSecond.isError, true, - "call 5 should STILL trigger the loop guard (guard must stay armed after firing)" + "call 3 should STILL trigger the loop guard (guard must stay armed after firing)" ); assert.equal(guardSecond.details?.errorKind, "search_loop"); - // Call 6 as well — guard should keep firing - const guardThird = await callSearch(execute, query, "call-6"); + // Call 4 as well — guard should keep firing + const guardThird = await callSearch(execute, query, "call-4"); assert.equal( guardThird.isError, true, - "call 6 should STILL trigger the loop guard" + "call 4 should STILL trigger the loop guard" ); }); @@ -255,10 +253,9 @@ test("search loop guard resets cleanly when a different query is issued", async const tool = pi.getRegisteredTool(); const execute = tool.execute.bind(tool); - // Trigger guard for queryA - for (let i = 1; i <= 4; i++) { - await callSearch(execute, queryA, `call-a-${i}`); - } + // Trigger guard for queryA (call 1 succeeds, call 2 fires guard) + await callSearch(execute, queryA, "call-a-1"); + await callSearch(execute, queryA, "call-a-2"); // Issue a different query — should succeed (resets the duplicate counter) const resultB = await callSearch(execute, queryB, "call-b-1"); @@ -267,3 +264,71 @@ test("search loop guard resets cleanly when a different query is issued", async "a different query after guard should not be treated as a loop" ); }); + +test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => { + process.env.BRAVE_API_KEY = "test-key-budget"; + delete process.env.TAVILY_API_KEY; + delete process.env.OLLAMA_API_KEY; + const restoreFetch = mockFetch(makeBraveResponse()); + + t.after(() => { + restoreFetch(); + restoreSearchEnv(); + }); + + // Reset guard state (including session budget) and register directly + resetSearchLoopGuardState(); + const pi = createMockPI(); + registerSearchTool(pi as any); + + const tool = pi.getRegisteredTool(); + assert.ok(tool, "search tool should be registered"); + const execute = tool.execute.bind(tool); + + // Issue 15 unique queries — all should succeed (budget = 15) + for (let i = 1; i <= 15; i++) { + const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`); + assert.notEqual(result.isError, true, `query ${i} should succeed within budget`); + } + + // Query 16: budget exhausted — should be blocked + const blocked = await callSearch(execute, "one more query", "budget-16"); + assert.equal(blocked.isError, true, "query 16 should be blocked by budget"); + assert.equal(blocked.details?.errorKind, "budget_exhausted"); + assert.ok( + blocked.content[0].text.includes("Search budget exhausted"), + "error message should mention budget" + ); +}); + +test("session search budget resets via resetSearchLoopGuardState", async (t) => { + process.env.BRAVE_API_KEY = "test-key-budget-reset"; + delete process.env.TAVILY_API_KEY; + delete process.env.OLLAMA_API_KEY; + const restoreFetch = mockFetch(makeBraveResponse()); + + t.after(() => { + restoreFetch(); + restoreSearchEnv(); + }); + + // Reset and register directly + resetSearchLoopGuardState(); + const pi = createMockPI(); + registerSearchTool(pi as any); + + const tool = pi.getRegisteredTool(); + const execute = tool.execute.bind(tool); + + // Exhaust budget + for (let i = 1; i <= 15; i++) { + await callSearch(execute, `budget reset query ${i}`, `br-${i}`); + } + const exhausted = await callSearch(execute, "exhausted query", "br-exhausted"); + assert.equal(exhausted.isError, true, "budget should be exhausted"); + + // Reset simulates new session + resetSearchLoopGuardState(); + const fresh = await callSearch(execute, "fresh session query", "br-fresh"); + assert.notEqual(fresh.isError, true, "first query after reset should succeed"); +}); From ebb5afbd571c7e4daeb46a5666c8cccfccba040b Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Wed, 25 Mar 2026 22:18:26 -0600 Subject: [PATCH 17/18] fix: use GitHub Issue Types via GraphQL instead of classification labels The forensics prompt and gh skill used --label "bug" / --label "type:feature" for issue classification, polluting the label taxonomy and leaving the Type field unset. gh issue create has no --type flag, so issue types must be set via GraphQL mutation after creation. Closes #2579 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../extensions/gsd/prompts/forensics.md | 13 ++++++++--- .../github-workflows/references/gh/SKILL.md | 23 ++++++++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/resources/extensions/gsd/prompts/forensics.md b/src/resources/extensions/gsd/prompts/forensics.md index f576d17c4..9112a773f 100644 --- a/src/resources/extensions/gsd/prompts/forensics.md +++ b/src/resources/extensions/gsd/prompts/forensics.md @@ -142,9 +142,10 @@ Then **offer GitHub issue creation**: "Would you like me to create a GitHub issu If yes, create using the `bash` tool: ```bash -gh issue create --repo gsd-build/gsd-2 \ +# Step 1: Create issue (use labels for metadata, NOT for classification — type is set via GraphQL) +ISSUE_URL=$(gh issue create --repo gsd-build/gsd-2 \ --title "..." \ - --label "bug" --label "auto-generated" \ + --label "auto-generated" \ --body "$(cat <<'EOF' ## Problem [1-2 sentence summary] @@ -169,7 +170,13 @@ gh issue create --repo gsd-build/gsd-2 \ --- *Auto-generated by `/gsd forensics`* EOF -)" +)") + +# Step 2: Set issue type via GraphQL (gh issue create has no --type flag) +ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$') +ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id') +TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id') +gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }' ``` ### Redaction Rules (CRITICAL) diff --git a/src/resources/skills/github-workflows/references/gh/SKILL.md b/src/resources/skills/github-workflows/references/gh/SKILL.md index 2d1f4a53d..05d40f337 100644 --- a/src/resources/skills/github-workflows/references/gh/SKILL.md +++ b/src/resources/skills/github-workflows/references/gh/SKILL.md @@ -103,9 +103,12 @@ gh issue list -R gsd-build/gsd-2 gh issue list -R gsd-build/gsd-2 --label "priority:p1" --state open # Create issue with labels and milestone +# NOTE: Do NOT use labels for issue classification (bug, feature, etc.) +# Use labels for metadata (priority, status, auto-generated) only. +# Issue classification uses GitHub Issue Types, set via GraphQL after creation. gh issue create -R gsd-build/gsd-2 \ --title "feat: add feature X" \ - --label "priority:p1" --label "type:feature" \ + --label "priority:p1" \ --milestone "v1.0" # View issue @@ -120,6 +123,24 @@ gh issue edit -R gsd-build/gsd-2 \ --remove-label "status:needs-grooming" ``` +### Issue Types (Classification) + +`gh issue create` has no `--type` flag. Issue types (Bug, Feature Request, etc.) are set via GraphQL after creation: + +```bash +# Step 1: Create the issue (returns URL) +ISSUE_URL=$(gh issue create -R gsd-build/gsd-2 \ + --title "..." --body "...") + +# Step 2: Set the issue type via GraphQL +ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$') +ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id') +TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id') +gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }' +``` + +Replace `"Bug"` with the appropriate type name (`"Feature Request"`, `"Task"`, etc.). + ### Labels ```bash From 36ff7ac4fedd9fbc05d6020f8a61916ee5144315 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Wed, 25 Mar 2026 22:19:53 -0600 Subject: [PATCH 18/18] Fix complete-milestone prompt with structured parameter definitions Replace the free-form parameter listing in step 7 of complete-milestone.md with structured, typed parameter definitions that match the tool schema in db-tools.ts. Parameters are grouped into required and optional sections with explicit types (marking arrays as arrays, booleans as booleans) to prevent LLM validation failures when calling gsd_complete_milestone. Fixes #2581 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../gsd/prompts/complete-milestone.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/resources/extensions/gsd/prompts/complete-milestone.md b/src/resources/extensions/gsd/prompts/complete-milestone.md index 0ce59eeb7..4e11e80a6 100644 --- a/src/resources/extensions/gsd/prompts/complete-milestone.md +++ b/src/resources/extensions/gsd/prompts/complete-milestone.md @@ -35,7 +35,24 @@ Then: **Success path** (all verifications passed — continue with steps 7–11): -7. **Persist completion through `gsd_complete_milestone`.** Call it with: `milestoneId`, `title`, `oneLiner`, `narrative`, `successCriteriaResults`, `definitionOfDoneResults`, `requirementOutcomes`, `keyDecisions`, `keyFiles`, `lessonsLearned`, `followUps`, `deviations`, `verificationPassed: true`. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding. +7. **Persist completion through `gsd_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding. + + **Required parameters:** + - `milestoneId` (string) — Milestone ID (e.g. M001) + - `title` (string) — Milestone title + - `oneLiner` (string) — One-sentence summary of what the milestone achieved + - `narrative` (string) — Detailed narrative of what happened during the milestone + - `successCriteriaResults` (string) — Markdown detailing how each success criterion was met or not met + - `definitionOfDoneResults` (string) — Markdown detailing how each definition-of-done item was met + - `requirementOutcomes` (string) — Markdown detailing requirement status transitions with evidence + - `keyDecisions` (array of strings) — Key architectural/pattern decisions made during the milestone + - `keyFiles` (array of strings) — Key files created or modified during the milestone + - `lessonsLearned` (array of strings) — Lessons learned during the milestone + - `verificationPassed` (boolean) — Must be `true` — confirms that code change verification, success criteria, and definition of done checks all passed before completion + + **Optional parameters:** + - `followUps` (string) — Follow-up items for future milestones + - `deviations` (string) — Deviations from the original plan 8. For each requirement whose status changed in step 6, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically. 9. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state. 10. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.