From ccde39c2c86826559169acb1980071ce2fab7b09 Mon Sep 17 00:00:00 2001 From: Jeremy McSpadden Date: Thu, 19 Mar 2026 11:25:20 -0500 Subject: [PATCH] =?UTF-8?q?fix:=20close=205=20doctor=20coverage=20gaps=20?= =?UTF-8?q?=E2=80=94=20providers,=20lock=20dir,=20integration=20branch,=20?= =?UTF-8?q?orphaned=20worktrees?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the highest-impact gaps identified in the /gsd doctor deep-dive analysis. **1. Wire provider checks into runGSDDoctor()** doctor-providers.ts existed and worked but was never called from the main doctor run. Units could dispatch into guaranteed API failures with no warning. Now runProviderChecks() is called in runGSDDoctor() and converts required-provider errors/warnings into DoctorIssue entries with codes: - provider_key_missing (error) - provider_key_backedoff (warning) **2. Stranded lock directory detection (doctor-checks.ts)** proper-lockfile creates a .gsd.lock/ directory as the OS-level lock mechanism. After SIGKILL or hard crash, this directory can remain stranded, blocking all future auto-mode sessions from acquiring the lock (#1245 pattern). Doctor now: - Detects .gsd.lock/ existing without a live process holding it - Reports as stranded_lock_directory (error, fixable) - Auto-fix removes the stranded directory **3. Integration branch existence check (doctor-checks.ts + doctor-proactive.ts)** When a milestone records an integration branch and that branch is later deleted or renamed, merge-back will fail silently at the end of the milestone. Doctor now: - Checks each active milestone's stored integration branch exists in git - Reports as integration_branch_missing (error, not auto-fixable) - preDispatchHealthGate blocks dispatch if the active milestone's integration branch is missing, preventing work from being dispatched into a dead end **4. Orphaned worktree directory detection (doctor-checks.ts)** Worktree removal can fail after a branch delete, leaving a .gsd/worktrees// directory that is no longer registered with git. Re-creating the same name fails with "already exists". Doctor now: - Compares .gsd/worktrees/ entries against git worktree list - Reports unregistered directories as worktree_directory_orphaned (warning, fixable) - Auto-fix removes the orphaned directory Tests: all new codes covered with detection + fix assertions, including false-positive safety cases (live lock holder, registered worktrees, existing integration branch). All 1843 existing tests still pass. --- src/resources/extensions/gsd/doctor-checks.ts | 112 +++++++++++++++++- .../extensions/gsd/doctor-proactive.ts | 24 ++++ src/resources/extensions/gsd/doctor-types.ts | 10 +- src/resources/extensions/gsd/doctor.ts | 30 +++++ .../extensions/gsd/tests/doctor-git.test.ts | 100 +++++++++++++++- .../gsd/tests/doctor-runtime.test.ts | 55 ++++++++- 6 files changed, 320 insertions(+), 11 deletions(-) diff --git a/src/resources/extensions/gsd/doctor-checks.ts b/src/resources/extensions/gsd/doctor-checks.ts index 85d3efaab..9b3e4d5ac 100644 --- a/src/resources/extensions/gsd/doctor-checks.ts +++ b/src/resources/extensions/gsd/doctor-checks.ts @@ -1,15 +1,15 @@ -import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, statSync } from "node:fs"; -import { join, sep } from "node:path"; +import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs"; +import { dirname, join, sep } from "node:path"; import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js"; import { loadFile, parseRoadmap } from "./files.js"; import { resolveMilestoneFile, milestonesDir, gsdRoot, resolveGsdRootFile, relGsdRootFile } from "./paths.js"; import { deriveState, isMilestoneComplete } from "./state.js"; import { saveFile } from "./files.js"; -import { listWorktrees, resolveGitDir } from "./worktree-manager.js"; +import { listWorktrees, resolveGitDir, worktreesDir } from "./worktree-manager.js"; import { abortAndReset } from "./git-self-heal.js"; -import { RUNTIME_EXCLUSION_PATHS } from "./git-service.js"; -import { nativeIsRepo, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached } from "./native-git-bridge.js"; +import { RUNTIME_EXCLUSION_PATHS, readIntegrationBranch } from "./git-service.js"; +import { nativeIsRepo, nativeBranchExists, nativeWorktreeList, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached } from "./native-git-bridge.js"; import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js"; import { ensureGitignore } from "./gitignore.js"; import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js"; @@ -215,6 +215,70 @@ export async function checkGitHealth( } catch { // git branch list failed — skip } + + // ── Integration branch existence ────────────────────────────────────── + // For each active (non-complete) milestone, verify the stored integration + // branch still exists in git. A missing integration branch blocks merge-back + // and causes the next merge operation to fail silently. + try { + const state = await deriveState(basePath); + for (const milestone of state.registry) { + if (milestone.status === "complete") continue; + const integrationBranch = readIntegrationBranch(basePath, milestone.id); + if (!integrationBranch) continue; // No stored branch — skip (not yet set) + if (!nativeBranchExists(basePath, integrationBranch)) { + issues.push({ + severity: "error", + code: "integration_branch_missing", + scope: "milestone", + unitId: milestone.id, + message: `Milestone ${milestone.id} recorded integration branch "${integrationBranch}" but that branch no longer exists in git. Merge-back will fail.`, + fixable: false, + }); + } + } + } catch { + // Non-fatal — integration branch check failed + } + + // ── Orphaned worktree directories ──────────────────────────────────── + // Worktree removal can fail after a branch delete, leaving a directory + // that is no longer registered with git. These orphaned dirs cause + // "already exists" errors when re-creating the same worktree name. + try { + const wtDir = worktreesDir(basePath); + if (existsSync(wtDir)) { + const registeredPaths = new Set( + nativeWorktreeList(basePath).map(entry => entry.path), + ); + for (const entry of readdirSync(wtDir)) { + const fullPath = join(wtDir, entry); + try { + if (!statSync(fullPath).isDirectory()) continue; + } catch { continue; } + if (!registeredPaths.has(fullPath)) { + issues.push({ + severity: "warning", + code: "worktree_directory_orphaned", + scope: "project", + unitId: entry, + message: `Worktree directory ${fullPath} exists on disk but is not registered with git. Run "git worktree prune" or doctor --fix to remove it.`, + fixable: true, + }); + if (shouldFix("worktree_directory_orphaned")) { + try { + rmSync(fullPath, { recursive: true, force: true }); + fixesApplied.push(`removed orphaned worktree directory ${fullPath}`); + } catch { + fixesApplied.push(`failed to remove orphaned worktree directory ${fullPath}`); + } + } + } + } + } + } catch { + // Non-fatal — orphaned worktree directory check failed + } } // ── Runtime Health Checks ────────────────────────────────────────────────── @@ -255,6 +319,44 @@ export async function checkRuntimeHealth( // Non-fatal — crash lock check failed } + // ── Stranded lock directory ──────────────────────────────────────────── + // proper-lockfile creates a `.gsd.lock/` directory as the OS-level lock + // mechanism. If the process was SIGKILLed or crashed hard, this directory + // can remain on disk without any live process holding it. The next session + // fails to acquire the lock until the directory is removed (#1245). + try { + const lockDir = join(dirname(root), `${root.split("/").pop() ?? ".gsd"}.lock`); + if (existsSync(lockDir)) { + const statRes = statSync(lockDir); + if (statRes.isDirectory()) { + // Check if any live process actually holds this lock + const lock = readCrashLock(basePath); + const lockHolderAlive = lock ? isLockProcessAlive(lock) : false; + if (!lockHolderAlive) { + issues.push({ + severity: "error", + code: "stranded_lock_directory", + scope: "project", + unitId: "project", + message: `Stranded lock directory "${lockDir}" exists but no live process holds the session lock. This blocks new auto-mode sessions from starting.`, + file: lockDir, + fixable: true, + }); + if (shouldFix("stranded_lock_directory")) { + try { + rmSync(lockDir, { recursive: true, force: true }); + fixesApplied.push(`removed stranded lock directory ${lockDir}`); + } catch { + fixesApplied.push(`failed to remove stranded lock directory ${lockDir}`); + } + } + } + } + } + } catch { + // Non-fatal — stranded lock directory check failed + } + // ── Stale parallel sessions ──────────────────────────────────────────── try { const parallelStatuses = readAllSessionStatuses(basePath); diff --git a/src/resources/extensions/gsd/doctor-proactive.ts b/src/resources/extensions/gsd/doctor-proactive.ts index 29fabd288..f4202afc6 100644 --- a/src/resources/extensions/gsd/doctor-proactive.ts +++ b/src/resources/extensions/gsd/doctor-proactive.ts @@ -20,6 +20,9 @@ import { gsdRoot, resolveGsdRootFile } from "./paths.js"; import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js"; import { abortAndReset } from "./git-self-heal.js"; import { rebuildState } from "./doctor.js"; +import { deriveState } from "./state.js"; +import { readIntegrationBranch } from "./git-service.js"; +import { nativeBranchExists, nativeIsRepo } from "./native-git-bridge.js"; // ── Health Score Tracking ────────────────────────────────────────────────── @@ -191,6 +194,27 @@ export async function preDispatchHealthGate(basePath: string): Promise 0) { return { diff --git a/src/resources/extensions/gsd/doctor-types.ts b/src/resources/extensions/gsd/doctor-types.ts index dd4cafd7a..68740c30f 100644 --- a/src/resources/extensions/gsd/doctor-types.ts +++ b/src/resources/extensions/gsd/doctor-types.ts @@ -45,7 +45,15 @@ export type DoctorIssueCode = | "env_python" | "env_cargo" | "env_go" - | "env_git_remote"; + | "env_git_remote" + // Provider / auth checks + | "provider_key_missing" + | "provider_key_backedoff" + // Lock infrastructure checks + | "stranded_lock_directory" + // Git / worktree integrity checks + | "integration_branch_missing" + | "worktree_directory_orphaned"; /** * Issue codes that represent expected completion-transition states. diff --git a/src/resources/extensions/gsd/doctor.ts b/src/resources/extensions/gsd/doctor.ts index 2138a1586..cb4c117a9 100644 --- a/src/resources/extensions/gsd/doctor.ts +++ b/src/resources/extensions/gsd/doctor.ts @@ -11,6 +11,7 @@ import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js"; import { COMPLETION_TRANSITION_CODES } from "./doctor-types.js"; import { checkGitHealth, checkRuntimeHealth } from "./doctor-checks.js"; import { checkEnvironmentHealth } from "./doctor-environment.js"; +import { runProviderChecks } from "./doctor-providers.js"; // ── Re-exports ───────────────────────────────────────────────────────────── // All public types and functions from extracted modules are re-exported here @@ -396,6 +397,35 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; // Environment health checks (#1221: missing tools, port conflicts, stale deps, disk space) await checkEnvironmentHealth(basePath, issues, { includeRemote: !options?.scope }); + // Provider / auth health checks — detect missing or backed-off API keys before dispatching + try { + const providerResults = runProviderChecks(); + for (const result of providerResults) { + if (!result.required) continue; + if (result.status === "error") { + issues.push({ + severity: "error", + code: "provider_key_missing", + scope: "project", + unitId: "project", + message: result.message + (result.detail ? ` — ${result.detail}` : ""), + fixable: false, + }); + } else if (result.status === "warning") { + issues.push({ + severity: "warning", + code: "provider_key_backedoff", + scope: "project", + unitId: "project", + message: result.message + (result.detail ? ` — ${result.detail}` : ""), + fixable: false, + }); + } + } + } catch { + // Non-fatal — provider check failure should not block other checks + } + const milestonesPath = milestonesDir(basePath); if (!existsSync(milestonesPath)) { return { ok: issues.every(issue => issue.severity !== "error"), basePath, issues, fixesApplied }; diff --git a/src/resources/extensions/gsd/tests/doctor-git.test.ts b/src/resources/extensions/gsd/tests/doctor-git.test.ts index e466d0e36..016a88553 100644 --- a/src/resources/extensions/gsd/tests/doctor-git.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-git.test.ts @@ -2,9 +2,10 @@ * doctor-git.test.ts — Integration tests for doctor git health checks. * * Creates real temp git repos with deliberate broken state, runs runGSDDoctor, - * and asserts correct detection and fixing of all 4 git issue codes: + * and asserts correct detection and fixing of git issue codes: * orphaned_auto_worktree, stale_milestone_branch, - * corrupt_merge_state, tracked_runtime_files + * corrupt_merge_state, tracked_runtime_files, + * integration_branch_missing, worktree_directory_orphaned */ import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync } from "node:fs"; @@ -299,6 +300,101 @@ async function main(): Promise { console.log("\n=== none-mode skips stale branch (skipped on Windows) ==="); } + // ─── Test: Integration branch missing ────────────────────────────── + if (process.platform !== "win32") { + console.log("\n=== integration_branch_missing ==="); + { + const dir = createRepoWithActiveMilestone(); + cleanups.push(dir); + + // Write integration branch metadata for M001 pointing to a non-existent branch + const metaPath = join(dir, ".gsd", "milestones", "M001", "M001-META.json"); + writeFileSync(metaPath, JSON.stringify({ integrationBranch: "feat/does-not-exist" }, null, 2)); + + const detect = await runGSDDoctor(dir); + const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing"); + assertTrue(missingBranchIssues.length > 0, "detects missing integration branch"); + assertTrue( + missingBranchIssues[0]?.message.includes("feat/does-not-exist"), + "message includes the missing branch name", + ); + assertEq(missingBranchIssues[0]?.fixable, false, "integration_branch_missing is not auto-fixable"); + assertEq(missingBranchIssues[0]?.severity, "error", "severity is error"); + } + } else { + console.log("\n=== integration_branch_missing (skipped on Windows) ==="); + } + + // ─── Test: Integration branch present — no false positive ────────── + if (process.platform !== "win32") { + console.log("\n=== integration_branch_missing (no false positive) ==="); + { + const dir = createRepoWithActiveMilestone(); + cleanups.push(dir); + + // Write integration branch metadata for M001 pointing to "main" (which exists) + const metaPath = join(dir, ".gsd", "milestones", "M001", "M001-META.json"); + writeFileSync(metaPath, JSON.stringify({ integrationBranch: "main" }, null, 2)); + + const detect = await runGSDDoctor(dir); + const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing"); + assertEq(missingBranchIssues.length, 0, "existing integration branch NOT flagged"); + } + } else { + console.log("\n=== integration_branch_missing (no false positive — skipped on Windows) ==="); + } + + // ─── Test: Orphaned worktree directory ───────────────────────────── + if (process.platform !== "win32") { + console.log("\n=== worktree_directory_orphaned ==="); + { + const dir = createRepoWithActiveMilestone(); + cleanups.push(dir); + + // Create a worktrees/ dir with an entry that is NOT in git worktree list + const orphanDir = join(dir, ".gsd", "worktrees", "orphan-feature"); + mkdirSync(orphanDir, { recursive: true }); + writeFileSync(join(orphanDir, "some-file.txt"), "leftover content\n"); + + const detect = await runGSDDoctor(dir); + const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned"); + assertTrue(orphanDirIssues.length > 0, "detects orphaned worktree directory"); + assertTrue( + orphanDirIssues[0]?.message.includes("orphan-feature"), + "message includes the orphaned directory name", + ); + assertTrue(orphanDirIssues[0]?.fixable === true, "worktree_directory_orphaned is fixable"); + + const fixed = await runGSDDoctor(dir, { fix: true }); + assertTrue( + fixed.fixesApplied.some(f => f.includes("removed orphaned worktree directory")), + "fix removes orphaned worktree directory", + ); + assertTrue(!existsSync(orphanDir), "orphaned directory removed after fix"); + } + } else { + console.log("\n=== worktree_directory_orphaned (skipped on Windows) ==="); + } + + // ─── Test: Registered worktree NOT flagged as orphaned ───────────── + if (process.platform !== "win32") { + console.log("\n=== worktree_directory_orphaned (registered worktree not flagged) ==="); + { + const dir = createRepoWithActiveMilestone(); + cleanups.push(dir); + + // Create a real registered worktree under .gsd/worktrees/ + mkdirSync(join(dir, ".gsd", "worktrees"), { recursive: true }); + run("git worktree add -b worktree/feature-1 .gsd/worktrees/feature-1", dir); + + const detect = await runGSDDoctor(dir); + const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned"); + assertEq(orphanDirIssues.length, 0, "registered worktree NOT flagged as orphaned"); + } + } else { + console.log("\n=== worktree_directory_orphaned (registered worktree not flagged — skipped on Windows) ==="); + } + // ─── Test 9: none-mode still detects corrupt merge state ─────────── console.log("\n=== none-mode keeps corrupt merge state ==="); { diff --git a/src/resources/extensions/gsd/tests/doctor-runtime.test.ts b/src/resources/extensions/gsd/tests/doctor-runtime.test.ts index dda4ea9ab..db25dd57c 100644 --- a/src/resources/extensions/gsd/tests/doctor-runtime.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-runtime.test.ts @@ -2,9 +2,9 @@ * doctor-runtime.test.ts — Tests for doctor runtime health checks. * * Tests detection and auto-fix of: - * stale_crash_lock, orphaned_completed_units, stale_hook_state, - * activity_log_bloat, state_file_missing, state_file_stale, - * gitignore_missing_patterns + * stale_crash_lock, stranded_lock_directory, orphaned_completed_units, + * stale_hook_state, activity_log_bloat, state_file_missing, + * state_file_stale, gitignore_missing_patterns */ import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, realpathSync } from "node:fs"; @@ -290,6 +290,55 @@ node_modules/ assertEq(content.length, 0, "all orphaned keys removed"); } + // ─── Test: Stranded lock directory detection & fix ──────────────── + console.log("\n=== stranded_lock_directory ==="); + { + const dir = createMinimalProject(); + cleanups.push(dir); + + // Create the proper-lockfile lock directory without a live lock holder. + // The lock dir sits at /.gsd.lock (i.e., /.gsd.lock). + const lockDir = join(dir, ".gsd.lock"); + mkdirSync(lockDir, { recursive: true }); + + const detect = await runGSDDoctor(dir); + const strandedIssues = detect.issues.filter(i => i.code === "stranded_lock_directory"); + assertTrue(strandedIssues.length > 0, "detects stranded lock directory"); + assertTrue(strandedIssues[0]?.message.includes("lock directory"), "message describes stranded lock directory"); + assertTrue(strandedIssues[0]?.fixable === true, "stranded lock dir is fixable"); + + const fixed = await runGSDDoctor(dir, { fix: true }); + assertTrue( + fixed.fixesApplied.some(f => f.includes("removed stranded lock directory")), + "fix removes stranded lock directory", + ); + assertTrue(!existsSync(lockDir), "lock directory removed after fix"); + } + + // ─── Test: Stranded lock dir with live lock holder — NOT flagged ─── + console.log("\n=== stranded_lock_directory (live holder not flagged) ==="); + { + const dir = createMinimalProject(); + cleanups.push(dir); + + // Create lock dir + auto.lock with PID 1 (init/launchd — always alive, never our own PID) + const lockDir = join(dir, ".gsd.lock"); + mkdirSync(lockDir, { recursive: true }); + const liveLockData = { + pid: 1, + startedAt: new Date().toISOString(), + unitType: "execute-task", + unitId: "M001/S01/T01", + unitStartedAt: new Date().toISOString(), + completedUnits: 1, + }; + writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(liveLockData, null, 2)); + + const detect = await runGSDDoctor(dir); + const strandedIssues = detect.issues.filter(i => i.code === "stranded_lock_directory"); + assertEq(strandedIssues.length, 0, "live lock holder: stranded_lock_directory NOT detected"); + } + } finally { for (const dir of cleanups) { try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }