fix: close 5 doctor coverage gaps — providers, lock dir, integration branch, orphaned worktrees

Closes the highest-impact gaps identified in the /gsd doctor deep-dive analysis.

**1. Wire provider checks into runGSDDoctor()**
doctor-providers.ts existed and worked but was never called from the main
doctor run. Units could dispatch into guaranteed API failures with no warning.
Now runProviderChecks() is called in runGSDDoctor() and converts required-provider
errors/warnings into DoctorIssue entries with codes:
  - provider_key_missing (error)
  - provider_key_backedoff (warning)

**2. Stranded lock directory detection (doctor-checks.ts)**
proper-lockfile creates a .gsd.lock/ directory as the OS-level lock mechanism.
After SIGKILL or hard crash, this directory can remain stranded, blocking all
future auto-mode sessions from acquiring the lock (#1245 pattern). Doctor now:
  - Detects .gsd.lock/ existing without a live process holding it
  - Reports as stranded_lock_directory (error, fixable)
  - Auto-fix removes the stranded directory

**3. Integration branch existence check (doctor-checks.ts + doctor-proactive.ts)**
When a milestone records an integration branch and that branch is later deleted
or renamed, merge-back will fail silently at the end of the milestone. Doctor now:
  - Checks each active milestone's stored integration branch exists in git
  - Reports as integration_branch_missing (error, not auto-fixable)
  - preDispatchHealthGate blocks dispatch if the active milestone's integration
    branch is missing, preventing work from being dispatched into a dead end

**4. Orphaned worktree directory detection (doctor-checks.ts)**
Worktree removal can fail after a branch delete, leaving a .gsd/worktrees/<name>/
directory that is no longer registered with git. Re-creating the same name fails
with "already exists". Doctor now:
  - Compares .gsd/worktrees/ entries against git worktree list
  - Reports unregistered directories as worktree_directory_orphaned (warning, fixable)
  - Auto-fix removes the orphaned directory

Tests: all new codes covered with detection + fix assertions, including
false-positive safety cases (live lock holder, registered worktrees,
existing integration branch). All 1843 existing tests still pass.
This commit is contained in:
Jeremy McSpadden 2026-03-19 11:25:20 -05:00
parent e6bbd035ba
commit ccde39c2c8
6 changed files with 320 additions and 11 deletions

View file

@ -1,15 +1,15 @@
import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, statSync } from "node:fs";
import { join, sep } from "node:path";
import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs";
import { dirname, join, sep } from "node:path";
import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js";
import { loadFile, parseRoadmap } from "./files.js";
import { resolveMilestoneFile, milestonesDir, gsdRoot, resolveGsdRootFile, relGsdRootFile } from "./paths.js";
import { deriveState, isMilestoneComplete } from "./state.js";
import { saveFile } from "./files.js";
import { listWorktrees, resolveGitDir } from "./worktree-manager.js";
import { listWorktrees, resolveGitDir, worktreesDir } from "./worktree-manager.js";
import { abortAndReset } from "./git-self-heal.js";
import { RUNTIME_EXCLUSION_PATHS } from "./git-service.js";
import { nativeIsRepo, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached } from "./native-git-bridge.js";
import { RUNTIME_EXCLUSION_PATHS, readIntegrationBranch } from "./git-service.js";
import { nativeIsRepo, nativeBranchExists, nativeWorktreeList, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached } from "./native-git-bridge.js";
import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
import { ensureGitignore } from "./gitignore.js";
import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js";
@ -215,6 +215,70 @@ export async function checkGitHealth(
} catch {
// git branch list failed — skip
}
// ── Integration branch existence ──────────────────────────────────────
// For each active (non-complete) milestone, verify the stored integration
// branch still exists in git. A missing integration branch blocks merge-back
// and causes the next merge operation to fail silently.
try {
const state = await deriveState(basePath);
for (const milestone of state.registry) {
if (milestone.status === "complete") continue;
const integrationBranch = readIntegrationBranch(basePath, milestone.id);
if (!integrationBranch) continue; // No stored branch — skip (not yet set)
if (!nativeBranchExists(basePath, integrationBranch)) {
issues.push({
severity: "error",
code: "integration_branch_missing",
scope: "milestone",
unitId: milestone.id,
message: `Milestone ${milestone.id} recorded integration branch "${integrationBranch}" but that branch no longer exists in git. Merge-back will fail.`,
fixable: false,
});
}
}
} catch {
// Non-fatal — integration branch check failed
}
// ── Orphaned worktree directories ────────────────────────────────────
// Worktree removal can fail after a branch delete, leaving a directory
// that is no longer registered with git. These orphaned dirs cause
// "already exists" errors when re-creating the same worktree name.
try {
const wtDir = worktreesDir(basePath);
if (existsSync(wtDir)) {
const registeredPaths = new Set(
nativeWorktreeList(basePath).map(entry => entry.path),
);
for (const entry of readdirSync(wtDir)) {
const fullPath = join(wtDir, entry);
try {
if (!statSync(fullPath).isDirectory()) continue;
} catch { continue; }
if (!registeredPaths.has(fullPath)) {
issues.push({
severity: "warning",
code: "worktree_directory_orphaned",
scope: "project",
unitId: entry,
message: `Worktree directory ${fullPath} exists on disk but is not registered with git. Run "git worktree prune" or doctor --fix to remove it.`,
fixable: true,
});
if (shouldFix("worktree_directory_orphaned")) {
try {
rmSync(fullPath, { recursive: true, force: true });
fixesApplied.push(`removed orphaned worktree directory ${fullPath}`);
} catch {
fixesApplied.push(`failed to remove orphaned worktree directory ${fullPath}`);
}
}
}
}
}
} catch {
// Non-fatal — orphaned worktree directory check failed
}
}
// ── Runtime Health Checks ──────────────────────────────────────────────────
@ -255,6 +319,44 @@ export async function checkRuntimeHealth(
// Non-fatal — crash lock check failed
}
// ── Stranded lock directory ────────────────────────────────────────────
// proper-lockfile creates a `.gsd.lock/` directory as the OS-level lock
// mechanism. If the process was SIGKILLed or crashed hard, this directory
// can remain on disk without any live process holding it. The next session
// fails to acquire the lock until the directory is removed (#1245).
try {
const lockDir = join(dirname(root), `${root.split("/").pop() ?? ".gsd"}.lock`);
if (existsSync(lockDir)) {
const statRes = statSync(lockDir);
if (statRes.isDirectory()) {
// Check if any live process actually holds this lock
const lock = readCrashLock(basePath);
const lockHolderAlive = lock ? isLockProcessAlive(lock) : false;
if (!lockHolderAlive) {
issues.push({
severity: "error",
code: "stranded_lock_directory",
scope: "project",
unitId: "project",
message: `Stranded lock directory "${lockDir}" exists but no live process holds the session lock. This blocks new auto-mode sessions from starting.`,
file: lockDir,
fixable: true,
});
if (shouldFix("stranded_lock_directory")) {
try {
rmSync(lockDir, { recursive: true, force: true });
fixesApplied.push(`removed stranded lock directory ${lockDir}`);
} catch {
fixesApplied.push(`failed to remove stranded lock directory ${lockDir}`);
}
}
}
}
}
} catch {
// Non-fatal — stranded lock directory check failed
}
// ── Stale parallel sessions ────────────────────────────────────────────
try {
const parallelStatuses = readAllSessionStatuses(basePath);

View file

@ -20,6 +20,9 @@ import { gsdRoot, resolveGsdRootFile } from "./paths.js";
import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
import { abortAndReset } from "./git-self-heal.js";
import { rebuildState } from "./doctor.js";
import { deriveState } from "./state.js";
import { readIntegrationBranch } from "./git-service.js";
import { nativeBranchExists, nativeIsRepo } from "./native-git-bridge.js";
// ── Health Score Tracking ──────────────────────────────────────────────────
@ -191,6 +194,27 @@ export async function preDispatchHealthGate(basePath: string): Promise<PreDispat
// Non-fatal — dispatch continues without STATE.md if rebuild fails
}
// ── Integration branch existence check ──
// If the active milestone's recorded integration branch no longer exists in
// git, the merge-back at the end of the milestone will fail. Block dispatch
// now to surface this before work is lost.
try {
if (nativeIsRepo(basePath)) {
const state = await deriveState(basePath);
if (state.activeMilestone) {
const integrationBranch = readIntegrationBranch(basePath, state.activeMilestone.id);
if (integrationBranch && !nativeBranchExists(basePath, integrationBranch)) {
issues.push(
`Integration branch "${integrationBranch}" for milestone ${state.activeMilestone.id} no longer exists in git. ` +
`Restore the branch or update the integration branch before dispatching. Run /gsd doctor for details.`,
);
}
}
}
} catch {
// Non-fatal — dispatch continues if state/branch check fails
}
// If we had critical issues that couldn't be auto-healed, block dispatch
if (issues.length > 0) {
return {

View file

@ -45,7 +45,15 @@ export type DoctorIssueCode =
| "env_python"
| "env_cargo"
| "env_go"
| "env_git_remote";
| "env_git_remote"
// Provider / auth checks
| "provider_key_missing"
| "provider_key_backedoff"
// Lock infrastructure checks
| "stranded_lock_directory"
// Git / worktree integrity checks
| "integration_branch_missing"
| "worktree_directory_orphaned";
/**
* Issue codes that represent expected completion-transition states.

View file

@ -11,6 +11,7 @@ import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js";
import { COMPLETION_TRANSITION_CODES } from "./doctor-types.js";
import { checkGitHealth, checkRuntimeHealth } from "./doctor-checks.js";
import { checkEnvironmentHealth } from "./doctor-environment.js";
import { runProviderChecks } from "./doctor-providers.js";
// ── Re-exports ─────────────────────────────────────────────────────────────
// All public types and functions from extracted modules are re-exported here
@ -396,6 +397,35 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
// Environment health checks (#1221: missing tools, port conflicts, stale deps, disk space)
await checkEnvironmentHealth(basePath, issues, { includeRemote: !options?.scope });
// Provider / auth health checks — detect missing or backed-off API keys before dispatching
try {
const providerResults = runProviderChecks();
for (const result of providerResults) {
if (!result.required) continue;
if (result.status === "error") {
issues.push({
severity: "error",
code: "provider_key_missing",
scope: "project",
unitId: "project",
message: result.message + (result.detail ? `${result.detail}` : ""),
fixable: false,
});
} else if (result.status === "warning") {
issues.push({
severity: "warning",
code: "provider_key_backedoff",
scope: "project",
unitId: "project",
message: result.message + (result.detail ? `${result.detail}` : ""),
fixable: false,
});
}
}
} catch {
// Non-fatal — provider check failure should not block other checks
}
const milestonesPath = milestonesDir(basePath);
if (!existsSync(milestonesPath)) {
return { ok: issues.every(issue => issue.severity !== "error"), basePath, issues, fixesApplied };

View file

@ -2,9 +2,10 @@
* doctor-git.test.ts Integration tests for doctor git health checks.
*
* Creates real temp git repos with deliberate broken state, runs runGSDDoctor,
* and asserts correct detection and fixing of all 4 git issue codes:
* and asserts correct detection and fixing of git issue codes:
* orphaned_auto_worktree, stale_milestone_branch,
* corrupt_merge_state, tracked_runtime_files
* corrupt_merge_state, tracked_runtime_files,
* integration_branch_missing, worktree_directory_orphaned
*/
import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync } from "node:fs";
@ -299,6 +300,101 @@ async function main(): Promise<void> {
console.log("\n=== none-mode skips stale branch (skipped on Windows) ===");
}
// ─── Test: Integration branch missing ──────────────────────────────
if (process.platform !== "win32") {
console.log("\n=== integration_branch_missing ===");
{
const dir = createRepoWithActiveMilestone();
cleanups.push(dir);
// Write integration branch metadata for M001 pointing to a non-existent branch
const metaPath = join(dir, ".gsd", "milestones", "M001", "M001-META.json");
writeFileSync(metaPath, JSON.stringify({ integrationBranch: "feat/does-not-exist" }, null, 2));
const detect = await runGSDDoctor(dir);
const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing");
assertTrue(missingBranchIssues.length > 0, "detects missing integration branch");
assertTrue(
missingBranchIssues[0]?.message.includes("feat/does-not-exist"),
"message includes the missing branch name",
);
assertEq(missingBranchIssues[0]?.fixable, false, "integration_branch_missing is not auto-fixable");
assertEq(missingBranchIssues[0]?.severity, "error", "severity is error");
}
} else {
console.log("\n=== integration_branch_missing (skipped on Windows) ===");
}
// ─── Test: Integration branch present — no false positive ──────────
if (process.platform !== "win32") {
console.log("\n=== integration_branch_missing (no false positive) ===");
{
const dir = createRepoWithActiveMilestone();
cleanups.push(dir);
// Write integration branch metadata for M001 pointing to "main" (which exists)
const metaPath = join(dir, ".gsd", "milestones", "M001", "M001-META.json");
writeFileSync(metaPath, JSON.stringify({ integrationBranch: "main" }, null, 2));
const detect = await runGSDDoctor(dir);
const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing");
assertEq(missingBranchIssues.length, 0, "existing integration branch NOT flagged");
}
} else {
console.log("\n=== integration_branch_missing (no false positive — skipped on Windows) ===");
}
// ─── Test: Orphaned worktree directory ─────────────────────────────
if (process.platform !== "win32") {
console.log("\n=== worktree_directory_orphaned ===");
{
const dir = createRepoWithActiveMilestone();
cleanups.push(dir);
// Create a worktrees/ dir with an entry that is NOT in git worktree list
const orphanDir = join(dir, ".gsd", "worktrees", "orphan-feature");
mkdirSync(orphanDir, { recursive: true });
writeFileSync(join(orphanDir, "some-file.txt"), "leftover content\n");
const detect = await runGSDDoctor(dir);
const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned");
assertTrue(orphanDirIssues.length > 0, "detects orphaned worktree directory");
assertTrue(
orphanDirIssues[0]?.message.includes("orphan-feature"),
"message includes the orphaned directory name",
);
assertTrue(orphanDirIssues[0]?.fixable === true, "worktree_directory_orphaned is fixable");
const fixed = await runGSDDoctor(dir, { fix: true });
assertTrue(
fixed.fixesApplied.some(f => f.includes("removed orphaned worktree directory")),
"fix removes orphaned worktree directory",
);
assertTrue(!existsSync(orphanDir), "orphaned directory removed after fix");
}
} else {
console.log("\n=== worktree_directory_orphaned (skipped on Windows) ===");
}
// ─── Test: Registered worktree NOT flagged as orphaned ─────────────
if (process.platform !== "win32") {
console.log("\n=== worktree_directory_orphaned (registered worktree not flagged) ===");
{
const dir = createRepoWithActiveMilestone();
cleanups.push(dir);
// Create a real registered worktree under .gsd/worktrees/
mkdirSync(join(dir, ".gsd", "worktrees"), { recursive: true });
run("git worktree add -b worktree/feature-1 .gsd/worktrees/feature-1", dir);
const detect = await runGSDDoctor(dir);
const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned");
assertEq(orphanDirIssues.length, 0, "registered worktree NOT flagged as orphaned");
}
} else {
console.log("\n=== worktree_directory_orphaned (registered worktree not flagged — skipped on Windows) ===");
}
// ─── Test 9: none-mode still detects corrupt merge state ───────────
console.log("\n=== none-mode keeps corrupt merge state ===");
{

View file

@ -2,9 +2,9 @@
* doctor-runtime.test.ts Tests for doctor runtime health checks.
*
* Tests detection and auto-fix of:
* stale_crash_lock, orphaned_completed_units, stale_hook_state,
* activity_log_bloat, state_file_missing, state_file_stale,
* gitignore_missing_patterns
* stale_crash_lock, stranded_lock_directory, orphaned_completed_units,
* stale_hook_state, activity_log_bloat, state_file_missing,
* state_file_stale, gitignore_missing_patterns
*/
import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, realpathSync } from "node:fs";
@ -290,6 +290,55 @@ node_modules/
assertEq(content.length, 0, "all orphaned keys removed");
}
// ─── Test: Stranded lock directory detection & fix ────────────────
console.log("\n=== stranded_lock_directory ===");
{
const dir = createMinimalProject();
cleanups.push(dir);
// Create the proper-lockfile lock directory without a live lock holder.
// The lock dir sits at <parent of .gsd>/.gsd.lock (i.e., <basePath>/.gsd.lock).
const lockDir = join(dir, ".gsd.lock");
mkdirSync(lockDir, { recursive: true });
const detect = await runGSDDoctor(dir);
const strandedIssues = detect.issues.filter(i => i.code === "stranded_lock_directory");
assertTrue(strandedIssues.length > 0, "detects stranded lock directory");
assertTrue(strandedIssues[0]?.message.includes("lock directory"), "message describes stranded lock directory");
assertTrue(strandedIssues[0]?.fixable === true, "stranded lock dir is fixable");
const fixed = await runGSDDoctor(dir, { fix: true });
assertTrue(
fixed.fixesApplied.some(f => f.includes("removed stranded lock directory")),
"fix removes stranded lock directory",
);
assertTrue(!existsSync(lockDir), "lock directory removed after fix");
}
// ─── Test: Stranded lock dir with live lock holder — NOT flagged ───
console.log("\n=== stranded_lock_directory (live holder not flagged) ===");
{
const dir = createMinimalProject();
cleanups.push(dir);
// Create lock dir + auto.lock with PID 1 (init/launchd — always alive, never our own PID)
const lockDir = join(dir, ".gsd.lock");
mkdirSync(lockDir, { recursive: true });
const liveLockData = {
pid: 1,
startedAt: new Date().toISOString(),
unitType: "execute-task",
unitId: "M001/S01/T01",
unitStartedAt: new Date().toISOString(),
completedUnits: 1,
};
writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(liveLockData, null, 2));
const detect = await runGSDDoctor(dir);
const strandedIssues = detect.issues.filter(i => i.code === "stranded_lock_directory");
assertEq(strandedIssues.length, 0, "live lock holder: stranded_lock_directory NOT detected");
}
} finally {
for (const dir of cleanups) {
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }