fix: close 5 doctor coverage gaps — providers, lock dir, integration branch, orphaned worktrees
Closes the highest-impact gaps identified in the /gsd doctor deep-dive analysis. **1. Wire provider checks into runGSDDoctor()** doctor-providers.ts existed and worked but was never called from the main doctor run. Units could dispatch into guaranteed API failures with no warning. Now runProviderChecks() is called in runGSDDoctor() and converts required-provider errors/warnings into DoctorIssue entries with codes: - provider_key_missing (error) - provider_key_backedoff (warning) **2. Stranded lock directory detection (doctor-checks.ts)** proper-lockfile creates a .gsd.lock/ directory as the OS-level lock mechanism. After SIGKILL or hard crash, this directory can remain stranded, blocking all future auto-mode sessions from acquiring the lock (#1245 pattern). Doctor now: - Detects .gsd.lock/ existing without a live process holding it - Reports as stranded_lock_directory (error, fixable) - Auto-fix removes the stranded directory **3. Integration branch existence check (doctor-checks.ts + doctor-proactive.ts)** When a milestone records an integration branch and that branch is later deleted or renamed, merge-back will fail silently at the end of the milestone. Doctor now: - Checks each active milestone's stored integration branch exists in git - Reports as integration_branch_missing (error, not auto-fixable) - preDispatchHealthGate blocks dispatch if the active milestone's integration branch is missing, preventing work from being dispatched into a dead end **4. Orphaned worktree directory detection (doctor-checks.ts)** Worktree removal can fail after a branch delete, leaving a .gsd/worktrees/<name>/ directory that is no longer registered with git. Re-creating the same name fails with "already exists". Doctor now: - Compares .gsd/worktrees/ entries against git worktree list - Reports unregistered directories as worktree_directory_orphaned (warning, fixable) - Auto-fix removes the orphaned directory Tests: all new codes covered with detection + fix assertions, including false-positive safety cases (live lock holder, registered worktrees, existing integration branch). All 1843 existing tests still pass.
This commit is contained in:
parent
e6bbd035ba
commit
ccde39c2c8
6 changed files with 320 additions and 11 deletions
|
|
@ -1,15 +1,15 @@
|
|||
import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, statSync } from "node:fs";
|
||||
import { join, sep } from "node:path";
|
||||
import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs";
|
||||
import { dirname, join, sep } from "node:path";
|
||||
|
||||
import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js";
|
||||
import { loadFile, parseRoadmap } from "./files.js";
|
||||
import { resolveMilestoneFile, milestonesDir, gsdRoot, resolveGsdRootFile, relGsdRootFile } from "./paths.js";
|
||||
import { deriveState, isMilestoneComplete } from "./state.js";
|
||||
import { saveFile } from "./files.js";
|
||||
import { listWorktrees, resolveGitDir } from "./worktree-manager.js";
|
||||
import { listWorktrees, resolveGitDir, worktreesDir } from "./worktree-manager.js";
|
||||
import { abortAndReset } from "./git-self-heal.js";
|
||||
import { RUNTIME_EXCLUSION_PATHS } from "./git-service.js";
|
||||
import { nativeIsRepo, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached } from "./native-git-bridge.js";
|
||||
import { RUNTIME_EXCLUSION_PATHS, readIntegrationBranch } from "./git-service.js";
|
||||
import { nativeIsRepo, nativeBranchExists, nativeWorktreeList, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached } from "./native-git-bridge.js";
|
||||
import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
|
||||
import { ensureGitignore } from "./gitignore.js";
|
||||
import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js";
|
||||
|
|
@ -215,6 +215,70 @@ export async function checkGitHealth(
|
|||
} catch {
|
||||
// git branch list failed — skip
|
||||
}
|
||||
|
||||
// ── Integration branch existence ──────────────────────────────────────
|
||||
// For each active (non-complete) milestone, verify the stored integration
|
||||
// branch still exists in git. A missing integration branch blocks merge-back
|
||||
// and causes the next merge operation to fail silently.
|
||||
try {
|
||||
const state = await deriveState(basePath);
|
||||
for (const milestone of state.registry) {
|
||||
if (milestone.status === "complete") continue;
|
||||
const integrationBranch = readIntegrationBranch(basePath, milestone.id);
|
||||
if (!integrationBranch) continue; // No stored branch — skip (not yet set)
|
||||
if (!nativeBranchExists(basePath, integrationBranch)) {
|
||||
issues.push({
|
||||
severity: "error",
|
||||
code: "integration_branch_missing",
|
||||
scope: "milestone",
|
||||
unitId: milestone.id,
|
||||
message: `Milestone ${milestone.id} recorded integration branch "${integrationBranch}" but that branch no longer exists in git. Merge-back will fail.`,
|
||||
fixable: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — integration branch check failed
|
||||
}
|
||||
|
||||
// ── Orphaned worktree directories ────────────────────────────────────
|
||||
// Worktree removal can fail after a branch delete, leaving a directory
|
||||
// that is no longer registered with git. These orphaned dirs cause
|
||||
// "already exists" errors when re-creating the same worktree name.
|
||||
try {
|
||||
const wtDir = worktreesDir(basePath);
|
||||
if (existsSync(wtDir)) {
|
||||
const registeredPaths = new Set(
|
||||
nativeWorktreeList(basePath).map(entry => entry.path),
|
||||
);
|
||||
for (const entry of readdirSync(wtDir)) {
|
||||
const fullPath = join(wtDir, entry);
|
||||
try {
|
||||
if (!statSync(fullPath).isDirectory()) continue;
|
||||
} catch { continue; }
|
||||
if (!registeredPaths.has(fullPath)) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "worktree_directory_orphaned",
|
||||
scope: "project",
|
||||
unitId: entry,
|
||||
message: `Worktree directory ${fullPath} exists on disk but is not registered with git. Run "git worktree prune" or doctor --fix to remove it.`,
|
||||
fixable: true,
|
||||
});
|
||||
if (shouldFix("worktree_directory_orphaned")) {
|
||||
try {
|
||||
rmSync(fullPath, { recursive: true, force: true });
|
||||
fixesApplied.push(`removed orphaned worktree directory ${fullPath}`);
|
||||
} catch {
|
||||
fixesApplied.push(`failed to remove orphaned worktree directory ${fullPath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — orphaned worktree directory check failed
|
||||
}
|
||||
}
|
||||
|
||||
// ── Runtime Health Checks ──────────────────────────────────────────────────
|
||||
|
|
@ -255,6 +319,44 @@ export async function checkRuntimeHealth(
|
|||
// Non-fatal — crash lock check failed
|
||||
}
|
||||
|
||||
// ── Stranded lock directory ────────────────────────────────────────────
|
||||
// proper-lockfile creates a `.gsd.lock/` directory as the OS-level lock
|
||||
// mechanism. If the process was SIGKILLed or crashed hard, this directory
|
||||
// can remain on disk without any live process holding it. The next session
|
||||
// fails to acquire the lock until the directory is removed (#1245).
|
||||
try {
|
||||
const lockDir = join(dirname(root), `${root.split("/").pop() ?? ".gsd"}.lock`);
|
||||
if (existsSync(lockDir)) {
|
||||
const statRes = statSync(lockDir);
|
||||
if (statRes.isDirectory()) {
|
||||
// Check if any live process actually holds this lock
|
||||
const lock = readCrashLock(basePath);
|
||||
const lockHolderAlive = lock ? isLockProcessAlive(lock) : false;
|
||||
if (!lockHolderAlive) {
|
||||
issues.push({
|
||||
severity: "error",
|
||||
code: "stranded_lock_directory",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: `Stranded lock directory "${lockDir}" exists but no live process holds the session lock. This blocks new auto-mode sessions from starting.`,
|
||||
file: lockDir,
|
||||
fixable: true,
|
||||
});
|
||||
if (shouldFix("stranded_lock_directory")) {
|
||||
try {
|
||||
rmSync(lockDir, { recursive: true, force: true });
|
||||
fixesApplied.push(`removed stranded lock directory ${lockDir}`);
|
||||
} catch {
|
||||
fixesApplied.push(`failed to remove stranded lock directory ${lockDir}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — stranded lock directory check failed
|
||||
}
|
||||
|
||||
// ── Stale parallel sessions ────────────────────────────────────────────
|
||||
try {
|
||||
const parallelStatuses = readAllSessionStatuses(basePath);
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ import { gsdRoot, resolveGsdRootFile } from "./paths.js";
|
|||
import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
|
||||
import { abortAndReset } from "./git-self-heal.js";
|
||||
import { rebuildState } from "./doctor.js";
|
||||
import { deriveState } from "./state.js";
|
||||
import { readIntegrationBranch } from "./git-service.js";
|
||||
import { nativeBranchExists, nativeIsRepo } from "./native-git-bridge.js";
|
||||
|
||||
// ── Health Score Tracking ──────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -191,6 +194,27 @@ export async function preDispatchHealthGate(basePath: string): Promise<PreDispat
|
|||
// Non-fatal — dispatch continues without STATE.md if rebuild fails
|
||||
}
|
||||
|
||||
// ── Integration branch existence check ──
|
||||
// If the active milestone's recorded integration branch no longer exists in
|
||||
// git, the merge-back at the end of the milestone will fail. Block dispatch
|
||||
// now to surface this before work is lost.
|
||||
try {
|
||||
if (nativeIsRepo(basePath)) {
|
||||
const state = await deriveState(basePath);
|
||||
if (state.activeMilestone) {
|
||||
const integrationBranch = readIntegrationBranch(basePath, state.activeMilestone.id);
|
||||
if (integrationBranch && !nativeBranchExists(basePath, integrationBranch)) {
|
||||
issues.push(
|
||||
`Integration branch "${integrationBranch}" for milestone ${state.activeMilestone.id} no longer exists in git. ` +
|
||||
`Restore the branch or update the integration branch before dispatching. Run /gsd doctor for details.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — dispatch continues if state/branch check fails
|
||||
}
|
||||
|
||||
// If we had critical issues that couldn't be auto-healed, block dispatch
|
||||
if (issues.length > 0) {
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -45,7 +45,15 @@ export type DoctorIssueCode =
|
|||
| "env_python"
|
||||
| "env_cargo"
|
||||
| "env_go"
|
||||
| "env_git_remote";
|
||||
| "env_git_remote"
|
||||
// Provider / auth checks
|
||||
| "provider_key_missing"
|
||||
| "provider_key_backedoff"
|
||||
// Lock infrastructure checks
|
||||
| "stranded_lock_directory"
|
||||
// Git / worktree integrity checks
|
||||
| "integration_branch_missing"
|
||||
| "worktree_directory_orphaned";
|
||||
|
||||
/**
|
||||
* Issue codes that represent expected completion-transition states.
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js";
|
|||
import { COMPLETION_TRANSITION_CODES } from "./doctor-types.js";
|
||||
import { checkGitHealth, checkRuntimeHealth } from "./doctor-checks.js";
|
||||
import { checkEnvironmentHealth } from "./doctor-environment.js";
|
||||
import { runProviderChecks } from "./doctor-providers.js";
|
||||
|
||||
// ── Re-exports ─────────────────────────────────────────────────────────────
|
||||
// All public types and functions from extracted modules are re-exported here
|
||||
|
|
@ -396,6 +397,35 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
|
|||
// Environment health checks (#1221: missing tools, port conflicts, stale deps, disk space)
|
||||
await checkEnvironmentHealth(basePath, issues, { includeRemote: !options?.scope });
|
||||
|
||||
// Provider / auth health checks — detect missing or backed-off API keys before dispatching
|
||||
try {
|
||||
const providerResults = runProviderChecks();
|
||||
for (const result of providerResults) {
|
||||
if (!result.required) continue;
|
||||
if (result.status === "error") {
|
||||
issues.push({
|
||||
severity: "error",
|
||||
code: "provider_key_missing",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: result.message + (result.detail ? ` — ${result.detail}` : ""),
|
||||
fixable: false,
|
||||
});
|
||||
} else if (result.status === "warning") {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "provider_key_backedoff",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: result.message + (result.detail ? ` — ${result.detail}` : ""),
|
||||
fixable: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — provider check failure should not block other checks
|
||||
}
|
||||
|
||||
const milestonesPath = milestonesDir(basePath);
|
||||
if (!existsSync(milestonesPath)) {
|
||||
return { ok: issues.every(issue => issue.severity !== "error"), basePath, issues, fixesApplied };
|
||||
|
|
|
|||
|
|
@ -2,9 +2,10 @@
|
|||
* doctor-git.test.ts — Integration tests for doctor git health checks.
|
||||
*
|
||||
* Creates real temp git repos with deliberate broken state, runs runGSDDoctor,
|
||||
* and asserts correct detection and fixing of all 4 git issue codes:
|
||||
* and asserts correct detection and fixing of git issue codes:
|
||||
* orphaned_auto_worktree, stale_milestone_branch,
|
||||
* corrupt_merge_state, tracked_runtime_files
|
||||
* corrupt_merge_state, tracked_runtime_files,
|
||||
* integration_branch_missing, worktree_directory_orphaned
|
||||
*/
|
||||
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync } from "node:fs";
|
||||
|
|
@ -299,6 +300,101 @@ async function main(): Promise<void> {
|
|||
console.log("\n=== none-mode skips stale branch (skipped on Windows) ===");
|
||||
}
|
||||
|
||||
// ─── Test: Integration branch missing ──────────────────────────────
|
||||
if (process.platform !== "win32") {
|
||||
console.log("\n=== integration_branch_missing ===");
|
||||
{
|
||||
const dir = createRepoWithActiveMilestone();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Write integration branch metadata for M001 pointing to a non-existent branch
|
||||
const metaPath = join(dir, ".gsd", "milestones", "M001", "M001-META.json");
|
||||
writeFileSync(metaPath, JSON.stringify({ integrationBranch: "feat/does-not-exist" }, null, 2));
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing");
|
||||
assertTrue(missingBranchIssues.length > 0, "detects missing integration branch");
|
||||
assertTrue(
|
||||
missingBranchIssues[0]?.message.includes("feat/does-not-exist"),
|
||||
"message includes the missing branch name",
|
||||
);
|
||||
assertEq(missingBranchIssues[0]?.fixable, false, "integration_branch_missing is not auto-fixable");
|
||||
assertEq(missingBranchIssues[0]?.severity, "error", "severity is error");
|
||||
}
|
||||
} else {
|
||||
console.log("\n=== integration_branch_missing (skipped on Windows) ===");
|
||||
}
|
||||
|
||||
// ─── Test: Integration branch present — no false positive ──────────
|
||||
if (process.platform !== "win32") {
|
||||
console.log("\n=== integration_branch_missing (no false positive) ===");
|
||||
{
|
||||
const dir = createRepoWithActiveMilestone();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Write integration branch metadata for M001 pointing to "main" (which exists)
|
||||
const metaPath = join(dir, ".gsd", "milestones", "M001", "M001-META.json");
|
||||
writeFileSync(metaPath, JSON.stringify({ integrationBranch: "main" }, null, 2));
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing");
|
||||
assertEq(missingBranchIssues.length, 0, "existing integration branch NOT flagged");
|
||||
}
|
||||
} else {
|
||||
console.log("\n=== integration_branch_missing (no false positive — skipped on Windows) ===");
|
||||
}
|
||||
|
||||
// ─── Test: Orphaned worktree directory ─────────────────────────────
|
||||
if (process.platform !== "win32") {
|
||||
console.log("\n=== worktree_directory_orphaned ===");
|
||||
{
|
||||
const dir = createRepoWithActiveMilestone();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Create a worktrees/ dir with an entry that is NOT in git worktree list
|
||||
const orphanDir = join(dir, ".gsd", "worktrees", "orphan-feature");
|
||||
mkdirSync(orphanDir, { recursive: true });
|
||||
writeFileSync(join(orphanDir, "some-file.txt"), "leftover content\n");
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned");
|
||||
assertTrue(orphanDirIssues.length > 0, "detects orphaned worktree directory");
|
||||
assertTrue(
|
||||
orphanDirIssues[0]?.message.includes("orphan-feature"),
|
||||
"message includes the orphaned directory name",
|
||||
);
|
||||
assertTrue(orphanDirIssues[0]?.fixable === true, "worktree_directory_orphaned is fixable");
|
||||
|
||||
const fixed = await runGSDDoctor(dir, { fix: true });
|
||||
assertTrue(
|
||||
fixed.fixesApplied.some(f => f.includes("removed orphaned worktree directory")),
|
||||
"fix removes orphaned worktree directory",
|
||||
);
|
||||
assertTrue(!existsSync(orphanDir), "orphaned directory removed after fix");
|
||||
}
|
||||
} else {
|
||||
console.log("\n=== worktree_directory_orphaned (skipped on Windows) ===");
|
||||
}
|
||||
|
||||
// ─── Test: Registered worktree NOT flagged as orphaned ─────────────
|
||||
if (process.platform !== "win32") {
|
||||
console.log("\n=== worktree_directory_orphaned (registered worktree not flagged) ===");
|
||||
{
|
||||
const dir = createRepoWithActiveMilestone();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Create a real registered worktree under .gsd/worktrees/
|
||||
mkdirSync(join(dir, ".gsd", "worktrees"), { recursive: true });
|
||||
run("git worktree add -b worktree/feature-1 .gsd/worktrees/feature-1", dir);
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned");
|
||||
assertEq(orphanDirIssues.length, 0, "registered worktree NOT flagged as orphaned");
|
||||
}
|
||||
} else {
|
||||
console.log("\n=== worktree_directory_orphaned (registered worktree not flagged — skipped on Windows) ===");
|
||||
}
|
||||
|
||||
// ─── Test 9: none-mode still detects corrupt merge state ───────────
|
||||
console.log("\n=== none-mode keeps corrupt merge state ===");
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
* doctor-runtime.test.ts — Tests for doctor runtime health checks.
|
||||
*
|
||||
* Tests detection and auto-fix of:
|
||||
* stale_crash_lock, orphaned_completed_units, stale_hook_state,
|
||||
* activity_log_bloat, state_file_missing, state_file_stale,
|
||||
* gitignore_missing_patterns
|
||||
* stale_crash_lock, stranded_lock_directory, orphaned_completed_units,
|
||||
* stale_hook_state, activity_log_bloat, state_file_missing,
|
||||
* state_file_stale, gitignore_missing_patterns
|
||||
*/
|
||||
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, realpathSync } from "node:fs";
|
||||
|
|
@ -290,6 +290,55 @@ node_modules/
|
|||
assertEq(content.length, 0, "all orphaned keys removed");
|
||||
}
|
||||
|
||||
// ─── Test: Stranded lock directory detection & fix ────────────────
|
||||
console.log("\n=== stranded_lock_directory ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Create the proper-lockfile lock directory without a live lock holder.
|
||||
// The lock dir sits at <parent of .gsd>/.gsd.lock (i.e., <basePath>/.gsd.lock).
|
||||
const lockDir = join(dir, ".gsd.lock");
|
||||
mkdirSync(lockDir, { recursive: true });
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const strandedIssues = detect.issues.filter(i => i.code === "stranded_lock_directory");
|
||||
assertTrue(strandedIssues.length > 0, "detects stranded lock directory");
|
||||
assertTrue(strandedIssues[0]?.message.includes("lock directory"), "message describes stranded lock directory");
|
||||
assertTrue(strandedIssues[0]?.fixable === true, "stranded lock dir is fixable");
|
||||
|
||||
const fixed = await runGSDDoctor(dir, { fix: true });
|
||||
assertTrue(
|
||||
fixed.fixesApplied.some(f => f.includes("removed stranded lock directory")),
|
||||
"fix removes stranded lock directory",
|
||||
);
|
||||
assertTrue(!existsSync(lockDir), "lock directory removed after fix");
|
||||
}
|
||||
|
||||
// ─── Test: Stranded lock dir with live lock holder — NOT flagged ───
|
||||
console.log("\n=== stranded_lock_directory (live holder not flagged) ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Create lock dir + auto.lock with PID 1 (init/launchd — always alive, never our own PID)
|
||||
const lockDir = join(dir, ".gsd.lock");
|
||||
mkdirSync(lockDir, { recursive: true });
|
||||
const liveLockData = {
|
||||
pid: 1,
|
||||
startedAt: new Date().toISOString(),
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
unitStartedAt: new Date().toISOString(),
|
||||
completedUnits: 1,
|
||||
};
|
||||
writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(liveLockData, null, 2));
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const strandedIssues = detect.issues.filter(i => i.code === "stranded_lock_directory");
|
||||
assertEq(strandedIssues.length, 0, "live lock holder: stranded_lock_directory NOT detected");
|
||||
}
|
||||
|
||||
} finally {
|
||||
for (const dir of cleanups) {
|
||||
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue