feat(doctor): add 7 runtime health checks with auto-fix (#646)
* feat(doctor): add 7 runtime health checks with auto-fix Add comprehensive runtime health monitoring to /gsd doctor: - stale_crash_lock: detect dead auto.lock from crashed sessions, auto-clear - orphaned_completed_units: find completed-unit keys referencing missing artifacts, auto-remove - stale_hook_state: detect residual hook cycle counts with no running session, auto-clear - activity_log_bloat: flag activity/ dir exceeding 500 files or 100MB, auto-prune (7-day retention) - state_file_missing: detect missing STATE.md when milestones exist, auto-generate - state_file_stale: detect STATE.md drift (wrong phase/milestone/slice), auto-rebuild - gitignore_missing_patterns: detect missing critical GSD runtime patterns in .gitignore, auto-fix All checks are non-fatal (gracefully degrade on read errors) and respect the existing fix/fixLevel system. Includes 34 new test assertions across 9 test scenarios in doctor-runtime.test.ts. * feat(doctor): add proactive healing layer for auto-mode Three new mechanisms for automatic health monitoring: 1. Pre-dispatch health gate: runs before each unit dispatch in auto-mode. Checks for stale crash locks (auto-clears) and corrupt merge state (auto-heals via abortAndReset). Pauses auto-mode if critical issues can't be resolved. 2. Health score tracking: records error/warning/fix counts after each post-unit doctor run. Tracks trends (improving/stable/degrading) across a sliding window of 50 snapshots. Monitors consecutive error unit streaks. 3. Auto-heal escalation: when deterministic fixes can't resolve errors after 5 consecutive units AND health trend is not improving, automatically dispatches LLM-assisted heal (dispatchDoctorHeal). Single-fire per session to prevent spam. Defers escalation when trend is improving (fixes are working, just slowly). Integration points in auto.ts: - resetProactiveHealing() on start/stop - preDispatchHealthGate() before deriveState in dispatchNextUnit - recordHealthSnapshot() + checkHealEscalation() in post-unit hook - formatHealthSummary() available for dashboard display Includes 30 test assertions across 15 scenarios.
This commit is contained in:
parent
cb9191fa4f
commit
061d826a4e
6 changed files with 1174 additions and 4 deletions
|
|
@ -64,7 +64,15 @@ import {
|
|||
formatValidationIssues,
|
||||
} from "./observability-validator.js";
|
||||
import { ensureGitignore, untrackRuntimeFiles } from "./gitignore.js";
|
||||
import { runGSDDoctor, rebuildState } from "./doctor.js";
|
||||
import { runGSDDoctor, rebuildState, summarizeDoctorIssues } from "./doctor.js";
|
||||
import {
|
||||
preDispatchHealthGate,
|
||||
recordHealthSnapshot,
|
||||
checkHealEscalation,
|
||||
resetProactiveHealing,
|
||||
formatHealthSummary,
|
||||
getConsecutiveErrorUnits,
|
||||
} from "./doctor-proactive.js";
|
||||
import { snapshotSkills, clearSkillSnapshot } from "./skill-discovery.js";
|
||||
import { captureAvailableSkills, getAndClearSkills, resetSkillTelemetry } from "./skill-telemetry.js";
|
||||
import {
|
||||
|
|
@ -559,6 +567,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
|
|||
completedUnits = [];
|
||||
clearSliceProgressCache();
|
||||
clearActivityLogState();
|
||||
resetProactiveHealing();
|
||||
pendingCrashRecovery = null;
|
||||
_handlingAgentEnd = false;
|
||||
ctx?.ui.setStatus("gsd-auto", undefined);
|
||||
|
|
@ -858,6 +867,7 @@ export async function startAuto(
|
|||
loadPersistedKeys(base, completedKeySet);
|
||||
resetHookState();
|
||||
restoreHookState(base);
|
||||
resetProactiveHealing();
|
||||
autoStartTime = Date.now();
|
||||
resourceSyncedAtOnStart = readResourceSyncedAt();
|
||||
completedUnits = [];
|
||||
|
|
@ -1089,6 +1099,35 @@ export async function handleAgentEnd(
|
|||
if (report.fixesApplied.length > 0) {
|
||||
ctx.ui.notify(`Post-hook: applied ${report.fixesApplied.length} fix(es).`, "info");
|
||||
}
|
||||
|
||||
// ── Proactive health tracking ──────────────────────────────────────
|
||||
// Record health snapshot for trend analysis and escalation logic.
|
||||
const summary = summarizeDoctorIssues(report.issues);
|
||||
recordHealthSnapshot(summary.errors, summary.warnings, report.fixesApplied.length);
|
||||
|
||||
// Check if we should escalate to LLM-assisted heal
|
||||
if (summary.errors > 0) {
|
||||
const unresolvedErrors = report.issues
|
||||
.filter(i => i.severity === "error" && !i.fixable)
|
||||
.map(i => ({ code: i.code, message: i.message, unitId: i.unitId }));
|
||||
const escalation = checkHealEscalation(summary.errors, unresolvedErrors);
|
||||
if (escalation.shouldEscalate) {
|
||||
ctx.ui.notify(
|
||||
`Doctor heal escalation: ${escalation.reason}. Dispatching LLM-assisted heal.`,
|
||||
"warning",
|
||||
);
|
||||
try {
|
||||
const { formatDoctorIssuesForPrompt, formatDoctorReport } = await import("./doctor.js");
|
||||
const { dispatchDoctorHeal } = await import("./commands.js");
|
||||
const actionable = report.issues.filter(i => i.severity === "error");
|
||||
const reportText = formatDoctorReport(report, { scope: doctorScope, includeWarnings: true });
|
||||
const structuredIssues = formatDoctorIssuesForPrompt(actionable);
|
||||
dispatchDoctorHeal(pi, doctorScope, reportText, structuredIssues);
|
||||
} catch {
|
||||
// Non-fatal — escalation dispatch failure
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — doctor failure should never block dispatch
|
||||
}
|
||||
|
|
@ -1558,6 +1597,23 @@ async function dispatchNextUnit(
|
|||
lastPromptCharCount = undefined;
|
||||
lastBaselineCharCount = undefined;
|
||||
|
||||
// ── Pre-dispatch health gate ──────────────────────────────────────────
|
||||
// Lightweight check for critical issues that would cause the next unit
|
||||
// to fail or corrupt state. Auto-heals what it can, blocks on the rest.
|
||||
try {
|
||||
const healthGate = preDispatchHealthGate(basePath);
|
||||
if (healthGate.fixesApplied.length > 0) {
|
||||
ctx.ui.notify(`Pre-dispatch: ${healthGate.fixesApplied.join(", ")}`, "info");
|
||||
}
|
||||
if (!healthGate.proceed) {
|
||||
ctx.ui.notify(healthGate.reason ?? "Pre-dispatch health check failed.", "error");
|
||||
await pauseAuto(ctx, pi);
|
||||
return;
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — health gate failure should never block dispatch
|
||||
}
|
||||
|
||||
let state = await deriveState(basePath);
|
||||
let mid = state.activeMilestone?.id;
|
||||
let midTitle = state.activeMilestone?.title;
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ import { handleUndo } from "./undo.js";
|
|||
import { handleExport } from "./export.js";
|
||||
import { nativeBranchList, nativeDetectMainBranch, nativeBranchListMerged, nativeBranchDelete, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js";
|
||||
|
||||
function dispatchDoctorHeal(pi: ExtensionAPI, scope: string | undefined, reportText: string, structuredIssues: string): void {
|
||||
export function dispatchDoctorHeal(pi: ExtensionAPI, scope: string | undefined, reportText: string, structuredIssues: string): void {
|
||||
const workflowPath = process.env.GSD_WORKFLOW_PATH ?? join(process.env.HOME ?? "~", ".pi", "GSD-WORKFLOW.md");
|
||||
const workflow = readFileSync(workflowPath, "utf-8");
|
||||
const prompt = loadPrompt("doctor-heal", {
|
||||
|
|
|
|||
286
src/resources/extensions/gsd/doctor-proactive.ts
Normal file
286
src/resources/extensions/gsd/doctor-proactive.ts
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
/**
|
||||
* GSD Doctor — Proactive Healing Layer
|
||||
*
|
||||
* Three mechanisms for automatic health monitoring during auto-mode:
|
||||
*
|
||||
* 1. Pre-dispatch health gate: lightweight check before each unit dispatch.
|
||||
* Returns blocking issues that should pause auto-mode rather than
|
||||
* dispatching into a broken state.
|
||||
*
|
||||
* 2. Health score tracking: tracks issue counts over time to detect
|
||||
* degradation trends. If health is declining, surfaces a warning.
|
||||
*
|
||||
* 3. Auto-heal escalation: if deterministic fix can't resolve issues
|
||||
* after N units, escalates to LLM-assisted heal dispatch.
|
||||
*/
|
||||
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { gsdRoot, resolveGsdRootFile } from "./paths.js";
|
||||
import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
|
||||
import { abortAndReset } from "./git-self-heal.js";
|
||||
|
||||
// ── Health Score Tracking ──────────────────────────────────────────────────
|
||||
|
||||
export interface HealthSnapshot {
|
||||
timestamp: number;
|
||||
errors: number;
|
||||
warnings: number;
|
||||
fixesApplied: number;
|
||||
unitIndex: number; // which unit dispatch triggered this snapshot
|
||||
}
|
||||
|
||||
/** In-memory health history for the current auto-mode session. */
|
||||
let healthHistory: HealthSnapshot[] = [];
|
||||
|
||||
/** Count of consecutive units with unresolved errors. */
|
||||
let consecutiveErrorUnits = 0;
|
||||
|
||||
/** Unit index counter for health tracking. */
|
||||
let healthUnitIndex = 0;
|
||||
|
||||
/**
|
||||
* Record a health snapshot after a doctor run.
|
||||
* Called from the post-unit hook in auto.ts.
|
||||
*/
|
||||
export function recordHealthSnapshot(errors: number, warnings: number, fixesApplied: number): void {
|
||||
healthUnitIndex++;
|
||||
healthHistory.push({
|
||||
timestamp: Date.now(),
|
||||
errors,
|
||||
warnings,
|
||||
fixesApplied,
|
||||
unitIndex: healthUnitIndex,
|
||||
});
|
||||
|
||||
// Keep only the last 50 snapshots to bound memory
|
||||
if (healthHistory.length > 50) {
|
||||
healthHistory = healthHistory.slice(-50);
|
||||
}
|
||||
|
||||
if (errors > 0) {
|
||||
consecutiveErrorUnits++;
|
||||
} else {
|
||||
consecutiveErrorUnits = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current health trend.
|
||||
* Returns "improving", "stable", "degrading", or "unknown" (not enough data).
|
||||
*/
|
||||
export function getHealthTrend(): "improving" | "stable" | "degrading" | "unknown" {
|
||||
if (healthHistory.length < 3) return "unknown";
|
||||
|
||||
const recent = healthHistory.slice(-5);
|
||||
const older = healthHistory.slice(-10, -5);
|
||||
|
||||
if (older.length === 0) return "unknown";
|
||||
|
||||
const recentAvg = recent.reduce((sum, s) => sum + s.errors + s.warnings, 0) / recent.length;
|
||||
const olderAvg = older.reduce((sum, s) => sum + s.errors + s.warnings, 0) / older.length;
|
||||
|
||||
const delta = recentAvg - olderAvg;
|
||||
if (delta > 1) return "degrading";
|
||||
if (delta < -1) return "improving";
|
||||
return "stable";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of consecutive units with unresolved errors.
|
||||
*/
|
||||
export function getConsecutiveErrorUnits(): number {
|
||||
return consecutiveErrorUnits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get health history for display (e.g., dashboard overlay).
|
||||
*/
|
||||
export function getHealthHistory(): readonly HealthSnapshot[] {
|
||||
return healthHistory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset health tracking state. Called on auto-mode start/stop.
|
||||
*/
|
||||
export function resetHealthTracking(): void {
|
||||
healthHistory = [];
|
||||
consecutiveErrorUnits = 0;
|
||||
healthUnitIndex = 0;
|
||||
}
|
||||
|
||||
// ── Pre-Dispatch Health Gate ───────────────────────────────────────────────
|
||||
|
||||
export interface PreDispatchHealthResult {
|
||||
/** Whether the dispatch should proceed. */
|
||||
proceed: boolean;
|
||||
/** If blocked, the reason to show the user. */
|
||||
reason?: string;
|
||||
/** Issues found (for logging). */
|
||||
issues: string[];
|
||||
/** Whether fix was applied. */
|
||||
fixesApplied: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Lightweight pre-dispatch health check. Runs fast checks that should
|
||||
* block dispatch if they fail — avoids dispatching into a broken state.
|
||||
*
|
||||
* This is NOT a full doctor run — it only checks critical, fast-to-evaluate
|
||||
* conditions that would cause the next unit to fail or corrupt state.
|
||||
*
|
||||
* Returns { proceed: true } if dispatch should continue.
|
||||
*/
|
||||
export function preDispatchHealthGate(basePath: string): PreDispatchHealthResult {
|
||||
const issues: string[] = [];
|
||||
const fixesApplied: string[] = [];
|
||||
|
||||
// ── Stale crash lock blocks dispatch ──
|
||||
// If a stale lock exists, the crash recovery path should handle it,
|
||||
// not a new dispatch. This prevents double-dispatch after crashes.
|
||||
try {
|
||||
const lock = readCrashLock(basePath);
|
||||
if (lock && !isLockProcessAlive(lock)) {
|
||||
// Auto-clear it since we're about to dispatch anyway
|
||||
clearLock(basePath);
|
||||
fixesApplied.push("cleared stale auto.lock before dispatch");
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
|
||||
// ── Corrupt merge/rebase state blocks dispatch ──
|
||||
// Dispatching a unit with MERGE_HEAD present will cause git operations to fail.
|
||||
try {
|
||||
const gitDir = join(basePath, ".git");
|
||||
if (existsSync(gitDir)) {
|
||||
const blockers = ["MERGE_HEAD", "rebase-apply", "rebase-merge"].filter(
|
||||
f => existsSync(join(gitDir, f)),
|
||||
);
|
||||
if (blockers.length > 0) {
|
||||
// Try to auto-heal
|
||||
try {
|
||||
const result = abortAndReset(basePath);
|
||||
fixesApplied.push(`pre-dispatch: cleaned merge state (${result.cleaned.join(", ")})`);
|
||||
} catch {
|
||||
issues.push(`Corrupt git state: ${blockers.join(", ")}. Run /gsd doctor fix.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
|
||||
// ── STATE.md existence check ──
|
||||
// If STATE.md is missing, deriveState will still work but the LLM
|
||||
// may get confused. Rebuild it silently.
|
||||
try {
|
||||
const stateFile = resolveGsdRootFile(basePath, "STATE");
|
||||
const milestonesDir = join(gsdRoot(basePath), "milestones");
|
||||
if (existsSync(milestonesDir) && !existsSync(stateFile)) {
|
||||
issues.push("STATE.md missing — will rebuild after this unit");
|
||||
// Don't block dispatch — rebuilding happens in post-hook
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
|
||||
// If we had critical issues that couldn't be auto-healed, block dispatch
|
||||
if (issues.length > 0) {
|
||||
return {
|
||||
proceed: false,
|
||||
reason: `Pre-dispatch health check failed:\n${issues.map(i => ` - ${i}`).join("\n")}\nRun /gsd doctor fix to resolve.`,
|
||||
issues,
|
||||
fixesApplied,
|
||||
};
|
||||
}
|
||||
|
||||
return { proceed: true, issues, fixesApplied };
|
||||
}
|
||||
|
||||
// ── Auto-Heal Escalation ──────────────────────────────────────────────────
|
||||
|
||||
/** Threshold: escalate to LLM heal after this many consecutive error units. */
|
||||
const ESCALATION_THRESHOLD = 5;
|
||||
|
||||
/** Whether an escalation has already been triggered this session (prevent spam). */
|
||||
let escalationTriggered = false;
|
||||
|
||||
/**
|
||||
* Check whether auto-heal should escalate from deterministic fix to
|
||||
* LLM-assisted heal. Called after each post-unit doctor run.
|
||||
*
|
||||
* Returns the structured issue text for LLM dispatch, or null if
|
||||
* escalation is not needed.
|
||||
*/
|
||||
export function checkHealEscalation(
|
||||
errors: number,
|
||||
unresolvedIssues: Array<{ code: string; message: string; unitId: string }>,
|
||||
): { shouldEscalate: boolean; reason: string; issues: typeof unresolvedIssues } {
|
||||
if (escalationTriggered) {
|
||||
return { shouldEscalate: false, reason: "already escalated this session", issues: [] };
|
||||
}
|
||||
|
||||
if (consecutiveErrorUnits < ESCALATION_THRESHOLD) {
|
||||
return {
|
||||
shouldEscalate: false,
|
||||
reason: `${consecutiveErrorUnits}/${ESCALATION_THRESHOLD} consecutive error units`,
|
||||
issues: [],
|
||||
};
|
||||
}
|
||||
|
||||
if (errors === 0) {
|
||||
return { shouldEscalate: false, reason: "no errors to escalate", issues: [] };
|
||||
}
|
||||
|
||||
const trend = getHealthTrend();
|
||||
if (trend === "improving") {
|
||||
return { shouldEscalate: false, reason: "health is improving — deferring escalation", issues: [] };
|
||||
}
|
||||
|
||||
escalationTriggered = true;
|
||||
return {
|
||||
shouldEscalate: true,
|
||||
reason: `${consecutiveErrorUnits} consecutive units with unresolved errors (trend: ${trend})`,
|
||||
issues: unresolvedIssues,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset escalation state. Called on auto-mode start/stop.
|
||||
*/
|
||||
export function resetEscalation(): void {
|
||||
escalationTriggered = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a health summary for display in the auto-mode dashboard.
|
||||
*/
|
||||
export function formatHealthSummary(): string {
|
||||
if (healthHistory.length === 0) return "No health data yet.";
|
||||
|
||||
const latest = healthHistory[healthHistory.length - 1]!;
|
||||
const trend = getHealthTrend();
|
||||
const trendIcon = trend === "improving" ? "+" : trend === "degrading" ? "-" : "=";
|
||||
const totalFixes = healthHistory.reduce((sum, s) => sum + s.fixesApplied, 0);
|
||||
|
||||
const parts = [
|
||||
`Health: ${latest.errors}E/${latest.warnings}W`,
|
||||
`trend:${trendIcon}`,
|
||||
`fixes:${totalFixes}`,
|
||||
];
|
||||
|
||||
if (consecutiveErrorUnits > 0) {
|
||||
parts.push(`streak:${consecutiveErrorUnits}/${ESCALATION_THRESHOLD}`);
|
||||
}
|
||||
|
||||
return parts.join(" | ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset all proactive healing state. Called on auto-mode start/stop.
|
||||
*/
|
||||
export function resetProactiveHealing(): void {
|
||||
resetHealthTracking();
|
||||
resetEscalation();
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
import { existsSync, mkdirSync } from "node:fs";
|
||||
import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, unlinkSync } from "node:fs";
|
||||
import { join, sep } from "node:path";
|
||||
|
||||
import { loadFile, parsePlan, parseRoadmap, parseSummary, saveFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js";
|
||||
|
|
@ -9,6 +9,8 @@ import { listWorktrees } from "./worktree-manager.js";
|
|||
import { abortAndReset } from "./git-self-heal.js";
|
||||
import { RUNTIME_EXCLUSION_PATHS } from "./git-service.js";
|
||||
import { nativeIsRepo, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached } from "./native-git-bridge.js";
|
||||
import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
|
||||
import { ensureGitignore } from "./gitignore.js";
|
||||
|
||||
export type DoctorSeverity = "info" | "warning" | "error";
|
||||
export type DoctorIssueCode =
|
||||
|
|
@ -32,7 +34,14 @@ export type DoctorIssueCode =
|
|||
| "stale_milestone_branch"
|
||||
| "corrupt_merge_state"
|
||||
| "tracked_runtime_files"
|
||||
| "legacy_slice_branches";
|
||||
| "legacy_slice_branches"
|
||||
| "stale_crash_lock"
|
||||
| "orphaned_completed_units"
|
||||
| "stale_hook_state"
|
||||
| "activity_log_bloat"
|
||||
| "state_file_stale"
|
||||
| "state_file_missing"
|
||||
| "gitignore_missing_patterns";
|
||||
|
||||
export interface DoctorIssue {
|
||||
severity: DoctorSeverity;
|
||||
|
|
@ -657,6 +666,275 @@ async function checkGitHealth(
|
|||
}
|
||||
}
|
||||
|
||||
// ── Runtime Health Checks ──────────────────────────────────────────────────
|
||||
// Checks for stale crash locks, orphaned completed-units, stale hook state,
|
||||
// activity log bloat, STATE.md drift, and gitignore drift.
|
||||
|
||||
async function checkRuntimeHealth(
|
||||
basePath: string,
|
||||
issues: DoctorIssue[],
|
||||
fixesApplied: string[],
|
||||
shouldFix: (code: DoctorIssueCode) => boolean,
|
||||
): Promise<void> {
|
||||
const root = gsdRoot(basePath);
|
||||
|
||||
// ── Stale crash lock ──────────────────────────────────────────────────
|
||||
try {
|
||||
const lock = readCrashLock(basePath);
|
||||
if (lock) {
|
||||
const alive = isLockProcessAlive(lock);
|
||||
if (!alive) {
|
||||
issues.push({
|
||||
severity: "error",
|
||||
code: "stale_crash_lock",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: `Stale auto.lock from PID ${lock.pid} (started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`,
|
||||
file: ".gsd/auto.lock",
|
||||
fixable: true,
|
||||
});
|
||||
|
||||
if (shouldFix("stale_crash_lock")) {
|
||||
clearLock(basePath);
|
||||
fixesApplied.push("cleared stale auto.lock");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — crash lock check failed
|
||||
}
|
||||
|
||||
// ── Orphaned completed-units keys ─────────────────────────────────────
|
||||
try {
|
||||
const completedKeysFile = join(root, "completed-units.json");
|
||||
if (existsSync(completedKeysFile)) {
|
||||
const raw = readFileSync(completedKeysFile, "utf-8");
|
||||
const keys: string[] = JSON.parse(raw);
|
||||
const orphaned: string[] = [];
|
||||
|
||||
for (const key of keys) {
|
||||
// Key format: "unitType/unitId" e.g. "execute-task/M001/S01/T01"
|
||||
const slashIdx = key.indexOf("/");
|
||||
if (slashIdx === -1) continue;
|
||||
const unitType = key.slice(0, slashIdx);
|
||||
const unitId = key.slice(slashIdx + 1);
|
||||
|
||||
// Only validate artifact-producing unit types
|
||||
const { verifyExpectedArtifact } = await import("./auto-recovery.js");
|
||||
if (!verifyExpectedArtifact(unitType, unitId, basePath)) {
|
||||
orphaned.push(key);
|
||||
}
|
||||
}
|
||||
|
||||
if (orphaned.length > 0) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "orphaned_completed_units",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: `${orphaned.length} completed-unit key(s) reference missing artifacts: ${orphaned.slice(0, 3).join(", ")}${orphaned.length > 3 ? "..." : ""}`,
|
||||
file: ".gsd/completed-units.json",
|
||||
fixable: true,
|
||||
});
|
||||
|
||||
if (shouldFix("orphaned_completed_units")) {
|
||||
const { removePersistedKey } = await import("./auto-recovery.js");
|
||||
for (const key of orphaned) {
|
||||
removePersistedKey(basePath, key);
|
||||
}
|
||||
fixesApplied.push(`removed ${orphaned.length} orphaned completed-unit key(s)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — completed-units check failed
|
||||
}
|
||||
|
||||
// ── Stale hook state ──────────────────────────────────────────────────
|
||||
try {
|
||||
const hookStateFile = join(root, "hook-state.json");
|
||||
if (existsSync(hookStateFile)) {
|
||||
const raw = readFileSync(hookStateFile, "utf-8");
|
||||
const state = JSON.parse(raw);
|
||||
const hasCycleCounts = state.cycleCounts && typeof state.cycleCounts === "object"
|
||||
&& Object.keys(state.cycleCounts).length > 0;
|
||||
|
||||
// Only flag if there are actual cycle counts AND no auto-mode is running
|
||||
if (hasCycleCounts) {
|
||||
const lock = readCrashLock(basePath);
|
||||
const autoRunning = lock ? isLockProcessAlive(lock) : false;
|
||||
|
||||
if (!autoRunning) {
|
||||
issues.push({
|
||||
severity: "info",
|
||||
code: "stale_hook_state",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: `hook-state.json has ${Object.keys(state.cycleCounts).length} residual cycle count(s) from a previous session`,
|
||||
file: ".gsd/hook-state.json",
|
||||
fixable: true,
|
||||
});
|
||||
|
||||
if (shouldFix("stale_hook_state")) {
|
||||
const { clearPersistedHookState } = await import("./post-unit-hooks.js");
|
||||
clearPersistedHookState(basePath);
|
||||
fixesApplied.push("cleared stale hook-state.json");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — hook state check failed
|
||||
}
|
||||
|
||||
// ── Activity log bloat ────────────────────────────────────────────────
|
||||
try {
|
||||
const activityDir = join(root, "activity");
|
||||
if (existsSync(activityDir)) {
|
||||
const files = readdirSync(activityDir);
|
||||
let totalSize = 0;
|
||||
for (const f of files) {
|
||||
try {
|
||||
totalSize += statSync(join(activityDir, f)).size;
|
||||
} catch {
|
||||
// stat failed — skip
|
||||
}
|
||||
}
|
||||
|
||||
const totalMB = totalSize / (1024 * 1024);
|
||||
const BLOAT_FILE_THRESHOLD = 500;
|
||||
const BLOAT_SIZE_MB = 100;
|
||||
|
||||
if (files.length > BLOAT_FILE_THRESHOLD || totalMB > BLOAT_SIZE_MB) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "activity_log_bloat",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: `Activity logs: ${files.length} files, ${totalMB.toFixed(1)}MB (thresholds: ${BLOAT_FILE_THRESHOLD} files / ${BLOAT_SIZE_MB}MB)`,
|
||||
file: ".gsd/activity/",
|
||||
fixable: true,
|
||||
});
|
||||
|
||||
if (shouldFix("activity_log_bloat")) {
|
||||
const { pruneActivityLogs } = await import("./activity-log.js");
|
||||
pruneActivityLogs(activityDir, 7); // 7-day retention
|
||||
fixesApplied.push("pruned activity logs (7-day retention)");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — activity log check failed
|
||||
}
|
||||
|
||||
// ── STATE.md health ───────────────────────────────────────────────────
|
||||
try {
|
||||
const stateFilePath = resolveGsdRootFile(basePath, "STATE");
|
||||
const milestonesPath = milestonesDir(basePath);
|
||||
|
||||
if (existsSync(milestonesPath)) {
|
||||
if (!existsSync(stateFilePath)) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "state_file_missing",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: "STATE.md is missing — state display will not work",
|
||||
file: ".gsd/STATE.md",
|
||||
fixable: true,
|
||||
});
|
||||
|
||||
if (shouldFix("state_file_missing")) {
|
||||
const state = await deriveState(basePath);
|
||||
await saveFile(stateFilePath, buildStateMarkdown(state));
|
||||
fixesApplied.push("created STATE.md from derived state");
|
||||
}
|
||||
} else {
|
||||
// Check if STATE.md is stale by comparing active milestone/slice/phase
|
||||
const currentContent = readFileSync(stateFilePath, "utf-8");
|
||||
const state = await deriveState(basePath);
|
||||
const freshContent = buildStateMarkdown(state);
|
||||
|
||||
// Extract key fields for comparison — don't compare full content
|
||||
// since timestamp/formatting differences are normal
|
||||
const extractFields = (content: string) => {
|
||||
const milestone = content.match(/\*\*Active Milestone:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
|
||||
const slice = content.match(/\*\*Active Slice:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
|
||||
const phase = content.match(/\*\*Phase:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
|
||||
return { milestone, slice, phase };
|
||||
};
|
||||
|
||||
const current = extractFields(currentContent);
|
||||
const fresh = extractFields(freshContent);
|
||||
|
||||
if (current.milestone !== fresh.milestone || current.slice !== fresh.slice || current.phase !== fresh.phase) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "state_file_stale",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: `STATE.md is stale — shows "${current.phase}" but derived state is "${fresh.phase}"`,
|
||||
file: ".gsd/STATE.md",
|
||||
fixable: true,
|
||||
});
|
||||
|
||||
if (shouldFix("state_file_stale")) {
|
||||
await saveFile(stateFilePath, freshContent);
|
||||
fixesApplied.push("rebuilt STATE.md from derived state");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — STATE.md check failed
|
||||
}
|
||||
|
||||
// ── Gitignore drift ───────────────────────────────────────────────────
|
||||
try {
|
||||
const gitignorePath = join(basePath, ".gitignore");
|
||||
if (existsSync(gitignorePath) && nativeIsRepo(basePath)) {
|
||||
const content = readFileSync(gitignorePath, "utf-8");
|
||||
const existingLines = new Set(
|
||||
content.split("\n").map(l => l.trim()).filter(l => l && !l.startsWith("#")),
|
||||
);
|
||||
|
||||
// Check for critical runtime patterns that must be present
|
||||
const criticalPatterns = [
|
||||
".gsd/activity/",
|
||||
".gsd/runtime/",
|
||||
".gsd/auto.lock",
|
||||
".gsd/gsd.db",
|
||||
".gsd/completed-units.json",
|
||||
];
|
||||
|
||||
// If blanket .gsd/ or .gsd is present, all patterns are covered
|
||||
const hasBlanketIgnore = existingLines.has(".gsd/") || existingLines.has(".gsd");
|
||||
|
||||
if (!hasBlanketIgnore) {
|
||||
const missing = criticalPatterns.filter(p => !existingLines.has(p));
|
||||
if (missing.length > 0) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "gitignore_missing_patterns",
|
||||
scope: "project",
|
||||
unitId: "project",
|
||||
message: `${missing.length} critical GSD runtime pattern(s) missing from .gitignore: ${missing.join(", ")}`,
|
||||
file: ".gitignore",
|
||||
fixable: true,
|
||||
});
|
||||
|
||||
if (shouldFix("gitignore_missing_patterns")) {
|
||||
ensureGitignore(basePath);
|
||||
fixesApplied.push("added missing GSD runtime patterns to .gitignore");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — gitignore check failed
|
||||
}
|
||||
}
|
||||
|
||||
export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; scope?: string; fixLevel?: "task" | "all" }): Promise<DoctorReport> {
|
||||
const issues: DoctorIssue[] = [];
|
||||
const fixesApplied: string[] = [];
|
||||
|
|
@ -700,6 +978,9 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
|
|||
// Git health checks (orphaned worktrees, stale branches, corrupt merge state, tracked runtime files)
|
||||
await checkGitHealth(basePath, issues, fixesApplied, shouldFix);
|
||||
|
||||
// Runtime health checks (crash locks, completed-units, hook state, activity logs, STATE.md, gitignore)
|
||||
await checkRuntimeHealth(basePath, issues, fixesApplied, shouldFix);
|
||||
|
||||
const milestonesPath = milestonesDir(basePath);
|
||||
if (!existsSync(milestonesPath)) {
|
||||
return { ok: issues.every(issue => issue.severity !== "error"), basePath, issues, fixesApplied };
|
||||
|
|
|
|||
244
src/resources/extensions/gsd/tests/doctor-proactive.test.ts
Normal file
244
src/resources/extensions/gsd/tests/doctor-proactive.test.ts
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
/**
|
||||
* doctor-proactive.test.ts — Tests for proactive healing layer.
|
||||
*
|
||||
* Tests:
|
||||
* - Pre-dispatch health gate (stale lock, merge state)
|
||||
* - Health score tracking (snapshots, trends)
|
||||
* - Auto-heal escalation (consecutive errors, threshold)
|
||||
*/
|
||||
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { execSync } from "node:child_process";
|
||||
|
||||
import {
|
||||
preDispatchHealthGate,
|
||||
recordHealthSnapshot,
|
||||
getHealthTrend,
|
||||
getConsecutiveErrorUnits,
|
||||
getHealthHistory,
|
||||
checkHealEscalation,
|
||||
resetProactiveHealing,
|
||||
formatHealthSummary,
|
||||
} from "../doctor-proactive.ts";
|
||||
import { createTestContext } from "./test-helpers.ts";
|
||||
|
||||
const { assertEq, assertTrue, report } = createTestContext();
|
||||
|
||||
function run(cmd: string, cwd: string): string {
|
||||
return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
|
||||
}
|
||||
|
||||
function createGitRepo(): string {
|
||||
const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));
|
||||
run("git init", dir);
|
||||
run("git config user.email test@test.com", dir);
|
||||
run("git config user.name Test", dir);
|
||||
writeFileSync(join(dir, "README.md"), "# test\n");
|
||||
run("git add .", dir);
|
||||
run("git commit -m init", dir);
|
||||
run("git branch -M main", dir);
|
||||
mkdirSync(join(dir, ".gsd"), { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const cleanups: string[] = [];
|
||||
|
||||
try {
|
||||
// ─── Health Score Tracking ─────────────────────────────────────────
|
||||
console.log("\n=== health tracking: initial state ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
assertEq(getHealthTrend(), "unknown", "trend is unknown with no data");
|
||||
assertEq(getConsecutiveErrorUnits(), 0, "no consecutive errors initially");
|
||||
assertEq(getHealthHistory().length, 0, "no history initially");
|
||||
}
|
||||
|
||||
console.log("\n=== health tracking: recording snapshots ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
recordHealthSnapshot(0, 2, 1);
|
||||
recordHealthSnapshot(0, 1, 0);
|
||||
recordHealthSnapshot(0, 0, 0);
|
||||
|
||||
assertEq(getHealthHistory().length, 3, "3 snapshots recorded");
|
||||
assertEq(getConsecutiveErrorUnits(), 0, "no consecutive errors after clean units");
|
||||
}
|
||||
|
||||
console.log("\n=== health tracking: consecutive error counting ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
recordHealthSnapshot(2, 1, 0); // errors
|
||||
recordHealthSnapshot(1, 0, 0); // errors
|
||||
recordHealthSnapshot(1, 0, 0); // errors
|
||||
assertEq(getConsecutiveErrorUnits(), 3, "3 consecutive error units");
|
||||
|
||||
recordHealthSnapshot(0, 0, 0); // clean
|
||||
assertEq(getConsecutiveErrorUnits(), 0, "streak reset on clean unit");
|
||||
}
|
||||
|
||||
console.log("\n=== health tracking: trend detection ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
// Record 5 older snapshots with low issues
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordHealthSnapshot(0, 1, 0);
|
||||
}
|
||||
// Record 5 recent snapshots with high issues
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordHealthSnapshot(3, 5, 0);
|
||||
}
|
||||
assertEq(getHealthTrend(), "degrading", "detects degrading trend");
|
||||
}
|
||||
|
||||
console.log("\n=== health tracking: improving trend ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
// Record 5 older snapshots with high issues
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordHealthSnapshot(3, 5, 0);
|
||||
}
|
||||
// Record 5 recent snapshots with low issues
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordHealthSnapshot(0, 0, 0);
|
||||
}
|
||||
assertEq(getHealthTrend(), "improving", "detects improving trend");
|
||||
}
|
||||
|
||||
console.log("\n=== health tracking: stable trend ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
for (let i = 0; i < 10; i++) {
|
||||
recordHealthSnapshot(1, 1, 0);
|
||||
}
|
||||
assertEq(getHealthTrend(), "stable", "detects stable trend");
|
||||
}
|
||||
|
||||
// ─── Auto-Heal Escalation ─────────────────────────────────────────
|
||||
console.log("\n=== escalation: below threshold ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
recordHealthSnapshot(1, 0, 0);
|
||||
recordHealthSnapshot(1, 0, 0);
|
||||
recordHealthSnapshot(1, 0, 0);
|
||||
const result = checkHealEscalation(1, [{ code: "test", message: "test error", unitId: "M001/S01" }]);
|
||||
assertEq(result.shouldEscalate, false, "no escalation below threshold");
|
||||
assertTrue(result.reason.includes("3/5"), "reason shows progress toward threshold");
|
||||
}
|
||||
|
||||
console.log("\n=== escalation: at threshold ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
// Need 5+ consecutive error units AND degrading/stable trend
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordHealthSnapshot(0, 0, 0); // older clean snapshots
|
||||
}
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordHealthSnapshot(2, 1, 0); // recent error snapshots
|
||||
}
|
||||
const result = checkHealEscalation(2, [{ code: "test", message: "test error", unitId: "M001/S01" }]);
|
||||
assertEq(result.shouldEscalate, true, "escalates at threshold with degrading trend");
|
||||
assertTrue(result.reason.includes("5 consecutive"), "reason mentions consecutive count");
|
||||
}
|
||||
|
||||
console.log("\n=== escalation: no double escalation ===");
|
||||
{
|
||||
// Don't reset — should already be escalated from previous test
|
||||
recordHealthSnapshot(2, 0, 0);
|
||||
const result = checkHealEscalation(2, [{ code: "test", message: "test error", unitId: "M001/S01" }]);
|
||||
assertEq(result.shouldEscalate, false, "no double escalation in same session");
|
||||
assertTrue(result.reason.includes("already escalated"), "reason explains why no escalation");
|
||||
}
|
||||
|
||||
console.log("\n=== escalation: deferred when improving ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
// 5 older snapshots with high errors
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordHealthSnapshot(5, 5, 0);
|
||||
}
|
||||
// 5 recent snapshots with fewer errors (still > 0)
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordHealthSnapshot(1, 0, 0);
|
||||
}
|
||||
const result = checkHealEscalation(1, [{ code: "test", message: "test error", unitId: "M001/S01" }]);
|
||||
assertEq(result.shouldEscalate, false, "no escalation when trend is improving");
|
||||
assertTrue(result.reason.includes("improving"), "reason mentions improving trend");
|
||||
}
|
||||
|
||||
// ─── Health Summary Formatting ────────────────────────────────────
|
||||
console.log("\n=== formatHealthSummary ===");
|
||||
{
|
||||
resetProactiveHealing();
|
||||
assertEq(formatHealthSummary(), "No health data yet.", "empty summary when no data");
|
||||
|
||||
recordHealthSnapshot(2, 3, 1);
|
||||
const summary = formatHealthSummary();
|
||||
assertTrue(summary.includes("2E/3W"), "summary includes error/warning counts");
|
||||
assertTrue(summary.includes("fixes:1"), "summary includes fix count");
|
||||
assertTrue(summary.includes("streak:1/5"), "summary includes error streak");
|
||||
}
|
||||
|
||||
// ─── Pre-Dispatch Health Gate ─────────────────────────────────────
|
||||
console.log("\n=== health gate: clean state ===");
|
||||
{
|
||||
const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));
|
||||
cleanups.push(dir);
|
||||
mkdirSync(join(dir, ".gsd"), { recursive: true });
|
||||
|
||||
const result = preDispatchHealthGate(dir);
|
||||
assertTrue(result.proceed, "gate passes on clean state");
|
||||
assertEq(result.issues.length, 0, "no issues on clean state");
|
||||
}
|
||||
|
||||
console.log("\n=== health gate: stale crash lock auto-cleared ===");
|
||||
{
|
||||
const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));
|
||||
cleanups.push(dir);
|
||||
mkdirSync(join(dir, ".gsd"), { recursive: true });
|
||||
|
||||
// Write a stale lock
|
||||
writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify({
|
||||
pid: 9999999, startedAt: "2026-03-10T00:00:00Z",
|
||||
unitType: "execute-task", unitId: "M001/S01/T01",
|
||||
unitStartedAt: "2026-03-10T00:01:00Z", completedUnits: 3,
|
||||
}));
|
||||
|
||||
const result = preDispatchHealthGate(dir);
|
||||
assertTrue(result.proceed, "gate passes after auto-clearing stale lock");
|
||||
assertTrue(result.fixesApplied.some(f => f.includes("cleared stale auto.lock")), "reports lock cleared");
|
||||
assertTrue(!existsSync(join(dir, ".gsd", "auto.lock")), "lock file removed");
|
||||
}
|
||||
|
||||
console.log("\n=== health gate: corrupt merge state auto-healed ===");
|
||||
if (process.platform !== "win32") {
|
||||
{
|
||||
const dir = createGitRepo();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Inject MERGE_HEAD
|
||||
const headHash = run("git rev-parse HEAD", dir);
|
||||
writeFileSync(join(dir, ".git", "MERGE_HEAD"), headHash + "\n");
|
||||
|
||||
const result = preDispatchHealthGate(dir);
|
||||
assertTrue(result.proceed, "gate passes after auto-healing merge state");
|
||||
assertTrue(result.fixesApplied.some(f => f.includes("cleaned merge state")), "reports merge state cleaned");
|
||||
assertTrue(!existsSync(join(dir, ".git", "MERGE_HEAD")), "MERGE_HEAD removed");
|
||||
}
|
||||
} else {
|
||||
console.log(" (skipped on Windows)");
|
||||
}
|
||||
|
||||
} finally {
|
||||
resetProactiveHealing();
|
||||
for (const dir of cleanups) {
|
||||
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
report();
|
||||
}
|
||||
|
||||
main();
|
||||
303
src/resources/extensions/gsd/tests/doctor-runtime.test.ts
Normal file
303
src/resources/extensions/gsd/tests/doctor-runtime.test.ts
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
/**
|
||||
* doctor-runtime.test.ts — Tests for doctor runtime health checks.
|
||||
*
|
||||
* Tests detection and auto-fix of:
|
||||
* stale_crash_lock, orphaned_completed_units, stale_hook_state,
|
||||
* activity_log_bloat, state_file_missing, state_file_stale,
|
||||
* gitignore_missing_patterns
|
||||
*/
|
||||
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, realpathSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { execSync } from "node:child_process";
|
||||
|
||||
import { runGSDDoctor } from "../doctor.ts";
|
||||
import { createTestContext } from "./test-helpers.ts";
|
||||
|
||||
const { assertEq, assertTrue, report } = createTestContext();
|
||||
|
||||
function run(cmd: string, cwd: string): string {
|
||||
return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
|
||||
}
|
||||
|
||||
/** Create a minimal .gsd project with a milestone for STATE.md tests. */
|
||||
function createMinimalProject(): string {
|
||||
const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-runtime-test-")));
|
||||
const msDir = join(dir, ".gsd", "milestones", "M001");
|
||||
mkdirSync(msDir, { recursive: true });
|
||||
writeFileSync(join(msDir, "M001-ROADMAP.md"), `# M001: Test
|
||||
|
||||
## Slices
|
||||
- [ ] **S01: Demo** \`risk:low\` \`depends:[]\`
|
||||
> After this: done
|
||||
`);
|
||||
const sDir = join(msDir, "slices", "S01", "tasks");
|
||||
mkdirSync(sDir, { recursive: true });
|
||||
writeFileSync(join(msDir, "slices", "S01", "S01-PLAN.md"), `# S01: Demo
|
||||
|
||||
**Goal:** Demo
|
||||
|
||||
## Tasks
|
||||
- [ ] **T01: Do thing** \`est:10m\`
|
||||
`);
|
||||
return dir;
|
||||
}
|
||||
|
||||
/** Create a minimal git repo with .gsd for gitignore tests. */
|
||||
function createGitProject(): string {
|
||||
const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-runtime-git-")));
|
||||
run("git init", dir);
|
||||
run("git config user.email test@test.com", dir);
|
||||
run("git config user.name Test", dir);
|
||||
writeFileSync(join(dir, "README.md"), "# test\n");
|
||||
run("git add .", dir);
|
||||
run("git commit -m init", dir);
|
||||
run("git branch -M main", dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const cleanups: string[] = [];
|
||||
|
||||
try {
|
||||
// ─── Test 1: Stale crash lock detection & fix ─────────────────────
|
||||
console.log("\n=== stale_crash_lock ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Write a lock file with a PID that is definitely dead (use PID 1 million+)
|
||||
const lockData = {
|
||||
pid: 9999999,
|
||||
startedAt: "2026-03-10T00:00:00Z",
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
unitStartedAt: "2026-03-10T00:01:00Z",
|
||||
completedUnits: 3,
|
||||
};
|
||||
writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2));
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const lockIssues = detect.issues.filter(i => i.code === "stale_crash_lock");
|
||||
assertTrue(lockIssues.length > 0, "detects stale crash lock");
|
||||
assertTrue(lockIssues[0]?.message.includes("9999999"), "message includes PID");
|
||||
assertTrue(lockIssues[0]?.fixable === true, "stale lock is fixable");
|
||||
|
||||
const fixed = await runGSDDoctor(dir, { fix: true });
|
||||
assertTrue(fixed.fixesApplied.some(f => f.includes("cleared stale auto.lock")), "fix clears stale lock");
|
||||
assertTrue(!existsSync(join(dir, ".gsd", "auto.lock")), "auto.lock removed after fix");
|
||||
}
|
||||
|
||||
// ─── Test 2: No false positive for missing lock ───────────────────
|
||||
console.log("\n=== stale_crash_lock — no false positive ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const lockIssues = detect.issues.filter(i => i.code === "stale_crash_lock");
|
||||
assertEq(lockIssues.length, 0, "no stale lock issue when no lock file exists");
|
||||
}
|
||||
|
||||
// ─── Test 3: Stale hook state detection & fix ─────────────────────
|
||||
console.log("\n=== stale_hook_state ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Write hook state with active cycle counts and no auto.lock (no running session)
|
||||
const hookState = {
|
||||
cycleCounts: {
|
||||
"code-review/execute-task/M001/S01/T01": 2,
|
||||
"lint-check/execute-task/M001/S01/T02": 1,
|
||||
},
|
||||
savedAt: "2026-03-10T00:00:00Z",
|
||||
};
|
||||
writeFileSync(join(dir, ".gsd", "hook-state.json"), JSON.stringify(hookState, null, 2));
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const hookIssues = detect.issues.filter(i => i.code === "stale_hook_state");
|
||||
assertTrue(hookIssues.length > 0, "detects stale hook state");
|
||||
assertTrue(hookIssues[0]?.message.includes("2 residual cycle count"), "message includes count");
|
||||
|
||||
const fixed = await runGSDDoctor(dir, { fix: true });
|
||||
assertTrue(fixed.fixesApplied.some(f => f.includes("cleared stale hook-state.json")), "fix clears hook state");
|
||||
|
||||
// Verify the file was cleaned
|
||||
const content = JSON.parse(readFileSync(join(dir, ".gsd", "hook-state.json"), "utf-8"));
|
||||
assertEq(Object.keys(content.cycleCounts).length, 0, "hook state cycle counts cleared");
|
||||
}
|
||||
|
||||
// ─── Test 4: Activity log bloat detection ─────────────────────────
|
||||
console.log("\n=== activity_log_bloat ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Create an activity dir with > 500 files
|
||||
const activityDir = join(dir, ".gsd", "activity");
|
||||
mkdirSync(activityDir, { recursive: true });
|
||||
for (let i = 0; i < 510; i++) {
|
||||
writeFileSync(join(activityDir, `${String(i).padStart(3, "0")}-execute-task-M001-S01-T01.jsonl`), `{"test":${i}}\n`);
|
||||
}
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const bloatIssues = detect.issues.filter(i => i.code === "activity_log_bloat");
|
||||
assertTrue(bloatIssues.length > 0, "detects activity log bloat");
|
||||
assertTrue(bloatIssues[0]?.message.includes("510 files"), "message includes file count");
|
||||
}
|
||||
|
||||
// ─── Test 5: STATE.md missing detection & fix ─────────────────────
|
||||
console.log("\n=== state_file_missing ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// No STATE.md exists by default in our minimal setup
|
||||
const stateFilePath = join(dir, ".gsd", "STATE.md");
|
||||
assertTrue(!existsSync(stateFilePath), "STATE.md does not exist initially");
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const stateIssues = detect.issues.filter(i => i.code === "state_file_missing");
|
||||
assertTrue(stateIssues.length > 0, "detects missing STATE.md");
|
||||
assertTrue(stateIssues[0]?.fixable === true, "missing STATE.md is fixable");
|
||||
assertEq(stateIssues[0]?.severity, "warning", "missing STATE.md is a warning (derived file)");
|
||||
|
||||
const fixed = await runGSDDoctor(dir, { fix: true });
|
||||
assertTrue(fixed.fixesApplied.some(f => f.includes("created STATE.md")), "fix creates STATE.md");
|
||||
assertTrue(existsSync(stateFilePath), "STATE.md exists after fix");
|
||||
|
||||
// Verify content has expected structure
|
||||
const content = readFileSync(stateFilePath, "utf-8");
|
||||
assertTrue(content.includes("# GSD State"), "STATE.md has header");
|
||||
assertTrue(content.includes("M001"), "STATE.md references milestone");
|
||||
}
|
||||
|
||||
// ─── Test 6: STATE.md stale detection & fix ───────────────────────
|
||||
console.log("\n=== state_file_stale ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Write a STATE.md with wrong phase/milestone info
|
||||
const stateFilePath = join(dir, ".gsd", "STATE.md");
|
||||
writeFileSync(stateFilePath, `# GSD State
|
||||
|
||||
**Active Milestone:** None
|
||||
**Active Slice:** None
|
||||
**Phase:** idle
|
||||
|
||||
## Milestone Registry
|
||||
|
||||
## Recent Decisions
|
||||
- None recorded
|
||||
|
||||
## Blockers
|
||||
- None
|
||||
|
||||
## Next Action
|
||||
None
|
||||
`);
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const staleIssues = detect.issues.filter(i => i.code === "state_file_stale");
|
||||
assertTrue(staleIssues.length > 0, "detects stale STATE.md");
|
||||
assertTrue(staleIssues[0]?.message.includes("idle"), "message references old phase");
|
||||
|
||||
const fixed = await runGSDDoctor(dir, { fix: true });
|
||||
assertTrue(fixed.fixesApplied.some(f => f.includes("rebuilt STATE.md")), "fix rebuilds STATE.md");
|
||||
|
||||
// Verify updated content matches derived state
|
||||
const content = readFileSync(stateFilePath, "utf-8");
|
||||
assertTrue(content.includes("M001"), "rebuilt STATE.md references milestone");
|
||||
}
|
||||
|
||||
// ─── Test 7: Gitignore missing patterns detection & fix ───────────
|
||||
if (process.platform !== "win32") {
|
||||
console.log("\n=== gitignore_missing_patterns ===");
|
||||
{
|
||||
const dir = createGitProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Create .gsd dir so checks can run
|
||||
mkdirSync(join(dir, ".gsd"), { recursive: true });
|
||||
|
||||
// Write a .gitignore missing GSD runtime patterns
|
||||
writeFileSync(join(dir, ".gitignore"), `node_modules/
|
||||
.env
|
||||
`);
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const gitignoreIssues = detect.issues.filter(i => i.code === "gitignore_missing_patterns");
|
||||
assertTrue(gitignoreIssues.length > 0, "detects missing gitignore patterns");
|
||||
assertTrue(gitignoreIssues[0]?.message.includes(".gsd/activity/"), "message lists missing patterns");
|
||||
|
||||
const fixed = await runGSDDoctor(dir, { fix: true });
|
||||
assertTrue(fixed.fixesApplied.some(f => f.includes("added missing GSD runtime patterns")), "fix adds patterns");
|
||||
|
||||
// Verify patterns were added
|
||||
const content = readFileSync(join(dir, ".gitignore"), "utf-8");
|
||||
assertTrue(content.includes(".gsd/activity/"), "gitignore now has activity pattern");
|
||||
assertTrue(content.includes(".gsd/auto.lock"), "gitignore now has auto.lock pattern");
|
||||
}
|
||||
} else {
|
||||
console.log("\n=== gitignore_missing_patterns (skipped on Windows) ===");
|
||||
}
|
||||
|
||||
// ─── Test 8: No false positive when gitignore has blanket .gsd/ ───
|
||||
if (process.platform !== "win32") {
|
||||
console.log("\n=== gitignore — blanket .gsd/ ===");
|
||||
{
|
||||
const dir = createGitProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
mkdirSync(join(dir, ".gsd"), { recursive: true });
|
||||
writeFileSync(join(dir, ".gitignore"), `.gsd/
|
||||
node_modules/
|
||||
`);
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const gitignoreIssues = detect.issues.filter(i => i.code === "gitignore_missing_patterns");
|
||||
assertEq(gitignoreIssues.length, 0, "no missing patterns when blanket .gsd/ present");
|
||||
}
|
||||
} else {
|
||||
console.log("\n=== gitignore — blanket .gsd/ (skipped on Windows) ===");
|
||||
}
|
||||
|
||||
// ─── Test 9: Orphaned completed-units detection & fix ─────────────
|
||||
console.log("\n=== orphaned_completed_units ===");
|
||||
{
|
||||
const dir = createMinimalProject();
|
||||
cleanups.push(dir);
|
||||
|
||||
// Write completed-units.json with keys that reference non-existent artifacts
|
||||
const completedKeys = [
|
||||
"execute-task/M001/S01/T99", // T99 doesn't exist
|
||||
"complete-slice/M001/S99", // S99 doesn't exist
|
||||
];
|
||||
writeFileSync(join(dir, ".gsd", "completed-units.json"), JSON.stringify(completedKeys));
|
||||
|
||||
const detect = await runGSDDoctor(dir);
|
||||
const orphanIssues = detect.issues.filter(i => i.code === "orphaned_completed_units");
|
||||
assertTrue(orphanIssues.length > 0, "detects orphaned completed-unit keys");
|
||||
assertTrue(orphanIssues[0]?.message.includes("2 completed-unit key"), "message includes count");
|
||||
|
||||
const fixed = await runGSDDoctor(dir, { fix: true });
|
||||
assertTrue(fixed.fixesApplied.some(f => f.includes("removed") && f.includes("orphaned")), "fix removes orphaned keys");
|
||||
|
||||
// Verify keys were cleaned
|
||||
const content = JSON.parse(readFileSync(join(dir, ".gsd", "completed-units.json"), "utf-8"));
|
||||
assertEq(content.length, 0, "all orphaned keys removed");
|
||||
}
|
||||
|
||||
} finally {
|
||||
for (const dir of cleanups) {
|
||||
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
report();
|
||||
}
|
||||
|
||||
main();
|
||||
Loading…
Add table
Reference in a new issue