fix(gsd): session and recovery robustness (wave 3/5)

Five fixes for session lifecycle and recovery reliability:

1. hasImplementationArtifacts now returns tri-state ("present"|"absent"|"unknown")
   instead of boolean. "unknown" on git errors lets callers warn+proceed instead
   of either silently blocking or silently allowing. Both callers updated.

2. DB-ahead-of-disk split-brain: rollback DELETE in db-writer.ts saveDecisionToDb
   and saveRequirementToDb now wrapped in try/catch with logError. A failed
   rollback is explicitly logged as SPLIT BRAIN so the orphaned row is auditable.

3. _consecutiveCompleteBootstraps moved from module-level in auto-start.ts into
   AutoSession class. Now properly reset by s.reset(), preventing cross-session
   counter bleed in long-running processes (VS Code extension).

4. s.paused sticky on lock failure: when acquireSessionLock fails during resume,
   s.paused is now set back to false so isAutoPaused() doesn't return true
   permanently.

5. nativeCommit empty message replaced with "chore(gsd): reconcile merge state"
   to avoid rejection by strict git configurations.
This commit is contained in:
Jeremy 2026-04-07 12:35:43 -05:00
parent 0dd7c31213
commit dc9899c9d6
6 changed files with 41 additions and 27 deletions

View file

@ -767,13 +767,17 @@ export const DISPATCH_RULES: DispatchRule[] = [
// Safety guard (#1703): verify the milestone produced implementation
// artifacts (non-.gsd/ files). A milestone with only plan files and
// zero implementation code should not be marked complete.
if (!hasImplementationArtifacts(basePath)) {
const artifactCheck = hasImplementationArtifacts(basePath);
if (artifactCheck === "absent") {
return {
action: "stop",
reason: `Cannot complete milestone ${mid}: no implementation files found outside .gsd/. The milestone has only plan files — actual code changes are required.`,
level: "error",
};
}
if (artifactCheck === "unknown") {
logWarning("dispatch", `Implementation artifact check inconclusive for ${mid} — proceeding with caution`);
}
// Verification class compliance: if operational verification was planned,
// ensure the validation output documents it before allowing completion.

View file

@ -60,13 +60,12 @@ export { resolveExpectedArtifactPath, diagnoseExpectedArtifact };
* in the git history. Uses `git log --name-only` to inspect all commits on the
* current branch that touch files outside `.gsd/`.
*
* Returns true if at least one non-`.gsd/` file was committed, false otherwise.
* Non-fatal: returns true on git errors to avoid blocking the pipeline when
* running outside a git repo (e.g., tests).
* Returns "present" if implementation files found, "absent" if only .gsd/ files,
* "unknown" if git is unavailable or check failed (callers decide how to handle).
*/
export function hasImplementationArtifacts(basePath: string): boolean {
export function hasImplementationArtifacts(basePath: string): "present" | "absent" | "unknown" {
try {
// Verify we're in a git repo — fail open if not
// Verify we're in a git repo
try {
execFileSync("git", ["rev-parse", "--is-inside-work-tree"], {
cwd: basePath,
@ -75,7 +74,7 @@ export function hasImplementationArtifacts(basePath: string): boolean {
});
} catch (e) {
logWarning("recovery", `git rev-parse check failed: ${(e as Error).message}`);
return true;
return "unknown";
}
// Strategy: check `git diff --name-only` against the merge-base with the
@ -85,19 +84,19 @@ export function hasImplementationArtifacts(basePath: string): boolean {
const mainBranch = detectMainBranch(basePath);
const changedFiles = getChangedFilesSinceBranch(basePath, mainBranch);
// No files changed at all — fail open (could be detached HEAD, single-
// No files changed at all — unknown (could be detached HEAD, single-
// commit repo, or other edge case where git diff returns nothing).
if (changedFiles.length === 0) return true;
if (changedFiles.length === 0) return "unknown";
// Filter out .gsd/ files — only implementation files count.
// If every changed file is under .gsd/, the milestone produced no
// implementation code (#1703).
const implFiles = changedFiles.filter(f => !f.startsWith(".gsd/") && !f.startsWith(".gsd\\"));
return implFiles.length > 0;
return implFiles.length > 0 ? "present" : "absent";
} catch (e) {
// Non-fatal — if git operations fail, don't block the pipeline
// Non-fatal — if git operations fail, return unknown so callers can decide
logWarning("recovery", `implementation artifact check failed: ${(e as Error).message}`);
return true;
return "unknown";
}
}
@ -394,7 +393,7 @@ export function verifyExpectedArtifact(
// A milestone with only .gsd/ plan files and zero implementation code is
// not genuinely complete — the LLM wrote plan files but skipped actual work.
if (unitType === "complete-milestone") {
if (!hasImplementationArtifacts(base)) return false;
if (hasImplementationArtifacts(base) === "absent") return false;
}
return true;
@ -497,7 +496,7 @@ export function reconcileMergeState(
if (conflictedFiles.length === 0) {
// All conflicts resolved — finalize the merge/squash commit
try {
const commitSha = nativeCommit(basePath, ""); // --no-edit equivalent: use empty message placeholder
const commitSha = nativeCommit(basePath, "chore(gsd): reconcile merge state");
if (commitSha) {
const mode = hasMergeHead ? "merge" : "squash commit";
ctx.ui.notify(`Finalized leftover ${mode} from prior session.`, "info");

View file

@ -102,11 +102,8 @@ export interface BootstrapDeps {
* concurrent session detected). Returns true when ready to dispatch.
*/
/** Guard: tracks consecutive bootstrap attempts that found phase === "complete".
* Prevents the recursive dialog loop described in #1348 where
* bootstrapAutoSession showSmartEntry checkAutoStartAfterDiscuss startAuto
* cycles indefinitely when the discuss workflow doesn't produce a milestone. */
let _consecutiveCompleteBootstraps = 0;
// Guard constant for consecutive bootstrap attempts that found phase === "complete".
// Counter moved to AutoSession.consecutiveCompleteBootstraps so s.reset() clears it.
const MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS = 2;
export async function openProjectDbIfPresent(basePath: string): Promise<void> {
@ -392,9 +389,9 @@ export async function bootstrapAutoSession(
// Guard against recursive dialog loop (#1348):
// If we've entered this branch multiple times in quick succession,
// the discuss workflow isn't producing a milestone. Break the cycle.
_consecutiveCompleteBootstraps++;
if (_consecutiveCompleteBootstraps > MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS) {
_consecutiveCompleteBootstraps = 0;
s.consecutiveCompleteBootstraps++;
if (s.consecutiveCompleteBootstraps > MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS) {
s.consecutiveCompleteBootstraps = 0;
ctx.ui.notify(
"All milestones are complete and the discussion didn't produce a new one. " +
"Run /gsd to start a new milestone manually.",
@ -413,7 +410,7 @@ export async function bootstrapAutoSession(
postState.phase !== "complete" &&
postState.phase !== "pre-planning"
) {
_consecutiveCompleteBootstraps = 0; // Successfully advanced past "complete"
s.consecutiveCompleteBootstraps = 0; // Successfully advanced past "complete"
state = postState;
} else if (
postState.activeMilestone &&
@ -492,7 +489,7 @@ export async function bootstrapAutoSession(
}
// Successfully resolved an active milestone — reset the re-entry guard
_consecutiveCompleteBootstraps = 0;
s.consecutiveCompleteBootstraps = 0;
// ── Initialize session state ──
s.active = true;

View file

@ -1187,6 +1187,8 @@ export async function startAuto(
if (s.paused) {
const resumeLock = acquireSessionLock(base);
if (!resumeLock.acquired) {
// Reset paused state so isAutoPaused() doesn't stick true after lock failure
s.paused = false;
ctx.ui.notify(`Cannot resume: ${resumeLock.reason}`, "error");
return;
}

View file

@ -138,6 +138,9 @@ export class AutoSession {
// ── Dispatch circuit breakers ──────────────────────────────────────
rewriteAttemptCount = 0;
/** Tracks consecutive bootstrap attempts that found phase === "complete".
* Moved from module-level to per-session so s.reset() clears it (#1348). */
consecutiveCompleteBootstraps = 0;
// ── Metrics ──────────────────────────────────────────────────────────────
autoStartTime = 0;
@ -224,6 +227,7 @@ export class AutoSession {
this.pendingQuickTasks = [];
this.sidecarQueue = [];
this.rewriteAttemptCount = 0;
this.consecutiveCompleteBootstraps = 0;
this.lastToolInvocationError = null;
this.isolationDegraded = false;
this.milestoneMergedInPhases = false;

View file

@ -345,8 +345,12 @@ export async function saveRequirementToDb(
await saveFile(filePath, md);
} catch (diskErr) {
logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveRequirementToDb', error: String((diskErr as Error).message) });
const rollbackAdapter = db._getAdapter();
rollbackAdapter?.prepare('DELETE FROM requirements WHERE id = :id').run({ ':id': id });
try {
const rollbackAdapter = db._getAdapter();
rollbackAdapter?.prepare('DELETE FROM requirements WHERE id = :id').run({ ':id': id });
} catch (rollbackErr) {
logError('manifest', 'SPLIT BRAIN: disk write failed AND DB rollback failed — DB has orphaned row', { fn: 'saveRequirementToDb', id, error: String((rollbackErr as Error).message) });
}
throw diskErr;
}
invalidateStateCache();
@ -466,7 +470,11 @@ export async function saveDecisionToDb(
await saveFile(filePath, md);
} catch (diskErr) {
logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveDecisionToDb', error: String((diskErr as Error).message) });
adapter?.prepare('DELETE FROM decisions WHERE id = :id').run({ ':id': id });
try {
adapter?.prepare('DELETE FROM decisions WHERE id = :id').run({ ':id': id });
} catch (rollbackErr) {
logError('manifest', 'SPLIT BRAIN: disk write failed AND DB rollback failed — DB has orphaned row', { fn: 'saveDecisionToDb', id, error: String((rollbackErr as Error).message) });
}
throw diskErr;
}
// #2661: When a decision defers a slice, update the slice status in the DB