Merge branch 'main' into fix/gsd-extension-ctx-log

2026-03-15 12:51:42 -05:00 · 2026-03-15 12:51:42 -05:00 · 314c134962
commit 314c134962
parent 996dc3d7dc 2d4a14b7ca
8 changed files with 259 additions and 18 deletions
--- a/src/resources/extensions/gsd/auto.ts
+++ b/src/resources/extensions/gsd/auto.ts
@ -939,17 +939,37 @@ export async function handleAgentEnd(
    // produced its expected artifact. If so, persist the completion key now so the
    // idempotency check at the top of dispatchNextUnit() skips it — even if
    // deriveState() still returns this unit as active (e.g. branch mismatch).
-    try {
-      if (verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath)) {
-        const completionKey = `${currentUnit.type}/${currentUnit.id}`;
-        if (!completedKeySet.has(completionKey)) {
-          persistCompletedKey(basePath, completionKey);
-          completedKeySet.add(completionKey);
+    //
+    // IMPORTANT: For non-hook units, defer persistence until after the hook check.
+    // If a post-unit hook requests a retry, we need to remove the completion key
+    // so dispatchNextUnit re-dispatches the trigger unit.
+    let triggerArtifactVerified = false;
+    if (!currentUnit.type.startsWith("hook/")) {
+      try {
+        triggerArtifactVerified = verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath);
+        if (triggerArtifactVerified) {
+          const completionKey = `${currentUnit.type}/${currentUnit.id}`;
+          if (!completedKeySet.has(completionKey)) {
+            persistCompletedKey(basePath, completionKey);
+            completedKeySet.add(completionKey);
+          }
+          invalidateStateCache();
        }
-        invalidateStateCache();
+      } catch {
+        // Non-fatal — worst case we fall through to normal dispatch which has its own checks
+      }
+    } else {
+      // Hook unit completed — finalize its runtime record and clear it
+      try {
+        writeUnitRuntimeRecord(basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, {
+          phase: "finalized",
+          progressCount: 1,
+          lastProgressKind: "hook-completed",
+        });
+        clearUnitRuntimeRecord(basePath, currentUnit.type, currentUnit.id);
+      } catch {
+        // Non-fatal
      }
-    } catch {
-      // Non-fatal — worst case we fall through to normal dispatch which has its own checks
    }
  }

@ -1005,6 +1025,31 @@ export async function handleAgentEnd(
      writeLock(basePath, hookUnit.unitType, hookUnit.unitId, completedUnits.length, sessionFile);
      // Persist hook state so cycle counts survive crashes
      persistHookState(basePath);
+
+      // Start supervision timers for hook units — hooks can get stuck just
+      // like normal units, and without a watchdog auto-mode would hang forever.
+      clearUnitTimeout();
+      const supervisor = resolveAutoSupervisorConfig();
+      const hookHardTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
+      unitTimeoutHandle = setTimeout(async () => {
+        unitTimeoutHandle = null;
+        if (!active) return;
+        if (currentUnit) {
+          writeUnitRuntimeRecord(basePath, hookUnit.unitType, hookUnit.unitId, currentUnit.startedAt, {
+            phase: "timeout",
+            timeoutAt: Date.now(),
+          });
+        }
+        ctx.ui.notify(
+          `Hook ${hookUnit.hookName} exceeded ${supervisor.hard_timeout_minutes ?? 30}min timeout. Pausing auto-mode.`,
+          "warning",
+        );
+        resetHookState();
+        await pauseAuto(ctx, pi);
+      }, hookHardTimeoutMs);
+
+      // Guard against race with timeout/pause before sending
+      if (!active) return;
      pi.sendMessage(
        { customType: "gsd-auto", content: hookUnit.prompt, display: verbose },
        { triggerTurn: true },
@ -1016,6 +1061,11 @@ export async function handleAgentEnd(
    if (isRetryPending()) {
      const trigger = consumeRetryTrigger();
      if (trigger) {
+        // Remove the trigger unit's completion key so dispatchNextUnit
+        // will re-dispatch it instead of skipping it as already-complete.
+        const triggerKey = `${trigger.unitType}/${trigger.unitId}`;
+        completedKeySet.delete(triggerKey);
+        removePersistedKey(basePath, triggerKey);
        ctx.ui.notify(
          `Hook requested retry of ${trigger.unitType} ${trigger.unitId}.`,
          "info",
@ -2207,12 +2257,19 @@ async function dispatchNextUnit(
    // Only mark the previous unit as completed if:
    // 1. We're not about to re-dispatch the same unit (retry scenario)
    // 2. The expected artifact actually exists on disk
+    // For hook units, skip artifact verification — hooks don't produce standard
+    // artifacts and their runtime records were already finalized in handleAgentEnd.
    const closeoutKey = `${currentUnit.type}/${currentUnit.id}`;
    const incomingKey = `${unitType}/${unitId}`;
-    const artifactVerified = verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath);
+    const isHookUnit = currentUnit.type.startsWith("hook/");
+    const artifactVerified = isHookUnit || verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath);
    if (closeoutKey !== incomingKey && artifactVerified) {
-      persistCompletedKey(basePath, closeoutKey);
-      completedKeySet.add(closeoutKey);
+      if (!isHookUnit) {
+        // Only persist completion keys for real units — hook keys are
+        // ephemeral and should not pollute the idempotency set.
+        persistCompletedKey(basePath, closeoutKey);
+        completedKeySet.add(closeoutKey);
+      }

      completedUnits.push({
        type: currentUnit.type,
@ -3772,6 +3829,10 @@ export function verifyExpectedArtifact(unitType: string, unitId: string, base: s
  // Clear stale directory listing cache so artifact checks see fresh disk state (#431)
  clearPathCache();

+  // Hook units have no standard artifact — always pass. Their lifecycle
+  // is managed by the hook engine, not the artifact verification system.
+  if (unitType.startsWith("hook/")) return true;
+
  // fix-merge has no file artifact — verify by checking git state
  if (unitType === "fix-merge") {
    const unmerged = runGit(base, ["diff", "--name-only", "--diff-filter=U"], { allowFailure: true });
--- a/src/resources/extensions/gsd/gitignore.ts
+++ b/src/resources/extensions/gsd/gitignore.ts
@ -23,6 +23,7 @@ const BASELINE_PATTERNS = [
  ".gsd/metrics.json",
  ".gsd/completed-units.json",
  ".gsd/STATE.md",
+  ".gsd/DISCUSSION-MANIFEST.json",

  // ── OS junk ──
  ".DS_Store",
--- a/src/resources/extensions/gsd/guided-flow.ts
+++ b/src/resources/extensions/gsd/guided-flow.ts
@ -50,13 +50,76 @@ export function checkAutoStartAfterDiscuss(): boolean {

  const { ctx, pi, basePath, milestoneId, step } = pendingAutoStart;

-  // Don't fire until the discuss phase has actually produced a context file
-  // for the milestone being discussed. agent_end fires after every LLM turn,
-  // including the initial "What do you want to build?" response — we need to
-  // wait for the full conversation to complete and the LLM to write CONTEXT.md.
+  // Gate 1: Primary milestone must have CONTEXT.md
  const contextFile = resolveMilestoneFile(basePath, milestoneId, "CONTEXT");
  if (!contextFile) return false; // no context yet — keep waiting

+  // Gate 2: STATE.md must exist — written as the last step in the discuss
+  // output phase. This prevents auto-start from firing during Phase 3
+  // (sequential readiness gates for remaining milestones) in multi-milestone
+  // discussions, where M001-CONTEXT.md exists but M002/M003 haven't been
+  // processed yet.
+  const stateFile = resolveGsdRootFile(basePath, "STATE");
+  if (!stateFile) return false; // discussion not finalized yet
+
+  // Gate 3: Multi-milestone completeness warning
+  // Parse PROJECT.md for milestone sequence, warn if any are missing context.
+  // Don't block — milestones can be intentionally queued without context.
+  const projectFile = resolveGsdRootFile(basePath, "PROJECT");
+  if (projectFile) {
+    try {
+      const projectContent = readFileSync(projectFile, "utf-8");
+      const milestoneIds = parseMilestoneSequenceFromProject(projectContent);
+      if (milestoneIds.length > 1) {
+        const missing = milestoneIds.filter(id => {
+          const hasContext = !!resolveMilestoneFile(basePath, id, "CONTEXT");
+          const hasDraft = !!resolveMilestoneFile(basePath, id, "CONTEXT-DRAFT");
+          const hasDir = existsSync(join(basePath, ".gsd", "milestones", id));
+          return !hasContext && !hasDraft && !hasDir;
+        });
+        if (missing.length > 0) {
+          ctx.ui.notify(
+            `Multi-milestone validation: ${missing.join(", ")} not found in filesystem. ` +
+            `Discussion may not have completed all readiness gates.`,
+            "warning",
+          );
+        }
+      }
+    } catch { /* non-fatal — PROJECT.md parsing failure shouldn't block auto-start */ }
+  }
+
+  // Gate 4: Discussion manifest process verification (multi-milestone only)
+  // The LLM writes DISCUSSION-MANIFEST.json after each Phase 3 gate decision.
+  // If the manifest exists but gates_completed < total, the LLM hasn't finished
+  // presenting all readiness gates to the user — block auto-start.
+  const manifestPath = join(basePath, ".gsd", "DISCUSSION-MANIFEST.json");
+  if (existsSync(manifestPath)) {
+    try {
+      const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
+      const total = typeof manifest.total === "number" ? manifest.total : 0;
+      const completed = typeof manifest.gates_completed === "number" ? manifest.gates_completed : 0;
+
+      if (total > 1 && completed < total) {
+        // Discussion not complete — block auto-start until all gates are done
+        return false;
+      }
+
+      // Cross-check manifest milestones against PROJECT.md if available
+      if (projectFile) {
+        const projectContent = readFileSync(projectFile, "utf-8");
+        const projectIds = parseMilestoneSequenceFromProject(projectContent);
+        const manifestIds = Object.keys(manifest.milestones ?? {});
+        const untracked = projectIds.filter(id => !manifestIds.includes(id));
+        if (untracked.length > 0) {
+          ctx.ui.notify(
+            `Discussion manifest missing gates for: ${untracked.join(", ")}`,
+            "warning",
+          );
+        }
+      }
+    } catch { /* malformed manifest — warn but don't block */ }
+  }
+
  // Draft promotion cleanup: if a CONTEXT-DRAFT.md exists alongside the new
  // CONTEXT.md, delete the draft — it's been consumed by the discussion.
  try {
@ -64,11 +127,28 @@ export function checkAutoStartAfterDiscuss(): boolean {
    if (draftFile) unlinkSync(draftFile);
  } catch { /* non-fatal — stale draft doesn't break anything, CONTEXT.md wins */ }

+  // Cleanup: remove discussion manifest after auto-start (only needed during discussion)
+  try { unlinkSync(manifestPath); } catch { /* may not exist for single-milestone */ }
+
  pendingAutoStart = null;
  startAuto(ctx, pi, basePath, false, { step }).catch(() => {});
  return true;
 }

+/**
+ * Extract milestone IDs from PROJECT.md milestone sequence table.
+ * Looks for rows like "| M001 | Name | Status |" and extracts the ID column.
+ */
+function parseMilestoneSequenceFromProject(content: string): string[] {
+  const ids: string[] = [];
+  const lines = content.split(/\r?\n/);
+  for (const line of lines) {
+    const match = line.match(/^\|\s*(M\d{3}[A-Z0-9-]*)\s*\|/);
+    if (match) ids.push(match[1]);
+  }
+  return ids;
+}
+
 // ─── Types ────────────────────────────────────────────────────────────────────

 type UIContext = ExtensionContext;
--- a/src/resources/extensions/gsd/prompts/discuss.md
+++ b/src/resources/extensions/gsd/prompts/discuss.md
@ -227,6 +227,27 @@ For each remaining milestone **one at a time, in sequence**, use `ask_user_quest

 Each context file (full or draft) should be rich enough that a future agent encountering it fresh — with no memory of this conversation — can understand the intent, constraints, dependencies, what this milestone unlocks, and what "done" looks like.

+#### Milestone Gate Tracking (MANDATORY for multi-milestone)
+
+After EVERY Phase 3 gate decision, immediately write or update `.gsd/DISCUSSION-MANIFEST.json` with the cumulative state. This file is mechanically validated by the system before auto-mode starts — if gates are incomplete, auto-mode will NOT start.
+
+```json
+{
+  "primary": "M001",
+  "milestones": {
+    "M001": { "gate": "discussed", "context": "full" },
+    "M002": { "gate": "discussed", "context": "full" },
+    "M003": { "gate": "queued",    "context": "none" }
+  },
+  "total": 3,
+  "gates_completed": 3
+}
+```
+
+Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`.
+
+For single-milestone projects, do NOT write this file — it is only for multi-milestone discussions.
+
 #### Phase 4: Finalize

 7. Update `.gsd/STATE.md`
--- a/src/resources/extensions/gsd/state.ts
+++ b/src/resources/extensions/gsd/state.ts
@ -470,7 +470,7 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
  };
  const activeTaskEntry = slicePlan.tasks.find(t => !t.done);

-  if (!activeTaskEntry) {
+  if (!activeTaskEntry && slicePlan.tasks.length > 0) {
    // All tasks done but slice not marked complete
    return {
      activeMilestone,
@ -491,6 +491,27 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
    };
  }

+  // Empty plan — no tasks defined yet, stay in planning phase
+  if (!activeTaskEntry) {
+    return {
+      activeMilestone,
+      activeSlice,
+      activeTask: null,
+      phase: 'planning',
+      recentDecisions: [],
+      blockers: [],
+      nextAction: `Slice ${activeSlice.id} has a plan file but no tasks. Add tasks to the plan.`,
+      activeBranch: activeBranch ?? undefined,
+      registry,
+      requirements,
+      progress: {
+        milestones: milestoneProgress,
+        slices: sliceProgress,
+        tasks: taskProgress,
+      },
+    };
+  }
+
  const activeTask: ActiveRef = {
    id: activeTaskEntry.id,
    title: activeTaskEntry.title,
--- a/src/resources/extensions/gsd/tests/derive-state.test.ts
+++ b/src/resources/extensions/gsd/tests/derive-state.test.ts
@ -651,6 +651,41 @@ Continue from step 2.
    }
  }

+  // ─── Empty plan (zero tasks) stays in planning, not summarizing (#454) ──
+  console.log('\n=== empty plan → planning (not summarizing) ===');
+  {
+    const base = createFixtureBase();
+    try {
+      writeRoadmap(base, 'M001', `---
+id: M001
+title: "Test"
+---
+# M001: Test
+## Vision
+Test
+## Success Criteria
+- Done
+## Slices
+- [ ] **S01: Empty slice** \`risk:low\` \`depends:[]\`
+  > Test
+## Boundary Map
+_None_
+`);
+      writePlan(base, 'M001', 'S01', `---
+slice: S01
+---
+# S01 Plan
+## Tasks
+`);
+      const state = await deriveState(base);
+      assertEq(state.phase, 'planning', 'empty plan stays in planning');
+      assertEq(state.activeSlice?.id, 'S01', 'active slice is S01');
+      assertEq(state.activeTask, null, 'no active task');
+    } finally {
+      cleanup(base);
+    }
+  }
+
  report();
 }

--- a/src/resources/extensions/gsd/tests/draft-promotion.test.ts
+++ b/src/resources/extensions/gsd/tests/draft-promotion.test.ts
@ -145,7 +145,8 @@ const guidedFlowSource = readFileSync(
 );

 const checkFnIdx = guidedFlowSource.indexOf("checkAutoStartAfterDiscuss");
-const checkFnChunk = guidedFlowSource.slice(checkFnIdx, checkFnIdx + 1200);
+const checkFnEnd = guidedFlowSource.indexOf("\nexport ", checkFnIdx + 1);
+const checkFnChunk = guidedFlowSource.slice(checkFnIdx, checkFnEnd > checkFnIdx ? checkFnEnd : checkFnIdx + 5000);

 assert(
  checkFnChunk.includes("CONTEXT-DRAFT"),
--- a/src/resources/extensions/gsd/tests/idle-recovery.test.ts
+++ b/src/resources/extensions/gsd/tests/idle-recovery.test.ts
@ -574,4 +574,25 @@ const ROADMAP_COMPLETE = `# M001: Test Milestone
  }
 }

+// ═══ verifyExpectedArtifact: hook unit types ═════════════════════════════════
+
+console.log("\n=== verifyExpectedArtifact: hook types always return true ===");
+
+{
+  const base = createFixtureBase();
+  try {
+    // Hook units don't have standard artifacts — they should always pass
+    const result1 = verifyExpectedArtifact("hook/code-review", "M001/S01/T01", base);
+    assertTrue(result1, "hook/code-review should always return true");
+
+    const result2 = verifyExpectedArtifact("hook/simplify", "M001/S01/T02", base);
+    assertTrue(result2, "hook/simplify should always return true");
+
+    const result3 = verifyExpectedArtifact("hook/custom-hook", "M001/S01", base);
+    assertTrue(result3, "hook/custom-hook at slice level should return true");
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+}
+
 report();