From ccb2a08d6776f95509d286924c29cf04eeb3c9ff Mon Sep 17 00:00:00 2001 From: deseltrus Date: Sun, 15 Mar 2026 09:07:55 +0100 Subject: [PATCH 1/5] feat(discuss): harden multi-milestone gates with two-layer enforcement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Layer 1 (Prompt): discuss.md now enforces: - Document ingestion rule: read ALL user-provided files before reflection - Mandatory milestone confirmation gate via ask_user_questions - 1M context awareness: prefer discussing all milestones in-session - Phase 3 gates marked MANDATORY with progress tracking - Default-recommend "Discuss now" over "Draft for later" Layer 2 (Code): checkAutoStartAfterDiscuss() now validates: - Gate 1: Primary CONTEXT.md exists - Gate 2: STATE.md exists (written last in Phase 4, prevents premature auto-start during Phase 3 readiness gates) - Gate 3: Multi-milestone completeness check against PROJECT.md milestone sequence — warns if milestones are missing from filesystem Also fixes conflict markers in discuss.md from gsd/M005/S05 merge. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/guided-flow.ts | 53 +++++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts index 5ad3cc766..f8ddb66ed 100644 --- a/src/resources/extensions/gsd/guided-flow.ts +++ b/src/resources/extensions/gsd/guided-flow.ts @@ -50,13 +50,44 @@ export function checkAutoStartAfterDiscuss(): boolean { const { ctx, pi, basePath, milestoneId, step } = pendingAutoStart; - // Don't fire until the discuss phase has actually produced a context file - // for the milestone being discussed. agent_end fires after every LLM turn, - // including the initial "What do you want to build?" response — we need to - // wait for the full conversation to complete and the LLM to write CONTEXT.md. + // Gate 1: Primary milestone must have CONTEXT.md const contextFile = resolveMilestoneFile(basePath, milestoneId, "CONTEXT"); if (!contextFile) return false; // no context yet — keep waiting + // Gate 2: STATE.md must exist — written as the last step in the discuss + // output phase. This prevents auto-start from firing during Phase 3 + // (sequential readiness gates for remaining milestones) in multi-milestone + // discussions, where M001-CONTEXT.md exists but M002/M003 haven't been + // processed yet. + const stateFile = resolveGsdRootFile(basePath, "STATE"); + if (!stateFile) return false; // discussion not finalized yet + + // Gate 3: Multi-milestone completeness warning + // Parse PROJECT.md for milestone sequence, warn if any are missing context. + // Don't block — milestones can be intentionally queued without context. + const projectFile = resolveGsdRootFile(basePath, "PROJECT"); + if (projectFile) { + try { + const projectContent = readFileSync(projectFile, "utf-8"); + const milestoneIds = parseMilestoneSequenceFromProject(projectContent); + if (milestoneIds.length > 1) { + const missing = milestoneIds.filter(id => { + const hasContext = !!resolveMilestoneFile(basePath, id, "CONTEXT"); + const hasDraft = !!resolveMilestoneFile(basePath, id, "CONTEXT-DRAFT"); + const hasDir = existsSync(join(basePath, ".gsd", "milestones", id)); + return !hasContext && !hasDraft && !hasDir; + }); + if (missing.length > 0) { + ctx.ui.notify( + `Multi-milestone validation: ${missing.join(", ")} not found in filesystem. ` + + `Discussion may not have completed all readiness gates.`, + "warning", + ); + } + } + } catch { /* non-fatal — PROJECT.md parsing failure shouldn't block auto-start */ } + } + // Draft promotion cleanup: if a CONTEXT-DRAFT.md exists alongside the new // CONTEXT.md, delete the draft — it's been consumed by the discussion. try { @@ -69,6 +100,20 @@ export function checkAutoStartAfterDiscuss(): boolean { return true; } +/** + * Extract milestone IDs from PROJECT.md milestone sequence table. + * Looks for rows like "| M001 | Name | Status |" and extracts the ID column. + */ +function parseMilestoneSequenceFromProject(content: string): string[] { + const ids: string[] = []; + const lines = content.split(/\r?\n/); + for (const line of lines) { + const match = line.match(/^\|\s*(M\d{3}[A-Z0-9-]*)\s*\|/); + if (match) ids.push(match[1]); + } + return ids; +} + // ─── Types ──────────────────────────────────────────────────────────────────── type UIContext = ExtensionContext; From f27ed34fc095dec359fbaf8e55dd8870a734d04b Mon Sep 17 00:00:00 2001 From: deseltrus Date: Sun, 15 Mar 2026 09:14:32 +0100 Subject: [PATCH 2/5] feat(discuss): add discussion manifest for mechanical process verification Closes the remaining gap in multi-milestone enforcement: the code previously validated only the END STATE (files exist) but not the PROCESS (each gate was presented to the user). New mechanism: - discuss.md instructs the LLM to write .gsd/DISCUSSION-MANIFEST.json after EACH Phase 3 gate decision, tracking gates_completed vs total - checkAutoStartAfterDiscuss() Gate 4: BLOCKS auto-start if gates_completed < total (not just a warning) - Manifest is deleted after auto-start (only needed during discussion) - Single-milestone discussions don't use manifest (backward-compatible) - DISCUSSION-MANIFEST.json added to baseline gitignore patterns This creates a three-layer enforcement: Layer 1 (Prompt): ask_user_questions calls at each gate Layer 2 (Files): CONTEXT.md/DRAFT/directory existence check Layer 3 (Manifest): gates_completed == total process verification Co-Authored-By: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/gitignore.ts | 1 + src/resources/extensions/gsd/guided-flow.ts | 35 +++++++++++++++++++ .../extensions/gsd/prompts/discuss.md | 21 +++++++++++ 3 files changed, 57 insertions(+) diff --git a/src/resources/extensions/gsd/gitignore.ts b/src/resources/extensions/gsd/gitignore.ts index 3d6a52c74..8626bc6af 100644 --- a/src/resources/extensions/gsd/gitignore.ts +++ b/src/resources/extensions/gsd/gitignore.ts @@ -23,6 +23,7 @@ const BASELINE_PATTERNS = [ ".gsd/metrics.json", ".gsd/completed-units.json", ".gsd/STATE.md", + ".gsd/DISCUSSION-MANIFEST.json", // ── OS junk ── ".DS_Store", diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts index f8ddb66ed..7dc073fc7 100644 --- a/src/resources/extensions/gsd/guided-flow.ts +++ b/src/resources/extensions/gsd/guided-flow.ts @@ -88,6 +88,38 @@ export function checkAutoStartAfterDiscuss(): boolean { } catch { /* non-fatal — PROJECT.md parsing failure shouldn't block auto-start */ } } + // Gate 4: Discussion manifest process verification (multi-milestone only) + // The LLM writes DISCUSSION-MANIFEST.json after each Phase 3 gate decision. + // If the manifest exists but gates_completed < total, the LLM hasn't finished + // presenting all readiness gates to the user — block auto-start. + const manifestPath = join(basePath, ".gsd", "DISCUSSION-MANIFEST.json"); + if (existsSync(manifestPath)) { + try { + const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")); + const total = typeof manifest.total === "number" ? manifest.total : 0; + const completed = typeof manifest.gates_completed === "number" ? manifest.gates_completed : 0; + + if (total > 1 && completed < total) { + // Discussion not complete — block auto-start until all gates are done + return false; + } + + // Cross-check manifest milestones against PROJECT.md if available + if (projectFile) { + const projectContent = readFileSync(projectFile, "utf-8"); + const projectIds = parseMilestoneSequenceFromProject(projectContent); + const manifestIds = Object.keys(manifest.milestones ?? {}); + const untracked = projectIds.filter(id => !manifestIds.includes(id)); + if (untracked.length > 0) { + ctx.ui.notify( + `Discussion manifest missing gates for: ${untracked.join(", ")}`, + "warning", + ); + } + } + } catch { /* malformed manifest — warn but don't block */ } + } + // Draft promotion cleanup: if a CONTEXT-DRAFT.md exists alongside the new // CONTEXT.md, delete the draft — it's been consumed by the discussion. try { @@ -95,6 +127,9 @@ export function checkAutoStartAfterDiscuss(): boolean { if (draftFile) unlinkSync(draftFile); } catch { /* non-fatal — stale draft doesn't break anything, CONTEXT.md wins */ } + // Cleanup: remove discussion manifest after auto-start (only needed during discussion) + try { unlinkSync(manifestPath); } catch { /* may not exist for single-milestone */ } + pendingAutoStart = null; startAuto(ctx, pi, basePath, false, { step }).catch(() => {}); return true; diff --git a/src/resources/extensions/gsd/prompts/discuss.md b/src/resources/extensions/gsd/prompts/discuss.md index accdbc8ce..fef9176b8 100644 --- a/src/resources/extensions/gsd/prompts/discuss.md +++ b/src/resources/extensions/gsd/prompts/discuss.md @@ -227,6 +227,27 @@ For each remaining milestone **one at a time, in sequence**, use `ask_user_quest Each context file (full or draft) should be rich enough that a future agent encountering it fresh — with no memory of this conversation — can understand the intent, constraints, dependencies, what this milestone unlocks, and what "done" looks like. +#### Milestone Gate Tracking (MANDATORY for multi-milestone) + +After EVERY Phase 3 gate decision, immediately write or update `.gsd/DISCUSSION-MANIFEST.json` with the cumulative state. This file is mechanically validated by the system before auto-mode starts — if gates are incomplete, auto-mode will NOT start. + +```json +{ + "primary": "M001", + "milestones": { + "M001": { "gate": "discussed", "context": "full" }, + "M002": { "gate": "discussed", "context": "full" }, + "M003": { "gate": "queued", "context": "none" } + }, + "total": 3, + "gates_completed": 3 +} +``` + +Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`. + +For single-milestone projects, do NOT write this file — it is only for multi-milestone discussions. + #### Phase 4: Finalize 7. Update `.gsd/STATE.md` From 3b914033f496cb71fd0dbe2b9f4235a96c4b08cc Mon Sep 17 00:00:00 2001 From: deseltrus Date: Sun, 15 Mar 2026 09:23:13 +0100 Subject: [PATCH 3/5] fix(test): update draft-promotion test for expanded checkAutoStartAfterDiscuss The static assertion searched the first 1200 chars of checkAutoStartAfterDiscuss for CONTEXT-DRAFT and unlinkSync references, but the function grew to 4164 chars after adding Gates 2-4 (STATE.md, PROJECT.md, manifest validation). The search window now extends to the next export statement. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/tests/draft-promotion.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/resources/extensions/gsd/tests/draft-promotion.test.ts b/src/resources/extensions/gsd/tests/draft-promotion.test.ts index 4ea6f976c..0ce24ed50 100644 --- a/src/resources/extensions/gsd/tests/draft-promotion.test.ts +++ b/src/resources/extensions/gsd/tests/draft-promotion.test.ts @@ -145,7 +145,8 @@ const guidedFlowSource = readFileSync( ); const checkFnIdx = guidedFlowSource.indexOf("checkAutoStartAfterDiscuss"); -const checkFnChunk = guidedFlowSource.slice(checkFnIdx, checkFnIdx + 1200); +const checkFnEnd = guidedFlowSource.indexOf("\nexport ", checkFnIdx + 1); +const checkFnChunk = guidedFlowSource.slice(checkFnIdx, checkFnEnd > checkFnIdx ? checkFnEnd : checkFnIdx + 5000); assert( checkFnChunk.includes("CONTEXT-DRAFT"), From e147b2dfdff019f9cc189000fcf9b01c30e1deff Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Sun, 15 Mar 2026 10:04:57 -0600 Subject: [PATCH 4/5] fix(state): empty slice plan stays in planning, not summarizing (#454) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A plan file with zero tasks caused `find(t => !t.done)` to return undefined, which was treated as "all tasks done" → summarizing phase. Now requires `tasks.length > 0` before entering summarizing. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/state.ts | 23 +++++++++++- .../extensions/gsd/tests/derive-state.test.ts | 35 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 9aa14e85c..6d15b1c5b 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -470,7 +470,7 @@ async function _deriveStateImpl(basePath: string): Promise { }; const activeTaskEntry = slicePlan.tasks.find(t => !t.done); - if (!activeTaskEntry) { + if (!activeTaskEntry && slicePlan.tasks.length > 0) { // All tasks done but slice not marked complete return { activeMilestone, @@ -491,6 +491,27 @@ async function _deriveStateImpl(basePath: string): Promise { }; } + // Empty plan — no tasks defined yet, stay in planning phase + if (!activeTaskEntry) { + return { + activeMilestone, + activeSlice, + activeTask: null, + phase: 'planning', + recentDecisions: [], + blockers: [], + nextAction: `Slice ${activeSlice.id} has a plan file but no tasks. Add tasks to the plan.`, + activeBranch: activeBranch ?? undefined, + registry, + requirements, + progress: { + milestones: milestoneProgress, + slices: sliceProgress, + tasks: taskProgress, + }, + }; + } + const activeTask: ActiveRef = { id: activeTaskEntry.id, title: activeTaskEntry.title, diff --git a/src/resources/extensions/gsd/tests/derive-state.test.ts b/src/resources/extensions/gsd/tests/derive-state.test.ts index b750bd058..6c97d31c0 100644 --- a/src/resources/extensions/gsd/tests/derive-state.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state.test.ts @@ -651,6 +651,41 @@ Continue from step 2. } } + // ─── Empty plan (zero tasks) stays in planning, not summarizing (#454) ── + console.log('\n=== empty plan → planning (not summarizing) ==='); + { + const base = createFixtureBase(); + try { + writeRoadmap(base, 'M001', `--- +id: M001 +title: "Test" +--- +# M001: Test +## Vision +Test +## Success Criteria +- Done +## Slices +- [ ] **S01: Empty slice** \`risk:low\` \`depends:[]\` + > Test +## Boundary Map +_None_ +`); + writePlan(base, 'M001', 'S01', `--- +slice: S01 +--- +# S01 Plan +## Tasks +`); + const state = await deriveState(base); + assertEq(state.phase, 'planning', 'empty plan stays in planning'); + assertEq(state.activeSlice?.id, 'S01', 'active slice is S01'); + assertEq(state.activeTask, null, 'no active task'); + } finally { + cleanup(base); + } + } + report(); } From 94336dd445e3fcb46f6c9a3562a882f8a5d5f5b7 Mon Sep 17 00:00:00 2001 From: Flux Labs Date: Sun, 15 Mar 2026 12:14:30 -0500 Subject: [PATCH 5/5] =?UTF-8?q?fix:=20hook=20orchestration=20=E2=80=94=20f?= =?UTF-8?q?inalize=20runtime=20records,=20add=20supervision,=20fix=20retry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hooks were dispatched (runtime record created with phase="dispatched") but never properly tracked through completion. Four issues fixed: 1. Hook runtime records now finalized: handleAgentEnd writes phase="finalized" and clears the record when a hook completes. Previously records stayed at "dispatched" forever because verifyExpectedArtifact returned false for hook types. 2. Supervision timer for hooks: hook dispatch now sets a hard timeout so stuck hooks don't hang auto-mode indefinitely. 3. Hook retry removes completion key: when a hook requests retry via retry_on, the trigger unit's completion key is removed from the idempotency set so dispatchNextUnit will re-dispatch it. 4. Hook closeout in dispatchNextUnit: hook units are properly closed out (pushed to completedUnits, runtime cleared) without polluting the idempotency set. verifyExpectedArtifact returns true for hook/ types. Fixes #140 (comment 4063396798) --- src/resources/extensions/gsd/auto.ts | 85 ++++++++++++++++--- .../gsd/tests/idle-recovery.test.ts | 21 +++++ 2 files changed, 94 insertions(+), 12 deletions(-) diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index ba866014d..3dfe517a0 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -939,17 +939,37 @@ export async function handleAgentEnd( // produced its expected artifact. If so, persist the completion key now so the // idempotency check at the top of dispatchNextUnit() skips it — even if // deriveState() still returns this unit as active (e.g. branch mismatch). - try { - if (verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath)) { - const completionKey = `${currentUnit.type}/${currentUnit.id}`; - if (!completedKeySet.has(completionKey)) { - persistCompletedKey(basePath, completionKey); - completedKeySet.add(completionKey); + // + // IMPORTANT: For non-hook units, defer persistence until after the hook check. + // If a post-unit hook requests a retry, we need to remove the completion key + // so dispatchNextUnit re-dispatches the trigger unit. + let triggerArtifactVerified = false; + if (!currentUnit.type.startsWith("hook/")) { + try { + triggerArtifactVerified = verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath); + if (triggerArtifactVerified) { + const completionKey = `${currentUnit.type}/${currentUnit.id}`; + if (!completedKeySet.has(completionKey)) { + persistCompletedKey(basePath, completionKey); + completedKeySet.add(completionKey); + } + invalidateStateCache(); } - invalidateStateCache(); + } catch { + // Non-fatal — worst case we fall through to normal dispatch which has its own checks + } + } else { + // Hook unit completed — finalize its runtime record and clear it + try { + writeUnitRuntimeRecord(basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, { + phase: "finalized", + progressCount: 1, + lastProgressKind: "hook-completed", + }); + clearUnitRuntimeRecord(basePath, currentUnit.type, currentUnit.id); + } catch { + // Non-fatal } - } catch { - // Non-fatal — worst case we fall through to normal dispatch which has its own checks } } @@ -1005,6 +1025,31 @@ export async function handleAgentEnd( writeLock(basePath, hookUnit.unitType, hookUnit.unitId, completedUnits.length, sessionFile); // Persist hook state so cycle counts survive crashes persistHookState(basePath); + + // Start supervision timers for hook units — hooks can get stuck just + // like normal units, and without a watchdog auto-mode would hang forever. + clearUnitTimeout(); + const supervisor = resolveAutoSupervisorConfig(); + const hookHardTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000; + unitTimeoutHandle = setTimeout(async () => { + unitTimeoutHandle = null; + if (!active) return; + if (currentUnit) { + writeUnitRuntimeRecord(basePath, hookUnit.unitType, hookUnit.unitId, currentUnit.startedAt, { + phase: "timeout", + timeoutAt: Date.now(), + }); + } + ctx.ui.notify( + `Hook ${hookUnit.hookName} exceeded ${supervisor.hard_timeout_minutes ?? 30}min timeout. Pausing auto-mode.`, + "warning", + ); + resetHookState(); + await pauseAuto(ctx, pi); + }, hookHardTimeoutMs); + + // Guard against race with timeout/pause before sending + if (!active) return; pi.sendMessage( { customType: "gsd-auto", content: hookUnit.prompt, display: verbose }, { triggerTurn: true }, @@ -1016,6 +1061,11 @@ export async function handleAgentEnd( if (isRetryPending()) { const trigger = consumeRetryTrigger(); if (trigger) { + // Remove the trigger unit's completion key so dispatchNextUnit + // will re-dispatch it instead of skipping it as already-complete. + const triggerKey = `${trigger.unitType}/${trigger.unitId}`; + completedKeySet.delete(triggerKey); + removePersistedKey(basePath, triggerKey); ctx.ui.notify( `Hook requested retry of ${trigger.unitType} ${trigger.unitId}.`, "info", @@ -2207,12 +2257,19 @@ async function dispatchNextUnit( // Only mark the previous unit as completed if: // 1. We're not about to re-dispatch the same unit (retry scenario) // 2. The expected artifact actually exists on disk + // For hook units, skip artifact verification — hooks don't produce standard + // artifacts and their runtime records were already finalized in handleAgentEnd. const closeoutKey = `${currentUnit.type}/${currentUnit.id}`; const incomingKey = `${unitType}/${unitId}`; - const artifactVerified = verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath); + const isHookUnit = currentUnit.type.startsWith("hook/"); + const artifactVerified = isHookUnit || verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath); if (closeoutKey !== incomingKey && artifactVerified) { - persistCompletedKey(basePath, closeoutKey); - completedKeySet.add(closeoutKey); + if (!isHookUnit) { + // Only persist completion keys for real units — hook keys are + // ephemeral and should not pollute the idempotency set. + persistCompletedKey(basePath, closeoutKey); + completedKeySet.add(closeoutKey); + } completedUnits.push({ type: currentUnit.type, @@ -3772,6 +3829,10 @@ export function verifyExpectedArtifact(unitType: string, unitId: string, base: s // Clear stale directory listing cache so artifact checks see fresh disk state (#431) clearPathCache(); + // Hook units have no standard artifact — always pass. Their lifecycle + // is managed by the hook engine, not the artifact verification system. + if (unitType.startsWith("hook/")) return true; + // fix-merge has no file artifact — verify by checking git state if (unitType === "fix-merge") { const unmerged = runGit(base, ["diff", "--name-only", "--diff-filter=U"], { allowFailure: true }); diff --git a/src/resources/extensions/gsd/tests/idle-recovery.test.ts b/src/resources/extensions/gsd/tests/idle-recovery.test.ts index 60d952c27..4f63dcb99 100644 --- a/src/resources/extensions/gsd/tests/idle-recovery.test.ts +++ b/src/resources/extensions/gsd/tests/idle-recovery.test.ts @@ -574,4 +574,25 @@ const ROADMAP_COMPLETE = `# M001: Test Milestone } } +// ═══ verifyExpectedArtifact: hook unit types ═════════════════════════════════ + +console.log("\n=== verifyExpectedArtifact: hook types always return true ==="); + +{ + const base = createFixtureBase(); + try { + // Hook units don't have standard artifacts — they should always pass + const result1 = verifyExpectedArtifact("hook/code-review", "M001/S01/T01", base); + assertTrue(result1, "hook/code-review should always return true"); + + const result2 = verifyExpectedArtifact("hook/simplify", "M001/S01/T02", base); + assertTrue(result2, "hook/simplify should always return true"); + + const result3 = verifyExpectedArtifact("hook/custom-hook", "M001/S01", base); + assertTrue(result3, "hook/custom-hook at slice level should return true"); + } finally { + rmSync(base, { recursive: true, force: true }); + } +} + report();