Merge branch 'main' into fix/gsd-extension-ctx-log

This commit is contained in:
Flux Labs 2026-03-15 12:51:42 -05:00 committed by GitHub
commit 314c134962
8 changed files with 259 additions and 18 deletions

View file

@ -939,17 +939,37 @@ export async function handleAgentEnd(
// produced its expected artifact. If so, persist the completion key now so the
// idempotency check at the top of dispatchNextUnit() skips it — even if
// deriveState() still returns this unit as active (e.g. branch mismatch).
try {
if (verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath)) {
const completionKey = `${currentUnit.type}/${currentUnit.id}`;
if (!completedKeySet.has(completionKey)) {
persistCompletedKey(basePath, completionKey);
completedKeySet.add(completionKey);
//
// IMPORTANT: For non-hook units, defer persistence until after the hook check.
// If a post-unit hook requests a retry, we need to remove the completion key
// so dispatchNextUnit re-dispatches the trigger unit.
let triggerArtifactVerified = false;
if (!currentUnit.type.startsWith("hook/")) {
try {
triggerArtifactVerified = verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath);
if (triggerArtifactVerified) {
const completionKey = `${currentUnit.type}/${currentUnit.id}`;
if (!completedKeySet.has(completionKey)) {
persistCompletedKey(basePath, completionKey);
completedKeySet.add(completionKey);
}
invalidateStateCache();
}
invalidateStateCache();
} catch {
// Non-fatal — worst case we fall through to normal dispatch which has its own checks
}
} else {
// Hook unit completed — finalize its runtime record and clear it
try {
writeUnitRuntimeRecord(basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, {
phase: "finalized",
progressCount: 1,
lastProgressKind: "hook-completed",
});
clearUnitRuntimeRecord(basePath, currentUnit.type, currentUnit.id);
} catch {
// Non-fatal
}
} catch {
// Non-fatal — worst case we fall through to normal dispatch which has its own checks
}
}
@ -1005,6 +1025,31 @@ export async function handleAgentEnd(
writeLock(basePath, hookUnit.unitType, hookUnit.unitId, completedUnits.length, sessionFile);
// Persist hook state so cycle counts survive crashes
persistHookState(basePath);
// Start supervision timers for hook units — hooks can get stuck just
// like normal units, and without a watchdog auto-mode would hang forever.
clearUnitTimeout();
const supervisor = resolveAutoSupervisorConfig();
const hookHardTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
unitTimeoutHandle = setTimeout(async () => {
unitTimeoutHandle = null;
if (!active) return;
if (currentUnit) {
writeUnitRuntimeRecord(basePath, hookUnit.unitType, hookUnit.unitId, currentUnit.startedAt, {
phase: "timeout",
timeoutAt: Date.now(),
});
}
ctx.ui.notify(
`Hook ${hookUnit.hookName} exceeded ${supervisor.hard_timeout_minutes ?? 30}min timeout. Pausing auto-mode.`,
"warning",
);
resetHookState();
await pauseAuto(ctx, pi);
}, hookHardTimeoutMs);
// Guard against race with timeout/pause before sending
if (!active) return;
pi.sendMessage(
{ customType: "gsd-auto", content: hookUnit.prompt, display: verbose },
{ triggerTurn: true },
@ -1016,6 +1061,11 @@ export async function handleAgentEnd(
if (isRetryPending()) {
const trigger = consumeRetryTrigger();
if (trigger) {
// Remove the trigger unit's completion key so dispatchNextUnit
// will re-dispatch it instead of skipping it as already-complete.
const triggerKey = `${trigger.unitType}/${trigger.unitId}`;
completedKeySet.delete(triggerKey);
removePersistedKey(basePath, triggerKey);
ctx.ui.notify(
`Hook requested retry of ${trigger.unitType} ${trigger.unitId}.`,
"info",
@ -2207,12 +2257,19 @@ async function dispatchNextUnit(
// Only mark the previous unit as completed if:
// 1. We're not about to re-dispatch the same unit (retry scenario)
// 2. The expected artifact actually exists on disk
// For hook units, skip artifact verification — hooks don't produce standard
// artifacts and their runtime records were already finalized in handleAgentEnd.
const closeoutKey = `${currentUnit.type}/${currentUnit.id}`;
const incomingKey = `${unitType}/${unitId}`;
const artifactVerified = verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath);
const isHookUnit = currentUnit.type.startsWith("hook/");
const artifactVerified = isHookUnit || verifyExpectedArtifact(currentUnit.type, currentUnit.id, basePath);
if (closeoutKey !== incomingKey && artifactVerified) {
persistCompletedKey(basePath, closeoutKey);
completedKeySet.add(closeoutKey);
if (!isHookUnit) {
// Only persist completion keys for real units — hook keys are
// ephemeral and should not pollute the idempotency set.
persistCompletedKey(basePath, closeoutKey);
completedKeySet.add(closeoutKey);
}
completedUnits.push({
type: currentUnit.type,
@ -3772,6 +3829,10 @@ export function verifyExpectedArtifact(unitType: string, unitId: string, base: s
// Clear stale directory listing cache so artifact checks see fresh disk state (#431)
clearPathCache();
// Hook units have no standard artifact — always pass. Their lifecycle
// is managed by the hook engine, not the artifact verification system.
if (unitType.startsWith("hook/")) return true;
// fix-merge has no file artifact — verify by checking git state
if (unitType === "fix-merge") {
const unmerged = runGit(base, ["diff", "--name-only", "--diff-filter=U"], { allowFailure: true });

View file

@ -23,6 +23,7 @@ const BASELINE_PATTERNS = [
".gsd/metrics.json",
".gsd/completed-units.json",
".gsd/STATE.md",
".gsd/DISCUSSION-MANIFEST.json",
// ── OS junk ──
".DS_Store",

View file

@ -50,13 +50,76 @@ export function checkAutoStartAfterDiscuss(): boolean {
const { ctx, pi, basePath, milestoneId, step } = pendingAutoStart;
// Don't fire until the discuss phase has actually produced a context file
// for the milestone being discussed. agent_end fires after every LLM turn,
// including the initial "What do you want to build?" response — we need to
// wait for the full conversation to complete and the LLM to write CONTEXT.md.
// Gate 1: Primary milestone must have CONTEXT.md
const contextFile = resolveMilestoneFile(basePath, milestoneId, "CONTEXT");
if (!contextFile) return false; // no context yet — keep waiting
// Gate 2: STATE.md must exist — written as the last step in the discuss
// output phase. This prevents auto-start from firing during Phase 3
// (sequential readiness gates for remaining milestones) in multi-milestone
// discussions, where M001-CONTEXT.md exists but M002/M003 haven't been
// processed yet.
const stateFile = resolveGsdRootFile(basePath, "STATE");
if (!stateFile) return false; // discussion not finalized yet
// Gate 3: Multi-milestone completeness warning
// Parse PROJECT.md for milestone sequence, warn if any are missing context.
// Don't block — milestones can be intentionally queued without context.
const projectFile = resolveGsdRootFile(basePath, "PROJECT");
if (projectFile) {
try {
const projectContent = readFileSync(projectFile, "utf-8");
const milestoneIds = parseMilestoneSequenceFromProject(projectContent);
if (milestoneIds.length > 1) {
const missing = milestoneIds.filter(id => {
const hasContext = !!resolveMilestoneFile(basePath, id, "CONTEXT");
const hasDraft = !!resolveMilestoneFile(basePath, id, "CONTEXT-DRAFT");
const hasDir = existsSync(join(basePath, ".gsd", "milestones", id));
return !hasContext && !hasDraft && !hasDir;
});
if (missing.length > 0) {
ctx.ui.notify(
`Multi-milestone validation: ${missing.join(", ")} not found in filesystem. ` +
`Discussion may not have completed all readiness gates.`,
"warning",
);
}
}
} catch { /* non-fatal — PROJECT.md parsing failure shouldn't block auto-start */ }
}
// Gate 4: Discussion manifest process verification (multi-milestone only)
// The LLM writes DISCUSSION-MANIFEST.json after each Phase 3 gate decision.
// If the manifest exists but gates_completed < total, the LLM hasn't finished
// presenting all readiness gates to the user — block auto-start.
const manifestPath = join(basePath, ".gsd", "DISCUSSION-MANIFEST.json");
if (existsSync(manifestPath)) {
try {
const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
const total = typeof manifest.total === "number" ? manifest.total : 0;
const completed = typeof manifest.gates_completed === "number" ? manifest.gates_completed : 0;
if (total > 1 && completed < total) {
// Discussion not complete — block auto-start until all gates are done
return false;
}
// Cross-check manifest milestones against PROJECT.md if available
if (projectFile) {
const projectContent = readFileSync(projectFile, "utf-8");
const projectIds = parseMilestoneSequenceFromProject(projectContent);
const manifestIds = Object.keys(manifest.milestones ?? {});
const untracked = projectIds.filter(id => !manifestIds.includes(id));
if (untracked.length > 0) {
ctx.ui.notify(
`Discussion manifest missing gates for: ${untracked.join(", ")}`,
"warning",
);
}
}
} catch { /* malformed manifest — warn but don't block */ }
}
// Draft promotion cleanup: if a CONTEXT-DRAFT.md exists alongside the new
// CONTEXT.md, delete the draft — it's been consumed by the discussion.
try {
@ -64,11 +127,28 @@ export function checkAutoStartAfterDiscuss(): boolean {
if (draftFile) unlinkSync(draftFile);
} catch { /* non-fatal — stale draft doesn't break anything, CONTEXT.md wins */ }
// Cleanup: remove discussion manifest after auto-start (only needed during discussion)
try { unlinkSync(manifestPath); } catch { /* may not exist for single-milestone */ }
pendingAutoStart = null;
startAuto(ctx, pi, basePath, false, { step }).catch(() => {});
return true;
}
/**
* Extract milestone IDs from PROJECT.md milestone sequence table.
* Looks for rows like "| M001 | Name | Status |" and extracts the ID column.
*/
function parseMilestoneSequenceFromProject(content: string): string[] {
const ids: string[] = [];
const lines = content.split(/\r?\n/);
for (const line of lines) {
const match = line.match(/^\|\s*(M\d{3}[A-Z0-9-]*)\s*\|/);
if (match) ids.push(match[1]);
}
return ids;
}
// ─── Types ────────────────────────────────────────────────────────────────────
type UIContext = ExtensionContext;

View file

@ -227,6 +227,27 @@ For each remaining milestone **one at a time, in sequence**, use `ask_user_quest
Each context file (full or draft) should be rich enough that a future agent encountering it fresh — with no memory of this conversation — can understand the intent, constraints, dependencies, what this milestone unlocks, and what "done" looks like.
#### Milestone Gate Tracking (MANDATORY for multi-milestone)
After EVERY Phase 3 gate decision, immediately write or update `.gsd/DISCUSSION-MANIFEST.json` with the cumulative state. This file is mechanically validated by the system before auto-mode starts — if gates are incomplete, auto-mode will NOT start.
```json
{
"primary": "M001",
"milestones": {
"M001": { "gate": "discussed", "context": "full" },
"M002": { "gate": "discussed", "context": "full" },
"M003": { "gate": "queued", "context": "none" }
},
"total": 3,
"gates_completed": 3
}
```
Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`.
For single-milestone projects, do NOT write this file — it is only for multi-milestone discussions.
#### Phase 4: Finalize
7. Update `.gsd/STATE.md`

View file

@ -470,7 +470,7 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
};
const activeTaskEntry = slicePlan.tasks.find(t => !t.done);
if (!activeTaskEntry) {
if (!activeTaskEntry && slicePlan.tasks.length > 0) {
// All tasks done but slice not marked complete
return {
activeMilestone,
@ -491,6 +491,27 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
};
}
// Empty plan — no tasks defined yet, stay in planning phase
if (!activeTaskEntry) {
return {
activeMilestone,
activeSlice,
activeTask: null,
phase: 'planning',
recentDecisions: [],
blockers: [],
nextAction: `Slice ${activeSlice.id} has a plan file but no tasks. Add tasks to the plan.`,
activeBranch: activeBranch ?? undefined,
registry,
requirements,
progress: {
milestones: milestoneProgress,
slices: sliceProgress,
tasks: taskProgress,
},
};
}
const activeTask: ActiveRef = {
id: activeTaskEntry.id,
title: activeTaskEntry.title,

View file

@ -651,6 +651,41 @@ Continue from step 2.
}
}
// ─── Empty plan (zero tasks) stays in planning, not summarizing (#454) ──
console.log('\n=== empty plan → planning (not summarizing) ===');
{
const base = createFixtureBase();
try {
writeRoadmap(base, 'M001', `---
id: M001
title: "Test"
---
# M001: Test
## Vision
Test
## Success Criteria
- Done
## Slices
- [ ] **S01: Empty slice** \`risk:low\` \`depends:[]\`
> Test
## Boundary Map
_None_
`);
writePlan(base, 'M001', 'S01', `---
slice: S01
---
# S01 Plan
## Tasks
`);
const state = await deriveState(base);
assertEq(state.phase, 'planning', 'empty plan stays in planning');
assertEq(state.activeSlice?.id, 'S01', 'active slice is S01');
assertEq(state.activeTask, null, 'no active task');
} finally {
cleanup(base);
}
}
report();
}

View file

@ -145,7 +145,8 @@ const guidedFlowSource = readFileSync(
);
const checkFnIdx = guidedFlowSource.indexOf("checkAutoStartAfterDiscuss");
const checkFnChunk = guidedFlowSource.slice(checkFnIdx, checkFnIdx + 1200);
const checkFnEnd = guidedFlowSource.indexOf("\nexport ", checkFnIdx + 1);
const checkFnChunk = guidedFlowSource.slice(checkFnIdx, checkFnEnd > checkFnIdx ? checkFnEnd : checkFnIdx + 5000);
assert(
checkFnChunk.includes("CONTEXT-DRAFT"),

View file

@ -574,4 +574,25 @@ const ROADMAP_COMPLETE = `# M001: Test Milestone
}
}
// ═══ verifyExpectedArtifact: hook unit types ═════════════════════════════════
console.log("\n=== verifyExpectedArtifact: hook types always return true ===");
{
const base = createFixtureBase();
try {
// Hook units don't have standard artifacts — they should always pass
const result1 = verifyExpectedArtifact("hook/code-review", "M001/S01/T01", base);
assertTrue(result1, "hook/code-review should always return true");
const result2 = verifyExpectedArtifact("hook/simplify", "M001/S01/T02", base);
assertTrue(result2, "hook/simplify should always return true");
const result3 = verifyExpectedArtifact("hook/custom-hook", "M001/S01", base);
assertTrue(result3, "hook/custom-hook at slice level should return true");
} finally {
rmSync(base, { recursive: true, force: true });
}
}
report();