fix(gsd): reactive batch verification + dependency-based carry-forward (#1549)

* fix(gsd): batch-specific artifact verification for reactive-execute

The reactive-execute artifact verifier previously checked only that
'at least one task summary exists' in the slice. This meant the unit
could report success even when none of the dispatched tasks actually
completed — a pre-existing T01 summary would satisfy the check.

Fix:
- Encode dispatched task IDs in the unitId: M001/S01/reactive+T02,T03
- Persist dispatched batch in ReactiveExecutionState before dispatch
- Verify each dispatched task's summary file exists individually
- Legacy unitId format (no +batch suffix) falls back to old behavior

The verifier now answers 'did the tasks we dispatched actually finish?'
instead of 'does any summary exist?'

Added ReactiveExecutionState.dispatched field to track the batch.
5 new tests covering: all-pass, partial-fail, pre-existing-irrelevant,
legacy fallback, and unitId round-trip encoding.

* fix(gsd): dependency-based carry-forward for reactive task execution

In reactive mode, each subagent task was getting order-based carry-forward
(all prior task summaries by number), not dependency-based. T05 depending
only on T02 would still receive T01, T03, T04 summaries — noise context
that wastes tokens and could confuse execution.

Fix:
- Add getDependencyTaskSummaryPaths() — returns only summaries for tasks
  in the derived dependsOn set, falling back to order-based for root tasks
  with no dependencies (preserves continuity)
- Add ExecuteTaskPromptOptions with carryForwardPaths override
- buildExecuteTaskPrompt accepts optional override, sequential callers
  unchanged (no options = order-based, backward compatible)
- buildReactiveExecutePrompt now passes dependency-scoped paths per task

Sequential execute-task dispatch is completely unchanged — the new code
path only activates when carryForwardPaths is explicitly provided.

3 new tests: dependency-only filtering, root task fallback, missing
dependency summary handling.
This commit is contained in:
TÂCHES 2026-03-19 23:59:03 -06:00 committed by GitHub
parent 596b941475
commit 1bd53a4c87
6 changed files with 253 additions and 14 deletions

View file

@ -367,10 +367,25 @@ const DISPATCH_RULES: DispatchRule[] = [
`ready:${metrics.readySetSize} dispatching:${selected.length} ambiguous:${metrics.ambiguous}\n`,
);
// Persist dispatched batch so verification and recovery can check
// exactly which tasks were sent.
const { saveReactiveState } = await import("./reactive-graph.js");
saveReactiveState(basePath, mid, sid, {
sliceId: sid,
completed: [...completed],
dispatched: selected,
graphSnapshot: metrics,
updatedAt: new Date().toISOString(),
});
// Encode selected task IDs in unitId for artifact verification.
// Format: M001/S01/reactive+T02,T03
const batchSuffix = selected.join(",");
return {
action: "dispatch",
unitType: "reactive-execute",
unitId: `${mid}/${sid}/reactive`,
unitId: `${mid}/${sid}/reactive+${batchSuffix}`,
prompt: await buildReactiveExecutePrompt(
mid,
midTitle,

View file

@ -485,6 +485,41 @@ export async function getPriorTaskSummaryPaths(
.map(f => `${sRel}/tasks/${f}`);
}
/**
* Get carry-forward summary paths scoped to a task's derived dependencies.
*
* Instead of all prior tasks (order-based), returns only summaries for task
* IDs in `dependsOn`. Used by reactive-execute to give each subagent only
* the context it actually needs not sibling tasks from a parallel batch.
*
* Falls back to order-based when dependsOn is empty (root tasks still get
* any available prior summaries for continuity).
*/
export async function getDependencyTaskSummaryPaths(
mid: string, sid: string, currentTid: string,
dependsOn: string[], base: string,
): Promise<string[]> {
// If no dependencies, fall back to order-based for root tasks
if (dependsOn.length === 0) {
return getPriorTaskSummaryPaths(mid, sid, currentTid, base);
}
const tDir = resolveTasksDir(base, mid, sid);
if (!tDir) return [];
const summaryFiles = resolveTaskFiles(tDir, "SUMMARY");
const sRel = relSlicePath(base, mid, sid);
const depSet = new Set(dependsOn.map((d) => d.toUpperCase()));
return summaryFiles
.filter((f) => {
// Extract task ID from filename: "T02-SUMMARY.md" → "T02"
const tid = f.replace(/-SUMMARY\.md$/i, "").toUpperCase();
return depSet.has(tid);
})
.map((f) => `${sRel}/tasks/${f}`);
}
// ─── Adaptive Replanning Checks ────────────────────────────────────────────
/**
@ -772,13 +807,24 @@ export async function buildPlanSlicePrompt(
});
}
/** Options for customizing execute-task prompt construction. */
export interface ExecuteTaskPromptOptions {
level?: InlineLevel;
/** Override carry-forward paths (dependency-based instead of order-based). */
carryForwardPaths?: string[];
}
export async function buildExecuteTaskPrompt(
mid: string, sid: string, sTitle: string,
tid: string, tTitle: string, base: string, level?: InlineLevel,
tid: string, tTitle: string, base: string,
level?: InlineLevel | ExecuteTaskPromptOptions,
): Promise<string> {
const inlineLevel = level ?? resolveInlineLevel();
const opts: ExecuteTaskPromptOptions = typeof level === "object" && level !== null && !Array.isArray(level)
? level
: { level: level as InlineLevel | undefined };
const inlineLevel = opts.level ?? resolveInlineLevel();
const priorSummaries = await getPriorTaskSummaryPaths(mid, sid, tid, base);
const priorSummaries = opts.carryForwardPaths ?? await getPriorTaskSummaryPaths(mid, sid, tid, base);
const priorLines = priorSummaries.length > 0
? priorSummaries.map(p => `- \`${p}\``).join("\n")
: "- (no prior tasks)";
@ -1272,8 +1318,16 @@ export async function buildReactiveExecutePrompt(
const tTitle = node?.title ?? tid;
readyTaskListLines.push(`- **${tid}: ${tTitle}**`);
// Build a full execute-task prompt for this task (reuse existing builder)
const taskPrompt = await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, base);
// Build dependency-scoped carry-forward paths for this task
const depPaths = await getDependencyTaskSummaryPaths(
mid, sid, tid, node?.dependsOn ?? [], base,
);
// Build a full execute-task prompt with dependency-based carry-forward
const taskPrompt = await buildExecuteTaskPrompt(
mid, sid, sTitle, tid, tTitle, base,
{ carryForwardPaths: depPaths },
);
subagentSections.push([
`### ${tid}: ${tTitle}`,

View file

@ -152,18 +152,42 @@ export function verifyExpectedArtifact(
return !content.includes("**Scope:** active");
}
// Reactive-execute: verify that at least one new task summary was written.
// The unitId is "{mid}/{sid}/reactive" — extract mid and sid to check.
// Reactive-execute: verify that each dispatched task's summary exists.
// The unitId encodes the batch: "{mid}/{sid}/reactive+T02,T03"
if (unitType === "reactive-execute") {
const parts = unitId.split("/");
const mid = parts[0];
const sid = parts[1];
if (!mid || !sid) return false;
const sidAndBatch = parts[1];
const batchPart = parts[2]; // "reactive+T02,T03"
if (!mid || !sidAndBatch || !batchPart) return false;
const sid = sidAndBatch;
const plusIdx = batchPart.indexOf("+");
if (plusIdx === -1) {
// Legacy format "reactive" without batch IDs — fall back to "any summary"
const tDir = resolveTasksDir(base, mid, sid);
if (!tDir) return false;
const summaryFiles = resolveTaskFiles(tDir, "SUMMARY");
return summaryFiles.length > 0;
}
const batchIds = batchPart.slice(plusIdx + 1).split(",").filter(Boolean);
if (batchIds.length === 0) return false;
const tDir = resolveTasksDir(base, mid, sid);
if (!tDir) return false;
const summaryFiles = resolveTaskFiles(tDir, "SUMMARY");
// At least one summary file should exist
return summaryFiles.length > 0;
const existingSummaries = new Set(
resolveTaskFiles(tDir, "SUMMARY").map((f) =>
f.replace(/-SUMMARY\.md$/i, "").toUpperCase(),
),
);
// Every dispatched task must have a summary file
for (const tid of batchIds) {
if (!existingSummaries.has(tid.toUpperCase())) return false;
}
return true;
}
const absPath = resolveExpectedArtifactPath(unitType, unitId, base);

View file

@ -245,7 +245,7 @@ function reactiveStatePath(basePath: string, mid: string, sid: string): string {
function isReactiveState(data: unknown): data is ReactiveExecutionState {
if (!data || typeof data !== "object") return false;
const d = data as Record<string, unknown>;
return typeof d.sliceId === "string" && Array.isArray(d.completed);
return typeof d.sliceId === "string" && Array.isArray(d.completed) && Array.isArray(d.dispatched);
}
/**

View file

@ -266,6 +266,7 @@ test("saveReactiveState and loadReactiveState round-trip", () => {
const state: ReactiveExecutionState = {
sliceId: "S01",
completed: ["T01", "T02"],
dispatched: ["T03"],
graphSnapshot: { taskCount: 4, edgeCount: 2, readySetSize: 1, ambiguous: false },
updatedAt: "2025-01-01T00:00:00Z",
};
@ -285,6 +286,7 @@ test("clearReactiveState removes the file", () => {
const state: ReactiveExecutionState = {
sliceId: "S01",
completed: [],
dispatched: ["T01", "T02"],
graphSnapshot: { taskCount: 2, edgeCount: 0, readySetSize: 2, ambiguous: false },
updatedAt: "2025-01-01T00:00:00Z",
};
@ -365,3 +367,145 @@ test("completed tasks are not re-dispatched on next iteration", async () => {
rmSync(repo, { recursive: true, force: true });
}
});
// ─── Batch Verification ───────────────────────────────────────────────────
test("verifyExpectedArtifact: reactive-execute passes when all dispatched summaries exist", async () => {
const { verifyExpectedArtifact } = await import("../auto-recovery.ts");
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-verify-pass-"));
try {
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(tasksDir, { recursive: true });
writeFileSync(join(tasksDir, "T02-SUMMARY.md"), "---\nid: T02\n---\n# T02: Done\n");
writeFileSync(join(tasksDir, "T03-SUMMARY.md"), "---\nid: T03\n---\n# T03: Done\n");
const result = verifyExpectedArtifact("reactive-execute", "M001/S01/reactive+T02,T03", repo);
assert.equal(result, true, "Should pass when all dispatched task summaries exist");
} finally {
rmSync(repo, { recursive: true, force: true });
}
});
test("verifyExpectedArtifact: reactive-execute fails when a dispatched summary is missing", async () => {
const { verifyExpectedArtifact } = await import("../auto-recovery.ts");
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-verify-fail-"));
try {
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(tasksDir, { recursive: true });
// Only T02 has a summary, T03 does not
writeFileSync(join(tasksDir, "T02-SUMMARY.md"), "---\nid: T02\n---\n# T02: Done\n");
const result = verifyExpectedArtifact("reactive-execute", "M001/S01/reactive+T02,T03", repo);
assert.equal(result, false, "Should fail when dispatched task T03 summary is missing");
} finally {
rmSync(repo, { recursive: true, force: true });
}
});
test("verifyExpectedArtifact: reactive-execute fails even with pre-existing summaries from other tasks", async () => {
const { verifyExpectedArtifact } = await import("../auto-recovery.ts");
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-verify-preexisting-"));
try {
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(tasksDir, { recursive: true });
// T01 summary exists from before, but T02 and T03 were dispatched
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01: Prior\n");
const result = verifyExpectedArtifact("reactive-execute", "M001/S01/reactive+T02,T03", repo);
assert.equal(result, false, "Pre-existing T01 summary should not satisfy T02,T03 batch");
} finally {
rmSync(repo, { recursive: true, force: true });
}
});
test("verifyExpectedArtifact: reactive-execute legacy format (no batch IDs) falls back", async () => {
const { verifyExpectedArtifact } = await import("../auto-recovery.ts");
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-verify-legacy-"));
try {
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(tasksDir, { recursive: true });
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01\n");
// Legacy format without +batch suffix
const result = verifyExpectedArtifact("reactive-execute", "M001/S01/reactive", repo);
assert.equal(result, true, "Legacy format should fall back to any-summary check");
} finally {
rmSync(repo, { recursive: true, force: true });
}
});
test("unitId batch encoding round-trips correctly", () => {
const mid = "M001";
const sid = "S01";
const selected = ["T02", "T03", "T05"];
const unitId = `${mid}/${sid}/reactive+${selected.join(",")}`;
// Parse it back
const parts = unitId.split("/");
assert.equal(parts[0], "M001");
assert.equal(parts[1], "S01");
const batchPart = parts[2];
const plusIdx = batchPart.indexOf("+");
assert.ok(plusIdx > 0, "Should have + separator");
const batchIds = batchPart.slice(plusIdx + 1).split(",");
assert.deepEqual(batchIds, ["T02", "T03", "T05"]);
});
// ─── Dependency-Based Carry-Forward ───────────────────────────────────────
test("getDependencyTaskSummaryPaths returns only dependency summaries", async () => {
const { getDependencyTaskSummaryPaths } = await import("../auto-prompts.ts");
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-depcarry-"));
try {
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(tasksDir, { recursive: true });
// T01, T02, T03 all have summaries
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01\n");
writeFileSync(join(tasksDir, "T02-SUMMARY.md"), "---\nid: T02\n---\n# T02\n");
writeFileSync(join(tasksDir, "T03-SUMMARY.md"), "---\nid: T03\n---\n# T03\n");
// T04 depends only on T01 and T03 — should NOT get T02
const paths = await getDependencyTaskSummaryPaths("M001", "S01", "T04", ["T01", "T03"], repo);
assert.equal(paths.length, 2, "Should get exactly 2 dependency summaries");
assert.ok(paths.some((p) => p.includes("T01-SUMMARY")), "Should include T01");
assert.ok(paths.some((p) => p.includes("T03-SUMMARY")), "Should include T03");
assert.ok(!paths.some((p) => p.includes("T02-SUMMARY")), "Should NOT include T02");
} finally {
rmSync(repo, { recursive: true, force: true });
}
});
test("getDependencyTaskSummaryPaths falls back to order-based for root tasks", async () => {
const { getDependencyTaskSummaryPaths } = await import("../auto-prompts.ts");
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-depcarry-root-"));
try {
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(tasksDir, { recursive: true });
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01\n");
// T02 has no dependencies (root task) — should fall back to order-based
const paths = await getDependencyTaskSummaryPaths("M001", "S01", "T02", [], repo);
assert.equal(paths.length, 1, "Root task should get order-based prior summaries");
assert.ok(paths[0].includes("T01-SUMMARY"), "Should include T01 via order fallback");
} finally {
rmSync(repo, { recursive: true, force: true });
}
});
test("getDependencyTaskSummaryPaths handles missing dependency summaries gracefully", async () => {
const { getDependencyTaskSummaryPaths } = await import("../auto-prompts.ts");
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-depcarry-missing-"));
try {
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(tasksDir, { recursive: true });
// Only T01 has a summary, T02 does not
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01\n");
// T03 depends on T01 and T02, but T02 summary doesn't exist
const paths = await getDependencyTaskSummaryPaths("M001", "S01", "T03", ["T01", "T02"], repo);
assert.equal(paths.length, 1, "Should only return existing dependency summaries");
assert.ok(paths[0].includes("T01-SUMMARY"), "Should include T01 (exists)");
} finally {
rmSync(repo, { recursive: true, force: true });
}
});

View file

@ -468,6 +468,8 @@ export interface ReactiveExecutionState {
sliceId: string;
/** Task IDs that have been verified as completed. */
completed: string[];
/** Task IDs dispatched in the current/most recent reactive batch. */
dispatched: string[];
/** Snapshot of the graph at last dispatch. */
graphSnapshot: {
taskCount: number;