fix(gsd): reactive batch verification + dependency-based carry-forward (#1549)
* fix(gsd): batch-specific artifact verification for reactive-execute The reactive-execute artifact verifier previously checked only that 'at least one task summary exists' in the slice. This meant the unit could report success even when none of the dispatched tasks actually completed — a pre-existing T01 summary would satisfy the check. Fix: - Encode dispatched task IDs in the unitId: M001/S01/reactive+T02,T03 - Persist dispatched batch in ReactiveExecutionState before dispatch - Verify each dispatched task's summary file exists individually - Legacy unitId format (no +batch suffix) falls back to old behavior The verifier now answers 'did the tasks we dispatched actually finish?' instead of 'does any summary exist?' Added ReactiveExecutionState.dispatched field to track the batch. 5 new tests covering: all-pass, partial-fail, pre-existing-irrelevant, legacy fallback, and unitId round-trip encoding. * fix(gsd): dependency-based carry-forward for reactive task execution In reactive mode, each subagent task was getting order-based carry-forward (all prior task summaries by number), not dependency-based. T05 depending only on T02 would still receive T01, T03, T04 summaries — noise context that wastes tokens and could confuse execution. Fix: - Add getDependencyTaskSummaryPaths() — returns only summaries for tasks in the derived dependsOn set, falling back to order-based for root tasks with no dependencies (preserves continuity) - Add ExecuteTaskPromptOptions with carryForwardPaths override - buildExecuteTaskPrompt accepts optional override, sequential callers unchanged (no options = order-based, backward compatible) - buildReactiveExecutePrompt now passes dependency-scoped paths per task Sequential execute-task dispatch is completely unchanged — the new code path only activates when carryForwardPaths is explicitly provided. 3 new tests: dependency-only filtering, root task fallback, missing dependency summary handling.
This commit is contained in:
parent
596b941475
commit
1bd53a4c87
6 changed files with 253 additions and 14 deletions
|
|
@ -367,10 +367,25 @@ const DISPATCH_RULES: DispatchRule[] = [
|
|||
`ready:${metrics.readySetSize} dispatching:${selected.length} ambiguous:${metrics.ambiguous}\n`,
|
||||
);
|
||||
|
||||
// Persist dispatched batch so verification and recovery can check
|
||||
// exactly which tasks were sent.
|
||||
const { saveReactiveState } = await import("./reactive-graph.js");
|
||||
saveReactiveState(basePath, mid, sid, {
|
||||
sliceId: sid,
|
||||
completed: [...completed],
|
||||
dispatched: selected,
|
||||
graphSnapshot: metrics,
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
// Encode selected task IDs in unitId for artifact verification.
|
||||
// Format: M001/S01/reactive+T02,T03
|
||||
const batchSuffix = selected.join(",");
|
||||
|
||||
return {
|
||||
action: "dispatch",
|
||||
unitType: "reactive-execute",
|
||||
unitId: `${mid}/${sid}/reactive`,
|
||||
unitId: `${mid}/${sid}/reactive+${batchSuffix}`,
|
||||
prompt: await buildReactiveExecutePrompt(
|
||||
mid,
|
||||
midTitle,
|
||||
|
|
|
|||
|
|
@ -485,6 +485,41 @@ export async function getPriorTaskSummaryPaths(
|
|||
.map(f => `${sRel}/tasks/${f}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get carry-forward summary paths scoped to a task's derived dependencies.
|
||||
*
|
||||
* Instead of all prior tasks (order-based), returns only summaries for task
|
||||
* IDs in `dependsOn`. Used by reactive-execute to give each subagent only
|
||||
* the context it actually needs — not sibling tasks from a parallel batch.
|
||||
*
|
||||
* Falls back to order-based when dependsOn is empty (root tasks still get
|
||||
* any available prior summaries for continuity).
|
||||
*/
|
||||
export async function getDependencyTaskSummaryPaths(
|
||||
mid: string, sid: string, currentTid: string,
|
||||
dependsOn: string[], base: string,
|
||||
): Promise<string[]> {
|
||||
// If no dependencies, fall back to order-based for root tasks
|
||||
if (dependsOn.length === 0) {
|
||||
return getPriorTaskSummaryPaths(mid, sid, currentTid, base);
|
||||
}
|
||||
|
||||
const tDir = resolveTasksDir(base, mid, sid);
|
||||
if (!tDir) return [];
|
||||
|
||||
const summaryFiles = resolveTaskFiles(tDir, "SUMMARY");
|
||||
const sRel = relSlicePath(base, mid, sid);
|
||||
const depSet = new Set(dependsOn.map((d) => d.toUpperCase()));
|
||||
|
||||
return summaryFiles
|
||||
.filter((f) => {
|
||||
// Extract task ID from filename: "T02-SUMMARY.md" → "T02"
|
||||
const tid = f.replace(/-SUMMARY\.md$/i, "").toUpperCase();
|
||||
return depSet.has(tid);
|
||||
})
|
||||
.map((f) => `${sRel}/tasks/${f}`);
|
||||
}
|
||||
|
||||
// ─── Adaptive Replanning Checks ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
|
@ -772,13 +807,24 @@ export async function buildPlanSlicePrompt(
|
|||
});
|
||||
}
|
||||
|
||||
/** Options for customizing execute-task prompt construction. */
|
||||
export interface ExecuteTaskPromptOptions {
|
||||
level?: InlineLevel;
|
||||
/** Override carry-forward paths (dependency-based instead of order-based). */
|
||||
carryForwardPaths?: string[];
|
||||
}
|
||||
|
||||
export async function buildExecuteTaskPrompt(
|
||||
mid: string, sid: string, sTitle: string,
|
||||
tid: string, tTitle: string, base: string, level?: InlineLevel,
|
||||
tid: string, tTitle: string, base: string,
|
||||
level?: InlineLevel | ExecuteTaskPromptOptions,
|
||||
): Promise<string> {
|
||||
const inlineLevel = level ?? resolveInlineLevel();
|
||||
const opts: ExecuteTaskPromptOptions = typeof level === "object" && level !== null && !Array.isArray(level)
|
||||
? level
|
||||
: { level: level as InlineLevel | undefined };
|
||||
const inlineLevel = opts.level ?? resolveInlineLevel();
|
||||
|
||||
const priorSummaries = await getPriorTaskSummaryPaths(mid, sid, tid, base);
|
||||
const priorSummaries = opts.carryForwardPaths ?? await getPriorTaskSummaryPaths(mid, sid, tid, base);
|
||||
const priorLines = priorSummaries.length > 0
|
||||
? priorSummaries.map(p => `- \`${p}\``).join("\n")
|
||||
: "- (no prior tasks)";
|
||||
|
|
@ -1272,8 +1318,16 @@ export async function buildReactiveExecutePrompt(
|
|||
const tTitle = node?.title ?? tid;
|
||||
readyTaskListLines.push(`- **${tid}: ${tTitle}**`);
|
||||
|
||||
// Build a full execute-task prompt for this task (reuse existing builder)
|
||||
const taskPrompt = await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, base);
|
||||
// Build dependency-scoped carry-forward paths for this task
|
||||
const depPaths = await getDependencyTaskSummaryPaths(
|
||||
mid, sid, tid, node?.dependsOn ?? [], base,
|
||||
);
|
||||
|
||||
// Build a full execute-task prompt with dependency-based carry-forward
|
||||
const taskPrompt = await buildExecuteTaskPrompt(
|
||||
mid, sid, sTitle, tid, tTitle, base,
|
||||
{ carryForwardPaths: depPaths },
|
||||
);
|
||||
|
||||
subagentSections.push([
|
||||
`### ${tid}: ${tTitle}`,
|
||||
|
|
|
|||
|
|
@ -152,18 +152,42 @@ export function verifyExpectedArtifact(
|
|||
return !content.includes("**Scope:** active");
|
||||
}
|
||||
|
||||
// Reactive-execute: verify that at least one new task summary was written.
|
||||
// The unitId is "{mid}/{sid}/reactive" — extract mid and sid to check.
|
||||
// Reactive-execute: verify that each dispatched task's summary exists.
|
||||
// The unitId encodes the batch: "{mid}/{sid}/reactive+T02,T03"
|
||||
if (unitType === "reactive-execute") {
|
||||
const parts = unitId.split("/");
|
||||
const mid = parts[0];
|
||||
const sid = parts[1];
|
||||
if (!mid || !sid) return false;
|
||||
const sidAndBatch = parts[1];
|
||||
const batchPart = parts[2]; // "reactive+T02,T03"
|
||||
if (!mid || !sidAndBatch || !batchPart) return false;
|
||||
|
||||
const sid = sidAndBatch;
|
||||
const plusIdx = batchPart.indexOf("+");
|
||||
if (plusIdx === -1) {
|
||||
// Legacy format "reactive" without batch IDs — fall back to "any summary"
|
||||
const tDir = resolveTasksDir(base, mid, sid);
|
||||
if (!tDir) return false;
|
||||
const summaryFiles = resolveTaskFiles(tDir, "SUMMARY");
|
||||
return summaryFiles.length > 0;
|
||||
}
|
||||
|
||||
const batchIds = batchPart.slice(plusIdx + 1).split(",").filter(Boolean);
|
||||
if (batchIds.length === 0) return false;
|
||||
|
||||
const tDir = resolveTasksDir(base, mid, sid);
|
||||
if (!tDir) return false;
|
||||
const summaryFiles = resolveTaskFiles(tDir, "SUMMARY");
|
||||
// At least one summary file should exist
|
||||
return summaryFiles.length > 0;
|
||||
|
||||
const existingSummaries = new Set(
|
||||
resolveTaskFiles(tDir, "SUMMARY").map((f) =>
|
||||
f.replace(/-SUMMARY\.md$/i, "").toUpperCase(),
|
||||
),
|
||||
);
|
||||
|
||||
// Every dispatched task must have a summary file
|
||||
for (const tid of batchIds) {
|
||||
if (!existingSummaries.has(tid.toUpperCase())) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const absPath = resolveExpectedArtifactPath(unitType, unitId, base);
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ function reactiveStatePath(basePath: string, mid: string, sid: string): string {
|
|||
function isReactiveState(data: unknown): data is ReactiveExecutionState {
|
||||
if (!data || typeof data !== "object") return false;
|
||||
const d = data as Record<string, unknown>;
|
||||
return typeof d.sliceId === "string" && Array.isArray(d.completed);
|
||||
return typeof d.sliceId === "string" && Array.isArray(d.completed) && Array.isArray(d.dispatched);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -266,6 +266,7 @@ test("saveReactiveState and loadReactiveState round-trip", () => {
|
|||
const state: ReactiveExecutionState = {
|
||||
sliceId: "S01",
|
||||
completed: ["T01", "T02"],
|
||||
dispatched: ["T03"],
|
||||
graphSnapshot: { taskCount: 4, edgeCount: 2, readySetSize: 1, ambiguous: false },
|
||||
updatedAt: "2025-01-01T00:00:00Z",
|
||||
};
|
||||
|
|
@ -285,6 +286,7 @@ test("clearReactiveState removes the file", () => {
|
|||
const state: ReactiveExecutionState = {
|
||||
sliceId: "S01",
|
||||
completed: [],
|
||||
dispatched: ["T01", "T02"],
|
||||
graphSnapshot: { taskCount: 2, edgeCount: 0, readySetSize: 2, ambiguous: false },
|
||||
updatedAt: "2025-01-01T00:00:00Z",
|
||||
};
|
||||
|
|
@ -365,3 +367,145 @@ test("completed tasks are not re-dispatched on next iteration", async () => {
|
|||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Batch Verification ───────────────────────────────────────────────────
|
||||
|
||||
test("verifyExpectedArtifact: reactive-execute passes when all dispatched summaries exist", async () => {
|
||||
const { verifyExpectedArtifact } = await import("../auto-recovery.ts");
|
||||
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-verify-pass-"));
|
||||
try {
|
||||
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
writeFileSync(join(tasksDir, "T02-SUMMARY.md"), "---\nid: T02\n---\n# T02: Done\n");
|
||||
writeFileSync(join(tasksDir, "T03-SUMMARY.md"), "---\nid: T03\n---\n# T03: Done\n");
|
||||
|
||||
const result = verifyExpectedArtifact("reactive-execute", "M001/S01/reactive+T02,T03", repo);
|
||||
assert.equal(result, true, "Should pass when all dispatched task summaries exist");
|
||||
} finally {
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verifyExpectedArtifact: reactive-execute fails when a dispatched summary is missing", async () => {
|
||||
const { verifyExpectedArtifact } = await import("../auto-recovery.ts");
|
||||
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-verify-fail-"));
|
||||
try {
|
||||
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
// Only T02 has a summary, T03 does not
|
||||
writeFileSync(join(tasksDir, "T02-SUMMARY.md"), "---\nid: T02\n---\n# T02: Done\n");
|
||||
|
||||
const result = verifyExpectedArtifact("reactive-execute", "M001/S01/reactive+T02,T03", repo);
|
||||
assert.equal(result, false, "Should fail when dispatched task T03 summary is missing");
|
||||
} finally {
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verifyExpectedArtifact: reactive-execute fails even with pre-existing summaries from other tasks", async () => {
|
||||
const { verifyExpectedArtifact } = await import("../auto-recovery.ts");
|
||||
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-verify-preexisting-"));
|
||||
try {
|
||||
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
// T01 summary exists from before, but T02 and T03 were dispatched
|
||||
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01: Prior\n");
|
||||
|
||||
const result = verifyExpectedArtifact("reactive-execute", "M001/S01/reactive+T02,T03", repo);
|
||||
assert.equal(result, false, "Pre-existing T01 summary should not satisfy T02,T03 batch");
|
||||
} finally {
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verifyExpectedArtifact: reactive-execute legacy format (no batch IDs) falls back", async () => {
|
||||
const { verifyExpectedArtifact } = await import("../auto-recovery.ts");
|
||||
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-verify-legacy-"));
|
||||
try {
|
||||
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01\n");
|
||||
|
||||
// Legacy format without +batch suffix
|
||||
const result = verifyExpectedArtifact("reactive-execute", "M001/S01/reactive", repo);
|
||||
assert.equal(result, true, "Legacy format should fall back to any-summary check");
|
||||
} finally {
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("unitId batch encoding round-trips correctly", () => {
|
||||
const mid = "M001";
|
||||
const sid = "S01";
|
||||
const selected = ["T02", "T03", "T05"];
|
||||
const unitId = `${mid}/${sid}/reactive+${selected.join(",")}`;
|
||||
|
||||
// Parse it back
|
||||
const parts = unitId.split("/");
|
||||
assert.equal(parts[0], "M001");
|
||||
assert.equal(parts[1], "S01");
|
||||
const batchPart = parts[2];
|
||||
const plusIdx = batchPart.indexOf("+");
|
||||
assert.ok(plusIdx > 0, "Should have + separator");
|
||||
const batchIds = batchPart.slice(plusIdx + 1).split(",");
|
||||
assert.deepEqual(batchIds, ["T02", "T03", "T05"]);
|
||||
});
|
||||
|
||||
// ─── Dependency-Based Carry-Forward ───────────────────────────────────────
|
||||
|
||||
test("getDependencyTaskSummaryPaths returns only dependency summaries", async () => {
|
||||
const { getDependencyTaskSummaryPaths } = await import("../auto-prompts.ts");
|
||||
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-depcarry-"));
|
||||
try {
|
||||
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
// T01, T02, T03 all have summaries
|
||||
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01\n");
|
||||
writeFileSync(join(tasksDir, "T02-SUMMARY.md"), "---\nid: T02\n---\n# T02\n");
|
||||
writeFileSync(join(tasksDir, "T03-SUMMARY.md"), "---\nid: T03\n---\n# T03\n");
|
||||
|
||||
// T04 depends only on T01 and T03 — should NOT get T02
|
||||
const paths = await getDependencyTaskSummaryPaths("M001", "S01", "T04", ["T01", "T03"], repo);
|
||||
assert.equal(paths.length, 2, "Should get exactly 2 dependency summaries");
|
||||
assert.ok(paths.some((p) => p.includes("T01-SUMMARY")), "Should include T01");
|
||||
assert.ok(paths.some((p) => p.includes("T03-SUMMARY")), "Should include T03");
|
||||
assert.ok(!paths.some((p) => p.includes("T02-SUMMARY")), "Should NOT include T02");
|
||||
} finally {
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("getDependencyTaskSummaryPaths falls back to order-based for root tasks", async () => {
|
||||
const { getDependencyTaskSummaryPaths } = await import("../auto-prompts.ts");
|
||||
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-depcarry-root-"));
|
||||
try {
|
||||
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01\n");
|
||||
|
||||
// T02 has no dependencies (root task) — should fall back to order-based
|
||||
const paths = await getDependencyTaskSummaryPaths("M001", "S01", "T02", [], repo);
|
||||
assert.equal(paths.length, 1, "Root task should get order-based prior summaries");
|
||||
assert.ok(paths[0].includes("T01-SUMMARY"), "Should include T01 via order fallback");
|
||||
} finally {
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("getDependencyTaskSummaryPaths handles missing dependency summaries gracefully", async () => {
|
||||
const { getDependencyTaskSummaryPaths } = await import("../auto-prompts.ts");
|
||||
const repo = mkdtempSync(join(tmpdir(), "gsd-reactive-depcarry-missing-"));
|
||||
try {
|
||||
const tasksDir = join(repo, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
// Only T01 has a summary, T02 does not
|
||||
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# T01\n");
|
||||
|
||||
// T03 depends on T01 and T02, but T02 summary doesn't exist
|
||||
const paths = await getDependencyTaskSummaryPaths("M001", "S01", "T03", ["T01", "T02"], repo);
|
||||
assert.equal(paths.length, 1, "Should only return existing dependency summaries");
|
||||
assert.ok(paths[0].includes("T01-SUMMARY"), "Should include T01 (exists)");
|
||||
} finally {
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -468,6 +468,8 @@ export interface ReactiveExecutionState {
|
|||
sliceId: string;
|
||||
/** Task IDs that have been verified as completed. */
|
||||
completed: string[];
|
||||
/** Task IDs dispatched in the current/most recent reactive batch. */
|
||||
dispatched: string[];
|
||||
/** Snapshot of the graph at last dispatch. */
|
||||
graphSnapshot: {
|
||||
taskCount: number;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue