fix(doctor): enrich flow-audit repeated-failure rollup with full diagnostic context

The flow-audit repeated-milestone-failure rollup now includes: - Active milestone/unit and session pointer (AC1) - Stale dispatched units (AC2) - Runaway history (AC3) - Over-budget child processes (AC3) This satisfies the acceptance criteria of self-feedback entry sf-mp3ati7u-qqxcyi so operators can use the rollup evidence to repair stale dispatch, missing summary, runaway, or child-process handling without needing to re-run the flow audit manually. Refs: sf-mp3ati7u-qqxcyi
2026-05-13 02:25:29 +02:00 · 2026-05-13 02:25:29 +02:00 · 2cad6d54f4
commit 2cad6d54f4
parent 65e195a9fd
2 changed files with 164 additions and 1 deletions
--- a/src/resources/extensions/sf/doctor.js
+++ b/src/resources/extensions/sf/doctor.js
@ -553,6 +553,7 @@ function maybeRecordRepeatedFailureRollup(
 	milestoneId,
 	feedback,
 	options,
+	flowContext = {},
 ) {
 	if (!milestoneId || options.recordSelfFeedback === false) return undefined;
 	const failures = feedback.filter(
@ -576,7 +577,7 @@ function maybeRecordRepeatedFailureRollup(
 			entryId: openRollup.id,
 		};
 	}
-	const evidence = failures
+	const failureLines = failures
 		.slice(-8)
 		.map(
 			(e) =>
@ -589,6 +590,62 @@ function maybeRecordRepeatedFailureRollup(
 					.join("/")}: ${e.summary}`,
 		)
 		.join("\n");
+
+	// Build enriched flow-audit evidence so the rollup preserves the full
+	// diagnostic context (AC1-AC3 of sf-mp3ati7u-qqxcyi).
+	const ctx = flowContext;
+	const lines = [];
+	if (ctx.activeMilestone || ctx.activeUnit) {
+		lines.push("--- Flow Audit Context ---");
+		if (ctx.activeMilestone) {
+			lines.push(
+				`Active milestone: ${ctx.activeMilestone.id}${ctx.activeMilestone.title ? ` — ${ctx.activeMilestone.title}` : ""}`,
+			);
+		}
+		if (ctx.activeUnit) {
+			const ageMin = Math.round((ctx.activeUnit.ageMs ?? 0) / 60000);
+			lines.push(
+				`Active unit: ${ctx.activeUnit.unitType} ${ctx.activeUnit.unitId} (phase: ${ctx.activeUnit.phase}, age: ${ageMin} min)`,
+			);
+		}
+		if (ctx.sessionPointer) {
+			lines.push(
+				`Session pointer: ${ctx.sessionPointer.sessionFile ?? ctx.sessionPointer.sessionId ?? "none recorded"} (source: ${ctx.sessionPointer.source ?? "unknown"})`,
+			);
+		}
+	}
+	if (ctx.staleDispatchedUnits && ctx.staleDispatchedUnits.length > 0) {
+		lines.push("Stale dispatched units:");
+		for (const u of ctx.staleDispatchedUnits.slice(0, 5)) {
+			lines.push(
+				`  - ${u.unitType} ${u.unitId}: ${Math.round((u.progressAgeMs ?? 0) / 60000)} min without progress`,
+			);
+		}
+	}
+	if (ctx.runawayHistory && ctx.runawayHistory.length > 0) {
+		lines.push("Runaway history:");
+		for (const h of ctx.runawayHistory.slice(-5)) {
+			lines.push(`  - ${h}`);
+		}
+	}
+	if (ctx.childProcesses && ctx.childProcesses.length > 0) {
+		const overBudget = ctx.childProcesses.filter((p) => p.overBudget);
+		if (overBudget.length > 0) {
+			lines.push("Over-budget child processes:");
+			for (const p of overBudget.slice(0, 5)) {
+				lines.push(
+					`  - pid=${p.pid} [${p.classification}] age=${Math.round((p.ageMs ?? 0) / 60000)}m action=${p.action}`,
+				);
+			}
+		}
+	}
+	if (lines.length > 0) {
+		lines.push("");
+	}
+	lines.push("--- Underlying Failures ---");
+	lines.push(failureLines);
+	const evidence = lines.join("\n");
+
 	const recorded = recordSelfFeedback(
 		{
 			kind: FLOW_AUDIT_ROLLUP_KIND,
@ -879,6 +936,14 @@ export async function runFlowAudit(basePath, options = {}) {
 		milestoneId,
 		feedback,
 		options,
+		{
+			activeMilestone,
+			activeUnit,
+			sessionPointer,
+			staleDispatchedUnits,
+			childProcesses,
+			runawayHistory,
+		},
 	);
 	if (repeatedFailureRollup?.filed) {
 		recommendations.push(
--- a/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs
+++ b/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs
@ -10,6 +10,7 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, describe, test } from "vitest";
 import { runFlowAudit } from "../doctor.js";
+import { readAllSelfFeedback, recordSelfFeedback, markResolved } from "../self-feedback.js";

 const tmpDirs = [];

@ -24,6 +25,12 @@ function makeProject() {
 	const dir = mkdtempSync(join(tmpdir(), "sf-flow-audit-cleanup-"));
 	tmpDirs.push(dir);
 	mkdirSync(join(dir, ".sf"), { recursive: true });
+	// Pretend this is the forge repo so self-feedback stays project-local
+	writeFileSync(
+		join(dir, "package.json"),
+		JSON.stringify({ name: "singularity-forge" }),
+		"utf-8",
+	);
 	return dir;
 }

@ -174,4 +181,95 @@ describe("flow audit optional child cleanup", () => {
 			"Review recent errors before dispatching another unit.",
 		);
 	});
+
+	test("runFlowAudit_when_repeated_failures_on_same_milestone_files_rollup_with_enriched_evidence", async () => {
+		const project = makeProject();
+		const nowMs = Date.parse("2026-05-06T12:00:00.000Z");
+		const uniqueMid = `MTEST${Date.now()}`;
+
+		// Seed 3 unresolved self-feedback entries via the real channel (DB or JSONL)
+		const createdIds = [];
+		for (let i = 1; i <= 3; i++) {
+			const result = recordSelfFeedback(
+				{
+					kind: "runaway-guard-hard-pause",
+					severity: "medium",
+					summary: `Runaway guard paused execute-task ${uniqueMid}/S01/T0${i}`,
+					occurredIn: { milestone: uniqueMid, slice: "S01", task: `T0${i}` },
+					source: "detector",
+				},
+				project,
+			);
+			assert.ok(result, `recordSelfFeedback call ${i} should succeed`);
+			createdIds.push(result.entry.id);
+		}
+
+		// Set active unit in auto.lock so flow audit has context to enrich
+		writeFileSync(
+			join(project, ".sf", "auto.lock"),
+			JSON.stringify({
+				unitType: "execute-task",
+				unitId: `${uniqueMid}/S01/T01`,
+				phase: "running",
+				startedAt: nowMs - 10 * 60 * 1000,
+				sessionFile: ".sf/sessions/session-abc.json",
+			}),
+			"utf-8",
+		);
+
+		const report = await runFlowAudit(project, { nowMs, psOutput: "" });
+
+		assert.equal(report.repeatedFailureRollup?.filed, true);
+		assert.equal(report.repeatedFailureRollup?.milestoneId, uniqueMid);
+		assert.equal(report.repeatedFailureRollup?.count, 3);
+
+		// Read back the self-feedback and verify enriched evidence
+		const allFeedback = readAllSelfFeedback(project);
+		const rollup = allFeedback.find(
+			(e) =>
+				e.kind === "flow-audit:repeated-milestone-failure" &&
+				e.occurredIn?.milestone === uniqueMid,
+		);
+		assert.ok(rollup, "rollup entry should exist");
+		assert.ok(
+			rollup.evidence.includes("Flow Audit Context"),
+			"evidence should include Flow Audit Context header",
+		);
+		assert.ok(
+			rollup.evidence.includes(`Active milestone: ${uniqueMid}`),
+			"evidence should include active milestone",
+		);
+		assert.ok(
+			rollup.evidence.includes(`execute-task ${uniqueMid}/S01/T01`),
+			"evidence should include active unit",
+		);
+		assert.ok(
+			rollup.evidence.includes("Session pointer:"),
+			"evidence should include session pointer",
+		);
+		assert.ok(
+			rollup.evidence.includes("Runaway history:"),
+			"evidence should include runaway history",
+		);
+		assert.ok(
+			rollup.evidence.includes("Underlying Failures"),
+			"evidence should include underlying failures",
+		);
+
+		// Clean up: resolve all entries we created
+		for (const id of createdIds) {
+			markResolved(
+				id,
+				{ reason: "test cleanup", evidence: { kind: "auto-version-bump" } },
+				project,
+			);
+		}
+		if (rollup) {
+			markResolved(
+				rollup.id,
+				{ reason: "test cleanup", evidence: { kind: "auto-version-bump" } },
+				project,
+			);
+		}
+	});
 });