fix(doctor): enrich flow-audit repeated-failure rollup with full diagnostic context

The flow-audit repeated-milestone-failure rollup now includes:
- Active milestone/unit and session pointer (AC1)
- Stale dispatched units (AC2)
- Runaway history (AC3)
- Over-budget child processes (AC3)

This satisfies the acceptance criteria of self-feedback entry
sf-mp3ati7u-qqxcyi so operators can use the rollup evidence to
repair stale dispatch, missing summary, runaway, or child-process
handling without needing to re-run the flow audit manually.

Refs: sf-mp3ati7u-qqxcyi
This commit is contained in:
Mikael Hugo 2026-05-13 02:25:29 +02:00
parent 65e195a9fd
commit 2cad6d54f4
2 changed files with 164 additions and 1 deletions

View file

@ -553,6 +553,7 @@ function maybeRecordRepeatedFailureRollup(
milestoneId,
feedback,
options,
flowContext = {},
) {
if (!milestoneId || options.recordSelfFeedback === false) return undefined;
const failures = feedback.filter(
@ -576,7 +577,7 @@ function maybeRecordRepeatedFailureRollup(
entryId: openRollup.id,
};
}
const evidence = failures
const failureLines = failures
.slice(-8)
.map(
(e) =>
@ -589,6 +590,62 @@ function maybeRecordRepeatedFailureRollup(
.join("/")}: ${e.summary}`,
)
.join("\n");
// Build enriched flow-audit evidence so the rollup preserves the full
// diagnostic context (AC1-AC3 of sf-mp3ati7u-qqxcyi).
const ctx = flowContext;
const lines = [];
if (ctx.activeMilestone || ctx.activeUnit) {
lines.push("--- Flow Audit Context ---");
if (ctx.activeMilestone) {
lines.push(
`Active milestone: ${ctx.activeMilestone.id}${ctx.activeMilestone.title ? `${ctx.activeMilestone.title}` : ""}`,
);
}
if (ctx.activeUnit) {
const ageMin = Math.round((ctx.activeUnit.ageMs ?? 0) / 60000);
lines.push(
`Active unit: ${ctx.activeUnit.unitType} ${ctx.activeUnit.unitId} (phase: ${ctx.activeUnit.phase}, age: ${ageMin} min)`,
);
}
if (ctx.sessionPointer) {
lines.push(
`Session pointer: ${ctx.sessionPointer.sessionFile ?? ctx.sessionPointer.sessionId ?? "none recorded"} (source: ${ctx.sessionPointer.source ?? "unknown"})`,
);
}
}
if (ctx.staleDispatchedUnits && ctx.staleDispatchedUnits.length > 0) {
lines.push("Stale dispatched units:");
for (const u of ctx.staleDispatchedUnits.slice(0, 5)) {
lines.push(
` - ${u.unitType} ${u.unitId}: ${Math.round((u.progressAgeMs ?? 0) / 60000)} min without progress`,
);
}
}
if (ctx.runawayHistory && ctx.runawayHistory.length > 0) {
lines.push("Runaway history:");
for (const h of ctx.runawayHistory.slice(-5)) {
lines.push(` - ${h}`);
}
}
if (ctx.childProcesses && ctx.childProcesses.length > 0) {
const overBudget = ctx.childProcesses.filter((p) => p.overBudget);
if (overBudget.length > 0) {
lines.push("Over-budget child processes:");
for (const p of overBudget.slice(0, 5)) {
lines.push(
` - pid=${p.pid} [${p.classification}] age=${Math.round((p.ageMs ?? 0) / 60000)}m action=${p.action}`,
);
}
}
}
if (lines.length > 0) {
lines.push("");
}
lines.push("--- Underlying Failures ---");
lines.push(failureLines);
const evidence = lines.join("\n");
const recorded = recordSelfFeedback(
{
kind: FLOW_AUDIT_ROLLUP_KIND,
@ -879,6 +936,14 @@ export async function runFlowAudit(basePath, options = {}) {
milestoneId,
feedback,
options,
{
activeMilestone,
activeUnit,
sessionPointer,
staleDispatchedUnits,
childProcesses,
runawayHistory,
},
);
if (repeatedFailureRollup?.filed) {
recommendations.push(

View file

@ -10,6 +10,7 @@ import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, test } from "vitest";
import { runFlowAudit } from "../doctor.js";
import { readAllSelfFeedback, recordSelfFeedback, markResolved } from "../self-feedback.js";
const tmpDirs = [];
@ -24,6 +25,12 @@ function makeProject() {
const dir = mkdtempSync(join(tmpdir(), "sf-flow-audit-cleanup-"));
tmpDirs.push(dir);
mkdirSync(join(dir, ".sf"), { recursive: true });
// Pretend this is the forge repo so self-feedback stays project-local
writeFileSync(
join(dir, "package.json"),
JSON.stringify({ name: "singularity-forge" }),
"utf-8",
);
return dir;
}
@ -174,4 +181,95 @@ describe("flow audit optional child cleanup", () => {
"Review recent errors before dispatching another unit.",
);
});
test("runFlowAudit_when_repeated_failures_on_same_milestone_files_rollup_with_enriched_evidence", async () => {
const project = makeProject();
const nowMs = Date.parse("2026-05-06T12:00:00.000Z");
const uniqueMid = `MTEST${Date.now()}`;
// Seed 3 unresolved self-feedback entries via the real channel (DB or JSONL)
const createdIds = [];
for (let i = 1; i <= 3; i++) {
const result = recordSelfFeedback(
{
kind: "runaway-guard-hard-pause",
severity: "medium",
summary: `Runaway guard paused execute-task ${uniqueMid}/S01/T0${i}`,
occurredIn: { milestone: uniqueMid, slice: "S01", task: `T0${i}` },
source: "detector",
},
project,
);
assert.ok(result, `recordSelfFeedback call ${i} should succeed`);
createdIds.push(result.entry.id);
}
// Set active unit in auto.lock so flow audit has context to enrich
writeFileSync(
join(project, ".sf", "auto.lock"),
JSON.stringify({
unitType: "execute-task",
unitId: `${uniqueMid}/S01/T01`,
phase: "running",
startedAt: nowMs - 10 * 60 * 1000,
sessionFile: ".sf/sessions/session-abc.json",
}),
"utf-8",
);
const report = await runFlowAudit(project, { nowMs, psOutput: "" });
assert.equal(report.repeatedFailureRollup?.filed, true);
assert.equal(report.repeatedFailureRollup?.milestoneId, uniqueMid);
assert.equal(report.repeatedFailureRollup?.count, 3);
// Read back the self-feedback and verify enriched evidence
const allFeedback = readAllSelfFeedback(project);
const rollup = allFeedback.find(
(e) =>
e.kind === "flow-audit:repeated-milestone-failure" &&
e.occurredIn?.milestone === uniqueMid,
);
assert.ok(rollup, "rollup entry should exist");
assert.ok(
rollup.evidence.includes("Flow Audit Context"),
"evidence should include Flow Audit Context header",
);
assert.ok(
rollup.evidence.includes(`Active milestone: ${uniqueMid}`),
"evidence should include active milestone",
);
assert.ok(
rollup.evidence.includes(`execute-task ${uniqueMid}/S01/T01`),
"evidence should include active unit",
);
assert.ok(
rollup.evidence.includes("Session pointer:"),
"evidence should include session pointer",
);
assert.ok(
rollup.evidence.includes("Runaway history:"),
"evidence should include runaway history",
);
assert.ok(
rollup.evidence.includes("Underlying Failures"),
"evidence should include underlying failures",
);
// Clean up: resolve all entries we created
for (const id of createdIds) {
markResolved(
id,
{ reason: "test cleanup", evidence: { kind: "auto-version-bump" } },
project,
);
}
if (rollup) {
markResolved(
rollup.id,
{ reason: "test cleanup", evidence: { kind: "auto-version-bump" } },
project,
);
}
});
});