From 2cad6d54f484ca0576bb86fe7d655d443f044679 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Wed, 13 May 2026 02:25:29 +0200 Subject: [PATCH] fix(doctor): enrich flow-audit repeated-failure rollup with full diagnostic context The flow-audit repeated-milestone-failure rollup now includes: - Active milestone/unit and session pointer (AC1) - Stale dispatched units (AC2) - Runaway history (AC3) - Over-budget child processes (AC3) This satisfies the acceptance criteria of self-feedback entry sf-mp3ati7u-qqxcyi so operators can use the rollup evidence to repair stale dispatch, missing summary, runaway, or child-process handling without needing to re-run the flow audit manually. Refs: sf-mp3ati7u-qqxcyi --- src/resources/extensions/sf/doctor.js | 67 ++++++++++++- .../doctor-flow-audit-auto-cleanup.test.mjs | 98 +++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) diff --git a/src/resources/extensions/sf/doctor.js b/src/resources/extensions/sf/doctor.js index 41654b899..d639f0fb0 100644 --- a/src/resources/extensions/sf/doctor.js +++ b/src/resources/extensions/sf/doctor.js @@ -553,6 +553,7 @@ function maybeRecordRepeatedFailureRollup( milestoneId, feedback, options, + flowContext = {}, ) { if (!milestoneId || options.recordSelfFeedback === false) return undefined; const failures = feedback.filter( @@ -576,7 +577,7 @@ function maybeRecordRepeatedFailureRollup( entryId: openRollup.id, }; } - const evidence = failures + const failureLines = failures .slice(-8) .map( (e) => @@ -589,6 +590,62 @@ function maybeRecordRepeatedFailureRollup( .join("/")}: ${e.summary}`, ) .join("\n"); + + // Build enriched flow-audit evidence so the rollup preserves the full + // diagnostic context (AC1-AC3 of sf-mp3ati7u-qqxcyi). + const ctx = flowContext; + const lines = []; + if (ctx.activeMilestone || ctx.activeUnit) { + lines.push("--- Flow Audit Context ---"); + if (ctx.activeMilestone) { + lines.push( + `Active milestone: ${ctx.activeMilestone.id}${ctx.activeMilestone.title ? ` — ${ctx.activeMilestone.title}` : ""}`, + ); + } + if (ctx.activeUnit) { + const ageMin = Math.round((ctx.activeUnit.ageMs ?? 0) / 60000); + lines.push( + `Active unit: ${ctx.activeUnit.unitType} ${ctx.activeUnit.unitId} (phase: ${ctx.activeUnit.phase}, age: ${ageMin} min)`, + ); + } + if (ctx.sessionPointer) { + lines.push( + `Session pointer: ${ctx.sessionPointer.sessionFile ?? ctx.sessionPointer.sessionId ?? "none recorded"} (source: ${ctx.sessionPointer.source ?? "unknown"})`, + ); + } + } + if (ctx.staleDispatchedUnits && ctx.staleDispatchedUnits.length > 0) { + lines.push("Stale dispatched units:"); + for (const u of ctx.staleDispatchedUnits.slice(0, 5)) { + lines.push( + ` - ${u.unitType} ${u.unitId}: ${Math.round((u.progressAgeMs ?? 0) / 60000)} min without progress`, + ); + } + } + if (ctx.runawayHistory && ctx.runawayHistory.length > 0) { + lines.push("Runaway history:"); + for (const h of ctx.runawayHistory.slice(-5)) { + lines.push(` - ${h}`); + } + } + if (ctx.childProcesses && ctx.childProcesses.length > 0) { + const overBudget = ctx.childProcesses.filter((p) => p.overBudget); + if (overBudget.length > 0) { + lines.push("Over-budget child processes:"); + for (const p of overBudget.slice(0, 5)) { + lines.push( + ` - pid=${p.pid} [${p.classification}] age=${Math.round((p.ageMs ?? 0) / 60000)}m action=${p.action}`, + ); + } + } + } + if (lines.length > 0) { + lines.push(""); + } + lines.push("--- Underlying Failures ---"); + lines.push(failureLines); + const evidence = lines.join("\n"); + const recorded = recordSelfFeedback( { kind: FLOW_AUDIT_ROLLUP_KIND, @@ -879,6 +936,14 @@ export async function runFlowAudit(basePath, options = {}) { milestoneId, feedback, options, + { + activeMilestone, + activeUnit, + sessionPointer, + staleDispatchedUnits, + childProcesses, + runawayHistory, + }, ); if (repeatedFailureRollup?.filed) { recommendations.push( diff --git a/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs b/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs index fce5993aa..e3c1f38dc 100644 --- a/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs +++ b/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs @@ -10,6 +10,7 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, describe, test } from "vitest"; import { runFlowAudit } from "../doctor.js"; +import { readAllSelfFeedback, recordSelfFeedback, markResolved } from "../self-feedback.js"; const tmpDirs = []; @@ -24,6 +25,12 @@ function makeProject() { const dir = mkdtempSync(join(tmpdir(), "sf-flow-audit-cleanup-")); tmpDirs.push(dir); mkdirSync(join(dir, ".sf"), { recursive: true }); + // Pretend this is the forge repo so self-feedback stays project-local + writeFileSync( + join(dir, "package.json"), + JSON.stringify({ name: "singularity-forge" }), + "utf-8", + ); return dir; } @@ -174,4 +181,95 @@ describe("flow audit optional child cleanup", () => { "Review recent errors before dispatching another unit.", ); }); + + test("runFlowAudit_when_repeated_failures_on_same_milestone_files_rollup_with_enriched_evidence", async () => { + const project = makeProject(); + const nowMs = Date.parse("2026-05-06T12:00:00.000Z"); + const uniqueMid = `MTEST${Date.now()}`; + + // Seed 3 unresolved self-feedback entries via the real channel (DB or JSONL) + const createdIds = []; + for (let i = 1; i <= 3; i++) { + const result = recordSelfFeedback( + { + kind: "runaway-guard-hard-pause", + severity: "medium", + summary: `Runaway guard paused execute-task ${uniqueMid}/S01/T0${i}`, + occurredIn: { milestone: uniqueMid, slice: "S01", task: `T0${i}` }, + source: "detector", + }, + project, + ); + assert.ok(result, `recordSelfFeedback call ${i} should succeed`); + createdIds.push(result.entry.id); + } + + // Set active unit in auto.lock so flow audit has context to enrich + writeFileSync( + join(project, ".sf", "auto.lock"), + JSON.stringify({ + unitType: "execute-task", + unitId: `${uniqueMid}/S01/T01`, + phase: "running", + startedAt: nowMs - 10 * 60 * 1000, + sessionFile: ".sf/sessions/session-abc.json", + }), + "utf-8", + ); + + const report = await runFlowAudit(project, { nowMs, psOutput: "" }); + + assert.equal(report.repeatedFailureRollup?.filed, true); + assert.equal(report.repeatedFailureRollup?.milestoneId, uniqueMid); + assert.equal(report.repeatedFailureRollup?.count, 3); + + // Read back the self-feedback and verify enriched evidence + const allFeedback = readAllSelfFeedback(project); + const rollup = allFeedback.find( + (e) => + e.kind === "flow-audit:repeated-milestone-failure" && + e.occurredIn?.milestone === uniqueMid, + ); + assert.ok(rollup, "rollup entry should exist"); + assert.ok( + rollup.evidence.includes("Flow Audit Context"), + "evidence should include Flow Audit Context header", + ); + assert.ok( + rollup.evidence.includes(`Active milestone: ${uniqueMid}`), + "evidence should include active milestone", + ); + assert.ok( + rollup.evidence.includes(`execute-task ${uniqueMid}/S01/T01`), + "evidence should include active unit", + ); + assert.ok( + rollup.evidence.includes("Session pointer:"), + "evidence should include session pointer", + ); + assert.ok( + rollup.evidence.includes("Runaway history:"), + "evidence should include runaway history", + ); + assert.ok( + rollup.evidence.includes("Underlying Failures"), + "evidence should include underlying failures", + ); + + // Clean up: resolve all entries we created + for (const id of createdIds) { + markResolved( + id, + { reason: "test cleanup", evidence: { kind: "auto-version-bump" } }, + project, + ); + } + if (rollup) { + markResolved( + rollup.id, + { reason: "test cleanup", evidence: { kind: "auto-version-bump" } }, + project, + ); + } + }); });