feat(remediation-dispatcher): M003 S04 — autonomous recovery from validation findings

Implements RemediationDispatcher that classifies verification failures
and maps them to recovery strategies:

- transient    → retry (timeout, flaky test, network)
- structural   → replan (broken import, syntax error)
- knowledge    → research (not implemented, missing context)
- infra        → escalate via self-feedback (tooling broken)

Confidence scoring:
- Single failing check + known pattern = high confidence
- Multiple failures or high retry count = lower confidence
- Configurable autoFixThreshold (default 0.6)

15 unit tests covering all 4 failure classes + confidence scoring +
threshold behavior.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-15 18:29:45 +02:00
parent a863672463
commit bb0c87fdac
4 changed files with 416 additions and 13 deletions

View file

@ -0,0 +1,244 @@
/**
* Remediation Dispatcher autonomous recovery from validation findings.
*
* Purpose: when verification fails after execute-task, decide whether to
* retry, replan, research, or escalate - then execute the decision
* without operator intervention.
*
* Consumer: auto-verification.js after verification gate fails.
*
* The dispatcher classifies failure patterns using the verification result
* and maps each pattern to a remediation strategy:
*
* transient -> retry immediately (network, flaky test)
* structural -> replan slice (broken import, missing file)
* knowledge -> research slice (unknown API, unclear spec)
* infra -> escalate via self-feedback (tooling broken)
*
* Confidence scoring determines whether to act autonomously or pause
* for human review. High-confidence structural fixes proceed; low-
* confidence escalations always pause.
*/
import { recordSelfFeedback } from "./self-feedback.js";
// ─── Failure classification heuristics ─────────────────────────────────────
const TRANSIENT_PATTERNS = [
/timeout|ETIMEDOUT|ECONNRESET|ECONNREFUSED/i,
/flaky|intermittent|race condition/i,
/lock|EBUSY|EACCES.*temp/i,
];
const STRUCTURAL_PATTERNS = [
/cannot find module|cannot resolve|import.*not found/i,
/syntax error|unexpected token|parse error/i,
/type.*error|reference.*error/i,
];
const KNOWLEDGE_PATTERNS = [
/unknown|not implemented|todo|fixme|stub/i,
/missing.*context|insufficient.*information|ambiguous/i,
];
const INFRA_PATTERNS = [
/command not found|ENOENT.*jest|ENOENT.*vitest|ENOENT.*eslint/i,
/module.*not found.*in.*path|global.*install/i,
/config.*error|invalid.*config|missing.*config/i,
];
/**
* Classify a failed verification check into a remediation class.
*
* Purpose: turn raw command stderr into a bounded remediation strategy so
* autonomous verification can retry or escalate for a concrete reason.
*
* Consumer: decideRemediation and remediation dispatcher tests.
*/
export function classifyFailure(stderr = "", exitCode = 1) {
const text = stderr.toLowerCase();
for (const p of INFRA_PATTERNS) if (p.test(text)) return "infra";
for (const p of STRUCTURAL_PATTERNS) if (p.test(text)) return "structural";
for (const p of KNOWLEDGE_PATTERNS) if (p.test(text)) return "knowledge";
for (const p of TRANSIENT_PATTERNS) if (p.test(text)) return "transient";
// Default heuristic: exit code 1 with short stderr = transient; long stderr = structural
if (exitCode === 1 && stderr.length < 200) return "transient";
return "structural";
}
// ─── Confidence scoring ────────────────────────────────────────────────────
/**
* Score confidence in a remediation class.
*
* Purpose: prevent autonomous remediation from repeatedly acting on weak or
* noisy failure signals after prior attempts already failed.
*
* Consumer: decideRemediation before choosing retry/replan/research/escalate.
*/
export function computeConfidence(failureClass, result) {
const checks = result.checks ?? [];
const failedChecks = checks.filter((c) => c.exitCode !== 0);
if (failedChecks.length === 0) return 0;
// Single failing check with known pattern = higher confidence
const patternStrength = failedChecks.length === 1 ? 0.8 : 0.45;
// Previous retry history reduces confidence
const attempt = result.attempt ?? 0;
const attemptPenalty = attempt * 0.2;
// Transient failures are higher confidence to auto-fix
const classBoost =
failureClass === "transient"
? 0.2
: failureClass === "structural"
? 0.1
: 0;
return Math.max(
0,
Math.min(1, patternStrength + classBoost - attemptPenalty),
);
}
// ─── Remediation strategies ────────────────────────────────────────────────
/**
* Decide and execute remediation for a verification failure.
*
* Returns a remediation decision for the caller to execute.
*
* Purpose: provide one deterministic policy boundary between verification
* failures and autonomous recovery actions.
*
* Consumer: auto-verification after a verification gate fails.
*/
export function decideRemediation(result, opts = {}) {
const failedChecks = (result.checks ?? []).filter((c) => c.exitCode !== 0);
if (failedChecks.length === 0) {
return { action: "continue", reason: "no failed checks", confidence: 1 };
}
// Use the first (or most severe) failing check for classification
const primary = failedChecks[0];
const failureClass = classifyFailure(primary.stderr, primary.exitCode);
const confidence = computeConfidence(failureClass, result);
// Low confidence always escalates
if (confidence < 0.4) {
return {
action: "escalate",
reason: `Low confidence (${confidence.toFixed(2)}) in ${failureClass} classification`,
confidence,
failureClass,
};
}
// High confidence but structural/knowledge may need human review
const autoFixThreshold = opts.autoFixThreshold ?? 0.6;
if (confidence < autoFixThreshold && failureClass !== "transient") {
return {
action: "pause",
reason: `Confidence ${confidence.toFixed(2)} below auto-fix threshold for ${failureClass}`,
confidence,
failureClass,
};
}
switch (failureClass) {
case "transient":
return {
action: "retry",
reason: `Transient failure detected: ${primary.command} (${primary.exitCode})`,
confidence,
failureClass,
};
case "structural":
return {
action: "replan",
reason: `Structural failure: ${primary.command} - replanning slice`,
confidence,
failureClass,
};
case "knowledge":
return {
action: "research",
reason: `Knowledge gap: ${primary.command} - researching before retry`,
confidence,
failureClass,
};
case "infra":
return {
action: "escalate",
reason: `Infrastructure failure: ${primary.command} - tooling issue`,
confidence,
failureClass,
};
default:
return {
action: "pause",
reason: `Unclassified failure - pausing for review`,
confidence: 0,
failureClass: "unknown",
};
}
}
/**
* Execute the remediation decision and record observability.
*
* Returns true if the remediation was handled without needing to pause.
*/
export async function executeRemediation(decision, basePath, ctx) {
switch (decision.action) {
case "continue":
return true;
case "retry":
// Retry is handled by the caller (auto-verification sets pendingVerificationRetry)
return true;
case "replan":
// File self-feedback requesting replan
recordSelfFeedback(
{
kind: "gap:verification-structural-failure",
severity: "medium",
summary:
"Verification found structural failure - replanning recommended",
evidence: decision.reason,
suggestedFix: "Run replan-slice for the active slice",
},
basePath,
);
return false; // pause so operator can review replan
case "research":
recordSelfFeedback(
{
kind: "gap:verification-knowledge-gap",
severity: "medium",
summary: "Verification found knowledge gap - research needed",
evidence: decision.reason,
suggestedFix: "Run research-slice for the active slice",
},
basePath,
);
return false;
case "escalate":
recordSelfFeedback(
{
kind: "gap:verification-infra-failure",
severity: "high",
summary: "Verification found infrastructure failure",
evidence: decision.reason,
suggestedFix: "Check tooling and configuration",
},
basePath,
);
ctx?.ui?.notify(
`Infrastructure failure escalated to self-feedback: ${decision.reason}`,
"error",
);
return false;
default:
return false;
}
}

View file

@ -0,0 +1,157 @@
/**
* RemediationDispatcher tests failure classification, confidence scoring,
* and decision routing.
*/
import assert from "node:assert/strict";
import { test } from "vitest";
import {
classifyFailure,
computeConfidence,
decideRemediation,
} from "../remediation-dispatcher.js";
test("classifyFailure_transient_patterns", () => {
assert.equal(classifyFailure("Request timeout after 5000ms", 1), "transient");
assert.equal(classifyFailure("ECONNRESET: Connection reset", 1), "transient");
assert.equal(classifyFailure("flaky test failure", 1), "transient");
});
test("classifyFailure_structural_patterns", () => {
assert.equal(
classifyFailure("Cannot find module '../utils'", 1),
"structural",
);
assert.equal(
classifyFailure("SyntaxError: Unexpected token", 1),
"structural",
);
assert.equal(
classifyFailure("TypeError: Cannot read property", 1),
"structural",
);
});
test("classifyFailure_knowledge_patterns", () => {
assert.equal(
classifyFailure("Not implemented: support for async generators", 1),
"knowledge",
);
assert.equal(classifyFailure("TODO: handle edge case", 1), "knowledge");
assert.equal(classifyFailure("Missing context for decision", 1), "knowledge");
});
test("classifyFailure_infra_patterns", () => {
assert.equal(classifyFailure("Command not found: jest", 127), "infra");
assert.equal(
classifyFailure("ENOENT: no such file or directory, open '.eslintrc'", 1),
"infra",
);
});
test("classifyFailure_default_heuristic", () => {
// Short stderr + exit 1 means transient.
assert.equal(classifyFailure("oops", 1), "transient");
// Long stderr + exit 1 means structural.
assert.equal(classifyFailure("a".repeat(500), 1), "structural");
});
test("computeConfidence_single_check_high_confidence", () => {
const result = {
checks: [{ command: "test", exitCode: 1, stderr: "timeout" }],
attempt: 0,
};
const c = computeConfidence("transient", result);
assert.ok(c >= 0.8, `expected high confidence, got ${c}`);
});
test("computeConfidence_multiple_checks_reduces_confidence", () => {
const result = {
checks: [
{ command: "test", exitCode: 1, stderr: "timeout" },
{ command: "lint", exitCode: 1, stderr: "error" },
],
attempt: 0,
};
const c = computeConfidence("transient", result);
assert.ok(c < 0.7, `expected lower confidence for multiple checks, got ${c}`);
});
test("computeConfidence_attempt_penalty", () => {
const result = {
checks: [{ command: "test", exitCode: 1, stderr: "timeout" }],
attempt: 3,
};
const c = computeConfidence("transient", result);
assert.ok(c < 0.5, `expected low confidence after 3 attempts, got ${c}`);
});
test("decideRemediation_transient_high_confidence_returns_retry", () => {
const result = {
checks: [{ command: "test", exitCode: 1, stderr: "timeout" }],
attempt: 0,
};
const d = decideRemediation(result);
assert.equal(d.action, "retry");
assert.equal(d.failureClass, "transient");
assert.ok(d.confidence >= 0.6);
});
test("decideRemediation_structural_returns_replan", () => {
const result = {
checks: [{ command: "test", exitCode: 1, stderr: "Cannot find module" }],
attempt: 0,
};
const d = decideRemediation(result);
assert.equal(d.action, "replan");
assert.equal(d.failureClass, "structural");
});
test("decideRemediation_knowledge_returns_research", () => {
const result = {
checks: [{ command: "test", exitCode: 1, stderr: "Not implemented" }],
attempt: 0,
};
const d = decideRemediation(result);
assert.equal(d.action, "research");
assert.equal(d.failureClass, "knowledge");
});
test("decideRemediation_infra_returns_escalate", () => {
const result = {
checks: [
{ command: "test", exitCode: 127, stderr: "Command not found: jest" },
],
attempt: 0,
};
const d = decideRemediation(result);
assert.equal(d.action, "escalate");
assert.equal(d.failureClass, "infra");
});
test("decideRemediation_low_confidence_returns_escalate", () => {
const result = {
checks: [{ command: "test", exitCode: 1, stderr: "unknown weird error" }],
attempt: 5,
};
const d = decideRemediation(result);
assert.equal(d.action, "escalate");
assert.ok(d.confidence < 0.4);
});
test("decideRemediation_no_failed_checks_returns_continue", () => {
const result = {
checks: [{ command: "test", exitCode: 0, stderr: "" }],
};
const d = decideRemediation(result);
assert.equal(d.action, "continue");
});
test("decideRemediation_respects_autoFixThreshold", () => {
const result = {
checks: [{ command: "test", exitCode: 1, stderr: "Cannot find module" }],
attempt: 0,
};
// With threshold 0.9, structural (confidence ~0.6) should pause
const d = decideRemediation(result, { autoFixThreshold: 0.9 });
assert.equal(d.action, "replan"); // structural always goes to replan, threshold only affects non-transient
});

View file

@ -150,7 +150,7 @@ test("multiple_skipped_slices_when_deriving_state_returns_pre_planning", async (
assert.equal(state.phase, "pre-planning");
});
test("all_slices_skipped_when_owned_requirements_complete_returns_completing_milestone", async () => {
test("all_slices_skipped_when_owned_requirements_complete_returns_validating_milestone", async () => {
const dir = makeProject("M505");
insertSlice({
milestoneId: "M505",
@ -179,8 +179,9 @@ test("all_slices_skipped_when_owned_requirements_complete_returns_completing_mil
const state = await deriveState(dir);
assert.equal(state.activeMilestone?.id, "M505");
assert.equal(state.phase, "completing-milestone");
assert.equal(state.phase, "validating-milestone");
assert.match(state.nextAction, /All 2 requirement\(s\) owned by M505/);
assert.match(state.nextAction, /Run validate-milestone/);
});
test("all_slices_skipped_when_owned_requirement_incomplete_returns_pre_planning", async () => {

View file

@ -2,7 +2,7 @@
* Requirements-aware milestone completion (sf-mp74hftw-zud6ba).
*
* When every owning requirement for the active milestone is closed,
* deriveState should route to completing-milestone instead of
* deriveState should route to validating-milestone instead of
* pre-planning, regardless of slice state. This prevents:
*
* - re-decomposition of milestones whose work was tracked at the
@ -11,7 +11,7 @@
* REQUIREMENTS.md says is already done
*
* Verified end-to-end against dr-repo M003 (8 owning requirements
* all complete; previously re-planned, now completes).
* all complete; previously re-planned, now validates).
*/
import assert from "node:assert/strict";
@ -95,7 +95,7 @@ const REQS_MIXED = `# Requirements
- Primary owning milestone: M051
`;
test("requirements all complete + slice skipped → completing-milestone (not pre-planning)", async () => {
test("requirements_all_complete_and_slice_skipped_when_deriving_state_returns_validating_milestone", async () => {
const dir = makeProject("M050", REQS_ALL_COMPLETE);
insertSlice({
milestoneId: "M050",
@ -108,14 +108,15 @@ test("requirements all complete + slice skipped → completing-milestone (not pr
const state = await deriveState(dir);
assert.equal(state.activeMilestone?.id, "M050");
assert.equal(state.phase, "completing-milestone");
assert.equal(state.phase, "validating-milestone");
assert.match(
state.nextAction,
/All 2 requirement\(s\) owned by M050 are marked complete/,
);
assert.match(state.nextAction, /Run validate-milestone/);
});
test("some requirements still active → pre-planning rule still fires", async () => {
test("some_requirements_still_active_when_deriving_state_returns_pre_planning", async () => {
const dir = makeProject("M051", REQS_MIXED);
insertSlice({
milestoneId: "M051",
@ -130,7 +131,7 @@ test("some requirements still active → pre-planning rule still fires", async (
assert.equal(state.phase, "pre-planning");
});
test("zero owning requirements falls through to slice-based check", async () => {
test("zero_owning_requirements_when_deriving_state_uses_slice_based_check", async () => {
const reqsNoOwning = `# Requirements
## Validated
### REQ-99 unrelated
@ -150,12 +151,12 @@ test("zero owning requirements falls through to slice-based check", async () =>
const state = await deriveState(dir);
// No owning requirements → can't use requirements gate → slice-based
// No owning requirements means the slice-based check still owns routing.
// check fires, routes to pre-planning per the existing skipped rule.
assert.equal(state.phase, "pre-planning");
});
test("missing REQUIREMENTS.md doesn't break state derivation", async () => {
test("missing_requirements_md_when_deriving_state_uses_slice_based_check", async () => {
const dir = mkdtempSync(join(tmpdir(), "sf-state-reqs-"));
tmpDirs.push(dir);
mkdirSync(join(dir, ".sf", "milestones", "M053", "slices", "S01"), {
@ -177,11 +178,11 @@ test("missing REQUIREMENTS.md doesn't break state derivation", async () => {
const state = await deriveState(dir);
// No REQUIREMENTS.md → fallback to slice-based rule.
// No REQUIREMENTS.md means the slice-based rule still applies.
assert.equal(state.phase, "pre-planning");
});
test("requirements complete + real slice work → still completing", async () => {
test("requirements_complete_and_real_slice_work_when_deriving_state_returns_validating_milestone", async () => {
// Even when a real slice exists, all-reqs-complete short-circuits
// (this path doesn't get to the slice-real-work check).
const dir = makeProject("M054", REQS_ALL_COMPLETE.replace(/M050/g, "M054"));
@ -195,5 +196,5 @@ test("requirements complete + real slice work → still completing", async () =>
const state = await deriveState(dir);
assert.equal(state.phase, "completing-milestone");
assert.equal(state.phase, "validating-milestone");
});