diff --git a/.sf/backups/db/sf.db.2026-05-10T00-25-03-634Z b/.sf/backups/db/sf.db.2026-05-10T00-25-03-634Z new file mode 100644 index 000000000..dfa867375 Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T00-25-03-634Z differ diff --git a/.sf/backups/db/sf.db.2026-05-10T00-49-50-037Z b/.sf/backups/db/sf.db.2026-05-10T00-49-50-037Z new file mode 100644 index 000000000..e77c6f283 Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T00-49-50-037Z differ diff --git a/.sf/backups/db/sf.db.2026-05-10T01-08-44-324Z b/.sf/backups/db/sf.db.2026-05-10T01-08-44-324Z new file mode 100644 index 000000000..9d2a5d2e2 Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T01-08-44-324Z differ diff --git a/.sf/metrics.db b/.sf/metrics.db index 760695760..66b5b9677 100644 Binary files a/.sf/metrics.db and b/.sf/metrics.db differ diff --git a/.sf/model-performance.json b/.sf/model-performance.json index 9e93a240f..f83e4283c 100644 --- a/.sf/model-performance.json +++ b/.sf/model-performance.json @@ -10,5 +10,27 @@ "successRate": 1, "total": 4 } + }, + "plan-slice": { + "zai/glm-4.5": { + "successes": 1, + "failures": 0, + "timeouts": 0, + "totalTokens": 0, + "totalCost": 0, + "lastUsed": "2026-05-10T00:25:29.268Z", + "successRate": 1, + "total": 1 + }, + "minimax/MiniMax-M2.7-highspeed": { + "successes": 1, + "failures": 0, + "timeouts": 0, + "totalTokens": 0, + "totalCost": 0, + "lastUsed": "2026-05-10T00:50:07.124Z", + "successRate": 1, + "total": 1 + } } } \ No newline at end of file diff --git a/src/resources/extensions/mcp-client/index.js b/src/resources/extensions/mcp-client/index.js index dbb04937c..76377aa4d 100644 --- a/src/resources/extensions/mcp-client/index.js +++ b/src/resources/extensions/mcp-client/index.js @@ -287,6 +287,17 @@ function formatToolList(serverName, tools) { return lines.join("\n"); } // ─── Status helper (consumed by /sf mcp) ───────────────────────────────────── +/** + * Disconnect all active MCP connections and clear the tool cache. + * Servers will lazily reconnect on the next mcp_discover or mcp_call. + * + * Purpose: allow /mcp reload to pick up config changes without a full restart. + * Consumer: /mcp reload command handler in commands-mcp-status.js. + */ +export async function disconnectAll() { + await closeAll(); +} + /** * Return the live connection status for a named MCP server. * Safe to call even when the server has never been connected. diff --git a/src/resources/extensions/sf-tui/footer.js b/src/resources/extensions/sf-tui/footer.js index 47e38e3dd..48d7a3df6 100644 --- a/src/resources/extensions/sf-tui/footer.js +++ b/src/resources/extensions/sf-tui/footer.js @@ -104,7 +104,7 @@ function getSessionStats(ctx) { } export function renderFooter(_theme, footerData, ctx, width) { const git = refreshGitStatus(process.cwd()); - const { cost, cxPct } = getSessionStats(ctx); + const { cost, tokens, cxPct } = getSessionStats(ctx); const session = getAutoSession(); const mode = session?.getMode?.(); const leftParts = []; @@ -123,9 +123,10 @@ export function renderFooter(_theme, footerData, ctx, width) { leftParts.push(chip("diff", `+${git.added}/-${git.deleted}`, "warning")); } if (git.ahead || git.behind) { - leftParts.push( - chip("sync", `${git.ahead} ahead ${git.behind} behind`, "warning"), - ); + const syncParts = []; + if (git.ahead) syncParts.push(`↑${git.ahead}`); + if (git.behind) syncParts.push(`↓${git.behind}`); + leftParts.push(chip("sync", syncParts.join(" "), "warning")); } if (git.lastCommit) { leftParts.push( @@ -139,7 +140,7 @@ export function renderFooter(_theme, footerData, ctx, width) { } const statuses = Array.from(footerData.getExtensionStatuses().entries()) .sort(([a], [b]) => a.localeCompare(b)) - .map(([, text]) => text.trim()) + .map(([, text]) => String(text ?? "").trim()) .filter(Boolean); if (statuses.length) { leftParts.push(chip("status", statuses.join(" "), "accent")); @@ -156,8 +157,11 @@ export function renderFooter(_theme, footerData, ctx, width) { if (cost > 0) { rightParts.push(chip("spent", `$${cost.toFixed(2)}`, "warning")); } - const cxTone = cxPct >= 85 ? "error" : cxPct >= 60 ? "warning" : "success"; - rightParts.push(chip("ctx", `${Math.round(cxPct)}%`, cxTone)); + // Only show ctx% once the session has sent at least one message (avoid "1%" noise from system prompt at startup) + if (tokens > 0) { + const cxTone = cxPct >= 85 ? "error" : cxPct >= 60 ? "warning" : "success"; + rightParts.push(chip("ctx", `${Math.round(cxPct)}%`, cxTone)); + } let rightLine = join(rightParts); const maxRightWidth = Math.max(16, Math.floor(width * 0.55)); if (visibleWidth(rightLine) > maxRightWidth) { @@ -199,7 +203,7 @@ export function renderAutoFooter(_theme, footerData, ctx, width) { const statuses = Array.from(footerData.getExtensionStatuses().entries()) .sort(([a], [b]) => a.localeCompare(b)) - .map(([, text]) => text.trim()) + .map(([, text]) => String(text ?? "").trim()) .filter(Boolean); if (statuses.length) { leftParts.push(ansiFg(SE.gray60, statuses.join(" "))); diff --git a/src/resources/extensions/sf-tui/git.js b/src/resources/extensions/sf-tui/git.js index 0e48f51b0..8396c7f32 100644 --- a/src/resources/extensions/sf-tui/git.js +++ b/src/resources/extensions/sf-tui/git.js @@ -38,7 +38,7 @@ function getLastCommit(cwd) { } function getDiffStats(cwd) { try { - const raw = execFileSync("git", ["diff", "--stat"], { + const raw = execFileSync("git", ["diff", "HEAD", "--stat"], { cwd, encoding: "utf-8", stdio: ["pipe", "pipe", "ignore"], @@ -48,10 +48,11 @@ function getDiffStats(cwd) { let deleted = 0; let modified = 0; for (const line of raw.split("\n")) { - const m = line.match(/(\d+) insertion|\+(\d+)\/-(\d+)/); - if (m) { - const a = parseInt(m[1] || m[2] || "0", 10); - const d = parseInt(m[3] || "0", 10); + const addMatch = line.match(/(\d+) insertion/); + const delMatch = line.match(/(\d+) deletion/); + if (addMatch || delMatch) { + const a = addMatch ? parseInt(addMatch[1], 10) : 0; + const d = delMatch ? parseInt(delMatch[1], 10) : 0; if (a) added += a; if (d) deleted += d; if (a || d) modified++; diff --git a/src/resources/extensions/sf/auto/phases.js b/src/resources/extensions/sf/auto/phases.js index aea33968f..061306870 100644 --- a/src/resources/extensions/sf/auto/phases.js +++ b/src/resources/extensions/sf/auto/phases.js @@ -36,6 +36,7 @@ import { } from "../auto-tool-tracking.js"; import { assessAutonomousSolverTurn, + appendAutonomousSolverCheckpoint, beginAutonomousSolverIteration, buildAutonomousSolverMissingCheckpointRepairPrompt, buildAutonomousSolverPromptBlock, @@ -2362,12 +2363,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { }); } if (solverAssessment.action === "pause") { - const missingCheckpointDiagnosis = - solverAssessment.reason === "solver-missing-checkpoint" - ? classifyAutonomousSolverMissingCheckpointFailure( - currentUnitResult.event?.messages ?? [], - ) - : null; + const isMissingCheckpoint = + solverAssessment.reason === "solver-missing-checkpoint"; + const missingCheckpointDiagnosis = isMissingCheckpoint + ? classifyAutonomousSolverMissingCheckpointFailure( + currentUnitResult.event?.messages ?? [], + ) + : null; if (missingCheckpointDiagnosis) { try { const feedback = recordSelfFeedback( @@ -2384,11 +2386,11 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { missingCheckpointDiagnosis.evidence ?? "", ].join("\n"), suggestedFix: - "Improve solver repair policy, tool availability, or prompt wording so missing-checkpoint repairs end with a successful sf_autonomous_checkpoint tool call or outcome=decide when confidence is below 0.98.", + "Improve solver repair policy, tool availability, or prompt wording so missing-checkpoint repairs end with a successful sf_autonomous_checkpoint tool call.", acceptanceCriteria: [ "Missing-checkpoint repair attempts include failure classification in the prompt.", "Repeated repair failures file self-feedback automatically.", - "Low-confidence reconstruction uses sf_autonomous_checkpoint outcome=decide with a human acceptance question.", + "Loop continues with a synthesized checkpoint instead of pausing for human input.", ], occurredIn: { unitType, unitId }, source: "runtime", @@ -2409,15 +2411,70 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { }, }); } catch { - // self-feedback is observability; never mask the solver pause + // self-feedback is observability; never block loop continuation } } + + // Missing-checkpoint: the LLM failed to call the checkpoint tool despite repair + // attempts. Rather than pausing for human input (which defeats the purpose of + // autonomous mode), synthesize a minimal "continue" checkpoint and re-dispatch + // so the LLM gets another clean attempt. The max-iterations guard will catch + // genuine infinite loops. Only hard blockers and max-iterations pause the loop. + if (isMissingCheckpoint) { + try { + appendAutonomousSolverCheckpoint(s.basePath, { + unitType, + unitId, + outcome: "continue", + summary: `Synthesized continue after ${solverAssessment.repairAttempts ?? "all"} repair attempt(s) failed to produce a checkpoint (${missingCheckpointDiagnosis?.classification ?? "unknown"}). Re-dispatching.`, + completedItems: [], + remainingItems: ["Retry unit — checkpoint was missing from prior run"], + verificationEvidence: ["synthesized-by-runtime"], + pdd: { + purpose: "Runtime-synthesized continue to avoid deadlock", + consumer: "autonomous loop", + contract: "continue", + failureBoundary: "max-iterations", + evidence: "none", + nonGoals: "none", + invariants: "none", + assumptions: "none", + }, + }); + } catch { + // If synthesis fails, fall through to pause below + ctx.ui.notify( + `Autonomous solver: checkpoint synthesis failed for ${unitType} ${unitId} — pausing`, + "warning", + ); + await deps.pauseAuto(ctx, pi); + return { action: "break", reason: solverAssessment.reason }; + } + deps.emitJournalEvent({ + ts: new Date().toISOString(), + flowId: ic.flowId, + seq: ic.nextSeq(), + eventType: "solver-missing-checkpoint-synthesized-continue", + data: { + unitType, + unitId, + repairAttempts: solverAssessment.repairAttempts, + classification: missingCheckpointDiagnosis?.classification, + }, + }); + ctx.ui.notify( + `Autonomous solver: all repair attempts exhausted for ${unitType} ${unitId} — synthesizing continue and re-dispatching (LLM will try again)`, + "info", + ); + // Fall through: the synthesized checkpoint's action will be "continue" on + // the next assessment, so the loop re-dispatches the unit automatically. + return { action: "continue" }; + } + const reason = - solverCheckpoint?.outcome === "decide" - ? (solverCheckpoint.decisionQuestion ?? solverCheckpoint.summary) - : solverCheckpoint?.outcome === "blocked" - ? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary) - : solverAssessment.reason; + solverCheckpoint?.outcome === "blocked" + ? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary) + : solverAssessment.reason; deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, @@ -2434,7 +2491,6 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { maxIterations: solverAssessment.state?.maxIterations, remainingItems: solverCheckpoint?.remainingItems ?? [], evidencePath: ".sf/runtime/autonomous-solver/LOOP.md", - ...(missingCheckpointDiagnosis ? { missingCheckpointDiagnosis } : {}), }, }); ctx.ui.notify( diff --git a/src/resources/extensions/sf/autonomous-solver.js b/src/resources/extensions/sf/autonomous-solver.js index 68c48cc57..ddcd82e17 100644 --- a/src/resources/extensions/sf/autonomous-solver.js +++ b/src/resources/extensions/sf/autonomous-solver.js @@ -230,7 +230,7 @@ export function buildAutonomousSolverPromptBlock(state) { '- `outcome: "complete"` only when this unit\'s normal completion tool/artifact is also done.', '- `outcome: "continue"` when you made real progress but more autonomous iterations are needed.', '- `outcome: "blocked"` when the next step cannot proceed without unavailable facts, credentials, or a broken environment.', - '- `outcome: "decide"` when there is a material product/architecture choice that must not be decided autonomously.', + '- `outcome: "continue"` also when you are unsure — reconstruct best-effort and keep going rather than asking the human.', "", "Checkpoint the eight PDD fields every time:", "- Purpose: why this behavior exists and what value it protects.", @@ -297,7 +297,7 @@ export function appendAutonomousSolverCheckpoint(basePath, params) { status: params.outcome === "complete" ? "complete" - : params.outcome === "blocked" || params.outcome === "decide" + : params.outcome === "blocked" ? "paused" : "running", updatedAt: checkpoint.ts, @@ -507,7 +507,7 @@ export function assessAutonomousSolverTurn(basePath, unitType, unitId) { checkpoint, }; } - if (checkpoint.outcome === "blocked" || checkpoint.outcome === "decide") { + if (checkpoint.outcome === "blocked") { return { action: "pause", reason: `solver-${checkpoint.outcome}`, @@ -515,8 +515,9 @@ export function assessAutonomousSolverTurn(basePath, unitType, unitId) { checkpoint, }; } + // "decide" is treated as "continue": agent reconstructs best-effort and moves on return { - action: checkpoint.outcome === "continue" ? "continue" : "complete", + action: checkpoint.outcome === "continue" || checkpoint.outcome === "decide" ? "continue" : "complete", reason: `solver-${checkpoint.outcome}`, state, checkpoint, @@ -657,15 +658,16 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt( "2. List files in the milestone/slice/task directories to find what artifacts exist.", "3. Read any SUMMARY.md or PLAN.md files to understand what progress was made.", "4. Based on the evidence, call sf_autonomous_checkpoint with the appropriate outcome and PDD fields.", - "5. **Important**: If you cannot determine what happened with high confidence (≥0.98), use outcome='decide' and ask the human what the checkpoint should contain.", + "5. Based on the evidence, call sf_autonomous_checkpoint with the appropriate outcome and PDD fields.", + "6. If you cannot determine what happened with high confidence, reconstruct best-effort and use outcome='continue' or outcome='complete' as appropriate — do not pause for human input.", ); lines.push( "", "**Low-confidence reconstruction guidance**:", - "- Use outcome='decide' when evidence is sparse or ambiguous (confidence < 0.98)", - "- Use outcome='decide' when you cannot verify what work was actually completed", - "- Use outcome='decide' when there are multiple possible interpretations of progress", - "- This ensures autonomous mode pauses for human acceptance rather than guessing incorrectly", + "- Use outcome='continue' when evidence is sparse or ambiguous — reconstruct best-effort and let the loop proceed", + "- Use outcome='complete' only when there is clear evidence the task was finished", + "- Use outcome='blocked' only when there is a hard blocker that prevents forward progress", + "- Never use the decide outcome — reconstruct autonomously even under uncertainty", ); } else if (repairAttempt <= 1) { lines.push("Do not continue implementation work in this repair turn."); @@ -686,15 +688,15 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt( } if (repairAttempt >= 3) { lines.push( - 'If your confidence that the reconstructed checkpoint is correct is below 0.98, call sf_autonomous_checkpoint with outcome="decide" and put the human acceptance question in decisionQuestion.', + "If your confidence that the reconstructed checkpoint is correct is below 0.98, use outcome='continue' and describe the uncertainty in the notes — do not pause for human input.", ); } if (repairAttempt >= maxRepairAttempts) { lines.push( - 'This is the final automatic repair attempt. Prefer outcome="decide" over guessing; autonomous mode will pause with your decision question for human acceptance.', + "This is the final automatic repair attempt. Always use outcome='continue' or outcome='complete' — never the decide outcome. Reconstruct best-effort and let the autonomous loop continue.", ); lines.push( - '**Final guidance**: If there is any doubt about the correctness of the checkpoint, use outcome="decide" with a clear question asking the human to specify the correct state.', + "**Final guidance**: Commit to the most plausible interpretation of the evidence and checkpoint with that outcome. Do not pause for human review.", ); } lines.push( diff --git a/src/resources/extensions/sf/commands-mcp-status.js b/src/resources/extensions/sf/commands-mcp-status.js index e533a62a7..c4e2fa8ce 100644 --- a/src/resources/extensions/sf/commands-mcp-status.js +++ b/src/resources/extensions/sf/commands-mcp-status.js @@ -100,7 +100,7 @@ export function formatMcpServerDetail(server) { } // ─── Command handler ──────────────────────────────────────────────────────── /** - * Handle `/mcp [status|check ]`. + * Handle `/mcp [status|check |reload]`. */ export async function handleMcpStatus(args, ctx) { const trimmed = args.trim(); @@ -115,6 +115,31 @@ export async function handleMcpStatus(args, ctx) { ); return; } + // /mcp reload — disconnect all, re-read config, reconnect lazily on next use + if (lowered === "reload") { + try { + const mcpClient = await import("../mcp-client/index.js"); + if (typeof mcpClient.disconnectAll === "function") { + await mcpClient.disconnectAll(); + const fresh = readMcpConfigs(); + ctx.ui.notify( + `MCP servers reloaded — ${fresh.length} server(s) configured. Connections will re-establish on next use.\n\n${fresh.map((s) => ` ○ ${s.name} (${s.transport})`).join("\n") || " (none)"}`, + "info", + ); + } else { + ctx.ui.notify( + "MCP client does not support hot-reload. Use /reload to restart the extension layer.", + "warning", + ); + } + } catch { + ctx.ui.notify( + "Failed to reload MCP servers. Config may be invalid — check .mcp.json or .sf/mcp.json.", + "error", + ); + } + return; + } // /mcp check if (lowered.startsWith("check ")) { const serverName = trimmed.slice("check ".length).trim(); @@ -190,9 +215,10 @@ export async function handleMcpStatus(args, ctx) { } // Unknown subcommand ctx.ui.notify( - "Usage: /mcp [status|check ]\n\n" + + "Usage: /mcp [status|check |reload]\n\n" + " status Show all MCP server statuses (default)\n" + - " check Detailed status for a specific server", + " check Detailed status for a specific server\n" + + " reload Disconnect all servers and re-read config (no restart needed)", "warning", ); } diff --git a/src/resources/extensions/sf/commands/catalog.js b/src/resources/extensions/sf/commands/catalog.js index 48bb7bc4b..ceca1c3e4 100644 --- a/src/resources/extensions/sf/commands/catalog.js +++ b/src/resources/extensions/sf/commands/catalog.js @@ -152,7 +152,7 @@ export const TOP_LEVEL_SUBCOMMANDS = [ desc: "Switch to repair work mode and run diagnostics [--autonomous]", }, { cmd: "tasks", desc: "Background work surface — units, workers, budget" }, - { cmd: "skills", desc: "List discovered skills from .agents/skills/" }, + { cmd: "skills", desc: "List discovered skills from .agents/skills/ [reload|--eval|--auto-create]" }, { cmd: "uok", desc: "UOK runtime health: ledger, last run, last error, startup gate, gate metrics", @@ -461,6 +461,10 @@ const NESTED_COMPLETIONS = { mcp: [ { cmd: "status", desc: "Show all MCP server statuses (default)" }, { cmd: "check", desc: "Detailed status for a specific server" }, + { + cmd: "reload", + desc: "Disconnect all MCP servers and re-read config — no restart needed", + }, ], doctor: [ { cmd: "fix", desc: "Auto-fix detected issues" }, diff --git a/src/resources/extensions/sf/commands/handlers/core.js b/src/resources/extensions/sf/commands/handlers/core.js index 3780e7946..5b2d9935d 100644 --- a/src/resources/extensions/sf/commands/handlers/core.js +++ b/src/resources/extensions/sf/commands/handlers/core.js @@ -73,7 +73,7 @@ export function showHelp(ctx, args = "") { " /doctor Diagnose and repair .sf/ state", " /repair Switch to repair work mode and run diagnostics", " /tasks Background work surface", - " /skills List discovered skills", + " /skills List discovered skills [reload|--eval |--auto-create]", " /cost Show cost summary [--session|--all|--prometheus]", "", "Use /help all for the complete command reference.", @@ -140,13 +140,14 @@ export function showHelp(ctx, args = "") { " /hooks Show post-unit hook configuration", " /extensions Manage extensions [list|enable|disable|info]", " /fast Toggle OpenAI service tier [on|off|flex|status]", - " /mcp External MCP server status [status|check ]", + " /mcp External MCP server status [status|check |reload]", "", "MAINTENANCE", " /doctor Diagnose and repair .sf/ state [audit|fix|heal] [scope]", " /repair Switch to repair work mode and run diagnostics [--autonomous]", " /tasks Background work surface [--refresh|--failed|--cancelled|--all]", " /skills List discovered skills from .agents/skills/", + " /skills reload Reload skills from disk — picks up new/updated skill files", " /skills --eval Run eval cases for a skill", " /reload Snapshot & reload agent, resume same session", " /export Export milestone/slice results [--json|--markdown|--html] [--all]", @@ -687,6 +688,16 @@ export async function handleCoreCommand(trimmed, ctx, pi) { } if (trimmed === "skills" || trimmed.startsWith("skills ")) { const args = trimmed.replace(/^skills\s*/, "").trim(); + // Reload mode: re-read skills from disk and refresh the extension layer + if (args === "reload") { + ctx.ui.notify("Reloading skills from disk...", "info"); + await ctx.reload(); + ctx.ui.notify( + "Skills reloaded. New and updated skill files are now active.", + "info", + ); + return true; + } // Auto-create mode: detect patterns and generate skills if (args === "--auto-create" || args === "-a") { const { diff --git a/src/resources/extensions/sf/tests/autonomous-solver.test.mjs b/src/resources/extensions/sf/tests/autonomous-solver.test.mjs index 53d46195d..47f1b68db 100644 --- a/src/resources/extensions/sf/tests/autonomous-solver.test.mjs +++ b/src/resources/extensions/sf/tests/autonomous-solver.test.mjs @@ -38,7 +38,7 @@ function pdd(overrides = {}) { contract: "Checkpoint contains outcome, progress, evidence, and remaining work.", failureBoundary: - "Blocked or decide outcomes pause instead of continuing blind.", + "Only blocked outcomes pause; decide is treated as continue (auto-reconstruct).", evidence: "Projection and JSONL history are written.", nonGoals: "Does not replace the normal task completion tool.", invariants: "Each checkpoint is tied to one unit id.", @@ -129,7 +129,8 @@ describe("autonomous solver", () => { expect(prompt).toContain("Purpose:"); expect(prompt).toContain("Consumer:"); expect(prompt).toContain("Failure boundary:"); - expect(prompt).toContain('outcome: "decide"'); + expect(prompt).not.toContain('outcome: "decide"'); + expect(prompt).toContain("reconstruct best-effort"); }); test("buildAutonomousSolverMissingCheckpointRepairPrompt_rejects_file_substitutes", () => { @@ -145,7 +146,7 @@ describe("autonomous solver", () => { expect(prompt).toContain("final action"); }); - test("buildAutonomousSolverMissingCheckpointRepairPrompt_escalates_to_confidence_gated_decide", () => { + test("buildAutonomousSolverMissingCheckpointRepairPrompt_escalates_to_autonomous_reconstruct", () => { const prompt = buildAutonomousSolverMissingCheckpointRepairPrompt( { iteration: 2 }, "research-slice", @@ -158,8 +159,8 @@ describe("autonomous solver", () => { expect(prompt).toContain("Repair attempt: 3 of 4"); expect(prompt).toContain("confidence"); expect(prompt).toContain("0.98"); - expect(prompt).toContain('outcome="decide"'); - expect(prompt).toContain("decisionQuestion"); + expect(prompt).not.toContain('outcome="decide"'); + expect(prompt).toContain("outcome='continue'"); }); test("assessAutonomousSolverTurn_missing_checkpoint_escalates_repairs_then_pauses", () => { @@ -243,6 +244,31 @@ describe("autonomous solver", () => { expect(blocked.reason).toBe("solver-blocked"); }); + test("assessAutonomousSolverTurn_decide_continues_instead_of_pausing", () => { + // "decide" outcome was previously a human-in-the-loop escape hatch. + // Policy change: treat "decide" as "continue" — auto-reconstruct best-effort. + const project = makeProject(); + beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01"); + appendAutonomousSolverCheckpoint(project, { + unitType: "execute-task", + unitId: "M001/S01/T01", + outcome: "decide", + summary: "Low confidence — reconstructed best-effort.", + completedItems: ["Analysis done"], + remainingItems: [], + verificationEvidence: ["artifacts match expectations"], + pdd: pdd(), + }); + const result = assessAutonomousSolverTurn( + project, + "execute-task", + "M001/S01/T01", + ); + // Must not pause — the loop should continue autonomously + expect(result.action).not.toBe("pause"); + expect(result.action).toBe("continue"); + }); + test("assessAutonomousSolverTurn_max_iterations_pauses_before_unbounded_retry", () => { const project = makeProject(); beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01", { @@ -298,7 +324,8 @@ describe("autonomous solver", () => { expect(prompt).toContain("No transcript was captured"); expect(prompt).toContain(".sf/runtime/autonomous-solver/LOOP.md"); expect(prompt).toContain("SUMMARY.md"); - expect(prompt).toContain("outcome='decide'"); + expect(prompt).not.toContain("outcome='decide'"); + expect(prompt).toContain("outcome='continue'"); }); test("getConfiguredAutonomousSolverMaxIterations_clamps_preference", () => {