diff --git a/src/resources/extensions/sf/autonomous-solver.js b/src/resources/extensions/sf/autonomous-solver.js index c5c3da9b6..9f4a51714 100644 --- a/src/resources/extensions/sf/autonomous-solver.js +++ b/src/resources/extensions/sf/autonomous-solver.js @@ -18,6 +18,7 @@ import { dirname, join } from "node:path"; import { atomicWriteSync } from "./atomic-write.js"; import { sfRoot } from "./paths.js"; import { emitJournalEvent } from "./journal.js"; +import { loadPrompt } from "./prompt-loader.js"; export const AUTONOMOUS_SOLVER_OUTCOMES = [ "continue", @@ -323,9 +324,10 @@ export function recordExecutorRefusalEscalation(basePath, unitType, unitId) { * (middle) on implementation; CLOSE (final 3) on verifying and wrapping up. * Stall/loop signals are injected when the system detects no progress. * - * Consumer: runUnitPhase prompt injection. + * @param {object} state - Active solver state + * @returns {object} Variables for the template */ -function _buildAutonomousLoopPromptPrefix(state, header) { +function buildAutonomousLoopVars(state) { const phase = getSolverPhase(state.iteration, state.maxIterations); const stalled = Number(state.iterationsSinceProgress) >= STALL_THRESHOLD_ITERATIONS; @@ -334,34 +336,35 @@ function _buildAutonomousLoopPromptPrefix(state, header) { // ── Phase header ──────────────────────────────────────────────────────── const phaseHeaders = { orient: - `ORIENT PHASE (iterations 1-2): Your priority is to read, understand, and plan — not to write code.\n` + - `Read all relevant artifacts: task plans, slice plans, DECISIONS.md, REQUIREMENTS.md, CONTEXT.md.\n` + - `Identify what already exists, what must be built, and what the acceptance criteria are.\n` + - `End this iteration with a concrete plan of action recorded in your checkpoint's remainingItems.`, + "ORIENT PHASE (iterations 1-2): Your priority is to read, understand, and plan — not to write code.\n" + + "Read all relevant artifacts: task plans, slice plans, DECISIONS.md, REQUIREMENTS.md, CONTEXT.md.\n" + + "Identify what already exists, what must be built, and what the acceptance criteria are.\n" + + "End this iteration with a concrete plan of action recorded in your checkpoint's remainingItems.", execute: - `EXECUTE PHASE: You are in the implementation stretch. Make concrete, verifiable progress each iteration.\n` + - `Each iteration must produce at least one new artifact, passing test, or measurable change.\n` + - `Record what you completed and what remains — do not repeat the same actions as prior iterations.`, + "EXECUTE PHASE: You are in the implementation stretch. Make concrete, verifiable progress each iteration.\n" + + "Each iteration must produce at least one new artifact, passing test, or measurable change.\n" + + "Record what you completed and what remains — do not repeat the same actions as prior iterations.", close: `CLOSE PHASE (final ${CLOSE_PHASE_LOOKAHEAD} iterations): You are approaching the iteration budget.\n` + - `Priority: verify all acceptance criteria, run the test suite, and confirm the unit is complete.\n` + - `If the unit cannot be completed in the remaining iterations, checkpoint with outcome="blocked" and a precise reason.\n` + - `Do NOT start new work — finish and verify existing work.`, + "Priority: verify all acceptance criteria, run the test suite, and confirm the unit is complete.\n" + + "If the unit cannot be completed in the remaining iterations, checkpoint with outcome=\"blocked\" and a precise reason.\n" + + "Do NOT start new work — finish and verify existing work.", }; - const lines = [ - `## ${header}`, - "", - `You are inside /autonomous iteration ${state.iteration} of ${state.maxIterations} for ${state.unitType} ${state.unitId}.`, - "", - phaseHeaders[phase], - "", - "This is SF's built-in solver loop. It is not a separate Ralph workflow. Work one bounded, useful chunk; preserve enough state for the next autonomous iteration to continue without guessing.", - ]; + const vars = { + unitType: state.unitType, + unitId: state.unitId, + iteration: state.iteration, + maxIterations: state.maxIterations, + phaseHeader: phaseHeaders[phase], + stallWarning: "", + loopWarning: "", + recentHistory: "", + }; // ── Stall injection ───────────────────────────────────────────────────── if (stalled) { - lines.push( + vars.stallWarning = [ "", `⚠️ STALL DETECTED: ${state.iterationsSinceProgress} iterations without new completedItems recorded.`, "You are repeating work without making measurable progress. Before continuing:", @@ -369,13 +372,13 @@ function _buildAutonomousLoopPromptPrefix(state, header) { "2. Are you blocked by something that requires a different approach?", "3. Try a DIFFERENT strategy from previous iterations — do not repeat the same steps.", "If there is a concrete blocker, use outcome='blocked' with a precise blockerReason.", - ); + ].join("\n"); } // ── Loop detection injection ───────────────────────────────────────────── if (looping) { const lastSummary = state.recentSummaryHashes?.slice(-1)[0] ?? ""; - lines.push( + vars.loopWarning = [ "", `🔁 LOOP DETECTED: Your last ${LOOP_DETECTION_WINDOW} checkpoint summaries are identical or nearly identical.`, `Pattern: "${lastSummary.slice(0, 80)}..."`, @@ -384,7 +387,7 @@ function _buildAutonomousLoopPromptPrefix(state, header) { "- Re-read the task plan from scratch — are you solving the right problem?", "- If the task is actually done, checkpoint with outcome='complete'.", "- If you cannot break the loop, checkpoint with outcome='blocked' and name the specific obstacle.", - ); + ].join("\n"); } // ── Rolling summary of recent iterations ──────────────────────────────── @@ -392,16 +395,16 @@ function _buildAutonomousLoopPromptPrefix(state, header) { ? state.recentCheckpointSummaries.filter(Boolean) : []; if (summaries.length > 0) { - lines.push( + vars.recentHistory = [ "", `## Recent Iteration History (last ${summaries.length})`, ...summaries.map( (s, i) => `- Iter ${state.iteration - summaries.length + i}: ${s}`, ), - ); + ].join("\n"); } - return lines; + return vars; } /** @@ -415,42 +418,13 @@ function _buildAutonomousLoopPromptPrefix(state, header) { * * Consumer: runUnitPhase prompt injection (solver pass). */ -export function buildAutonomousSolverPromptBlock(state) { - const lines = _buildAutonomousLoopPromptPrefix( - state, - "Autonomous Solver Loop Contract", - ); - lines.push( - "", - "## CHECKPOINT REQUIREMENT", - "", - "`checkpoint` is ALWAYS available in autonomous mode. It is registered unconditionally at startup.", - "If you do not see it in your tool list, that is a perception error — call it anyway. It will work.", - "Do NOT conclude it is missing or phantom based on a codebase search. It is registered at runtime by the extension bootstrap, not as a standalone file.", - "", - "Hard requirement: before ending the turn, call the actual `checkpoint` tool. Writing SUMMARY.md, LOOP.md, task files, chat prose, or any other artifact is useful evidence, but it is not a checkpoint and does not satisfy this requirement.", - "", - "Call `checkpoint` with:", - '- `outcome: "complete"` only when this unit\'s normal completion tool/artifact is also done.', - '- `outcome: "continue"` when you made real progress but more autonomous iterations are needed.', - '- `outcome: "blocked"` when the next step cannot proceed without unavailable facts, credentials, or a broken environment.', - '- `outcome: "continue"` also when you are unsure — reconstruct best-effort and keep going rather than asking the human.', - "", - "Checkpoint the eight PDD fields every time:", - "- Purpose: why this behavior exists and what value it protects.", - "- Consumer: who or what uses it in production.", - "- Contract: the observable behavior or artifact boundary.", - "- Failure boundary: what failures must be contained or surfaced.", - "- Evidence: commands, files, tests, or runtime observations proving progress.", - "- Non-goals: what you intentionally did not solve this iteration.", - "- Invariants: rules that must remain true across iterations.", - "- Assumptions: uncertain facts you relied on and how to falsify them later.", - "", - "If you are executing an `execute-task` unit and the task is finished, `complete_task` remains mandatory; `checkpoint` does not replace it.", - "If you need another iteration, leave exact remaining items in the checkpoint rather than ending with vague prose.", - "Your final autonomous action should be the checkpoint tool call unless a required completion tool such as complete_task must be called immediately before it.", - ); - return lines.join("\n"); +export function buildAutonomousSolverPromptBlock(state, vars = {}) { + const templateVars = { + workingDirectory: "/repo", // Fallback for tests or missing context + ...buildAutonomousLoopVars(state), + ...vars, + }; + return loadPrompt("autonomous-solver-contract", templateVars); } /** @@ -461,23 +435,13 @@ export function buildAutonomousSolverPromptBlock(state) { * * Consumer: runUnitPhase prompt injection (executor pass). */ -export function buildAutonomousExecutorPromptBlock(state) { - const lines = _buildAutonomousLoopPromptPrefix( - state, - "Autonomous Executor Contract", - ); - lines.push( - "", - "## EXECUTOR ROLE", - "", - "Your job is to do the unit work: read files, run tests, edit code, and produce concrete artifacts.", - "You do NOT need to call the `checkpoint` tool. A separate solver pass will observe your work and emit the canonical checkpoint.", - "Focus entirely on making verifiable progress toward the task goal.", - "", - "If you are executing an `execute-task` unit and the task is finished, `complete_task` remains mandatory.", - "End your turn when the bounded work is done or when you have made meaningful progress and need to wait for the next iteration.", - ); - return lines.join("\n"); +export function buildAutonomousExecutorPromptBlock(state, vars = {}) { + const templateVars = { + workingDirectory: "/repo", // Fallback for tests or missing context + ...buildAutonomousLoopVars(state), + ...vars, + }; + return loadPrompt("autonomous-executor-contract", templateVars); } /** @@ -497,46 +461,22 @@ export function buildSolverPassPrompt( const transcriptText = stringifyMessages(executorTranscript); const refusal = classifyExecutorRefusal(executorTranscript); - const lines = [ - "## Autonomous Solver Pass", - "", - `You are the protocol solver for ${unitType} ${unitId} · iteration ${state?.iteration ?? "unknown"} of ${state?.maxIterations ?? "unknown"}.`, - "", - "Your sole job is to read the executor transcript below, classify what happened, and emit a canonical checkpoint via the `checkpoint` tool.", - "Do NOT edit files, run commands, or propose code changes. Observe and classify only.", - "", - "## Classification Rubric", - "", - "Apply these in order; emit the FIRST one that matches.", - "", - "1. `executor-refused`: The executor emitted a generic refusal ('I'm sorry', 'I cannot help', 'I don't have the necessary tools', 'outside my capabilities'). → checkpoint outcome=`blocked`, blockerReason=`executor-refused`.", - "2. `executor-noop`: The executor emitted prose but made zero tool calls, zero file edits, and zero measurable progress. → checkpoint outcome=`blocked`, blockerReason=`executor-noop`. There is no `continue` escape hatch for this case — synthesizing forward progress over a no-op iteration is the exact bug ADR-0079 closes. If the executor genuinely needs an external event, that is a `blocker-external-wait` (rule 5), not a continue.", - "3. `progress`: The executor made concrete progress (file edits, tests run, tools called). → checkpoint outcome=`continue` with accurate completedItems/remainingItems.", - "4. `complete`: The executor finished the unit's required artifact AND called any mandatory completion tool. → checkpoint outcome=`complete`.", - "5. `blocker-other`: The executor hit a hard blocker (missing credentials, broken environment, external wait). → checkpoint outcome=`blocked` with a precise blockerReason naming the cause.", - "", - "## Executor Transcript", - "", - "```", - transcriptText, - "```", - "", - ]; + const vars = { + unitType, + unitId, + iteration: state?.iteration ?? "unknown", + maxIterations: state?.maxIterations ?? "unknown", + executorTranscript: transcriptText, + refusalMarker: refusal + ? [ + `⚠️ Refusal pattern detected: ${refusal.pattern}.`, + "The executor refused the task. Emit outcome='blocked' with blockerReason='executor-refused'.", + "", + ].join("\n") + : "", + }; - if (refusal) { - lines.push( - `⚠️ Refusal pattern detected: ${refusal.pattern}.`, - "The executor refused the task. Emit outcome='blocked' with blockerReason='executor-refused'.", - "", - ); - } - - lines.push( - "Call `checkpoint` with all eight PDD fields and accurate completedItems / remainingItems.", - "Your final action MUST be the checkpoint tool call.", - ); - - return lines.join("\n"); + return loadPrompt("autonomous-solver-pass", vars); } /** diff --git a/src/resources/extensions/sf/prompts/autonomous-executor-contract.md b/src/resources/extensions/sf/prompts/autonomous-executor-contract.md new file mode 100644 index 000000000..4b8f38e39 --- /dev/null +++ b/src/resources/extensions/sf/prompts/autonomous-executor-contract.md @@ -0,0 +1,22 @@ +{{include:working-directory}} + +## Autonomous Executor Contract + +You are inside /autonomous iteration {{iteration}} of {{maxIterations}} for {{unitType}} {{unitId}}. + +{{phaseHeader}} + +This is SF's built-in solver loop. It is not a separate Ralph workflow. Work one bounded, useful chunk; preserve enough state for the next autonomous iteration to continue without guessing. + +{{stallWarning}} +{{loopWarning}} +{{recentHistory}} + +## EXECUTOR ROLE + +Your job is to do the unit work: read files, run tests, edit code, and produce concrete artifacts. +You do NOT need to call the `checkpoint` tool. A separate solver pass will observe your work and emit the canonical checkpoint. +Focus entirely on making verifiable progress toward the task goal. + +If you are executing an `execute-task` unit and the task is finished, `complete_task` remains mandatory. +End your turn when the bounded work is done or when you have made meaningful progress and need to wait for the next iteration. diff --git a/src/resources/extensions/sf/prompts/autonomous-solver-contract.md b/src/resources/extensions/sf/prompts/autonomous-solver-contract.md new file mode 100644 index 000000000..b73f5f539 --- /dev/null +++ b/src/resources/extensions/sf/prompts/autonomous-solver-contract.md @@ -0,0 +1,41 @@ +{{include:working-directory}} + +## Autonomous Solver Loop Contract + +You are inside /autonomous iteration {{iteration}} of {{maxIterations}} for {{unitType}} {{unitId}}. + +{{phaseHeader}} + +This is SF's built-in solver loop. It is not a separate Ralph workflow. Work one bounded, useful chunk; preserve enough state for the next autonomous iteration to continue without guessing. + +{{stallWarning}} +{{loopWarning}} +{{recentHistory}} + +## CHECKPOINT REQUIREMENT + +`checkpoint` is ALWAYS available in autonomous mode. It is registered unconditionally at startup. +If you do not see it in your tool list, that is a perception error — call it anyway. It will work. +Do NOT conclude it is missing or phantom based on a codebase search. It is registered at runtime by the extension bootstrap, not as a standalone file. + +Hard requirement: before ending the turn, call the actual `checkpoint` tool. Writing SUMMARY.md, LOOP.md, task files, chat prose, or any other artifact is useful evidence, but it is not a checkpoint and does not satisfy this requirement. + +Call `checkpoint` with: +- `outcome: "complete"` only when this unit's normal completion tool/artifact is also done. +- `outcome: "continue"` when you made real progress but more autonomous iterations are needed. +- `outcome: "blocked"` when the next step cannot proceed without unavailable facts, credentials, or a broken environment. +- `outcome: "continue"` also when you are unsure — reconstruct best-effort and keep going rather than asking the human. + +Checkpoint the eight PDD fields every time: +- Purpose: why this behavior exists and what value it protects. +- Consumer: who or what uses it in production. +- Contract: the observable behavior or artifact boundary. +- Failure boundary: what failures must be contained or surfaced. +- Evidence: commands, files, tests, or runtime observations proving progress. +- Non-goals: what you intentionally did not solve this iteration. +- Invariants: rules that must remain true across iterations. +- Assumptions: uncertain facts you relied on and how to falsify them later. + +If you are executing an `execute-task` unit and the task is finished, `complete_task` remains mandatory; `checkpoint` does not replace it. +If you need another iteration, leave exact remaining items in the checkpoint rather than ending with vague prose. +Your final autonomous action should be the checkpoint tool call unless a required completion tool such as complete_task must be called immediately before it. diff --git a/src/resources/extensions/sf/prompts/autonomous-solver-pass.md b/src/resources/extensions/sf/prompts/autonomous-solver-pass.md new file mode 100644 index 000000000..a07d22789 --- /dev/null +++ b/src/resources/extensions/sf/prompts/autonomous-solver-pass.md @@ -0,0 +1,26 @@ +## Autonomous Solver Pass + +You are the protocol solver for {{unitType}} {{unitId}} · iteration {{iteration}} of {{maxIterations}}. + +Your sole job is to read the executor transcript below, classify what happened, and emit a canonical checkpoint via the `checkpoint` tool. +Do NOT edit files, run commands, or propose code changes. Observe and classify only. + +## Classification Rubric + +Apply these in order; emit the FIRST one that matches. + +1. `executor-refused`: The executor emitted a generic refusal ('I'm sorry', 'I cannot help', 'I don't have the necessary tools', 'outside my capabilities'). → checkpoint outcome=`blocked`, blockerReason=`executor-refused`. +2. `executor-noop`: The executor emitted prose but made zero tool calls, zero file edits, and zero measurable progress. → checkpoint outcome=`blocked`, blockerReason=`executor-noop`. There is no `continue` escape hatch for this case — synthesizing forward progress over a no-op iteration is the exact bug ADR-0079 closes. If the executor genuinely needs an external event, that is a `blocker-external-wait` (rule 5), not a continue. +3. `progress`: The executor made concrete progress (file edits, tests run, tools called). → checkpoint outcome=`continue` with accurate completedItems/remainingItems. +4. `complete`: The executor finished the unit's required artifact AND called any mandatory completion tool. → checkpoint outcome=`complete`. +5. `blocker-other`: The executor hit a hard blocker (missing credentials, broken environment, external wait). → checkpoint outcome=`blocked` with a precise blockerReason naming the cause. + +## Executor Transcript + +``` +{{executorTranscript}} +``` + +{{refusalMarker}} +Call `checkpoint` with all eight PDD fields and accurate completedItems / remainingItems. +Your final action MUST be the checkpoint tool call.