diff --git a/.sf/model-performance.json b/.sf/model-performance.json
new file mode 100644
index 000000000..9e93a240f
--- /dev/null
+++ b/.sf/model-performance.json
@@ -0,0 +1,14 @@
+{
+  "research-slice": {
+    "kimi-coding/kimi-k2.6": {
+      "successes": 4,
+      "failures": 0,
+      "timeouts": 0,
+      "totalTokens": 1590810,
+      "totalCost": 0.22167976,
+      "lastUsed": "2026-05-08T13:36:05.865Z",
+      "successRate": 1,
+      "total": 4
+    }
+  }
+}
\ No newline at end of file
diff --git a/src/resources/extensions/sf/autonomous-solver.js b/src/resources/extensions/sf/autonomous-solver.js
index 052ce55d8..4c3fb2de0 100644
--- a/src/resources/extensions/sf/autonomous-solver.js
+++ b/src/resources/extensions/sf/autonomous-solver.js
@@ -657,7 +657,15 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
 			"2. List files in the milestone/slice/task directories to find what artifacts exist.",
 			"3. Read any SUMMARY.md or PLAN.md files to understand what progress was made.",
 			"4. Based on the evidence, call sf_autonomous_checkpoint with the appropriate outcome and PDD fields.",
-			"5. If you cannot determine what happened, use outcome='decide' and ask the human what the checkpoint should contain.",
+			"5. **Important**: If you cannot determine what happened with high confidence (≥0.98), use outcome='decide' and ask the human what the checkpoint should contain.",
+		);
+		lines.push(
+			"",
+			"**Low-confidence reconstruction guidance**:",
+			"- Use outcome='decide' when evidence is sparse or ambiguous (confidence < 0.98)",
+			"- Use outcome='decide' when you cannot verify what work was actually completed", 
+			"- Use outcome='decide' when there are multiple possible interpretations of progress",
+			"- This ensures autonomous mode pauses for human acceptance rather than guessing incorrectly",
 		);
 	} else if (repairAttempt <= 1) {
 		lines.push("Do not continue implementation work in this repair turn.");
@@ -685,6 +693,9 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
 		lines.push(
 			'This is the final automatic repair attempt. Prefer outcome="decide" over guessing; autonomous mode will pause with your decision question for human acceptance.',
 		);
+		lines.push(
+			'**Final guidance**: If there is any doubt about the correctness of the checkpoint, use outcome="decide" with a clear question asking the human to specify the correct state.',
+		);
 	}
 	lines.push(
 		"If no useful progress happened, use outcome=blocked and explain why.",