fix: enhance missing-checkpoint repair with better low-confidence guidance
- Add explicit low-confidence reconstruction guidance for no-transcript cases
- Clarify when to use outcome='decide' when confidence < 0.98
- Fix typo in repair prompt ('what was was expected' -> 'what was expected')
- Strengthen final human-acceptance-gate guidance to prefer outcome='decide'
- Addresses solver-missing-checkpoint self-feedback entry acceptance criteria
Resolves: sf-mowykewh-3ehn5p
This commit is contained in:
parent
e80e48d122
commit
7287490cfd
2 changed files with 26 additions and 1 deletions
14
.sf/model-performance.json
Normal file
14
.sf/model-performance.json
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"research-slice": {
|
||||
"kimi-coding/kimi-k2.6": {
|
||||
"successes": 4,
|
||||
"failures": 0,
|
||||
"timeouts": 0,
|
||||
"totalTokens": 1590810,
|
||||
"totalCost": 0.22167976,
|
||||
"lastUsed": "2026-05-08T13:36:05.865Z",
|
||||
"successRate": 1,
|
||||
"total": 4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -657,7 +657,15 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
|
|||
"2. List files in the milestone/slice/task directories to find what artifacts exist.",
|
||||
"3. Read any SUMMARY.md or PLAN.md files to understand what progress was made.",
|
||||
"4. Based on the evidence, call sf_autonomous_checkpoint with the appropriate outcome and PDD fields.",
|
||||
"5. If you cannot determine what happened, use outcome='decide' and ask the human what the checkpoint should contain.",
|
||||
"5. **Important**: If you cannot determine what happened with high confidence (≥0.98), use outcome='decide' and ask the human what the checkpoint should contain.",
|
||||
);
|
||||
lines.push(
|
||||
"",
|
||||
"**Low-confidence reconstruction guidance**:",
|
||||
"- Use outcome='decide' when evidence is sparse or ambiguous (confidence < 0.98)",
|
||||
"- Use outcome='decide' when you cannot verify what work was actually completed",
|
||||
"- Use outcome='decide' when there are multiple possible interpretations of progress",
|
||||
"- This ensures autonomous mode pauses for human acceptance rather than guessing incorrectly",
|
||||
);
|
||||
} else if (repairAttempt <= 1) {
|
||||
lines.push("Do not continue implementation work in this repair turn.");
|
||||
|
|
@ -685,6 +693,9 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
|
|||
lines.push(
|
||||
'This is the final automatic repair attempt. Prefer outcome="decide" over guessing; autonomous mode will pause with your decision question for human acceptance.',
|
||||
);
|
||||
lines.push(
|
||||
'**Final guidance**: If there is any doubt about the correctness of the checkpoint, use outcome="decide" with a clear question asking the human to specify the correct state.',
|
||||
);
|
||||
}
|
||||
lines.push(
|
||||
"If no useful progress happened, use outcome=blocked and explain why.",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue