diff --git a/.sf/backups/db/maintenance.json b/.sf/backups/db/maintenance.json index 65d977734..6ed72f3c8 100644 --- a/.sf/backups/db/maintenance.json +++ b/.sf/backups/db/maintenance.json @@ -1,3 +1,3 @@ { - "lastFullVacuumAt": "2026-05-09T23:40:22.903Z" + "lastFullVacuumAt": "2026-05-10T05:57:58.807Z" } diff --git a/.sf/backups/db/sf.db.2026-05-10T05-57-58-732Z b/.sf/backups/db/sf.db.2026-05-10T05-57-58-732Z new file mode 100644 index 000000000..43ca86cfe Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T05-57-58-732Z differ diff --git a/.sf/backups/db/sf.db.2026-05-10T07-05-24-192Z b/.sf/backups/db/sf.db.2026-05-10T07-05-24-192Z new file mode 100644 index 000000000..8be3fcec4 Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T07-05-24-192Z differ diff --git a/.sf/metrics.db b/.sf/metrics.db index 52f417f8a..ef50db614 100644 Binary files a/.sf/metrics.db and b/.sf/metrics.db differ diff --git a/.sf/metrics.db-shm b/.sf/metrics.db-shm index a7c929dfa..f6111da4d 100644 Binary files a/.sf/metrics.db-shm and b/.sf/metrics.db-shm differ diff --git a/.sf/metrics.db-wal b/.sf/metrics.db-wal index 91b3c05e2..246215312 100644 Binary files a/.sf/metrics.db-wal and b/.sf/metrics.db-wal differ diff --git a/.sf/model-performance.json b/.sf/model-performance.json index ba649f294..9127cf7a6 100644 --- a/.sf/model-performance.json +++ b/.sf/model-performance.json @@ -9,6 +9,16 @@ "lastUsed": "2026-05-08T13:36:05.865Z", "successRate": 1, "total": 4 + }, + "minimax/MiniMax-M2.7": { + "successes": 1, + "failures": 0, + "timeouts": 0, + "totalTokens": 1101124, + "totalCost": 0.6158798199999999, + "lastUsed": "2026-05-10T07:19:50.702Z", + "successRate": 1, + "total": 1 } }, "plan-slice": { diff --git a/src/resources/extensions/sf/prompts/complete-slice.md b/src/resources/extensions/sf/prompts/complete-slice.md index 5d2edaf75..393b5f3be 100644 --- a/src/resources/extensions/sf/prompts/complete-slice.md +++ b/src/resources/extensions/sf/prompts/complete-slice.md @@ -31,6 +31,15 @@ Then: 6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `update_requirement` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.sf/REQUIREMENTS.md` directly — the engine renders it from the database. 7. Prepare the slice completion content you will pass to `complete_slice` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts. 8. Draft the UAT content you will pass as `uatContent` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built. + + **Whenever the slice uses a mode other than `artifact-driven`, start `uatContent` with a parsable `## UAT Type` section** (`extractUatType` must recognise the bullet). Typical shape: + ``` + ## UAT Type + + - UAT mode: artifact-driven | browser-executable | runtime-executable | live-runtime | human-experience | mixed + - Why this mode is sufficient: + ``` + The mode determines how the run-uat agent executes checks. For slices verified only by build commands, grep checks, and automated tests you may omit this block — `complete_slice` then injects a default `artifact-driven` section ahead of your body so parsers still classify the artifact. 9. Review task summaries for `key_decisions`. Append any significant decisions to `.sf/DECISIONS.md` if missing. 10. Review task summaries for patterns, gotchas, or non-obvious lessons learned. If any would save future agents from repeating investigation or hitting the same issues, append them to `.sf/KNOWLEDGE.md`. Only add entries that are genuinely useful — don't pad with obvious observations. 10b. Scan task summaries and the slice's activity log for sf-internal anomalies that the per-task agents may not have reported individually — repeated `Git stage failed`, `Verification failed … advisory`, `Safety: N unexpected file change(s)`, brittle gate predicates, etc. For any genuine sf-the-tool defect that surfaced during this slice but was NOT already filed via `report_issue`, file it now via `report_issue` with appropriate severity. This is the slice-level sweep — task-level agents file individual reports during execution; the slice-close agent catches systemic issues only visible across multiple tasks. diff --git a/src/resources/extensions/sf/tests/complete-slice-evidence.test.mjs b/src/resources/extensions/sf/tests/complete-slice-evidence.test.mjs index c81008e5e..93ee1a7b8 100644 --- a/src/resources/extensions/sf/tests/complete-slice-evidence.test.mjs +++ b/src/resources/extensions/sf/tests/complete-slice-evidence.test.mjs @@ -82,3 +82,92 @@ test("handleCompleteSlice_when_successful_records_completion_summary_evidence", assert.match(trail[0].content, /Slice finished with evidence/); assert.match(trail[0].content, /Keep slice evidence in DB/); }); + +test("handleCompleteSlice_writes_uat_with_UAT_Type_header_so_downstream_can_parse_mode", async () => { + const project = makeProject(); + const uatPath = join( + project, + ".sf", + "milestones", + "M001", + "slices", + "S01", + "S01-UAT.md", + ); + + const result = await handleCompleteSlice( + { + milestoneId: "M001", + sliceId: "S01", + sliceTitle: "Slice", + verification: "Verification passed.", + uatContent: "UAT passed.", + oneLiner: "Slice finished with evidence", + narrative: "All tasks are closed.", + keyDecisions: [], + keyFiles: [], + }, + project, + ); + + assert.equal(result.error, undefined); + assert.equal(result.uatPath, uatPath); + const { readFileSync } = await import("node:fs"); + const diskUat = readFileSync(uatPath, "utf8"); + assert.match( + diskUat, + /^## UAT Type$/m, + "UAT.md must contain ## UAT Type section", + ); + assert.match(diskUat, /- UAT mode: artifact-driven/m); + assert.match(diskUat, /## UAT Type\n\n- UAT mode: artifact-driven/m); + const { extractUatType } = await import("../files.js"); + assert.equal(extractUatType(diskUat), "artifact-driven"); +}); + +test("handleCompleteSlice_when_uat_declares_non_artifact_mode_does_not_duplicate_UAT_Type", async () => { + const project = makeProject(); + const uatPath = join( + project, + ".sf", + "milestones", + "M001", + "slices", + "S01", + "S01-UAT.md", + ); + const agentBody = `## UAT Type + +- UAT mode: browser-executable +- Why this mode is sufficient: Playwright validates the UX flow end-to-end + +## Steps + +1. Run the headed Playwright smoke and confirm the dashboard renders.`; + + const result = await handleCompleteSlice( + { + milestoneId: "M001", + sliceId: "S01", + sliceTitle: "Slice", + verification: "Verification passed.", + uatContent: agentBody, + oneLiner: "Slice finished with evidence", + narrative: "All tasks are closed.", + keyDecisions: [], + keyFiles: [], + }, + project, + ); + + assert.equal(result.error, undefined); + const { readFileSync } = await import("node:fs"); + const diskUat = readFileSync(uatPath, "utf8"); + assert.equal( + (diskUat.match(/^## UAT Type$/gm) ?? []).length, + 1, + "handler must not prepend a second ## UAT Type when the agent already declared one", + ); + const { extractUatType } = await import("../files.js"); + assert.equal(extractUatType(diskUat), "browser-executable"); +}); diff --git a/src/resources/extensions/sf/tools/complete-slice.js b/src/resources/extensions/sf/tools/complete-slice.js index 3f523dc08..e997012d5 100644 --- a/src/resources/extensions/sf/tools/complete-slice.js +++ b/src/resources/extensions/sf/tools/complete-slice.js @@ -8,7 +8,7 @@ import { promises as fs, constants as fsConstants, mkdirSync } from "node:fs"; import { dirname, join } from "node:path"; import { atomicWriteAsync } from "../atomic-write.js"; -import { clearParseCache } from "../files.js"; +import { clearParseCache, extractUatType } from "../files.js"; import { getGatesForTurn } from "../gate-registry.js"; import { renderRoadmapCheckboxes } from "../markdown-renderer.js"; import { clearPathCache, resolveSlicePath } from "../paths.js"; @@ -341,15 +341,34 @@ ${filesMod} } /** * Render UAT markdown matching the template format. + * + * When `uatContent` already contains a parsable `## UAT Type` block (validated + * via `extractUatType`), the handler does **not** inject a second section so + * the agent-chosen mode is what downstream tools observe. Otherwise the + * handler prepends canonical `artifact-driven` defaults before `uatContent`. + * + * Purpose: preserve single-source truth for run-uat / verdict parsers without + * forcing every caller to duplicate boilerplate when `artifact-driven` applies. */ function renderUatMarkdown(params) { + const now = new Date().toISOString(); + const uatBodyRaw = + typeof params.uatContent === "string" ? params.uatContent : ""; + const injectDefaultType = extractUatType(uatBodyRaw.trim()) === undefined; + const typePreface = injectDefaultType + ? `## UAT Type + +- UAT mode: artifact-driven +- Why this mode is sufficient: automated build + test verification + +` + : ""; return `# ${params.sliceId}: ${params.sliceTitle} — UAT **Milestone:** ${params.milestoneId} -**Written:** ${new Date().toISOString()} +**Written:** ${now} -${params.uatContent} -`; +${typePreface}${uatBodyRaw}`; } /** * Handle the complete_slice operation end-to-end.