diff --git a/src/resources/extensions/sf/auto-dispatch.ts b/src/resources/extensions/sf/auto-dispatch.ts index acdd5b0e7..d0d767498 100644 --- a/src/resources/extensions/sf/auto-dispatch.ts +++ b/src/resources/extensions/sf/auto-dispatch.ts @@ -1021,19 +1021,18 @@ export const DISPATCH_RULES: DispatchRule[] = [ match: async ({ state, mid, midTitle, basePath }) => { if (state.phase !== "completing-milestone") return null; - // Safety guard (#2675): block completion when VALIDATION verdict is - // needs-remediation. The state machine treats needs-remediation as - // terminal (to prevent validate-milestone loops per #832), but - // completing-milestone should NOT proceed — remediation work is needed. + // Safety guard (#2675): completion is only automatic after a pass verdict. + // Non-pass terminal verdicts are still terminal for validation loops, but + // they are not a license to close the milestone. const validationFile = resolveMilestoneFile(basePath, mid, "VALIDATION"); if (validationFile) { const validationContent = await loadFile(validationFile); if (validationContent) { const verdict = extractVerdict(validationContent); - if (verdict === "needs-remediation") { + if (verdict && verdict !== "pass") { return { action: "stop", - reason: `Cannot complete milestone ${mid}: VALIDATION verdict is "needs-remediation". Address the remediation findings and re-run validation, or update the verdict manually.`, + reason: `Cannot complete milestone ${mid}: VALIDATION verdict is "${verdict}". Only verdict "pass" may enter automatic milestone completion. Address or explicitly defer the findings and re-run validation.`, level: "warning", }; } diff --git a/src/resources/extensions/sf/auto-model-selection.ts b/src/resources/extensions/sf/auto-model-selection.ts index b76e68c43..5344da079 100644 --- a/src/resources/extensions/sf/auto-model-selection.ts +++ b/src/resources/extensions/sf/auto-model-selection.ts @@ -162,15 +162,50 @@ const BARE_MODEL_FAMILY_PRIORITY: Array<{ }, ]; +function preferredBareModelIds(modelId: string): readonly string[] | undefined { + const lower = modelId.toLowerCase(); + if ( + lower === "kimi-for-coding" || + lower === "kimi-k2.6" || + lower === "kimi-k2.6:cloud" || + lower === "moonshotai/kimi-k2.6" + ) { + return [ + "kimi-for-coding", + "kimi-k2.6", + "kimi-k2.6:cloud", + "moonshotai/kimi-k2.6", + ]; + } + if ( + lower === "k2p5" || + lower === "kimi-k2.5" || + lower === "moonshotai/kimi-k2.5" + ) { + return ["k2p5", "kimi-k2.5", "moonshotai/kimi-k2.5"]; + } + return undefined; +} + function resolveFamilyPreferredBareModel< T extends { id: string; provider: string }, >(modelId: string, candidates: T[]): T | undefined { const rule = BARE_MODEL_FAMILY_PRIORITY.find((r) => r.match.test(modelId)); if (!rule) return undefined; + const preferredModelIds = preferredBareModelIds(modelId); for (const provider of rule.providers) { - const match = candidates.find( + const providerCandidates = candidates.filter( (m) => m.provider.toLowerCase() === provider.toLowerCase(), ); + if (preferredModelIds) { + for (const preferredId of preferredModelIds) { + const match = providerCandidates.find( + (m) => m.id.toLowerCase() === preferredId.toLowerCase(), + ); + if (match) return match; + } + } + const match = providerCandidates[0]; if (match) return match; } return undefined; @@ -179,10 +214,25 @@ function resolveFamilyPreferredBareModel< function bareModelIdAliases(modelId: string): Set { const lower = modelId.toLowerCase(); const aliases = new Set([lower]); + if (lower === "kimi-for-coding" || lower === "kimi-k2.6") { + aliases.add("kimi-for-coding"); + aliases.add("kimi-k2.6"); + aliases.add("kimi-k2.6:cloud"); + aliases.add("moonshotai/kimi-k2.6"); + } + if (lower === "kimi-k2.6:cloud" || lower === "moonshotai/kimi-k2.6") { + aliases.add("kimi-for-coding"); + aliases.add("kimi-k2.6"); + aliases.add("kimi-k2.6:cloud"); + aliases.add("moonshotai/kimi-k2.6"); + } if (lower === "kimi-k2.5" || lower === "moonshotai/kimi-k2.5") { aliases.add("k2p5"); + aliases.add("kimi-k2.5"); + aliases.add("moonshotai/kimi-k2.5"); } if (lower === "k2p5") { + aliases.add("k2p5"); aliases.add("kimi-k2.5"); aliases.add("moonshotai/kimi-k2.5"); } diff --git a/src/resources/extensions/sf/bootstrap/system-context.ts b/src/resources/extensions/sf/bootstrap/system-context.ts index d11e5e434..e373c8173 100644 --- a/src/resources/extensions/sf/bootstrap/system-context.ts +++ b/src/resources/extensions/sf/bootstrap/system-context.ts @@ -25,6 +25,7 @@ import { parseSummary, } from "../files.js"; import { readForensicsMarker } from "../forensics.js"; +import { formatModelIdentity } from "../model-identity.js"; import { relSliceFile, relSlicePath, @@ -264,13 +265,16 @@ export async function buildBeforeAgentStartResult( : null; const worktreeBlock = buildWorktreeContextBlock(); + const modelIdentityBlock = ctx.model + ? `\n\n## Active Model Identity\n\nCurrent executor model: ${formatModelIdentity(ctx.model)}. Treat the model name as the capability identity and the provider/model route as the wire ID. Do not substitute one Kimi version for another.` + : ""; const subagentModelConfig = resolveModelWithFallbacksForUnit("subagent"); const subagentModelBlock = subagentModelConfig ? `\n\n## Subagent Model\n\nWhen spawning subagents via the \`subagent\` tool, always pass \`model: "${subagentModelConfig.primary}"\` in the tool call parameters. Never omit this — always specify it explicitly.` : ""; - const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${subagentModelBlock}`; + const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${modelIdentityBlock}${subagentModelBlock}`; stopContextTimer({ systemPromptSize: fullSystem.length, diff --git a/src/resources/extensions/sf/model-identity.ts b/src/resources/extensions/sf/model-identity.ts new file mode 100644 index 000000000..43553c538 --- /dev/null +++ b/src/resources/extensions/sf/model-identity.ts @@ -0,0 +1,48 @@ +/** + * model-identity.ts - user-facing names for provider wire model IDs. + * + * Purpose: keep provider-specific wire IDs stable while showing SF agents the + * actual model identity they should reason about. + * + * Consumer: bootstrap/system-context.ts and dashboard surfaces. + */ + +/** + * Return the human-readable model identity for a provider route. + * + * Purpose: let prompts and dashboards discuss model capability/version without + * rewriting the provider's required wire ID. + * + * Consumer: formatModelIdentity() and SF prompt context injection. + */ +export function normalizedModelName(model: { + provider?: string; + id: string; + name?: string; +}): string { + const provider = model.provider?.toLowerCase(); + const id = model.id.toLowerCase(); + if (provider === "kimi-coding" && id === "kimi-for-coding") + return "Kimi K2.6"; + if (provider === "kimi-coding" && id === "k2p5") return "Kimi K2.5"; + if (model.name?.trim()) return model.name.trim(); + return model.id; +} + +/** + * Return a display label that preserves both model identity and wire route. + * + * Purpose: avoid ambiguity for provider aliases such as Kimi Code's + * `kimi-for-coding`, which is the wire ID for Kimi K2.6. + * + * Consumer: SF system context and UI surfaces that need stable model labels. + */ +export function formatModelIdentity(model: { + provider?: string; + id: string; + name?: string; +}): string { + const route = model.provider ? `${model.provider}/${model.id}` : model.id; + const name = normalizedModelName(model); + return name === model.id ? route : `${name} (${route})`; +} diff --git a/src/resources/extensions/sf/prompts/complete-milestone.md b/src/resources/extensions/sf/prompts/complete-milestone.md index 85ca40167..02e65a0c0 100644 --- a/src/resources/extensions/sf/prompts/complete-milestone.md +++ b/src/resources/extensions/sf/prompts/complete-milestone.md @@ -16,13 +16,14 @@ All relevant context has been preloaded below — the roadmap, all slice summari Then: 1. Use the **Milestone Summary** output template from the inlined context above -2. {{skillActivation}} -3. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.sf/'` (or the equivalent for the integration branch). If no non-`.sf/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**. -4. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**. -5. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**. -6. If the roadmap includes a **Horizontal Checklist**, verify each item was addressed during the milestone. Note unchecked items in the milestone summary. -7. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.sf/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone. -8. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof. +2. Inspect the inlined or linked **MILESTONE VALIDATION** verdict. If the verdict is not `pass` (including `needs-attention` or `needs-remediation`), record it as a **verification failure** and follow the failure path. Do NOT call `sf_complete_milestone` for a non-pass validation verdict. +3. {{skillActivation}} +4. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.sf/'` (or the equivalent for the integration branch). If no non-`.sf/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**. +5. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**. +6. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**. +7. If the roadmap includes a **Horizontal Checklist**, verify each item was addressed during the milestone. Note unchecked items in the milestone summary. +8. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.sf/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone. +9. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof. ### Parallel Follow-up Classification @@ -36,19 +37,20 @@ If work falls into the second bucket, do not fail the milestone just because it ### Verification Gate — STOP if verification failed -**If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 10.** +**If ANY verification failure was recorded in steps 2, 4, 5, or 6, you MUST follow the failure path below. Do NOT proceed to step 11.** **Failure path** (verification failed): - Do NOT call `sf_complete_milestone` — the milestone must not be marked as complete. - Do NOT update `.sf/PROJECT.md` to reflect completion. - Do NOT update `.sf/REQUIREMENTS.md` to mark requirements as validated. +- A non-pass validation verdict is a verification failure, even if it is terminal for validation-loop purposes. - Write a clear summary of what failed and why to help the next attempt. - Say: "Milestone {{milestoneId}} verification FAILED — not complete." and stop. -**Success path** (all verifications passed — continue with steps 9–13): +**Success path** (all verifications passed — continue with steps 10–14): -9. For each requirement whose status changed in step 8, call `sf_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.sf/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted. -10. **Persist completion through `sf_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding. +10. For each requirement whose status changed in step 9, call `sf_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.sf/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted. +11. **Persist completion through `sf_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding. **Required parameters:** - `milestoneId` (string) — Milestone ID (e.g. M001) @@ -66,11 +68,11 @@ If work falls into the second bucket, do not fail the milestone just because it **Optional parameters:** - `followUps` (string) — Follow-up items for future milestones - `deviations` (string) — Deviations from the original plan -11. Update `.sf/PROJECT.md`: use the `write` tool with `path: ".sf/PROJECT.md"` and `content` containing the full updated document reflecting milestone completion and current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh. -12. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.sf/KNOWLEDGE.md`. -13. Do not commit manually — the system auto-commits your changes after this unit completes. +12. Update `.sf/PROJECT.md`: use the `write` tool with `path: ".sf/PROJECT.md"` and `content` containing the full updated document reflecting milestone completion and current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh. +13. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.sf/KNOWLEDGE.md`. +14. Do not commit manually — the system auto-commits your changes after this unit completes. - Say: "Milestone {{milestoneId}} complete." -**Important:** Do NOT skip the code change verification, success criteria, or definition of done verification (steps 3-5). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run. **If a verification tool itself fails, errors, or returns unexpected output, treat it as a verification failure** — never rationalize past a tool error ("tool didn't respond, assuming success" is forbidden). A tool that cannot verify is a tool that did not verify. +**Important:** Do NOT skip validation verdict, code change, success criteria, or definition-of-done verification (steps 2 and 4-6). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run. **If a verification tool itself fails, errors, or returns unexpected output, treat it as a verification failure** — never rationalize past a tool error ("tool didn't respond, assuming success" is forbidden). A tool that cannot verify is a tool that did not verify. **File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories. diff --git a/src/resources/extensions/sf/tests/auto-model-selection.test.ts b/src/resources/extensions/sf/tests/auto-model-selection.test.ts index 311268d7b..5f4594918 100644 --- a/src/resources/extensions/sf/tests/auto-model-selection.test.ts +++ b/src/resources/extensions/sf/tests/auto-model-selection.test.ts @@ -317,10 +317,24 @@ test("resolveModelId: bare GLM IDs fall back when zai lacks that exact model", ( assert.equal(result.provider, "opencode-go"); }); -test("resolveModelId: bare Kimi IDs prefer kimi-coding over aggregators", () => { +test("resolveModelId: bare Kimi K2.6 IDs prefer canonical Kimi Code over aggregators", () => { const availableModels = [ - { id: "kimi-k2.5", provider: "opencode-go" }, + { id: "kimi-k2.6:cloud", provider: "ollama" }, + { id: "kimi-for-coding", provider: "kimi-coding" }, + { id: "kimi-k2.6", provider: "opencode-go" }, + ]; + + const result = resolveModelId("kimi-k2.6", availableModels, "ollama"); + assert.ok(result, "should resolve a Kimi model"); + assert.equal(result.provider, "kimi-coding"); + assert.equal(result.id, "kimi-for-coding"); +}); + +test("resolveModelId: bare Kimi K2.5 IDs do not alias to K2.6", () => { + const availableModels = [ + { id: "kimi-for-coding", provider: "kimi-coding" }, { id: "k2p5", provider: "kimi-coding" }, + { id: "kimi-k2.5", provider: "opencode-go" }, ]; const result = resolveModelId("kimi-k2.5", availableModels, "opencode-go"); @@ -329,6 +343,18 @@ test("resolveModelId: bare Kimi IDs prefer kimi-coding over aggregators", () => assert.equal(result.id, "k2p5"); }); +test("resolveModelId: bare Kimi K2.5 only resolves exact K2.5 aliases", () => { + const availableModels = [ + { id: "kimi-k2.5", provider: "opencode-go" }, + { id: "kimi-for-coding", provider: "kimi-coding" }, + ]; + + const result = resolveModelId("kimi-k2.5", availableModels, "opencode-go"); + assert.ok(result, "should resolve a Kimi model"); + assert.equal(result.provider, "opencode-go"); + assert.equal(result.id, "kimi-k2.5"); +}); + test("resolveModelId: bare MiniMax IDs prefer minimax over minimax-cn and aggregators", () => { const availableModels = [ { id: "MiniMax-M2.7", provider: "opencode-go" }, diff --git a/src/resources/extensions/sf/tests/complete-milestone.test.ts b/src/resources/extensions/sf/tests/complete-milestone.test.ts index 32029a157..7c6cded9e 100644 --- a/src/resources/extensions/sf/tests/complete-milestone.test.ts +++ b/src/resources/extensions/sf/tests/complete-milestone.test.ts @@ -205,6 +205,32 @@ describe("complete-milestone", () => { ); }); + test("prompt treats non-pass validation verdict as a completion blocker", () => { + const prompt = loadPromptFromWorktree("complete-milestone", { + workingDirectory: "/tmp/test-project", + milestoneId: "M001", + milestoneTitle: "Validation Gate Test", + roadmapPath: ".sf/milestones/M001/M001-ROADMAP.md", + inlinedContext: "context", + }); + + assert.ok( + prompt.includes("MILESTONE VALIDATION"), + "prompt tells the closer to inspect milestone validation", + ); + assert.ok( + prompt.includes("needs-attention") && + prompt.includes("needs-remediation"), + "prompt names non-pass validation verdicts", + ); + assert.ok( + prompt.includes( + "A non-pass validation verdict is a verification failure", + ), + "failure path treats non-pass validation verdicts as verification failures", + ); + }); + test("handleCompleteMilestone rejects when verificationPassed is false", async () => { const { handleCompleteMilestone } = await import( "../tools/complete-milestone.ts" @@ -284,6 +310,49 @@ describe("complete-milestone", () => { } }); + test("handleCompleteMilestone rejects when validation verdict is needs-attention", async () => { + const { handleCompleteMilestone } = await import( + "../tools/complete-milestone.ts" + ); + const base = createFixtureBase(); + try { + writeMilestoneValidation(base, "M001", "needs-attention"); + const result = await handleCompleteMilestone( + { + milestoneId: "M001", + title: "Test Milestone", + oneLiner: "Test", + narrative: "Test narrative", + successCriteriaResults: "Results", + definitionOfDoneResults: "Done results", + requirementOutcomes: "Outcomes", + keyDecisions: [], + keyFiles: [], + lessonsLearned: [], + followUps: "", + deviations: "", + verificationPassed: true, + }, + base, + ); + + assert.ok( + "error" in result, + "returns error when validation verdict is non-pass", + ); + assert.ok( + (result as { error: string }).error.includes("needs-attention"), + "error message mentions needs-attention", + ); + assert.ok( + (result as { error: string }).error.includes('"pass"'), + "error message says pass is required", + ); + } finally { + cleanup(base); + } + }); + test("diagnoseExpectedArtifact logic for complete-milestone", async () => { // Import the path helpers used by diagnoseExpectedArtifact const { relMilestoneFile } = await import("../paths.ts"); diff --git a/src/resources/extensions/sf/tests/model-identity.test.ts b/src/resources/extensions/sf/tests/model-identity.test.ts new file mode 100644 index 000000000..da5d12b20 --- /dev/null +++ b/src/resources/extensions/sf/tests/model-identity.test.ts @@ -0,0 +1,23 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { formatModelIdentity, normalizedModelName } from "../model-identity.ts"; + +test("model identity: Kimi Code wire id displays as Kimi K2.6", () => { + const model = { + provider: "kimi-coding", + id: "kimi-for-coding", + name: "Kimi K2.6", + }; + assert.equal(normalizedModelName(model), "Kimi K2.6"); + assert.equal( + formatModelIdentity(model), + "Kimi K2.6 (kimi-coding/kimi-for-coding)", + ); +}); + +test("model identity: K2.5 remains distinct from K2.6", () => { + const model = { provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" }; + assert.equal(normalizedModelName(model), "Kimi K2.5"); + assert.equal(formatModelIdentity(model), "Kimi K2.5 (kimi-coding/k2p5)"); +}); diff --git a/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts b/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts index b12e23284..124f28882 100644 --- a/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts +++ b/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts @@ -1,10 +1,10 @@ /** - * Regression test for #2675: completing-milestone dispatch rule must - * block completion when VALIDATION verdict is "needs-remediation". + * Regression test for #2675: completing-milestone dispatch rule must block + * completion when VALIDATION verdict is not "pass". * - * Without this guard, needs-remediation + allSlicesDone causes a loop: - * complete-milestone dispatched → agent refuses (correct) → no SUMMARY - * → re-dispatch → repeat until stuck detection fires. + * Without this guard, non-pass validation + allSlicesDone can dispatch + * complete-milestone and incorrectly close a milestone that still needs + * attention or remediation. */ import assert from "node:assert/strict"; @@ -24,23 +24,25 @@ test("completing-milestone dispatch rule exists", () => { assert.ok(completingRule, "rule should exist in DISPATCH_RULES"); }); -test("completing-milestone blocks when VALIDATION verdict is needs-remediation (#2675)", async () => { +async function assertNonPassVerdictBlocksCompletion( + verdict: string, +): Promise { const base = mkdtempSync(join(tmpdir(), "sf-remediation-")); mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true }); try { - // Write a VALIDATION file with needs-remediation verdict + // Write a VALIDATION file with a non-pass verdict. writeFileSync( join(base, ".sf", "milestones", "M001", "M001-VALIDATION.md"), [ "---", - "verdict: needs-remediation", + `verdict: ${verdict}`, "remediation_round: 0", "---", "", "# Validation Report", "", - "3 success criteria failed. Remediation required.", + "Success criteria need follow-up.", ].join("\n"), ); @@ -64,13 +66,25 @@ test("completing-milestone blocks when VALIDATION verdict is needs-remediation ( "should be warning level (pausable)", ); assert.ok( - result!.reason.includes("needs-remediation"), - "reason should mention needs-remediation", + result!.reason.includes(verdict), + `reason should mention ${verdict}`, + ); + assert.ok( + result!.reason.includes('"pass"'), + "reason should mention pass as the only automatic completion verdict", ); } } finally { rmSync(base, { recursive: true, force: true }); } +} + +test("completing-milestone blocks when VALIDATION verdict is needs-remediation (#2675)", async () => { + await assertNonPassVerdictBlocksCompletion("needs-remediation"); +}); + +test("completing-milestone blocks when VALIDATION verdict is needs-attention", async () => { + await assertNonPassVerdictBlocksCompletion("needs-attention"); }); test("completing-milestone proceeds normally when VALIDATION verdict is pass (#2675 guard)", async () => { @@ -107,8 +121,8 @@ test("completing-milestone proceeds normally when VALIDATION verdict is pass (#2 // a different reason (e.g. missing SUMMARY files, no implementation) if (result && result.action === "stop") { assert.ok( - !result.reason.includes("needs-remediation"), - "pass verdict should NOT trigger the remediation guard", + !result.reason.includes("Only verdict"), + "pass verdict should NOT trigger the non-pass validation guard", ); } } finally { diff --git a/src/resources/extensions/sf/tools/complete-milestone.ts b/src/resources/extensions/sf/tools/complete-milestone.ts index b0fb7b986..346a2403e 100644 --- a/src/resources/extensions/sf/tools/complete-milestone.ts +++ b/src/resources/extensions/sf/tools/complete-milestone.ts @@ -6,7 +6,7 @@ * for recovery, and invalidates caches. */ -import { mkdirSync } from "node:fs"; +import { existsSync, mkdirSync, readFileSync } from "node:fs"; import { join } from "node:path"; import { clearParseCache, saveFile } from "../files.js"; import { clearPathCache, resolveMilestonePath } from "../paths.js"; @@ -19,6 +19,7 @@ import { } from "../sf-db.js"; import { invalidateStateCache } from "../state.js"; import { isClosedStatus } from "../status-guards.js"; +import { extractVerdict } from "../verdict-parser.js"; import { appendEvent } from "../workflow-events.js"; import { logError, logWarning } from "../workflow-logger.js"; import { writeManifest } from "../workflow-manifest.js"; @@ -157,6 +158,25 @@ export async function handleCompleteMilestone( }; } + const milestoneDir = resolveMilestonePath(basePath, params.milestoneId); + const validationPath = milestoneDir + ? join(milestoneDir, `${params.milestoneId}-VALIDATION.md`) + : join( + basePath, + ".sf", + "milestones", + params.milestoneId, + `${params.milestoneId}-VALIDATION.md`, + ); + if (existsSync(validationPath)) { + const verdict = extractVerdict(readFileSync(validationPath, "utf-8")); + if (verdict && verdict !== "pass") { + return { + error: `milestone validation verdict is "${verdict}" — only "pass" may be completed automatically`, + }; + } + } + // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ─── const completedAt = new Date().toISOString(); let guardError: string | null = null; @@ -214,7 +234,6 @@ export async function handleCompleteMilestone( const summaryMd = renderMilestoneSummaryMarkdown(params); let summaryPath: string; - const milestoneDir = resolveMilestonePath(basePath, params.milestoneId); if (milestoneDir) { summaryPath = join(milestoneDir, `${params.milestoneId}-SUMMARY.md`); } else {