fix(sf): keep kimi versions exact

2026-04-29 21:17:00 +02:00 · 2026-04-29 21:17:00 +02:00 · f78c3fb2b8
commit f78c3fb2b8
parent ab57548f2b
10 changed files with 294 additions and 40 deletions
--- a/src/resources/extensions/sf/auto-dispatch.ts
+++ b/src/resources/extensions/sf/auto-dispatch.ts
@ -1021,19 +1021,18 @@ export const DISPATCH_RULES: DispatchRule[] = [
 		match: async ({ state, mid, midTitle, basePath }) => {
 			if (state.phase !== "completing-milestone") return null;

-			// Safety guard (#2675): block completion when VALIDATION verdict is
-			// needs-remediation. The state machine treats needs-remediation as
-			// terminal (to prevent validate-milestone loops per #832), but
-			// completing-milestone should NOT proceed — remediation work is needed.
+			// Safety guard (#2675): completion is only automatic after a pass verdict.
+			// Non-pass terminal verdicts are still terminal for validation loops, but
+			// they are not a license to close the milestone.
 			const validationFile = resolveMilestoneFile(basePath, mid, "VALIDATION");
 			if (validationFile) {
 				const validationContent = await loadFile(validationFile);
 				if (validationContent) {
 					const verdict = extractVerdict(validationContent);
-					if (verdict === "needs-remediation") {
+					if (verdict && verdict !== "pass") {
 						return {
 							action: "stop",
-							reason: `Cannot complete milestone ${mid}: VALIDATION verdict is "needs-remediation". Address the remediation findings and re-run validation, or update the verdict manually.`,
+							reason: `Cannot complete milestone ${mid}: VALIDATION verdict is "${verdict}". Only verdict "pass" may enter automatic milestone completion. Address or explicitly defer the findings and re-run validation.`,
 							level: "warning",
 						};
 					}
--- a/src/resources/extensions/sf/auto-model-selection.ts
+++ b/src/resources/extensions/sf/auto-model-selection.ts
@ -162,15 +162,50 @@ const BARE_MODEL_FAMILY_PRIORITY: Array<{
 	},
 ];

+function preferredBareModelIds(modelId: string): readonly string[] | undefined {
+	const lower = modelId.toLowerCase();
+	if (
+		lower === "kimi-for-coding" ||
+		lower === "kimi-k2.6" ||
+		lower === "kimi-k2.6:cloud" ||
+		lower === "moonshotai/kimi-k2.6"
+	) {
+		return [
+			"kimi-for-coding",
+			"kimi-k2.6",
+			"kimi-k2.6:cloud",
+			"moonshotai/kimi-k2.6",
+		];
+	}
+	if (
+		lower === "k2p5" ||
+		lower === "kimi-k2.5" ||
+		lower === "moonshotai/kimi-k2.5"
+	) {
+		return ["k2p5", "kimi-k2.5", "moonshotai/kimi-k2.5"];
+	}
+	return undefined;
+}
+
 function resolveFamilyPreferredBareModel<
 	T extends { id: string; provider: string },
 >(modelId: string, candidates: T[]): T | undefined {
 	const rule = BARE_MODEL_FAMILY_PRIORITY.find((r) => r.match.test(modelId));
 	if (!rule) return undefined;
+	const preferredModelIds = preferredBareModelIds(modelId);
 	for (const provider of rule.providers) {
-		const match = candidates.find(
+		const providerCandidates = candidates.filter(
 			(m) => m.provider.toLowerCase() === provider.toLowerCase(),
 		);
+		if (preferredModelIds) {
+			for (const preferredId of preferredModelIds) {
+				const match = providerCandidates.find(
+					(m) => m.id.toLowerCase() === preferredId.toLowerCase(),
+				);
+				if (match) return match;
+			}
+		}
+		const match = providerCandidates[0];
 		if (match) return match;
 	}
 	return undefined;
@ -179,10 +214,25 @@ function resolveFamilyPreferredBareModel<
 function bareModelIdAliases(modelId: string): Set<string> {
 	const lower = modelId.toLowerCase();
 	const aliases = new Set([lower]);
+	if (lower === "kimi-for-coding" || lower === "kimi-k2.6") {
+		aliases.add("kimi-for-coding");
+		aliases.add("kimi-k2.6");
+		aliases.add("kimi-k2.6:cloud");
+		aliases.add("moonshotai/kimi-k2.6");
+	}
+	if (lower === "kimi-k2.6:cloud" || lower === "moonshotai/kimi-k2.6") {
+		aliases.add("kimi-for-coding");
+		aliases.add("kimi-k2.6");
+		aliases.add("kimi-k2.6:cloud");
+		aliases.add("moonshotai/kimi-k2.6");
+	}
 	if (lower === "kimi-k2.5" || lower === "moonshotai/kimi-k2.5") {
 		aliases.add("k2p5");
+		aliases.add("kimi-k2.5");
+		aliases.add("moonshotai/kimi-k2.5");
 	}
 	if (lower === "k2p5") {
+		aliases.add("k2p5");
 		aliases.add("kimi-k2.5");
 		aliases.add("moonshotai/kimi-k2.5");
 	}
--- a/src/resources/extensions/sf/bootstrap/system-context.ts
+++ b/src/resources/extensions/sf/bootstrap/system-context.ts
@ -25,6 +25,7 @@ import {
 	parseSummary,
 } from "../files.js";
 import { readForensicsMarker } from "../forensics.js";
+import { formatModelIdentity } from "../model-identity.js";
 import {
 	relSliceFile,
 	relSlicePath,
@ -264,13 +265,16 @@ export async function buildBeforeAgentStartResult(
 		: null;

 	const worktreeBlock = buildWorktreeContextBlock();
+	const modelIdentityBlock = ctx.model
+		? `\n\n## Active Model Identity\n\nCurrent executor model: ${formatModelIdentity(ctx.model)}. Treat the model name as the capability identity and the provider/model route as the wire ID. Do not substitute one Kimi version for another.`
+		: "";

 	const subagentModelConfig = resolveModelWithFallbacksForUnit("subagent");
 	const subagentModelBlock = subagentModelConfig
 		? `\n\n## Subagent Model\n\nWhen spawning subagents via the \`subagent\` tool, always pass \`model: "${subagentModelConfig.primary}"\` in the tool call parameters. Never omit this — always specify it explicitly.`
 		: "";

-	const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${subagentModelBlock}`;
+	const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${modelIdentityBlock}${subagentModelBlock}`;

 	stopContextTimer({
 		systemPromptSize: fullSystem.length,
--- a/src/resources/extensions/sf/model-identity.ts
+++ b/src/resources/extensions/sf/model-identity.ts
@ -0,0 +1,48 @@
+/**
+ * model-identity.ts - user-facing names for provider wire model IDs.
+ *
+ * Purpose: keep provider-specific wire IDs stable while showing SF agents the
+ * actual model identity they should reason about.
+ *
+ * Consumer: bootstrap/system-context.ts and dashboard surfaces.
+ */
+
+/**
+ * Return the human-readable model identity for a provider route.
+ *
+ * Purpose: let prompts and dashboards discuss model capability/version without
+ * rewriting the provider's required wire ID.
+ *
+ * Consumer: formatModelIdentity() and SF prompt context injection.
+ */
+export function normalizedModelName(model: {
+	provider?: string;
+	id: string;
+	name?: string;
+}): string {
+	const provider = model.provider?.toLowerCase();
+	const id = model.id.toLowerCase();
+	if (provider === "kimi-coding" && id === "kimi-for-coding")
+		return "Kimi K2.6";
+	if (provider === "kimi-coding" && id === "k2p5") return "Kimi K2.5";
+	if (model.name?.trim()) return model.name.trim();
+	return model.id;
+}
+
+/**
+ * Return a display label that preserves both model identity and wire route.
+ *
+ * Purpose: avoid ambiguity for provider aliases such as Kimi Code's
+ * `kimi-for-coding`, which is the wire ID for Kimi K2.6.
+ *
+ * Consumer: SF system context and UI surfaces that need stable model labels.
+ */
+export function formatModelIdentity(model: {
+	provider?: string;
+	id: string;
+	name?: string;
+}): string {
+	const route = model.provider ? `${model.provider}/${model.id}` : model.id;
+	const name = normalizedModelName(model);
+	return name === model.id ? route : `${name} (${route})`;
+}
--- a/src/resources/extensions/sf/prompts/complete-milestone.md
+++ b/src/resources/extensions/sf/prompts/complete-milestone.md
@ -16,13 +16,14 @@ All relevant context has been preloaded below — the roadmap, all slice summari

 Then:
 1. Use the **Milestone Summary** output template from the inlined context above
-2. {{skillActivation}}
-3. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.sf/'` (or the equivalent for the integration branch). If no non-`.sf/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**.
-4. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**.
-5. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**.
-6. If the roadmap includes a **Horizontal Checklist**, verify each item was addressed during the milestone. Note unchecked items in the milestone summary.
-7. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.sf/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone.
-8. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
+2. Inspect the inlined or linked **MILESTONE VALIDATION** verdict. If the verdict is not `pass` (including `needs-attention` or `needs-remediation`), record it as a **verification failure** and follow the failure path. Do NOT call `sf_complete_milestone` for a non-pass validation verdict.
+3. {{skillActivation}}
+4. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.sf/'` (or the equivalent for the integration branch). If no non-`.sf/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**.
+5. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**.
+6. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**.
+7. If the roadmap includes a **Horizontal Checklist**, verify each item was addressed during the milestone. Note unchecked items in the milestone summary.
+8. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.sf/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone.
+9. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.

 ### Parallel Follow-up Classification

@ -36,19 +37,20 @@ If work falls into the second bucket, do not fail the milestone just because it

 ### Verification Gate — STOP if verification failed

-**If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 10.**
+**If ANY verification failure was recorded in steps 2, 4, 5, or 6, you MUST follow the failure path below. Do NOT proceed to step 11.**

 **Failure path** (verification failed):
 - Do NOT call `sf_complete_milestone` — the milestone must not be marked as complete.
 - Do NOT update `.sf/PROJECT.md` to reflect completion.
 - Do NOT update `.sf/REQUIREMENTS.md` to mark requirements as validated.
+- A non-pass validation verdict is a verification failure, even if it is terminal for validation-loop purposes.
 - Write a clear summary of what failed and why to help the next attempt.
 - Say: "Milestone {{milestoneId}} verification FAILED — not complete." and stop.

-**Success path** (all verifications passed — continue with steps 9–13):
+**Success path** (all verifications passed — continue with steps 10–14):

-9. For each requirement whose status changed in step 8, call `sf_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.sf/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted.
-10. **Persist completion through `sf_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
+10. For each requirement whose status changed in step 9, call `sf_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.sf/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted.
+11. **Persist completion through `sf_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.

   **Required parameters:**
   - `milestoneId` (string) — Milestone ID (e.g. M001)
@ -66,11 +68,11 @@ If work falls into the second bucket, do not fail the milestone just because it
   **Optional parameters:**
   - `followUps` (string) — Follow-up items for future milestones
   - `deviations` (string) — Deviations from the original plan
-11. Update `.sf/PROJECT.md`: use the `write` tool with `path: ".sf/PROJECT.md"` and `content` containing the full updated document reflecting milestone completion and current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
-12. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.sf/KNOWLEDGE.md`.
-13. Do not commit manually — the system auto-commits your changes after this unit completes.
+12. Update `.sf/PROJECT.md`: use the `write` tool with `path: ".sf/PROJECT.md"` and `content` containing the full updated document reflecting milestone completion and current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
+13. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.sf/KNOWLEDGE.md`.
+14. Do not commit manually — the system auto-commits your changes after this unit completes.
 - Say: "Milestone {{milestoneId}} complete."

-**Important:** Do NOT skip the code change verification, success criteria, or definition of done verification (steps 3-5). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run. **If a verification tool itself fails, errors, or returns unexpected output, treat it as a verification failure** — never rationalize past a tool error ("tool didn't respond, assuming success" is forbidden). A tool that cannot verify is a tool that did not verify.
+**Important:** Do NOT skip validation verdict, code change, success criteria, or definition-of-done verification (steps 2 and 4-6). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run. **If a verification tool itself fails, errors, or returns unexpected output, treat it as a verification failure** — never rationalize past a tool error ("tool didn't respond, assuming success" is forbidden). A tool that cannot verify is a tool that did not verify.

 **File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories.
--- a/src/resources/extensions/sf/tests/auto-model-selection.test.ts
+++ b/src/resources/extensions/sf/tests/auto-model-selection.test.ts
@ -317,10 +317,24 @@ test("resolveModelId: bare GLM IDs fall back when zai lacks that exact model", (
 	assert.equal(result.provider, "opencode-go");
 });

-test("resolveModelId: bare Kimi IDs prefer kimi-coding over aggregators", () => {
+test("resolveModelId: bare Kimi K2.6 IDs prefer canonical Kimi Code over aggregators", () => {
 	const availableModels = [
-		{ id: "kimi-k2.5", provider: "opencode-go" },
+		{ id: "kimi-k2.6:cloud", provider: "ollama" },
+		{ id: "kimi-for-coding", provider: "kimi-coding" },
+		{ id: "kimi-k2.6", provider: "opencode-go" },
+	];
+
+	const result = resolveModelId("kimi-k2.6", availableModels, "ollama");
+	assert.ok(result, "should resolve a Kimi model");
+	assert.equal(result.provider, "kimi-coding");
+	assert.equal(result.id, "kimi-for-coding");
+});
+
+test("resolveModelId: bare Kimi K2.5 IDs do not alias to K2.6", () => {
+	const availableModels = [
+		{ id: "kimi-for-coding", provider: "kimi-coding" },
 		{ id: "k2p5", provider: "kimi-coding" },
+		{ id: "kimi-k2.5", provider: "opencode-go" },
 	];

 	const result = resolveModelId("kimi-k2.5", availableModels, "opencode-go");
@ -329,6 +343,18 @@ test("resolveModelId: bare Kimi IDs prefer kimi-coding over aggregators", () =>
 	assert.equal(result.id, "k2p5");
 });

+test("resolveModelId: bare Kimi K2.5 only resolves exact K2.5 aliases", () => {
+	const availableModels = [
+		{ id: "kimi-k2.5", provider: "opencode-go" },
+		{ id: "kimi-for-coding", provider: "kimi-coding" },
+	];
+
+	const result = resolveModelId("kimi-k2.5", availableModels, "opencode-go");
+	assert.ok(result, "should resolve a Kimi model");
+	assert.equal(result.provider, "opencode-go");
+	assert.equal(result.id, "kimi-k2.5");
+});
+
 test("resolveModelId: bare MiniMax IDs prefer minimax over minimax-cn and aggregators", () => {
 	const availableModels = [
 		{ id: "MiniMax-M2.7", provider: "opencode-go" },
--- a/src/resources/extensions/sf/tests/complete-milestone.test.ts
+++ b/src/resources/extensions/sf/tests/complete-milestone.test.ts
@ -205,6 +205,32 @@ describe("complete-milestone", () => {
 		);
 	});

+	test("prompt treats non-pass validation verdict as a completion blocker", () => {
+		const prompt = loadPromptFromWorktree("complete-milestone", {
+			workingDirectory: "/tmp/test-project",
+			milestoneId: "M001",
+			milestoneTitle: "Validation Gate Test",
+			roadmapPath: ".sf/milestones/M001/M001-ROADMAP.md",
+			inlinedContext: "context",
+		});
+
+		assert.ok(
+			prompt.includes("MILESTONE VALIDATION"),
+			"prompt tells the closer to inspect milestone validation",
+		);
+		assert.ok(
+			prompt.includes("needs-attention") &&
+				prompt.includes("needs-remediation"),
+			"prompt names non-pass validation verdicts",
+		);
+		assert.ok(
+			prompt.includes(
+				"A non-pass validation verdict is a verification failure",
+			),
+			"failure path treats non-pass validation verdicts as verification failures",
+		);
+	});
+
 	test("handleCompleteMilestone rejects when verificationPassed is false", async () => {
 		const { handleCompleteMilestone } = await import(
 			"../tools/complete-milestone.ts"
@ -284,6 +310,49 @@ describe("complete-milestone", () => {
 		}
 	});

+	test("handleCompleteMilestone rejects when validation verdict is needs-attention", async () => {
+		const { handleCompleteMilestone } = await import(
+			"../tools/complete-milestone.ts"
+		);
+		const base = createFixtureBase();
+		try {
+			writeMilestoneValidation(base, "M001", "needs-attention");
+			const result = await handleCompleteMilestone(
+				{
+					milestoneId: "M001",
+					title: "Test Milestone",
+					oneLiner: "Test",
+					narrative: "Test narrative",
+					successCriteriaResults: "Results",
+					definitionOfDoneResults: "Done results",
+					requirementOutcomes: "Outcomes",
+					keyDecisions: [],
+					keyFiles: [],
+					lessonsLearned: [],
+					followUps: "",
+					deviations: "",
+					verificationPassed: true,
+				},
+				base,
+			);
+
+			assert.ok(
+				"error" in result,
+				"returns error when validation verdict is non-pass",
+			);
+			assert.ok(
+				(result as { error: string }).error.includes("needs-attention"),
+				"error message mentions needs-attention",
+			);
+			assert.ok(
+				(result as { error: string }).error.includes('"pass"'),
+				"error message says pass is required",
+			);
+		} finally {
+			cleanup(base);
+		}
+	});
+
 	test("diagnoseExpectedArtifact logic for complete-milestone", async () => {
 		// Import the path helpers used by diagnoseExpectedArtifact
 		const { relMilestoneFile } = await import("../paths.ts");
--- a/src/resources/extensions/sf/tests/model-identity.test.ts
+++ b/src/resources/extensions/sf/tests/model-identity.test.ts
@ -0,0 +1,23 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+
+import { formatModelIdentity, normalizedModelName } from "../model-identity.ts";
+
+test("model identity: Kimi Code wire id displays as Kimi K2.6", () => {
+	const model = {
+		provider: "kimi-coding",
+		id: "kimi-for-coding",
+		name: "Kimi K2.6",
+	};
+	assert.equal(normalizedModelName(model), "Kimi K2.6");
+	assert.equal(
+		formatModelIdentity(model),
+		"Kimi K2.6 (kimi-coding/kimi-for-coding)",
+	);
+});
+
+test("model identity: K2.5 remains distinct from K2.6", () => {
+	const model = { provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" };
+	assert.equal(normalizedModelName(model), "Kimi K2.5");
+	assert.equal(formatModelIdentity(model), "Kimi K2.5 (kimi-coding/k2p5)");
+});
--- a/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts
+++ b/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts
@ -1,10 +1,10 @@
 /**
- * Regression test for #2675: completing-milestone dispatch rule must
- * block completion when VALIDATION verdict is "needs-remediation".
+ * Regression test for #2675: completing-milestone dispatch rule must block
+ * completion when VALIDATION verdict is not "pass".
 *
- * Without this guard, needs-remediation + allSlicesDone causes a loop:
- * complete-milestone dispatched → agent refuses (correct) → no SUMMARY
- * → re-dispatch → repeat until stuck detection fires.
+ * Without this guard, non-pass validation + allSlicesDone can dispatch
+ * complete-milestone and incorrectly close a milestone that still needs
+ * attention or remediation.
 */

 import assert from "node:assert/strict";
@ -24,23 +24,25 @@ test("completing-milestone dispatch rule exists", () => {
 	assert.ok(completingRule, "rule should exist in DISPATCH_RULES");
 });

-test("completing-milestone blocks when VALIDATION verdict is needs-remediation (#2675)", async () => {
+async function assertNonPassVerdictBlocksCompletion(
+	verdict: string,
+): Promise<void> {
 	const base = mkdtempSync(join(tmpdir(), "sf-remediation-"));
 	mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });

 	try {
-		// Write a VALIDATION file with needs-remediation verdict
+		// Write a VALIDATION file with a non-pass verdict.
 		writeFileSync(
 			join(base, ".sf", "milestones", "M001", "M001-VALIDATION.md"),
 			[
 				"---",
-				"verdict: needs-remediation",
+				`verdict: ${verdict}`,
 				"remediation_round: 0",
 				"---",
 				"",
 				"# Validation Report",
 				"",
-				"3 success criteria failed. Remediation required.",
+				"Success criteria need follow-up.",
 			].join("\n"),
 		);

@ -64,13 +66,25 @@ test("completing-milestone blocks when VALIDATION verdict is needs-remediation (
 				"should be warning level (pausable)",
 			);
 			assert.ok(
-				result!.reason.includes("needs-remediation"),
-				"reason should mention needs-remediation",
+				result!.reason.includes(verdict),
+				`reason should mention ${verdict}`,
+			);
+			assert.ok(
+				result!.reason.includes('"pass"'),
+				"reason should mention pass as the only automatic completion verdict",
 			);
 		}
 	} finally {
 		rmSync(base, { recursive: true, force: true });
 	}
+}
+
+test("completing-milestone blocks when VALIDATION verdict is needs-remediation (#2675)", async () => {
+	await assertNonPassVerdictBlocksCompletion("needs-remediation");
+});
+
+test("completing-milestone blocks when VALIDATION verdict is needs-attention", async () => {
+	await assertNonPassVerdictBlocksCompletion("needs-attention");
 });

 test("completing-milestone proceeds normally when VALIDATION verdict is pass (#2675 guard)", async () => {
@ -107,8 +121,8 @@ test("completing-milestone proceeds normally when VALIDATION verdict is pass (#2
 		// a different reason (e.g. missing SUMMARY files, no implementation)
 		if (result && result.action === "stop") {
 			assert.ok(
-				!result.reason.includes("needs-remediation"),
-				"pass verdict should NOT trigger the remediation guard",
+				!result.reason.includes("Only verdict"),
+				"pass verdict should NOT trigger the non-pass validation guard",
 			);
 		}
 	} finally {
--- a/src/resources/extensions/sf/tools/complete-milestone.ts
+++ b/src/resources/extensions/sf/tools/complete-milestone.ts
@ -6,7 +6,7 @@
 * for recovery, and invalidates caches.
 */

-import { mkdirSync } from "node:fs";
+import { existsSync, mkdirSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { clearParseCache, saveFile } from "../files.js";
 import { clearPathCache, resolveMilestonePath } from "../paths.js";
@ -19,6 +19,7 @@ import {
 } from "../sf-db.js";
 import { invalidateStateCache } from "../state.js";
 import { isClosedStatus } from "../status-guards.js";
+import { extractVerdict } from "../verdict-parser.js";
 import { appendEvent } from "../workflow-events.js";
 import { logError, logWarning } from "../workflow-logger.js";
 import { writeManifest } from "../workflow-manifest.js";
@ -157,6 +158,25 @@ export async function handleCompleteMilestone(
 		};
 	}

+	const milestoneDir = resolveMilestonePath(basePath, params.milestoneId);
+	const validationPath = milestoneDir
+		? join(milestoneDir, `${params.milestoneId}-VALIDATION.md`)
+		: join(
+				basePath,
+				".sf",
+				"milestones",
+				params.milestoneId,
+				`${params.milestoneId}-VALIDATION.md`,
+			);
+	if (existsSync(validationPath)) {
+		const verdict = extractVerdict(readFileSync(validationPath, "utf-8"));
+		if (verdict && verdict !== "pass") {
+			return {
+				error: `milestone validation verdict is "${verdict}" — only "pass" may be completed automatically`,
+			};
+		}
+	}
+
 	// ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
 	const completedAt = new Date().toISOString();
 	let guardError: string | null = null;
@ -214,7 +234,6 @@ export async function handleCompleteMilestone(
 	const summaryMd = renderMilestoneSummaryMarkdown(params);

 	let summaryPath: string;
-	const milestoneDir = resolveMilestonePath(basePath, params.milestoneId);
 	if (milestoneDir) {
 		summaryPath = join(milestoneDir, `${params.milestoneId}-SUMMARY.md`);
 	} else {