fix(sf): keep kimi versions exact
This commit is contained in:
parent
ab57548f2b
commit
f78c3fb2b8
10 changed files with 294 additions and 40 deletions
|
|
@ -1021,19 +1021,18 @@ export const DISPATCH_RULES: DispatchRule[] = [
|
|||
match: async ({ state, mid, midTitle, basePath }) => {
|
||||
if (state.phase !== "completing-milestone") return null;
|
||||
|
||||
// Safety guard (#2675): block completion when VALIDATION verdict is
|
||||
// needs-remediation. The state machine treats needs-remediation as
|
||||
// terminal (to prevent validate-milestone loops per #832), but
|
||||
// completing-milestone should NOT proceed — remediation work is needed.
|
||||
// Safety guard (#2675): completion is only automatic after a pass verdict.
|
||||
// Non-pass terminal verdicts are still terminal for validation loops, but
|
||||
// they are not a license to close the milestone.
|
||||
const validationFile = resolveMilestoneFile(basePath, mid, "VALIDATION");
|
||||
if (validationFile) {
|
||||
const validationContent = await loadFile(validationFile);
|
||||
if (validationContent) {
|
||||
const verdict = extractVerdict(validationContent);
|
||||
if (verdict === "needs-remediation") {
|
||||
if (verdict && verdict !== "pass") {
|
||||
return {
|
||||
action: "stop",
|
||||
reason: `Cannot complete milestone ${mid}: VALIDATION verdict is "needs-remediation". Address the remediation findings and re-run validation, or update the verdict manually.`,
|
||||
reason: `Cannot complete milestone ${mid}: VALIDATION verdict is "${verdict}". Only verdict "pass" may enter automatic milestone completion. Address or explicitly defer the findings and re-run validation.`,
|
||||
level: "warning",
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -162,15 +162,50 @@ const BARE_MODEL_FAMILY_PRIORITY: Array<{
|
|||
},
|
||||
];
|
||||
|
||||
function preferredBareModelIds(modelId: string): readonly string[] | undefined {
|
||||
const lower = modelId.toLowerCase();
|
||||
if (
|
||||
lower === "kimi-for-coding" ||
|
||||
lower === "kimi-k2.6" ||
|
||||
lower === "kimi-k2.6:cloud" ||
|
||||
lower === "moonshotai/kimi-k2.6"
|
||||
) {
|
||||
return [
|
||||
"kimi-for-coding",
|
||||
"kimi-k2.6",
|
||||
"kimi-k2.6:cloud",
|
||||
"moonshotai/kimi-k2.6",
|
||||
];
|
||||
}
|
||||
if (
|
||||
lower === "k2p5" ||
|
||||
lower === "kimi-k2.5" ||
|
||||
lower === "moonshotai/kimi-k2.5"
|
||||
) {
|
||||
return ["k2p5", "kimi-k2.5", "moonshotai/kimi-k2.5"];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveFamilyPreferredBareModel<
|
||||
T extends { id: string; provider: string },
|
||||
>(modelId: string, candidates: T[]): T | undefined {
|
||||
const rule = BARE_MODEL_FAMILY_PRIORITY.find((r) => r.match.test(modelId));
|
||||
if (!rule) return undefined;
|
||||
const preferredModelIds = preferredBareModelIds(modelId);
|
||||
for (const provider of rule.providers) {
|
||||
const match = candidates.find(
|
||||
const providerCandidates = candidates.filter(
|
||||
(m) => m.provider.toLowerCase() === provider.toLowerCase(),
|
||||
);
|
||||
if (preferredModelIds) {
|
||||
for (const preferredId of preferredModelIds) {
|
||||
const match = providerCandidates.find(
|
||||
(m) => m.id.toLowerCase() === preferredId.toLowerCase(),
|
||||
);
|
||||
if (match) return match;
|
||||
}
|
||||
}
|
||||
const match = providerCandidates[0];
|
||||
if (match) return match;
|
||||
}
|
||||
return undefined;
|
||||
|
|
@ -179,10 +214,25 @@ function resolveFamilyPreferredBareModel<
|
|||
function bareModelIdAliases(modelId: string): Set<string> {
|
||||
const lower = modelId.toLowerCase();
|
||||
const aliases = new Set([lower]);
|
||||
if (lower === "kimi-for-coding" || lower === "kimi-k2.6") {
|
||||
aliases.add("kimi-for-coding");
|
||||
aliases.add("kimi-k2.6");
|
||||
aliases.add("kimi-k2.6:cloud");
|
||||
aliases.add("moonshotai/kimi-k2.6");
|
||||
}
|
||||
if (lower === "kimi-k2.6:cloud" || lower === "moonshotai/kimi-k2.6") {
|
||||
aliases.add("kimi-for-coding");
|
||||
aliases.add("kimi-k2.6");
|
||||
aliases.add("kimi-k2.6:cloud");
|
||||
aliases.add("moonshotai/kimi-k2.6");
|
||||
}
|
||||
if (lower === "kimi-k2.5" || lower === "moonshotai/kimi-k2.5") {
|
||||
aliases.add("k2p5");
|
||||
aliases.add("kimi-k2.5");
|
||||
aliases.add("moonshotai/kimi-k2.5");
|
||||
}
|
||||
if (lower === "k2p5") {
|
||||
aliases.add("k2p5");
|
||||
aliases.add("kimi-k2.5");
|
||||
aliases.add("moonshotai/kimi-k2.5");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ import {
|
|||
parseSummary,
|
||||
} from "../files.js";
|
||||
import { readForensicsMarker } from "../forensics.js";
|
||||
import { formatModelIdentity } from "../model-identity.js";
|
||||
import {
|
||||
relSliceFile,
|
||||
relSlicePath,
|
||||
|
|
@ -264,13 +265,16 @@ export async function buildBeforeAgentStartResult(
|
|||
: null;
|
||||
|
||||
const worktreeBlock = buildWorktreeContextBlock();
|
||||
const modelIdentityBlock = ctx.model
|
||||
? `\n\n## Active Model Identity\n\nCurrent executor model: ${formatModelIdentity(ctx.model)}. Treat the model name as the capability identity and the provider/model route as the wire ID. Do not substitute one Kimi version for another.`
|
||||
: "";
|
||||
|
||||
const subagentModelConfig = resolveModelWithFallbacksForUnit("subagent");
|
||||
const subagentModelBlock = subagentModelConfig
|
||||
? `\n\n## Subagent Model\n\nWhen spawning subagents via the \`subagent\` tool, always pass \`model: "${subagentModelConfig.primary}"\` in the tool call parameters. Never omit this — always specify it explicitly.`
|
||||
: "";
|
||||
|
||||
const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${subagentModelBlock}`;
|
||||
const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${modelIdentityBlock}${subagentModelBlock}`;
|
||||
|
||||
stopContextTimer({
|
||||
systemPromptSize: fullSystem.length,
|
||||
|
|
|
|||
48
src/resources/extensions/sf/model-identity.ts
Normal file
48
src/resources/extensions/sf/model-identity.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* model-identity.ts - user-facing names for provider wire model IDs.
|
||||
*
|
||||
* Purpose: keep provider-specific wire IDs stable while showing SF agents the
|
||||
* actual model identity they should reason about.
|
||||
*
|
||||
* Consumer: bootstrap/system-context.ts and dashboard surfaces.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Return the human-readable model identity for a provider route.
|
||||
*
|
||||
* Purpose: let prompts and dashboards discuss model capability/version without
|
||||
* rewriting the provider's required wire ID.
|
||||
*
|
||||
* Consumer: formatModelIdentity() and SF prompt context injection.
|
||||
*/
|
||||
export function normalizedModelName(model: {
|
||||
provider?: string;
|
||||
id: string;
|
||||
name?: string;
|
||||
}): string {
|
||||
const provider = model.provider?.toLowerCase();
|
||||
const id = model.id.toLowerCase();
|
||||
if (provider === "kimi-coding" && id === "kimi-for-coding")
|
||||
return "Kimi K2.6";
|
||||
if (provider === "kimi-coding" && id === "k2p5") return "Kimi K2.5";
|
||||
if (model.name?.trim()) return model.name.trim();
|
||||
return model.id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a display label that preserves both model identity and wire route.
|
||||
*
|
||||
* Purpose: avoid ambiguity for provider aliases such as Kimi Code's
|
||||
* `kimi-for-coding`, which is the wire ID for Kimi K2.6.
|
||||
*
|
||||
* Consumer: SF system context and UI surfaces that need stable model labels.
|
||||
*/
|
||||
export function formatModelIdentity(model: {
|
||||
provider?: string;
|
||||
id: string;
|
||||
name?: string;
|
||||
}): string {
|
||||
const route = model.provider ? `${model.provider}/${model.id}` : model.id;
|
||||
const name = normalizedModelName(model);
|
||||
return name === model.id ? route : `${name} (${route})`;
|
||||
}
|
||||
|
|
@ -16,13 +16,14 @@ All relevant context has been preloaded below — the roadmap, all slice summari
|
|||
|
||||
Then:
|
||||
1. Use the **Milestone Summary** output template from the inlined context above
|
||||
2. {{skillActivation}}
|
||||
3. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.sf/'` (or the equivalent for the integration branch). If no non-`.sf/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**.
|
||||
4. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**.
|
||||
5. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**.
|
||||
6. If the roadmap includes a **Horizontal Checklist**, verify each item was addressed during the milestone. Note unchecked items in the milestone summary.
|
||||
7. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.sf/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone.
|
||||
8. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
|
||||
2. Inspect the inlined or linked **MILESTONE VALIDATION** verdict. If the verdict is not `pass` (including `needs-attention` or `needs-remediation`), record it as a **verification failure** and follow the failure path. Do NOT call `sf_complete_milestone` for a non-pass validation verdict.
|
||||
3. {{skillActivation}}
|
||||
4. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.sf/'` (or the equivalent for the integration branch). If no non-`.sf/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**.
|
||||
5. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**.
|
||||
6. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**.
|
||||
7. If the roadmap includes a **Horizontal Checklist**, verify each item was addressed during the milestone. Note unchecked items in the milestone summary.
|
||||
8. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.sf/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone.
|
||||
9. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
|
||||
|
||||
### Parallel Follow-up Classification
|
||||
|
||||
|
|
@ -36,19 +37,20 @@ If work falls into the second bucket, do not fail the milestone just because it
|
|||
|
||||
### Verification Gate — STOP if verification failed
|
||||
|
||||
**If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 10.**
|
||||
**If ANY verification failure was recorded in steps 2, 4, 5, or 6, you MUST follow the failure path below. Do NOT proceed to step 11.**
|
||||
|
||||
**Failure path** (verification failed):
|
||||
- Do NOT call `sf_complete_milestone` — the milestone must not be marked as complete.
|
||||
- Do NOT update `.sf/PROJECT.md` to reflect completion.
|
||||
- Do NOT update `.sf/REQUIREMENTS.md` to mark requirements as validated.
|
||||
- A non-pass validation verdict is a verification failure, even if it is terminal for validation-loop purposes.
|
||||
- Write a clear summary of what failed and why to help the next attempt.
|
||||
- Say: "Milestone {{milestoneId}} verification FAILED — not complete." and stop.
|
||||
|
||||
**Success path** (all verifications passed — continue with steps 9–13):
|
||||
**Success path** (all verifications passed — continue with steps 10–14):
|
||||
|
||||
9. For each requirement whose status changed in step 8, call `sf_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.sf/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted.
|
||||
10. **Persist completion through `sf_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
|
||||
10. For each requirement whose status changed in step 9, call `sf_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.sf/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted.
|
||||
11. **Persist completion through `sf_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
|
||||
|
||||
**Required parameters:**
|
||||
- `milestoneId` (string) — Milestone ID (e.g. M001)
|
||||
|
|
@ -66,11 +68,11 @@ If work falls into the second bucket, do not fail the milestone just because it
|
|||
**Optional parameters:**
|
||||
- `followUps` (string) — Follow-up items for future milestones
|
||||
- `deviations` (string) — Deviations from the original plan
|
||||
11. Update `.sf/PROJECT.md`: use the `write` tool with `path: ".sf/PROJECT.md"` and `content` containing the full updated document reflecting milestone completion and current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
|
||||
12. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.sf/KNOWLEDGE.md`.
|
||||
13. Do not commit manually — the system auto-commits your changes after this unit completes.
|
||||
12. Update `.sf/PROJECT.md`: use the `write` tool with `path: ".sf/PROJECT.md"` and `content` containing the full updated document reflecting milestone completion and current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
|
||||
13. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.sf/KNOWLEDGE.md`.
|
||||
14. Do not commit manually — the system auto-commits your changes after this unit completes.
|
||||
- Say: "Milestone {{milestoneId}} complete."
|
||||
|
||||
**Important:** Do NOT skip the code change verification, success criteria, or definition of done verification (steps 3-5). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run. **If a verification tool itself fails, errors, or returns unexpected output, treat it as a verification failure** — never rationalize past a tool error ("tool didn't respond, assuming success" is forbidden). A tool that cannot verify is a tool that did not verify.
|
||||
**Important:** Do NOT skip validation verdict, code change, success criteria, or definition-of-done verification (steps 2 and 4-6). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run. **If a verification tool itself fails, errors, or returns unexpected output, treat it as a verification failure** — never rationalize past a tool error ("tool didn't respond, assuming success" is forbidden). A tool that cannot verify is a tool that did not verify.
|
||||
|
||||
**File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories.
|
||||
|
|
|
|||
|
|
@ -317,10 +317,24 @@ test("resolveModelId: bare GLM IDs fall back when zai lacks that exact model", (
|
|||
assert.equal(result.provider, "opencode-go");
|
||||
});
|
||||
|
||||
test("resolveModelId: bare Kimi IDs prefer kimi-coding over aggregators", () => {
|
||||
test("resolveModelId: bare Kimi K2.6 IDs prefer canonical Kimi Code over aggregators", () => {
|
||||
const availableModels = [
|
||||
{ id: "kimi-k2.5", provider: "opencode-go" },
|
||||
{ id: "kimi-k2.6:cloud", provider: "ollama" },
|
||||
{ id: "kimi-for-coding", provider: "kimi-coding" },
|
||||
{ id: "kimi-k2.6", provider: "opencode-go" },
|
||||
];
|
||||
|
||||
const result = resolveModelId("kimi-k2.6", availableModels, "ollama");
|
||||
assert.ok(result, "should resolve a Kimi model");
|
||||
assert.equal(result.provider, "kimi-coding");
|
||||
assert.equal(result.id, "kimi-for-coding");
|
||||
});
|
||||
|
||||
test("resolveModelId: bare Kimi K2.5 IDs do not alias to K2.6", () => {
|
||||
const availableModels = [
|
||||
{ id: "kimi-for-coding", provider: "kimi-coding" },
|
||||
{ id: "k2p5", provider: "kimi-coding" },
|
||||
{ id: "kimi-k2.5", provider: "opencode-go" },
|
||||
];
|
||||
|
||||
const result = resolveModelId("kimi-k2.5", availableModels, "opencode-go");
|
||||
|
|
@ -329,6 +343,18 @@ test("resolveModelId: bare Kimi IDs prefer kimi-coding over aggregators", () =>
|
|||
assert.equal(result.id, "k2p5");
|
||||
});
|
||||
|
||||
test("resolveModelId: bare Kimi K2.5 only resolves exact K2.5 aliases", () => {
|
||||
const availableModels = [
|
||||
{ id: "kimi-k2.5", provider: "opencode-go" },
|
||||
{ id: "kimi-for-coding", provider: "kimi-coding" },
|
||||
];
|
||||
|
||||
const result = resolveModelId("kimi-k2.5", availableModels, "opencode-go");
|
||||
assert.ok(result, "should resolve a Kimi model");
|
||||
assert.equal(result.provider, "opencode-go");
|
||||
assert.equal(result.id, "kimi-k2.5");
|
||||
});
|
||||
|
||||
test("resolveModelId: bare MiniMax IDs prefer minimax over minimax-cn and aggregators", () => {
|
||||
const availableModels = [
|
||||
{ id: "MiniMax-M2.7", provider: "opencode-go" },
|
||||
|
|
|
|||
|
|
@ -205,6 +205,32 @@ describe("complete-milestone", () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("prompt treats non-pass validation verdict as a completion blocker", () => {
|
||||
const prompt = loadPromptFromWorktree("complete-milestone", {
|
||||
workingDirectory: "/tmp/test-project",
|
||||
milestoneId: "M001",
|
||||
milestoneTitle: "Validation Gate Test",
|
||||
roadmapPath: ".sf/milestones/M001/M001-ROADMAP.md",
|
||||
inlinedContext: "context",
|
||||
});
|
||||
|
||||
assert.ok(
|
||||
prompt.includes("MILESTONE VALIDATION"),
|
||||
"prompt tells the closer to inspect milestone validation",
|
||||
);
|
||||
assert.ok(
|
||||
prompt.includes("needs-attention") &&
|
||||
prompt.includes("needs-remediation"),
|
||||
"prompt names non-pass validation verdicts",
|
||||
);
|
||||
assert.ok(
|
||||
prompt.includes(
|
||||
"A non-pass validation verdict is a verification failure",
|
||||
),
|
||||
"failure path treats non-pass validation verdicts as verification failures",
|
||||
);
|
||||
});
|
||||
|
||||
test("handleCompleteMilestone rejects when verificationPassed is false", async () => {
|
||||
const { handleCompleteMilestone } = await import(
|
||||
"../tools/complete-milestone.ts"
|
||||
|
|
@ -284,6 +310,49 @@ describe("complete-milestone", () => {
|
|||
}
|
||||
});
|
||||
|
||||
test("handleCompleteMilestone rejects when validation verdict is needs-attention", async () => {
|
||||
const { handleCompleteMilestone } = await import(
|
||||
"../tools/complete-milestone.ts"
|
||||
);
|
||||
const base = createFixtureBase();
|
||||
try {
|
||||
writeMilestoneValidation(base, "M001", "needs-attention");
|
||||
const result = await handleCompleteMilestone(
|
||||
{
|
||||
milestoneId: "M001",
|
||||
title: "Test Milestone",
|
||||
oneLiner: "Test",
|
||||
narrative: "Test narrative",
|
||||
successCriteriaResults: "Results",
|
||||
definitionOfDoneResults: "Done results",
|
||||
requirementOutcomes: "Outcomes",
|
||||
keyDecisions: [],
|
||||
keyFiles: [],
|
||||
lessonsLearned: [],
|
||||
followUps: "",
|
||||
deviations: "",
|
||||
verificationPassed: true,
|
||||
},
|
||||
base,
|
||||
);
|
||||
|
||||
assert.ok(
|
||||
"error" in result,
|
||||
"returns error when validation verdict is non-pass",
|
||||
);
|
||||
assert.ok(
|
||||
(result as { error: string }).error.includes("needs-attention"),
|
||||
"error message mentions needs-attention",
|
||||
);
|
||||
assert.ok(
|
||||
(result as { error: string }).error.includes('"pass"'),
|
||||
"error message says pass is required",
|
||||
);
|
||||
} finally {
|
||||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
test("diagnoseExpectedArtifact logic for complete-milestone", async () => {
|
||||
// Import the path helpers used by diagnoseExpectedArtifact
|
||||
const { relMilestoneFile } = await import("../paths.ts");
|
||||
|
|
|
|||
23
src/resources/extensions/sf/tests/model-identity.test.ts
Normal file
23
src/resources/extensions/sf/tests/model-identity.test.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
|
||||
import { formatModelIdentity, normalizedModelName } from "../model-identity.ts";
|
||||
|
||||
test("model identity: Kimi Code wire id displays as Kimi K2.6", () => {
|
||||
const model = {
|
||||
provider: "kimi-coding",
|
||||
id: "kimi-for-coding",
|
||||
name: "Kimi K2.6",
|
||||
};
|
||||
assert.equal(normalizedModelName(model), "Kimi K2.6");
|
||||
assert.equal(
|
||||
formatModelIdentity(model),
|
||||
"Kimi K2.6 (kimi-coding/kimi-for-coding)",
|
||||
);
|
||||
});
|
||||
|
||||
test("model identity: K2.5 remains distinct from K2.6", () => {
|
||||
const model = { provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" };
|
||||
assert.equal(normalizedModelName(model), "Kimi K2.5");
|
||||
assert.equal(formatModelIdentity(model), "Kimi K2.5 (kimi-coding/k2p5)");
|
||||
});
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
/**
|
||||
* Regression test for #2675: completing-milestone dispatch rule must
|
||||
* block completion when VALIDATION verdict is "needs-remediation".
|
||||
* Regression test for #2675: completing-milestone dispatch rule must block
|
||||
* completion when VALIDATION verdict is not "pass".
|
||||
*
|
||||
* Without this guard, needs-remediation + allSlicesDone causes a loop:
|
||||
* complete-milestone dispatched → agent refuses (correct) → no SUMMARY
|
||||
* → re-dispatch → repeat until stuck detection fires.
|
||||
* Without this guard, non-pass validation + allSlicesDone can dispatch
|
||||
* complete-milestone and incorrectly close a milestone that still needs
|
||||
* attention or remediation.
|
||||
*/
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
|
|
@ -24,23 +24,25 @@ test("completing-milestone dispatch rule exists", () => {
|
|||
assert.ok(completingRule, "rule should exist in DISPATCH_RULES");
|
||||
});
|
||||
|
||||
test("completing-milestone blocks when VALIDATION verdict is needs-remediation (#2675)", async () => {
|
||||
async function assertNonPassVerdictBlocksCompletion(
|
||||
verdict: string,
|
||||
): Promise<void> {
|
||||
const base = mkdtempSync(join(tmpdir(), "sf-remediation-"));
|
||||
mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });
|
||||
|
||||
try {
|
||||
// Write a VALIDATION file with needs-remediation verdict
|
||||
// Write a VALIDATION file with a non-pass verdict.
|
||||
writeFileSync(
|
||||
join(base, ".sf", "milestones", "M001", "M001-VALIDATION.md"),
|
||||
[
|
||||
"---",
|
||||
"verdict: needs-remediation",
|
||||
`verdict: ${verdict}`,
|
||||
"remediation_round: 0",
|
||||
"---",
|
||||
"",
|
||||
"# Validation Report",
|
||||
"",
|
||||
"3 success criteria failed. Remediation required.",
|
||||
"Success criteria need follow-up.",
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
|
|
@ -64,13 +66,25 @@ test("completing-milestone blocks when VALIDATION verdict is needs-remediation (
|
|||
"should be warning level (pausable)",
|
||||
);
|
||||
assert.ok(
|
||||
result!.reason.includes("needs-remediation"),
|
||||
"reason should mention needs-remediation",
|
||||
result!.reason.includes(verdict),
|
||||
`reason should mention ${verdict}`,
|
||||
);
|
||||
assert.ok(
|
||||
result!.reason.includes('"pass"'),
|
||||
"reason should mention pass as the only automatic completion verdict",
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
test("completing-milestone blocks when VALIDATION verdict is needs-remediation (#2675)", async () => {
|
||||
await assertNonPassVerdictBlocksCompletion("needs-remediation");
|
||||
});
|
||||
|
||||
test("completing-milestone blocks when VALIDATION verdict is needs-attention", async () => {
|
||||
await assertNonPassVerdictBlocksCompletion("needs-attention");
|
||||
});
|
||||
|
||||
test("completing-milestone proceeds normally when VALIDATION verdict is pass (#2675 guard)", async () => {
|
||||
|
|
@ -107,8 +121,8 @@ test("completing-milestone proceeds normally when VALIDATION verdict is pass (#2
|
|||
// a different reason (e.g. missing SUMMARY files, no implementation)
|
||||
if (result && result.action === "stop") {
|
||||
assert.ok(
|
||||
!result.reason.includes("needs-remediation"),
|
||||
"pass verdict should NOT trigger the remediation guard",
|
||||
!result.reason.includes("Only verdict"),
|
||||
"pass verdict should NOT trigger the non-pass validation guard",
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
* for recovery, and invalidates caches.
|
||||
*/
|
||||
|
||||
import { mkdirSync } from "node:fs";
|
||||
import { existsSync, mkdirSync, readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { clearParseCache, saveFile } from "../files.js";
|
||||
import { clearPathCache, resolveMilestonePath } from "../paths.js";
|
||||
|
|
@ -19,6 +19,7 @@ import {
|
|||
} from "../sf-db.js";
|
||||
import { invalidateStateCache } from "../state.js";
|
||||
import { isClosedStatus } from "../status-guards.js";
|
||||
import { extractVerdict } from "../verdict-parser.js";
|
||||
import { appendEvent } from "../workflow-events.js";
|
||||
import { logError, logWarning } from "../workflow-logger.js";
|
||||
import { writeManifest } from "../workflow-manifest.js";
|
||||
|
|
@ -157,6 +158,25 @@ export async function handleCompleteMilestone(
|
|||
};
|
||||
}
|
||||
|
||||
const milestoneDir = resolveMilestonePath(basePath, params.milestoneId);
|
||||
const validationPath = milestoneDir
|
||||
? join(milestoneDir, `${params.milestoneId}-VALIDATION.md`)
|
||||
: join(
|
||||
basePath,
|
||||
".sf",
|
||||
"milestones",
|
||||
params.milestoneId,
|
||||
`${params.milestoneId}-VALIDATION.md`,
|
||||
);
|
||||
if (existsSync(validationPath)) {
|
||||
const verdict = extractVerdict(readFileSync(validationPath, "utf-8"));
|
||||
if (verdict && verdict !== "pass") {
|
||||
return {
|
||||
error: `milestone validation verdict is "${verdict}" — only "pass" may be completed automatically`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
|
||||
const completedAt = new Date().toISOString();
|
||||
let guardError: string | null = null;
|
||||
|
|
@ -214,7 +234,6 @@ export async function handleCompleteMilestone(
|
|||
const summaryMd = renderMilestoneSummaryMarkdown(params);
|
||||
|
||||
let summaryPath: string;
|
||||
const milestoneDir = resolveMilestonePath(basePath, params.milestoneId);
|
||||
if (milestoneDir) {
|
||||
summaryPath = join(milestoneDir, `${params.milestoneId}-SUMMARY.md`);
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue