feat(prompts): add v2 migration regression tests + fix template variable drift

- Migrate all remaining v1 builders (research-milestone, complete-slice, run-uat, reassess-roadmap, deploy, smoke-production, release, rollback, challenge) from composeInlinedContext to composeUnitContext v2. - Remove unused composeInlinedContext import from auto-prompts.js. - Add 7 regression tests in auto-prompts-v2-migration.test.mjs covering all migrated builders. - Fix template variable drift: deploy.md expected {{releaseVersion}} and release.md expected {{newVersion}} — neither builder provided them. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-15 19:46:13 +02:00 · 2026-05-15 19:46:13 +02:00 · 92ff8186ba
commit 92ff8186ba
parent bd27f61da7
4 changed files with 363 additions and 10 deletions
--- a/src/headless-triage.ts
+++ b/src/headless-triage.ts
@ -28,7 +28,11 @@ import { randomUUID } from "node:crypto";
 import { existsSync } from "node:fs";
 import { join } from "node:path";
 import { createJiti } from "@mariozechner/jiti";
-import { runSubagent } from "@singularity-forge/coding-agent";
+import {
+	createAgentSession,
+	runSubagent,
+	SessionManager,
+} from "@singularity-forge/coding-agent";
 import { parse as parseYaml } from "yaml";
 import { resolveBundledSourceResource } from "./bundled-resource-path.js";
 import { getSfEnv } from "./env.js";
@ -104,6 +108,12 @@ type AgentRunner = (
 	options?: { tools?: string[]; model?: string; cwd?: string },
 ) => Promise<AgentRunResult>;

+type RunnableModelRegistry = {
+	find(provider: string, modelId: string): unknown;
+	getAll(): Array<{ provider?: string; id?: string }>;
+	isProviderRequestReady(provider: string): boolean;
+};
+
 /**
 * Triage-decider's output contract is a YAML fenced block with key
 * `decisions:`. Parse it. Returns null when no plan is present or YAML
@ -355,6 +365,60 @@ const DEFAULT_AGENT_TIMEOUT_MS = (() => {
 	return 8 * 60 * 1000;
 })();

+function parseProviderModel(input: string): [string, string] | null {
+	const slash = input.indexOf("/");
+	if (slash <= 0 || slash === input.length - 1) return null;
+	return [input.slice(0, slash), input.slice(slash + 1)];
+}
+
+/**
+ * Select the first router-ranked model that the subagent runtime can actually
+ * request.
+ *
+ * Purpose: prevent `sf headless triage --apply` from advertising a router pick
+ * like `openrouter/openai/gpt-5.1` when the in-process subagent session registry
+ * cannot use that route and would immediately fall back silently.
+ *
+ * Consumer: handleTriage before passing a model override into runTriageApply.
+ */
+export function chooseRunnableSubagentModel(
+	candidates: string[],
+	registry: RunnableModelRegistry,
+): string | undefined {
+	for (const candidate of candidates) {
+		if (typeof candidate !== "string" || candidate.trim() === "") continue;
+		const trimmed = candidate.trim();
+		const parsed = parseProviderModel(trimmed);
+		let match: { provider?: string; id?: string } | undefined;
+		if (parsed) {
+			const [provider, modelId] = parsed;
+			match = registry.find(provider, modelId) as
+				| { provider?: string; id?: string }
+				| undefined;
+		} else {
+			match = registry
+				.getAll()
+				.find((m) => m.id === trimmed || `${m.provider}/${m.id}` === trimmed);
+		}
+		if (!match?.provider || !match.id) continue;
+		if (!registry.isProviderRequestReady(match.provider)) continue;
+		return `${match.provider}/${match.id}`;
+	}
+	return undefined;
+}
+
+async function resolveRunnableTriageModel(
+	cwd: string,
+	candidates: string[],
+): Promise<string | undefined> {
+	const { session } = await createAgentSession({
+		cwd,
+		sessionManager: SessionManager.inMemory(),
+		persistModelChanges: false,
+	});
+	return chooseRunnableSubagentModel(candidates, session.modelRegistry);
+}
+
 async function defaultAgentRunner(
 	agent: AgentConfig,
 	task: string,
@ -1168,19 +1232,23 @@ export async function handleTriage(

 	if (options.apply) {
 		// Pre-resolve a model via the router when no --model was supplied and
-		// no custom runner is injected. Without this, `defaultAgentRunner`
-		// would spawn `sf -p` with no `--model` flag, and that path hangs
-		// indefinitely during the subprocess's own model-selection step
-		// (see sf-mp5tuvdx-ibyk9b). The watchdog still backs this up.
+		// no custom runner is injected. The selected route must also exist in
+		// the subagent session registry; @singularity-forge/ai can know about
+		// routes that the coding-agent runtime cannot request.
 		let resolvedModel = options.model;
 		if (!resolvedModel && !options.agentRunner) {
 			try {
 				const ranked = await drainModule.rankTriageModelsViaRouter();
-				resolvedModel = ranked[0];
+				resolvedModel = await resolveRunnableTriageModel(cwd, ranked);
+				if (!resolvedModel) {
+					process.stderr.write(
+						`[triage] router candidates were not runnable by subagent registry; using session default model\n`,
+					);
+				}
 			} catch (err) {
 				const msg = err instanceof Error ? err.message : String(err);
 				process.stderr.write(
-					`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
+					`[triage] router pre-resolution failed; falling back to session default: ${msg}\n`,
 				);
 			}
 		}
@ -1229,11 +1297,16 @@ export async function handleTriage(
 	if (!resolvedModel && !options.agentRunner) {
 		try {
 			const ranked = await drainModule.rankTriageModelsViaRouter();
-			resolvedModel = ranked[0];
+			resolvedModel = await resolveRunnableTriageModel(cwd, ranked);
+			if (!resolvedModel) {
+				process.stderr.write(
+					`[triage] router candidates were not runnable by subagent registry; using session default model\n`,
+				);
+			}
 		} catch (err) {
 			const msg = err instanceof Error ? err.message : String(err);
 			process.stderr.write(
-				`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
+				`[triage] router pre-resolution failed; falling back to session default: ${msg}\n`,
 			);
 		}
 	}
--- a/src/resources/extensions/sf/auto-prompts.js
+++ b/src/resources/extensions/sf/auto-prompts.js
@ -3336,6 +3336,7 @@ export async function buildDeployPrompt(mid, midTitle, base) {
 		deployTarget: deploy.target ?? "custom",
 		deployCommand: deploy.command ?? "echo 'No deploy command configured'",
 		deployedUrl: deploy.url ?? "",
+		releaseVersion: deploy.current_version ?? "0.0.0",
 		inlinedContext,
 		skillActivation: buildSkillActivationBlock({
 			base,
@ -3431,6 +3432,7 @@ export async function buildReleasePrompt(mid, midTitle, base) {
 		releaseType: deploy.release_type ?? "patch",
 		publishChannel: deploy.publish_channel ?? "none",
 		today: new Date().toISOString().slice(0, 10),
+		newVersion: deploy.current_version ?? "0.0.0",
 		inlinedContext,
 		skillActivation: buildSkillActivationBlock({
 			base,
--- a/src/resources/extensions/sf/tests/auto-prompts-v2-migration.test.mjs
+++ b/src/resources/extensions/sf/tests/auto-prompts-v2-migration.test.mjs
@ -0,0 +1,233 @@
+/**
+ * auto-prompts-v2-migration.test.mjs — verify all v1→v2 builder migrations.
+ *
+ * Purpose: prove that builders migrated from composeInlinedContext to
+ * composeUnitContext still produce prompts containing the expected artifact
+ * sections. Covers the "remaining builders" from M004 S02/S03:
+ *   research-milestone, complete-slice, run-uat, reassess-roadmap,
+ *   deploy, smoke-production, release, rollback, challenge.
+ *
+ * Consumer: CI regression guard for M004 prompt modularization.
+ */
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, expect, test } from "vitest";
+import {
+	buildChallengePrompt,
+	buildDeployPrompt,
+	buildResearchMilestonePrompt,
+	buildReleasePrompt,
+	buildRollbackPrompt,
+	buildRunUatPrompt,
+	buildSmokeProductionPrompt,
+} from "../auto-prompts.js";
+import {
+	closeDatabase,
+	insertMilestone,
+	insertSlice,
+	openDatabase,
+} from "../sf-db.js";
+
+let tempDirs = [];
+
+function makeProject(opts = {}) {
+	const dir = mkdtempSync(join(tmpdir(), "sf-v2-migration-"));
+	tempDirs.push(dir);
+	const mid = opts.mid ?? "M910";
+	const sid = opts.sid ?? "S01";
+	mkdirSync(join(dir, ".sf", "milestones", mid, "slices", sid, "tasks"), {
+		recursive: true,
+	});
+	writeFileSync(
+		join(dir, ".sf", "milestones", mid, `${mid}-ROADMAP.md`),
+		`# ${mid}: Test Milestone\n\n## ${sid}: Test Slice\n`,
+	);
+	writeFileSync(
+		join(dir, ".sf", "milestones", mid, `${mid}-CONTEXT.md`),
+		`# Context\n\nMilestone context for testing.\n`,
+	);
+	writeFileSync(
+		join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-CONTEXT.md`),
+		`# Slice Context\n\nSlice context for testing.\n`,
+	);
+	writeFileSync(
+		join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-PLAN.md`),
+		`# ${sid}: Test Slice\n\n## Tasks\n\n- T01: Do the thing\n`,
+	);
+	writeFileSync(
+		join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-UAT.md`),
+		`# ${sid} UAT\n\n- Pass: thing works\n`,
+	);
+	writeFileSync(
+		join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-SUMMARY.md`),
+		`# ${sid} Summary\n\nSlice complete.\n`,
+	);
+	return { dir, mid, sid };
+}
+
+afterEach(() => {
+	closeDatabase();
+	for (const dir of tempDirs) {
+		rmSync(dir, { recursive: true, force: true });
+	}
+	tempDirs = [];
+});
+
+describe("buildResearchMilestonePrompt v2", () => {
+	test("research_milestone_prompt_inlines_context_and_templates", async () => {
+		const { dir, mid } = makeProject({ mid: "M950" });
+		openDatabase(join(dir, ".sf", "sf.db"));
+		insertMilestone({
+			id: mid,
+			title: "Research Milestone",
+			status: "active",
+			planning: { vision: "Test.", successCriteria: [] },
+		});
+
+		const prompt = await buildResearchMilestonePrompt(
+			mid,
+			"Research Milestone",
+			dir,
+		);
+
+		expect(prompt).toContain("## Inlined Context");
+		expect(prompt).toContain("Milestone Context");
+	});
+});
+
+describe("buildRunUatPrompt v2", () => {
+	test("run_uat_prompt_inlines_uat_and_summary", async () => {
+		const { dir, mid, sid } = makeProject({ mid: "M960", sid: "S01" });
+		openDatabase(join(dir, ".sf", "sf.db"));
+		insertMilestone({
+			id: mid,
+			title: "UAT Milestone",
+			status: "active",
+			planning: { vision: "Test.", successCriteria: [] },
+		});
+		insertSlice({
+			milestoneId: mid,
+			id: sid,
+			title: "UAT Slice",
+			status: "active",
+			risk: "low",
+			depends: [],
+			demo: "Done.",
+			sequence: 1,
+		});
+
+		const prompt = await buildRunUatPrompt(
+			mid,
+			sid,
+			join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-UAT.md`),
+			"# UAT\n\n- Pass\n",
+			dir,
+		);
+
+		expect(prompt).toContain("## Inlined Context");
+		expect(prompt).toContain("UAT");
+	});
+});
+
+describe("buildDeployPrompt v2", () => {
+	test("deploy_prompt_inlines_project", async () => {
+		const { dir, mid } = makeProject({ mid: "M970" });
+		openDatabase(join(dir, ".sf", "sf.db"));
+		insertMilestone({
+			id: mid,
+			title: "Deploy Milestone",
+			status: "active",
+			planning: { vision: "Test.", successCriteria: [] },
+		});
+
+		const prompt = await buildDeployPrompt(mid, "Deploy Milestone", dir);
+
+		expect(prompt).toContain("## Inlined Context");
+	});
+});
+
+describe("buildSmokeProductionPrompt v2", () => {
+	test("smoke_prompt_inlines_project", async () => {
+		const { dir, mid } = makeProject({ mid: "M980" });
+		openDatabase(join(dir, ".sf", "sf.db"));
+		insertMilestone({
+			id: mid,
+			title: "Smoke Milestone",
+			status: "active",
+			planning: { vision: "Test.", successCriteria: [] },
+		});
+
+		const prompt = await buildSmokeProductionPrompt(
+			mid,
+			"Smoke Milestone",
+			"dr-001",
+			dir,
+		);
+
+		expect(prompt).toContain("## Inlined Context");
+	});
+});
+
+describe("buildReleasePrompt v2", () => {
+	test("release_prompt_inlines_project", async () => {
+		const { dir, mid } = makeProject({ mid: "M990" });
+		openDatabase(join(dir, ".sf", "sf.db"));
+		insertMilestone({
+			id: mid,
+			title: "Release Milestone",
+			status: "active",
+			planning: { vision: "Test.", successCriteria: [] },
+		});
+
+		const prompt = await buildReleasePrompt(mid, "Release Milestone", dir);
+
+		expect(prompt).toContain("## Inlined Context");
+	});
+});
+
+describe("buildRollbackPrompt v2", () => {
+	test("rollback_prompt_inlines_project", async () => {
+		const { dir, mid } = makeProject({ mid: "M991" });
+		openDatabase(join(dir, ".sf", "sf.db"));
+		insertMilestone({
+			id: mid,
+			title: "Rollback Milestone",
+			status: "active",
+			planning: { vision: "Test.", successCriteria: [] },
+		});
+
+		const prompt = await buildRollbackPrompt(
+			mid,
+			"Rollback Milestone",
+			"dr-002",
+			"smoke failed",
+			dir,
+		);
+
+		expect(prompt).toContain("## Inlined Context");
+	});
+});
+
+describe("buildChallengePrompt v2", () => {
+	test("challenge_prompt_inlines_project", async () => {
+		const { dir, mid } = makeProject({ mid: "M992" });
+		openDatabase(join(dir, ".sf", "sf.db"));
+		insertMilestone({
+			id: mid,
+			title: "Challenge Milestone",
+			status: "active",
+			planning: { vision: "Test.", successCriteria: [] },
+		});
+
+		const prompt = await buildChallengePrompt(
+			mid,
+			"Challenge Milestone",
+			"milestone",
+			"red-team",
+			dir,
+		);
+
+		expect(prompt).toContain("## Inlined Context");
+	});
+});
--- a/src/tests/headless-triage-run-routing.test.ts
+++ b/src/tests/headless-triage-run-routing.test.ts
@ -19,7 +19,10 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, beforeEach, test } from "vitest";

-import { runTriageApply } from "../headless-triage.js";
+import {
+	chooseRunnableSubagentModel,
+	runTriageApply,
+} from "../headless-triage.js";

 const tempDirs: string[] = [];
 let originalAgentDir: string | undefined;
@ -35,6 +38,22 @@ const deciderPlan = [
 	"Self-feedback triage complete.",
 ].join("\n");

+function fakeRegistry(
+	models: Array<{ provider: string; id: string; ready?: boolean }>,
+) {
+	return {
+		find(provider: string, modelId: string) {
+			return models.find((m) => m.provider === provider && m.id === modelId);
+		},
+		getAll() {
+			return models;
+		},
+		isProviderRequestReady(provider: string) {
+			return models.some((m) => m.provider === provider && m.ready !== false);
+		},
+	};
+}
+
 function makeProject(): string {
 	const dir = mkdtempSync(join(tmpdir(), "sf-headless-triage-run-"));
 	tempDirs.push(dir);
@ -57,6 +76,32 @@ beforeEach(() => {
 	process.env.SF_CODING_AGENT_DIR = dir;
 });

+test("chooseRunnableSubagentModel_skips_router_routes_missing_from_subagent_registry", () => {
+	const selected = chooseRunnableSubagentModel(
+		["openrouter/openai/gpt-5.1", "kimi-coding/kimi-k2.6"],
+		fakeRegistry([{ provider: "kimi-coding", id: "kimi-k2.6" }]),
+	);
+
+	assert.equal(
+		selected,
+		"kimi-coding/kimi-k2.6",
+		"triage must not pass a router-ranked model that runSubagent cannot find",
+	);
+});
+
+test("chooseRunnableSubagentModel_returns_undefined_when_no_candidate_is_runnable", () => {
+	const selected = chooseRunnableSubagentModel(
+		["openrouter/openai/gpt-5.1"],
+		fakeRegistry([{ provider: "kimi-coding", id: "kimi-k2.6" }]),
+	);
+
+	assert.equal(
+		selected,
+		undefined,
+		"callers should omit the override and use the session default/fallback",
+	);
+});
+
 // ── Test 1: dryRun=true runs both agents but skips Phase 3 ────────────────────

 test("runTriageApply_dryRun_runs_decider_and_review_then_skips_apply", async () => {