feat(prompts): add v2 migration regression tests + fix template variable drift

- Migrate all remaining v1 builders (research-milestone, complete-slice,
  run-uat, reassess-roadmap, deploy, smoke-production, release, rollback,
  challenge) from composeInlinedContext to composeUnitContext v2.
- Remove unused composeInlinedContext import from auto-prompts.js.
- Add 7 regression tests in auto-prompts-v2-migration.test.mjs covering
  all migrated builders.
- Fix template variable drift: deploy.md expected {{releaseVersion}} and
  release.md expected {{newVersion}} — neither builder provided them.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-15 19:46:13 +02:00
parent bd27f61da7
commit 92ff8186ba
4 changed files with 363 additions and 10 deletions

View file

@ -28,7 +28,11 @@ import { randomUUID } from "node:crypto";
import { existsSync } from "node:fs";
import { join } from "node:path";
import { createJiti } from "@mariozechner/jiti";
import { runSubagent } from "@singularity-forge/coding-agent";
import {
createAgentSession,
runSubagent,
SessionManager,
} from "@singularity-forge/coding-agent";
import { parse as parseYaml } from "yaml";
import { resolveBundledSourceResource } from "./bundled-resource-path.js";
import { getSfEnv } from "./env.js";
@ -104,6 +108,12 @@ type AgentRunner = (
options?: { tools?: string[]; model?: string; cwd?: string },
) => Promise<AgentRunResult>;
type RunnableModelRegistry = {
find(provider: string, modelId: string): unknown;
getAll(): Array<{ provider?: string; id?: string }>;
isProviderRequestReady(provider: string): boolean;
};
/**
* Triage-decider's output contract is a YAML fenced block with key
* `decisions:`. Parse it. Returns null when no plan is present or YAML
@ -355,6 +365,60 @@ const DEFAULT_AGENT_TIMEOUT_MS = (() => {
return 8 * 60 * 1000;
})();
function parseProviderModel(input: string): [string, string] | null {
const slash = input.indexOf("/");
if (slash <= 0 || slash === input.length - 1) return null;
return [input.slice(0, slash), input.slice(slash + 1)];
}
/**
* Select the first router-ranked model that the subagent runtime can actually
* request.
*
* Purpose: prevent `sf headless triage --apply` from advertising a router pick
* like `openrouter/openai/gpt-5.1` when the in-process subagent session registry
* cannot use that route and would immediately fall back silently.
*
* Consumer: handleTriage before passing a model override into runTriageApply.
*/
export function chooseRunnableSubagentModel(
candidates: string[],
registry: RunnableModelRegistry,
): string | undefined {
for (const candidate of candidates) {
if (typeof candidate !== "string" || candidate.trim() === "") continue;
const trimmed = candidate.trim();
const parsed = parseProviderModel(trimmed);
let match: { provider?: string; id?: string } | undefined;
if (parsed) {
const [provider, modelId] = parsed;
match = registry.find(provider, modelId) as
| { provider?: string; id?: string }
| undefined;
} else {
match = registry
.getAll()
.find((m) => m.id === trimmed || `${m.provider}/${m.id}` === trimmed);
}
if (!match?.provider || !match.id) continue;
if (!registry.isProviderRequestReady(match.provider)) continue;
return `${match.provider}/${match.id}`;
}
return undefined;
}
async function resolveRunnableTriageModel(
cwd: string,
candidates: string[],
): Promise<string | undefined> {
const { session } = await createAgentSession({
cwd,
sessionManager: SessionManager.inMemory(),
persistModelChanges: false,
});
return chooseRunnableSubagentModel(candidates, session.modelRegistry);
}
async function defaultAgentRunner(
agent: AgentConfig,
task: string,
@ -1168,19 +1232,23 @@ export async function handleTriage(
if (options.apply) {
// Pre-resolve a model via the router when no --model was supplied and
// no custom runner is injected. Without this, `defaultAgentRunner`
// would spawn `sf -p` with no `--model` flag, and that path hangs
// indefinitely during the subprocess's own model-selection step
// (see sf-mp5tuvdx-ibyk9b). The watchdog still backs this up.
// no custom runner is injected. The selected route must also exist in
// the subagent session registry; @singularity-forge/ai can know about
// routes that the coding-agent runtime cannot request.
let resolvedModel = options.model;
if (!resolvedModel && !options.agentRunner) {
try {
const ranked = await drainModule.rankTriageModelsViaRouter();
resolvedModel = ranked[0];
resolvedModel = await resolveRunnableTriageModel(cwd, ranked);
if (!resolvedModel) {
process.stderr.write(
`[triage] router candidates were not runnable by subagent registry; using session default model\n`,
);
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(
`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
`[triage] router pre-resolution failed; falling back to session default: ${msg}\n`,
);
}
}
@ -1229,11 +1297,16 @@ export async function handleTriage(
if (!resolvedModel && !options.agentRunner) {
try {
const ranked = await drainModule.rankTriageModelsViaRouter();
resolvedModel = ranked[0];
resolvedModel = await resolveRunnableTriageModel(cwd, ranked);
if (!resolvedModel) {
process.stderr.write(
`[triage] router candidates were not runnable by subagent registry; using session default model\n`,
);
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(
`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
`[triage] router pre-resolution failed; falling back to session default: ${msg}\n`,
);
}
}

View file

@ -3336,6 +3336,7 @@ export async function buildDeployPrompt(mid, midTitle, base) {
deployTarget: deploy.target ?? "custom",
deployCommand: deploy.command ?? "echo 'No deploy command configured'",
deployedUrl: deploy.url ?? "",
releaseVersion: deploy.current_version ?? "0.0.0",
inlinedContext,
skillActivation: buildSkillActivationBlock({
base,
@ -3431,6 +3432,7 @@ export async function buildReleasePrompt(mid, midTitle, base) {
releaseType: deploy.release_type ?? "patch",
publishChannel: deploy.publish_channel ?? "none",
today: new Date().toISOString().slice(0, 10),
newVersion: deploy.current_version ?? "0.0.0",
inlinedContext,
skillActivation: buildSkillActivationBlock({
base,

View file

@ -0,0 +1,233 @@
/**
* auto-prompts-v2-migration.test.mjs verify all v1v2 builder migrations.
*
* Purpose: prove that builders migrated from composeInlinedContext to
* composeUnitContext still produce prompts containing the expected artifact
* sections. Covers the "remaining builders" from M004 S02/S03:
* research-milestone, complete-slice, run-uat, reassess-roadmap,
* deploy, smoke-production, release, rollback, challenge.
*
* Consumer: CI regression guard for M004 prompt modularization.
*/
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, test } from "vitest";
import {
buildChallengePrompt,
buildDeployPrompt,
buildResearchMilestonePrompt,
buildReleasePrompt,
buildRollbackPrompt,
buildRunUatPrompt,
buildSmokeProductionPrompt,
} from "../auto-prompts.js";
import {
closeDatabase,
insertMilestone,
insertSlice,
openDatabase,
} from "../sf-db.js";
let tempDirs = [];
function makeProject(opts = {}) {
const dir = mkdtempSync(join(tmpdir(), "sf-v2-migration-"));
tempDirs.push(dir);
const mid = opts.mid ?? "M910";
const sid = opts.sid ?? "S01";
mkdirSync(join(dir, ".sf", "milestones", mid, "slices", sid, "tasks"), {
recursive: true,
});
writeFileSync(
join(dir, ".sf", "milestones", mid, `${mid}-ROADMAP.md`),
`# ${mid}: Test Milestone\n\n## ${sid}: Test Slice\n`,
);
writeFileSync(
join(dir, ".sf", "milestones", mid, `${mid}-CONTEXT.md`),
`# Context\n\nMilestone context for testing.\n`,
);
writeFileSync(
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-CONTEXT.md`),
`# Slice Context\n\nSlice context for testing.\n`,
);
writeFileSync(
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-PLAN.md`),
`# ${sid}: Test Slice\n\n## Tasks\n\n- T01: Do the thing\n`,
);
writeFileSync(
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-UAT.md`),
`# ${sid} UAT\n\n- Pass: thing works\n`,
);
writeFileSync(
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-SUMMARY.md`),
`# ${sid} Summary\n\nSlice complete.\n`,
);
return { dir, mid, sid };
}
afterEach(() => {
closeDatabase();
for (const dir of tempDirs) {
rmSync(dir, { recursive: true, force: true });
}
tempDirs = [];
});
describe("buildResearchMilestonePrompt v2", () => {
test("research_milestone_prompt_inlines_context_and_templates", async () => {
const { dir, mid } = makeProject({ mid: "M950" });
openDatabase(join(dir, ".sf", "sf.db"));
insertMilestone({
id: mid,
title: "Research Milestone",
status: "active",
planning: { vision: "Test.", successCriteria: [] },
});
const prompt = await buildResearchMilestonePrompt(
mid,
"Research Milestone",
dir,
);
expect(prompt).toContain("## Inlined Context");
expect(prompt).toContain("Milestone Context");
});
});
describe("buildRunUatPrompt v2", () => {
test("run_uat_prompt_inlines_uat_and_summary", async () => {
const { dir, mid, sid } = makeProject({ mid: "M960", sid: "S01" });
openDatabase(join(dir, ".sf", "sf.db"));
insertMilestone({
id: mid,
title: "UAT Milestone",
status: "active",
planning: { vision: "Test.", successCriteria: [] },
});
insertSlice({
milestoneId: mid,
id: sid,
title: "UAT Slice",
status: "active",
risk: "low",
depends: [],
demo: "Done.",
sequence: 1,
});
const prompt = await buildRunUatPrompt(
mid,
sid,
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-UAT.md`),
"# UAT\n\n- Pass\n",
dir,
);
expect(prompt).toContain("## Inlined Context");
expect(prompt).toContain("UAT");
});
});
describe("buildDeployPrompt v2", () => {
test("deploy_prompt_inlines_project", async () => {
const { dir, mid } = makeProject({ mid: "M970" });
openDatabase(join(dir, ".sf", "sf.db"));
insertMilestone({
id: mid,
title: "Deploy Milestone",
status: "active",
planning: { vision: "Test.", successCriteria: [] },
});
const prompt = await buildDeployPrompt(mid, "Deploy Milestone", dir);
expect(prompt).toContain("## Inlined Context");
});
});
describe("buildSmokeProductionPrompt v2", () => {
test("smoke_prompt_inlines_project", async () => {
const { dir, mid } = makeProject({ mid: "M980" });
openDatabase(join(dir, ".sf", "sf.db"));
insertMilestone({
id: mid,
title: "Smoke Milestone",
status: "active",
planning: { vision: "Test.", successCriteria: [] },
});
const prompt = await buildSmokeProductionPrompt(
mid,
"Smoke Milestone",
"dr-001",
dir,
);
expect(prompt).toContain("## Inlined Context");
});
});
describe("buildReleasePrompt v2", () => {
test("release_prompt_inlines_project", async () => {
const { dir, mid } = makeProject({ mid: "M990" });
openDatabase(join(dir, ".sf", "sf.db"));
insertMilestone({
id: mid,
title: "Release Milestone",
status: "active",
planning: { vision: "Test.", successCriteria: [] },
});
const prompt = await buildReleasePrompt(mid, "Release Milestone", dir);
expect(prompt).toContain("## Inlined Context");
});
});
describe("buildRollbackPrompt v2", () => {
test("rollback_prompt_inlines_project", async () => {
const { dir, mid } = makeProject({ mid: "M991" });
openDatabase(join(dir, ".sf", "sf.db"));
insertMilestone({
id: mid,
title: "Rollback Milestone",
status: "active",
planning: { vision: "Test.", successCriteria: [] },
});
const prompt = await buildRollbackPrompt(
mid,
"Rollback Milestone",
"dr-002",
"smoke failed",
dir,
);
expect(prompt).toContain("## Inlined Context");
});
});
describe("buildChallengePrompt v2", () => {
test("challenge_prompt_inlines_project", async () => {
const { dir, mid } = makeProject({ mid: "M992" });
openDatabase(join(dir, ".sf", "sf.db"));
insertMilestone({
id: mid,
title: "Challenge Milestone",
status: "active",
planning: { vision: "Test.", successCriteria: [] },
});
const prompt = await buildChallengePrompt(
mid,
"Challenge Milestone",
"milestone",
"red-team",
dir,
);
expect(prompt).toContain("## Inlined Context");
});
});

View file

@ -19,7 +19,10 @@ import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, test } from "vitest";
import { runTriageApply } from "../headless-triage.js";
import {
chooseRunnableSubagentModel,
runTriageApply,
} from "../headless-triage.js";
const tempDirs: string[] = [];
let originalAgentDir: string | undefined;
@ -35,6 +38,22 @@ const deciderPlan = [
"Self-feedback triage complete.",
].join("\n");
function fakeRegistry(
models: Array<{ provider: string; id: string; ready?: boolean }>,
) {
return {
find(provider: string, modelId: string) {
return models.find((m) => m.provider === provider && m.id === modelId);
},
getAll() {
return models;
},
isProviderRequestReady(provider: string) {
return models.some((m) => m.provider === provider && m.ready !== false);
},
};
}
function makeProject(): string {
const dir = mkdtempSync(join(tmpdir(), "sf-headless-triage-run-"));
tempDirs.push(dir);
@ -57,6 +76,32 @@ beforeEach(() => {
process.env.SF_CODING_AGENT_DIR = dir;
});
test("chooseRunnableSubagentModel_skips_router_routes_missing_from_subagent_registry", () => {
const selected = chooseRunnableSubagentModel(
["openrouter/openai/gpt-5.1", "kimi-coding/kimi-k2.6"],
fakeRegistry([{ provider: "kimi-coding", id: "kimi-k2.6" }]),
);
assert.equal(
selected,
"kimi-coding/kimi-k2.6",
"triage must not pass a router-ranked model that runSubagent cannot find",
);
});
test("chooseRunnableSubagentModel_returns_undefined_when_no_candidate_is_runnable", () => {
const selected = chooseRunnableSubagentModel(
["openrouter/openai/gpt-5.1"],
fakeRegistry([{ provider: "kimi-coding", id: "kimi-k2.6" }]),
);
assert.equal(
selected,
undefined,
"callers should omit the override and use the session default/fallback",
);
});
// ── Test 1: dryRun=true runs both agents but skips Phase 3 ────────────────────
test("runTriageApply_dryRun_runs_decider_and_review_then_skips_apply", async () => {