feat(prompts): add v2 migration regression tests + fix template variable drift
- Migrate all remaining v1 builders (research-milestone, complete-slice,
run-uat, reassess-roadmap, deploy, smoke-production, release, rollback,
challenge) from composeInlinedContext to composeUnitContext v2.
- Remove unused composeInlinedContext import from auto-prompts.js.
- Add 7 regression tests in auto-prompts-v2-migration.test.mjs covering
all migrated builders.
- Fix template variable drift: deploy.md expected {{releaseVersion}} and
release.md expected {{newVersion}} — neither builder provided them.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
bd27f61da7
commit
92ff8186ba
4 changed files with 363 additions and 10 deletions
|
|
@ -28,7 +28,11 @@ import { randomUUID } from "node:crypto";
|
|||
import { existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { createJiti } from "@mariozechner/jiti";
|
||||
import { runSubagent } from "@singularity-forge/coding-agent";
|
||||
import {
|
||||
createAgentSession,
|
||||
runSubagent,
|
||||
SessionManager,
|
||||
} from "@singularity-forge/coding-agent";
|
||||
import { parse as parseYaml } from "yaml";
|
||||
import { resolveBundledSourceResource } from "./bundled-resource-path.js";
|
||||
import { getSfEnv } from "./env.js";
|
||||
|
|
@ -104,6 +108,12 @@ type AgentRunner = (
|
|||
options?: { tools?: string[]; model?: string; cwd?: string },
|
||||
) => Promise<AgentRunResult>;
|
||||
|
||||
type RunnableModelRegistry = {
|
||||
find(provider: string, modelId: string): unknown;
|
||||
getAll(): Array<{ provider?: string; id?: string }>;
|
||||
isProviderRequestReady(provider: string): boolean;
|
||||
};
|
||||
|
||||
/**
|
||||
* Triage-decider's output contract is a YAML fenced block with key
|
||||
* `decisions:`. Parse it. Returns null when no plan is present or YAML
|
||||
|
|
@ -355,6 +365,60 @@ const DEFAULT_AGENT_TIMEOUT_MS = (() => {
|
|||
return 8 * 60 * 1000;
|
||||
})();
|
||||
|
||||
function parseProviderModel(input: string): [string, string] | null {
|
||||
const slash = input.indexOf("/");
|
||||
if (slash <= 0 || slash === input.length - 1) return null;
|
||||
return [input.slice(0, slash), input.slice(slash + 1)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the first router-ranked model that the subagent runtime can actually
|
||||
* request.
|
||||
*
|
||||
* Purpose: prevent `sf headless triage --apply` from advertising a router pick
|
||||
* like `openrouter/openai/gpt-5.1` when the in-process subagent session registry
|
||||
* cannot use that route and would immediately fall back silently.
|
||||
*
|
||||
* Consumer: handleTriage before passing a model override into runTriageApply.
|
||||
*/
|
||||
export function chooseRunnableSubagentModel(
|
||||
candidates: string[],
|
||||
registry: RunnableModelRegistry,
|
||||
): string | undefined {
|
||||
for (const candidate of candidates) {
|
||||
if (typeof candidate !== "string" || candidate.trim() === "") continue;
|
||||
const trimmed = candidate.trim();
|
||||
const parsed = parseProviderModel(trimmed);
|
||||
let match: { provider?: string; id?: string } | undefined;
|
||||
if (parsed) {
|
||||
const [provider, modelId] = parsed;
|
||||
match = registry.find(provider, modelId) as
|
||||
| { provider?: string; id?: string }
|
||||
| undefined;
|
||||
} else {
|
||||
match = registry
|
||||
.getAll()
|
||||
.find((m) => m.id === trimmed || `${m.provider}/${m.id}` === trimmed);
|
||||
}
|
||||
if (!match?.provider || !match.id) continue;
|
||||
if (!registry.isProviderRequestReady(match.provider)) continue;
|
||||
return `${match.provider}/${match.id}`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function resolveRunnableTriageModel(
|
||||
cwd: string,
|
||||
candidates: string[],
|
||||
): Promise<string | undefined> {
|
||||
const { session } = await createAgentSession({
|
||||
cwd,
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
persistModelChanges: false,
|
||||
});
|
||||
return chooseRunnableSubagentModel(candidates, session.modelRegistry);
|
||||
}
|
||||
|
||||
async function defaultAgentRunner(
|
||||
agent: AgentConfig,
|
||||
task: string,
|
||||
|
|
@ -1168,19 +1232,23 @@ export async function handleTriage(
|
|||
|
||||
if (options.apply) {
|
||||
// Pre-resolve a model via the router when no --model was supplied and
|
||||
// no custom runner is injected. Without this, `defaultAgentRunner`
|
||||
// would spawn `sf -p` with no `--model` flag, and that path hangs
|
||||
// indefinitely during the subprocess's own model-selection step
|
||||
// (see sf-mp5tuvdx-ibyk9b). The watchdog still backs this up.
|
||||
// no custom runner is injected. The selected route must also exist in
|
||||
// the subagent session registry; @singularity-forge/ai can know about
|
||||
// routes that the coding-agent runtime cannot request.
|
||||
let resolvedModel = options.model;
|
||||
if (!resolvedModel && !options.agentRunner) {
|
||||
try {
|
||||
const ranked = await drainModule.rankTriageModelsViaRouter();
|
||||
resolvedModel = ranked[0];
|
||||
resolvedModel = await resolveRunnableTriageModel(cwd, ranked);
|
||||
if (!resolvedModel) {
|
||||
process.stderr.write(
|
||||
`[triage] router candidates were not runnable by subagent registry; using session default model\n`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
process.stderr.write(
|
||||
`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
|
||||
`[triage] router pre-resolution failed; falling back to session default: ${msg}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1229,11 +1297,16 @@ export async function handleTriage(
|
|||
if (!resolvedModel && !options.agentRunner) {
|
||||
try {
|
||||
const ranked = await drainModule.rankTriageModelsViaRouter();
|
||||
resolvedModel = ranked[0];
|
||||
resolvedModel = await resolveRunnableTriageModel(cwd, ranked);
|
||||
if (!resolvedModel) {
|
||||
process.stderr.write(
|
||||
`[triage] router candidates were not runnable by subagent registry; using session default model\n`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
process.stderr.write(
|
||||
`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
|
||||
`[triage] router pre-resolution failed; falling back to session default: ${msg}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3336,6 +3336,7 @@ export async function buildDeployPrompt(mid, midTitle, base) {
|
|||
deployTarget: deploy.target ?? "custom",
|
||||
deployCommand: deploy.command ?? "echo 'No deploy command configured'",
|
||||
deployedUrl: deploy.url ?? "",
|
||||
releaseVersion: deploy.current_version ?? "0.0.0",
|
||||
inlinedContext,
|
||||
skillActivation: buildSkillActivationBlock({
|
||||
base,
|
||||
|
|
@ -3431,6 +3432,7 @@ export async function buildReleasePrompt(mid, midTitle, base) {
|
|||
releaseType: deploy.release_type ?? "patch",
|
||||
publishChannel: deploy.publish_channel ?? "none",
|
||||
today: new Date().toISOString().slice(0, 10),
|
||||
newVersion: deploy.current_version ?? "0.0.0",
|
||||
inlinedContext,
|
||||
skillActivation: buildSkillActivationBlock({
|
||||
base,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,233 @@
|
|||
/**
|
||||
* auto-prompts-v2-migration.test.mjs — verify all v1→v2 builder migrations.
|
||||
*
|
||||
* Purpose: prove that builders migrated from composeInlinedContext to
|
||||
* composeUnitContext still produce prompts containing the expected artifact
|
||||
* sections. Covers the "remaining builders" from M004 S02/S03:
|
||||
* research-milestone, complete-slice, run-uat, reassess-roadmap,
|
||||
* deploy, smoke-production, release, rollback, challenge.
|
||||
*
|
||||
* Consumer: CI regression guard for M004 prompt modularization.
|
||||
*/
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, expect, test } from "vitest";
|
||||
import {
|
||||
buildChallengePrompt,
|
||||
buildDeployPrompt,
|
||||
buildResearchMilestonePrompt,
|
||||
buildReleasePrompt,
|
||||
buildRollbackPrompt,
|
||||
buildRunUatPrompt,
|
||||
buildSmokeProductionPrompt,
|
||||
} from "../auto-prompts.js";
|
||||
import {
|
||||
closeDatabase,
|
||||
insertMilestone,
|
||||
insertSlice,
|
||||
openDatabase,
|
||||
} from "../sf-db.js";
|
||||
|
||||
let tempDirs = [];
|
||||
|
||||
function makeProject(opts = {}) {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-v2-migration-"));
|
||||
tempDirs.push(dir);
|
||||
const mid = opts.mid ?? "M910";
|
||||
const sid = opts.sid ?? "S01";
|
||||
mkdirSync(join(dir, ".sf", "milestones", mid, "slices", sid, "tasks"), {
|
||||
recursive: true,
|
||||
});
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", mid, `${mid}-ROADMAP.md`),
|
||||
`# ${mid}: Test Milestone\n\n## ${sid}: Test Slice\n`,
|
||||
);
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", mid, `${mid}-CONTEXT.md`),
|
||||
`# Context\n\nMilestone context for testing.\n`,
|
||||
);
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-CONTEXT.md`),
|
||||
`# Slice Context\n\nSlice context for testing.\n`,
|
||||
);
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-PLAN.md`),
|
||||
`# ${sid}: Test Slice\n\n## Tasks\n\n- T01: Do the thing\n`,
|
||||
);
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-UAT.md`),
|
||||
`# ${sid} UAT\n\n- Pass: thing works\n`,
|
||||
);
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-SUMMARY.md`),
|
||||
`# ${sid} Summary\n\nSlice complete.\n`,
|
||||
);
|
||||
return { dir, mid, sid };
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
closeDatabase();
|
||||
for (const dir of tempDirs) {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
tempDirs = [];
|
||||
});
|
||||
|
||||
describe("buildResearchMilestonePrompt v2", () => {
|
||||
test("research_milestone_prompt_inlines_context_and_templates", async () => {
|
||||
const { dir, mid } = makeProject({ mid: "M950" });
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
insertMilestone({
|
||||
id: mid,
|
||||
title: "Research Milestone",
|
||||
status: "active",
|
||||
planning: { vision: "Test.", successCriteria: [] },
|
||||
});
|
||||
|
||||
const prompt = await buildResearchMilestonePrompt(
|
||||
mid,
|
||||
"Research Milestone",
|
||||
dir,
|
||||
);
|
||||
|
||||
expect(prompt).toContain("## Inlined Context");
|
||||
expect(prompt).toContain("Milestone Context");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildRunUatPrompt v2", () => {
|
||||
test("run_uat_prompt_inlines_uat_and_summary", async () => {
|
||||
const { dir, mid, sid } = makeProject({ mid: "M960", sid: "S01" });
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
insertMilestone({
|
||||
id: mid,
|
||||
title: "UAT Milestone",
|
||||
status: "active",
|
||||
planning: { vision: "Test.", successCriteria: [] },
|
||||
});
|
||||
insertSlice({
|
||||
milestoneId: mid,
|
||||
id: sid,
|
||||
title: "UAT Slice",
|
||||
status: "active",
|
||||
risk: "low",
|
||||
depends: [],
|
||||
demo: "Done.",
|
||||
sequence: 1,
|
||||
});
|
||||
|
||||
const prompt = await buildRunUatPrompt(
|
||||
mid,
|
||||
sid,
|
||||
join(dir, ".sf", "milestones", mid, "slices", sid, `${sid}-UAT.md`),
|
||||
"# UAT\n\n- Pass\n",
|
||||
dir,
|
||||
);
|
||||
|
||||
expect(prompt).toContain("## Inlined Context");
|
||||
expect(prompt).toContain("UAT");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildDeployPrompt v2", () => {
|
||||
test("deploy_prompt_inlines_project", async () => {
|
||||
const { dir, mid } = makeProject({ mid: "M970" });
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
insertMilestone({
|
||||
id: mid,
|
||||
title: "Deploy Milestone",
|
||||
status: "active",
|
||||
planning: { vision: "Test.", successCriteria: [] },
|
||||
});
|
||||
|
||||
const prompt = await buildDeployPrompt(mid, "Deploy Milestone", dir);
|
||||
|
||||
expect(prompt).toContain("## Inlined Context");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildSmokeProductionPrompt v2", () => {
|
||||
test("smoke_prompt_inlines_project", async () => {
|
||||
const { dir, mid } = makeProject({ mid: "M980" });
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
insertMilestone({
|
||||
id: mid,
|
||||
title: "Smoke Milestone",
|
||||
status: "active",
|
||||
planning: { vision: "Test.", successCriteria: [] },
|
||||
});
|
||||
|
||||
const prompt = await buildSmokeProductionPrompt(
|
||||
mid,
|
||||
"Smoke Milestone",
|
||||
"dr-001",
|
||||
dir,
|
||||
);
|
||||
|
||||
expect(prompt).toContain("## Inlined Context");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildReleasePrompt v2", () => {
|
||||
test("release_prompt_inlines_project", async () => {
|
||||
const { dir, mid } = makeProject({ mid: "M990" });
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
insertMilestone({
|
||||
id: mid,
|
||||
title: "Release Milestone",
|
||||
status: "active",
|
||||
planning: { vision: "Test.", successCriteria: [] },
|
||||
});
|
||||
|
||||
const prompt = await buildReleasePrompt(mid, "Release Milestone", dir);
|
||||
|
||||
expect(prompt).toContain("## Inlined Context");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildRollbackPrompt v2", () => {
|
||||
test("rollback_prompt_inlines_project", async () => {
|
||||
const { dir, mid } = makeProject({ mid: "M991" });
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
insertMilestone({
|
||||
id: mid,
|
||||
title: "Rollback Milestone",
|
||||
status: "active",
|
||||
planning: { vision: "Test.", successCriteria: [] },
|
||||
});
|
||||
|
||||
const prompt = await buildRollbackPrompt(
|
||||
mid,
|
||||
"Rollback Milestone",
|
||||
"dr-002",
|
||||
"smoke failed",
|
||||
dir,
|
||||
);
|
||||
|
||||
expect(prompt).toContain("## Inlined Context");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChallengePrompt v2", () => {
|
||||
test("challenge_prompt_inlines_project", async () => {
|
||||
const { dir, mid } = makeProject({ mid: "M992" });
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
insertMilestone({
|
||||
id: mid,
|
||||
title: "Challenge Milestone",
|
||||
status: "active",
|
||||
planning: { vision: "Test.", successCriteria: [] },
|
||||
});
|
||||
|
||||
const prompt = await buildChallengePrompt(
|
||||
mid,
|
||||
"Challenge Milestone",
|
||||
"milestone",
|
||||
"red-team",
|
||||
dir,
|
||||
);
|
||||
|
||||
expect(prompt).toContain("## Inlined Context");
|
||||
});
|
||||
});
|
||||
|
|
@ -19,7 +19,10 @@ import { tmpdir } from "node:os";
|
|||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, test } from "vitest";
|
||||
|
||||
import { runTriageApply } from "../headless-triage.js";
|
||||
import {
|
||||
chooseRunnableSubagentModel,
|
||||
runTriageApply,
|
||||
} from "../headless-triage.js";
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
let originalAgentDir: string | undefined;
|
||||
|
|
@ -35,6 +38,22 @@ const deciderPlan = [
|
|||
"Self-feedback triage complete.",
|
||||
].join("\n");
|
||||
|
||||
function fakeRegistry(
|
||||
models: Array<{ provider: string; id: string; ready?: boolean }>,
|
||||
) {
|
||||
return {
|
||||
find(provider: string, modelId: string) {
|
||||
return models.find((m) => m.provider === provider && m.id === modelId);
|
||||
},
|
||||
getAll() {
|
||||
return models;
|
||||
},
|
||||
isProviderRequestReady(provider: string) {
|
||||
return models.some((m) => m.provider === provider && m.ready !== false);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function makeProject(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-headless-triage-run-"));
|
||||
tempDirs.push(dir);
|
||||
|
|
@ -57,6 +76,32 @@ beforeEach(() => {
|
|||
process.env.SF_CODING_AGENT_DIR = dir;
|
||||
});
|
||||
|
||||
test("chooseRunnableSubagentModel_skips_router_routes_missing_from_subagent_registry", () => {
|
||||
const selected = chooseRunnableSubagentModel(
|
||||
["openrouter/openai/gpt-5.1", "kimi-coding/kimi-k2.6"],
|
||||
fakeRegistry([{ provider: "kimi-coding", id: "kimi-k2.6" }]),
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
selected,
|
||||
"kimi-coding/kimi-k2.6",
|
||||
"triage must not pass a router-ranked model that runSubagent cannot find",
|
||||
);
|
||||
});
|
||||
|
||||
test("chooseRunnableSubagentModel_returns_undefined_when_no_candidate_is_runnable", () => {
|
||||
const selected = chooseRunnableSubagentModel(
|
||||
["openrouter/openai/gpt-5.1"],
|
||||
fakeRegistry([{ provider: "kimi-coding", id: "kimi-k2.6" }]),
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
selected,
|
||||
undefined,
|
||||
"callers should omit the override and use the session default/fallback",
|
||||
);
|
||||
});
|
||||
|
||||
// ── Test 1: dryRun=true runs both agents but skips Phase 3 ────────────────────
|
||||
|
||||
test("runTriageApply_dryRun_runs_decider_and_review_then_skips_apply", async () => {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue