port gsd2: reassess-roadmap opt-in (ADR-003 §4) + prefer toolDefinition.label

reassess-roadmap: flip default from true → false. Most reassess units
conclude "roadmap is fine" burning a session for no change; the
plan-slice prompt now carries a JIT preamble at zero cost. (#4778)

tool-execution: always prefer toolDefinition.label when non-empty,
even when label === name — allows tools to display their canonical
name explicitly. (#4758)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-04-25 08:33:50 +02:00
parent d4cdcb582d
commit 2911d3b93d
2 changed files with 47 additions and 28 deletions

View file

@ -72,7 +72,7 @@ function parseMcpToolName(name: string): { server: string; tool: string } | null
* prefix and converts snake_case to Title Case.
*/
function prettifyToolName(name: string, label?: string): string {
if (label && label.trim().length > 0 && label !== name) return label;
if (label && label.trim().length > 0) return label;
const stripped = name.replace(/^sf_/, "");
if (stripped.length === 0) return name;
return stripped

View file

@ -12,7 +12,7 @@
import type { SFState } from "./types.js";
import type { SFPreferences } from "./preferences.js";
import type { UatType } from "./files.js";
import { loadFile, extractUatType, loadActiveOverrides, parseDeferredRequirements } from "./files.js";
import { loadFile, extractUatType, loadActiveOverrides, parseDeferredRequirements, resolveAllOverrides } from "./files.js";
import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, getSliceTasks } from "./sf-db.js";
import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
@ -80,6 +80,8 @@ export interface DispatchContext {
state: SFState;
prefs: SFPreferences | undefined;
session?: import("./auto/session.js").AutoSession;
/** Cached pipeline variant for this dispatch cycle — set once by resolveDispatch. */
pipelineVariant?: string | null;
}
export interface DispatchRule {
@ -201,7 +203,6 @@ export const DISPATCH_RULES: DispatchRule[] = [
if (pendingOverrides.length === 0) return null;
const count = getRewriteCount(basePath);
if (count >= MAX_REWRITE_ATTEMPTS) {
const { resolveAllOverrides } = await import("./files.js");
await resolveAllOverrides(basePath);
setRewriteCount(basePath, 0);
return null;
@ -321,18 +322,21 @@ export const DISPATCH_RULES: DispatchRule[] = [
return null;
}
for (const sliceId of completedSliceIds) {
const resultFile = resolveSliceFile(basePath, mid, sliceId, "UAT");
if (!resultFile) continue;
const content = await loadFile(resultFile);
if (!content) continue;
const verdict = extractVerdict(content);
const uatType = extractUatType(content);
if (verdict && !isAcceptableUatVerdict(verdict, uatType)) {
const uatChecks = await Promise.all(
completedSliceIds.map(async (sliceId) => {
const resultFile = resolveSliceFile(basePath, mid, sliceId, "UAT");
if (!resultFile) return null;
const content = await loadFile(resultFile);
if (!content) return null;
return { sliceId, verdict: extractVerdict(content), uatType: extractUatType(content) };
}),
);
for (const check of uatChecks) {
if (!check) continue;
if (check.verdict && !isAcceptableUatVerdict(check.verdict, check.uatType)) {
return {
action: "stop" as const,
reason: `UAT verdict for ${sliceId} is "${verdict}" — blocking progression until resolved.\nReview the UAT result and update the verdict to PASS, or re-run /sf auto after fixing.`,
reason: `UAT verdict for ${check.sliceId} is "${check.verdict}" — blocking progression until resolved.\nReview the UAT result and update the verdict to PASS, or re-run /sf auto after fixing.`,
level: "warning" as const,
};
}
@ -344,9 +348,13 @@ export const DISPATCH_RULES: DispatchRule[] = [
name: "reassess-roadmap (post-completion)",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
if (prefs?.phases?.skip_reassess) return null;
// Default reassess_after_slice to true — reassessment after slice completion
// is essential for roadmap integrity. Opt-out via explicit `false`.
const reassessEnabled = prefs?.phases?.reassess_after_slice ?? true;
// Default reassess_after_slice to false per ADR-003 §4 — most reassess
// units conclude "roadmap is fine" and burn a session for no change.
// The plan-slice prompt now carries a reassessment preamble so the
// next slice's planner does JIT roadmap verification at zero extra
// cost. Opt-in via explicit `reassess_after_slice: true` (e.g.
// burn-max profile) when you want the dedicated reassess session.
const reassessEnabled = prefs?.phases?.reassess_after_slice ?? false;
if (!reassessEnabled) return null;
const needsReassess = await checkNeedsReassessment(basePath, mid, state, prefs);
if (!needsReassess) return null;
@ -413,12 +421,12 @@ export const DISPATCH_RULES: DispatchRule[] = [
},
{
name: "pre-planning (no research) → research-milestone",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
match: async ({ state, mid, midTitle, basePath, prefs, pipelineVariant }) => {
if (state.phase !== "pre-planning") return null;
// Phase skip: skip research when preference or profile says so
if (prefs?.phases?.skip_research) return null;
// #4781 phase 2: trivial-scope milestones skip dedicated milestone research
if (await getMilestonePipelineVariant(mid) === "trivial") return null;
if (pipelineVariant === "trivial") return null;
const researchFile = resolveMilestoneFile(basePath, mid, "RESEARCH");
if (researchFile) return null; // has research, fall through
return {
@ -457,11 +465,11 @@ export const DISPATCH_RULES: DispatchRule[] = [
// Keep this rule before the single-slice research rule so the multi-slice
// path wins whenever 2+ slices are ready.
name: "planning (multiple slices need research) → parallel-research-slices",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
match: async ({ state, mid, midTitle, basePath, prefs, pipelineVariant }) => {
if (state.phase !== "planning") return null;
if (prefs?.phases?.skip_research || prefs?.phases?.skip_slice_research) return null;
// #4781 phase 2: trivial-scope milestones skip dedicated slice research
if (await getMilestonePipelineVariant(mid) === "trivial") return null;
if (pipelineVariant === "trivial") return null;
// Load roadmap to find all slices
const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP");
@ -473,6 +481,13 @@ export const DISPATCH_RULES: DispatchRule[] = [
const milestoneResearchFile = resolveMilestoneFile(basePath, mid, "RESEARCH");
const researchReadySlices: Array<{ id: string; title: string }> = [];
// Pre-compute which slices have SUMMARY files to avoid O(N×M) existsSync calls
const slicesWithSummary = new Set(
roadmap.slices
.filter((s) => !!resolveSliceFile(basePath, mid, s.id, "SUMMARY"))
.map((s) => s.id),
);
for (const slice of roadmap.slices) {
if (slice.done) continue;
// Skip S01 when milestone research exists
@ -480,9 +495,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
// Skip if already has research
if (resolveSliceFile(basePath, mid, slice.id, "RESEARCH")) continue;
// Skip if dependencies aren't done (check for SUMMARY files)
const depsComplete = (slice.depends ?? []).every((depId) =>
!!resolveSliceFile(basePath, mid, depId, "SUMMARY"),
);
const depsComplete = (slice.depends ?? []).every((depId) => slicesWithSummary.has(depId));
if (!depsComplete) continue;
researchReadySlices.push({ id: slice.id, title: slice.title });
@ -513,13 +526,13 @@ export const DISPATCH_RULES: DispatchRule[] = [
},
{
name: "planning (no research, not S01) → research-slice",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
match: async ({ state, mid, midTitle, basePath, prefs, pipelineVariant }) => {
if (state.phase !== "planning") return null;
// Phase skip: skip research when preference or profile says so
if (prefs?.phases?.skip_research || prefs?.phases?.skip_slice_research)
return null;
// #4781 phase 2: trivial-scope milestones skip dedicated slice research
if (await getMilestonePipelineVariant(mid) === "trivial") return null;
if (pipelineVariant === "trivial") return null;
if (!state.activeSlice) return missingSliceStop(mid, state.phase);
const sid = state.activeSlice!.id;
const sTitle = state.activeSlice!.title;
@ -649,6 +662,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
getReadyTasks,
chooseNonConflictingSubset,
graphMetrics,
saveReactiveState,
} = await import("./reactive-graph.js");
const taskIO = await loadSliceTaskIO(basePath, mid, sid);
@ -690,7 +704,6 @@ export const DISPATCH_RULES: DispatchRule[] = [
// Persist dispatched batch so verification and recovery can check
// exactly which tasks were sent.
const { saveReactiveState } = await import("./reactive-graph.js");
saveReactiveState(basePath, mid, sid, {
sliceId: sid,
completed: [...completed],
@ -808,7 +821,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
},
{
name: "validating-milestone → validate-milestone",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
match: async ({ state, mid, midTitle, basePath, prefs, pipelineVariant }) => {
if (state.phase !== "validating-milestone") return null;
// Safety guard (#1368): verify all roadmap slices have SUMMARY files before
@ -823,7 +836,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
}
// Skip preference or trivial-scope pipeline variant: write a minimal pass-through VALIDATION file
const trivialVariant = await getMilestonePipelineVariant(mid) === "trivial";
const trivialVariant = pipelineVariant === "trivial";
const skipSource = trivialVariant
? "trivial-scope pipeline variant (#4781)"
: "`skip_milestone_validation` preference";
@ -992,6 +1005,12 @@ import { getRegistry, hasRegistry } from "./rule-registry.js";
export async function resolveDispatch(
ctx: DispatchContext,
): Promise<DispatchAction> {
// Fetch pipeline variant once per dispatch cycle so rules can read ctx.pipelineVariant
// without triggering redundant DB queries + heuristic evaluations.
if (ctx.pipelineVariant === undefined) {
ctx.pipelineVariant = await getMilestonePipelineVariant(ctx.mid);
}
// Delegate to registry when available. Callers that run outside auto-mode
// (e.g. `sf headless query`, `sf headless status`) never initialize the
// registry — falling through to inline rules is the intended behavior,