fix(sf): remove legacy completion tool aliases
This commit is contained in:
parent
1891ccbdcd
commit
61485c5bef
31 changed files with 80 additions and 202 deletions
|
|
@ -22,7 +22,7 @@ The core SF workflow tools are internal extension tools. Examples include:
|
|||
- `sf_plan_milestone`
|
||||
- `sf_plan_slice`
|
||||
- `sf_plan_task`
|
||||
- `sf_task_complete` / `sf_complete_task`
|
||||
- `sf_task_complete`
|
||||
- `sf_slice_complete`
|
||||
- `sf_complete_milestone`
|
||||
- `sf_validate_milestone`
|
||||
|
|
@ -44,7 +44,7 @@ The Claude Code CLI provider uses the Anthropic Agent SDK through `src/resources
|
|||
|
||||
As a result:
|
||||
|
||||
- prompts tell the model to call tools like `sf_complete_task`
|
||||
- prompts tell the model to call tools like `sf_task_complete`
|
||||
- the tools exist in SF
|
||||
- but Claude Code sessions do not actually receive those tools
|
||||
|
||||
|
|
|
|||
|
|
@ -14,11 +14,11 @@
|
|||
|
||||
## MCP workflow mutations — read-only only
|
||||
|
||||
**Impact:** External providers (Claude Code CLI, remote orchestrators) that route through MCP can query SF state but cannot advance it. `sf_complete_task`, `sf_plan_milestone`, etc. are in-process only.
|
||||
**Impact:** External providers (Claude Code CLI, remote orchestrators) that route through MCP can query SF state but cannot advance it. `sf_task_complete`, `sf_plan_milestone`, etc. are in-process only.
|
||||
|
||||
**Proposed fix:** Extract shared handlers from native tools; expose over MCP server (ADR-008 Phase 1).
|
||||
|
||||
**Verification:** Claude Code provider session completes a task via MCP `sf_complete_task` and produces identical STATE.md outcome.
|
||||
**Verification:** Claude Code provider session completes a task via MCP `sf_task_complete` and produces identical STATE.md outcome.
|
||||
|
||||
**Tracked in:** [active/index.md — ADR-008 Phase 1](./active/index.md) · [ADR-008](../dev/ADR-008-IMPLEMENTATION-PLAN.md)
|
||||
|
||||
|
|
|
|||
|
|
@ -294,7 +294,7 @@ auto_supervisor:
|
|||
soft_timeout_minutes: 20 # warn LLM to wrap up
|
||||
idle_timeout_minutes: 10 # detect stalls
|
||||
hard_timeout_minutes: 30 # pause auto mode
|
||||
completion_nudge_after: 10 # complete-slice tool calls before nudging sf_complete_slice
|
||||
completion_nudge_after: 10 # complete-slice tool calls before nudging sf_slice_complete
|
||||
```
|
||||
|
||||
### `budget_ceiling`
|
||||
|
|
|
|||
|
|
@ -94,9 +94,7 @@ The workflow MCP surface includes:
|
|||
- `sf_replan_slice`
|
||||
- `sf_slice_replan`
|
||||
- `sf_task_complete`
|
||||
- `sf_complete_task`
|
||||
- `sf_slice_complete`
|
||||
- `sf_complete_slice`
|
||||
- `sf_skip_slice`
|
||||
- `sf_validate_milestone`
|
||||
- `sf_milestone_validate`
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ function makeToolCall(overrides: Record<string, unknown>) {
|
|||
return {
|
||||
type: "toolCall" as const,
|
||||
id: "call-1",
|
||||
name: "sf_complete_task",
|
||||
name: "sf_task_complete",
|
||||
arguments: {
|
||||
projectDir: "/tmp/sf-project",
|
||||
taskId: "T01",
|
||||
|
|
@ -57,9 +57,9 @@ function makeToolCall(overrides: Record<string, unknown>) {
|
|||
|
||||
describe("string-array schema coercion", () => {
|
||||
const sfCompleteTaskTool = {
|
||||
name: "sf_complete_task",
|
||||
name: "sf_task_complete",
|
||||
description: "Record a completed task to the SF database and render its SUMMARY.md.",
|
||||
parameters: workflowToolSchema("sf_complete_task") as any,
|
||||
parameters: workflowToolSchema("sf_task_complete") as any,
|
||||
};
|
||||
|
||||
it("coerces a bare string keyDecisions value before validation", () => {
|
||||
|
|
|
|||
|
|
@ -92,6 +92,8 @@ describe("workflow MCP tools", () => {
|
|||
|
||||
assert.equal(server.tools.length, WORKFLOW_TOOL_NAMES.length);
|
||||
assert.deepEqual(server.tools.map((t) => t.name), [...WORKFLOW_TOOL_NAMES]);
|
||||
assert.ok(!server.tools.some((t) => t.name === "sf_complete_task"));
|
||||
assert.ok(!server.tools.some((t) => t.name === "sf_complete_slice"));
|
||||
});
|
||||
|
||||
it("sf_summary_save writes artifact through the shared executor", async () => {
|
||||
|
|
@ -305,40 +307,6 @@ describe("workflow MCP tools", () => {
|
|||
}
|
||||
});
|
||||
|
||||
it("sf_complete_task alias delegates to sf_task_complete behavior", async () => {
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
mkdirSync(join(base, ".sf", "milestones", "M002", "slices", "S02"), { recursive: true });
|
||||
writeFileSync(
|
||||
join(base, ".sf", "milestones", "M002", "slices", "S02", "S02-PLAN.md"),
|
||||
"# S02\n\n- [ ] **T02: Demo** `est:5m`\n",
|
||||
);
|
||||
|
||||
const server = makeMockServer();
|
||||
registerWorkflowTools(server as any);
|
||||
const aliasTool = server.tools.find((t) => t.name === "sf_complete_task");
|
||||
assert.ok(aliasTool, "task completion alias should be registered");
|
||||
|
||||
const result = await aliasTool!.handler({
|
||||
projectDir: base,
|
||||
taskId: "T02",
|
||||
sliceId: "S02",
|
||||
milestoneId: "M002",
|
||||
oneLiner: "Completed task via alias",
|
||||
narrative: "Did the work through alias",
|
||||
verification: "npm test",
|
||||
});
|
||||
|
||||
assert.match((result as any).content[0].text as string, /Completed task T02/);
|
||||
assert.ok(
|
||||
existsSync(join(base, ".sf", "milestones", "M002", "slices", "S02", "tasks", "T02-SUMMARY.md")),
|
||||
"alias should write task summary to disk",
|
||||
);
|
||||
} finally {
|
||||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
it("sf_plan_milestone and sf_plan_slice work end-to-end", async () => {
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
|
|
@ -662,7 +630,7 @@ describe("workflow MCP tools", () => {
|
|||
}
|
||||
});
|
||||
|
||||
it("sf_slice_complete and sf_complete_slice work end-to-end", async () => {
|
||||
it("sf_slice_complete works end-to-end", async () => {
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
const server = makeMockServer();
|
||||
|
|
@ -671,12 +639,10 @@ describe("workflow MCP tools", () => {
|
|||
const sliceTool = server.tools.find((t) => t.name === "sf_plan_slice");
|
||||
const taskTool = server.tools.find((t) => t.name === "sf_task_complete");
|
||||
const canonicalTool = server.tools.find((t) => t.name === "sf_slice_complete");
|
||||
const aliasTool = server.tools.find((t) => t.name === "sf_complete_slice");
|
||||
assert.ok(milestoneTool, "milestone planning tool should be registered");
|
||||
assert.ok(sliceTool, "slice planning tool should be registered");
|
||||
assert.ok(taskTool, "task completion tool should be registered");
|
||||
assert.ok(canonicalTool, "slice completion tool should be registered");
|
||||
assert.ok(aliasTool, "slice completion alias should be registered");
|
||||
|
||||
await milestoneTool!.handler({
|
||||
projectDir: base,
|
||||
|
|
@ -738,74 +704,13 @@ describe("workflow MCP tools", () => {
|
|||
uatContent: "## UAT\n\nPASS",
|
||||
});
|
||||
assert.match((canonicalResult as any).content[0].text as string, /Completed slice S03/);
|
||||
|
||||
await milestoneTool!.handler({
|
||||
projectDir: base,
|
||||
milestoneId: "M004",
|
||||
title: "Alias milestone",
|
||||
vision: "Prepare alias slice completion state.",
|
||||
slices: [
|
||||
{
|
||||
sliceId: "S04",
|
||||
title: "Alias Slice",
|
||||
risk: "medium",
|
||||
depends: [],
|
||||
demo: "Alias slice completes through MCP.",
|
||||
goal: "Seed alias workflow state.",
|
||||
successCriteria: "Alias summary and UAT files are written.",
|
||||
proofLevel: "integration",
|
||||
integrationClosure: "Alias reaches the shared slice executor.",
|
||||
observabilityImpact: "Workflow tests cover alias completion.",
|
||||
},
|
||||
],
|
||||
});
|
||||
await sliceTool!.handler({
|
||||
projectDir: base,
|
||||
milestoneId: "M004",
|
||||
sliceId: "S04",
|
||||
goal: "Complete alias slice over MCP.",
|
||||
planningMeeting: validPlanningMeeting(),
|
||||
tasks: [
|
||||
{
|
||||
taskId: "T04",
|
||||
title: "Alias task",
|
||||
description: "Seed a completed task for alias slice completion.",
|
||||
estimate: "5m",
|
||||
files: ["packages/mcp-server/src/workflow-tools.ts"],
|
||||
verify: "node --test",
|
||||
inputs: ["M004-ROADMAP.md"],
|
||||
expectedOutput: ["S04-SUMMARY.md", "S04-UAT.md"],
|
||||
},
|
||||
],
|
||||
});
|
||||
await taskTool!.handler({
|
||||
projectDir: base,
|
||||
milestoneId: "M004",
|
||||
sliceId: "S04",
|
||||
taskId: "T04",
|
||||
oneLiner: "Completed alias task",
|
||||
narrative: "Prepared the alias slice for completion.",
|
||||
verification: "node --test",
|
||||
});
|
||||
|
||||
const aliasResult = await aliasTool!.handler({
|
||||
projectDir: base,
|
||||
milestoneId: "M004",
|
||||
sliceId: "S04",
|
||||
sliceTitle: "Alias Slice",
|
||||
oneLiner: "Completed alias slice",
|
||||
narrative: "Did the slice work via alias",
|
||||
verification: "npm test",
|
||||
uatContent: "## UAT\n\nPASS",
|
||||
});
|
||||
assert.match((aliasResult as any).content[0].text as string, /Completed slice S04/);
|
||||
assert.ok(
|
||||
existsSync(join(base, ".sf", "milestones", "M004", "slices", "S04", "S04-SUMMARY.md")),
|
||||
"alias should write slice summary to disk",
|
||||
existsSync(join(base, ".sf", "milestones", "M003", "slices", "S03", "S03-SUMMARY.md")),
|
||||
"canonical tool should write slice summary to disk",
|
||||
);
|
||||
assert.ok(
|
||||
existsSync(join(base, ".sf", "milestones", "M004", "slices", "S04", "S04-UAT.md")),
|
||||
"alias should write slice UAT to disk",
|
||||
existsSync(join(base, ".sf", "milestones", "M003", "slices", "S03", "S03-UAT.md")),
|
||||
"canonical tool should write slice UAT to disk",
|
||||
);
|
||||
} finally {
|
||||
cleanup(base);
|
||||
|
|
|
|||
|
|
@ -501,7 +501,6 @@ export const WORKFLOW_TOOL_NAMES = [
|
|||
"sf_replan_slice",
|
||||
"sf_slice_replan",
|
||||
"sf_slice_complete",
|
||||
"sf_complete_slice",
|
||||
"sf_skip_slice",
|
||||
"sf_complete_milestone",
|
||||
"sf_milestone_complete",
|
||||
|
|
@ -512,7 +511,6 @@ export const WORKFLOW_TOOL_NAMES = [
|
|||
"sf_save_gate_result",
|
||||
"sf_summary_save",
|
||||
"sf_task_complete",
|
||||
"sf_complete_task",
|
||||
"sf_milestone_status",
|
||||
"sf_journal_query",
|
||||
] as const;
|
||||
|
|
@ -1313,16 +1311,6 @@ export function registerWorkflowTools(server: McpToolServer): void {
|
|||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"sf_complete_slice",
|
||||
"Alias for sf_slice_complete. Record a completed slice to the SF database and render summary/UAT artifacts.",
|
||||
sliceCompleteParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(sliceCompleteSchema, args);
|
||||
return handleSliceComplete(parsed.projectDir, parsed);
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"sf_skip_slice",
|
||||
"Mark a slice as skipped so auto-mode advances past it without executing.",
|
||||
|
|
@ -1455,17 +1443,6 @@ export function registerWorkflowTools(server: McpToolServer): void {
|
|||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"sf_complete_task",
|
||||
"Alias for sf_task_complete. Record a completed task to the SF database and render its SUMMARY.md.",
|
||||
taskCompleteParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(taskCompleteSchema, args);
|
||||
const { projectDir, ...taskArgs } = parsed;
|
||||
return handleTaskComplete(projectDir, taskArgs);
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"sf_milestone_status",
|
||||
"Read the current status of a milestone and all its slices from the SF database.",
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import type { AgentMessage } from "@singularity-forge/pi-agent-core";
|
|||
|
||||
export const DEFAULT_COMPLETION_NUDGE_AFTER = 10;
|
||||
export const COMPLETION_NUDGE_TOOL_NAMES = new Set([
|
||||
"sf_complete_slice",
|
||||
"sf_slice_complete",
|
||||
]);
|
||||
|
||||
|
|
@ -117,12 +116,12 @@ function nextCompletionNudgeMessage(): string | null {
|
|||
state.reminderSent = true;
|
||||
state.strongSent = true;
|
||||
state.lowerTemperatureForNextRequest = true;
|
||||
return `You've performed ${state.toolCalls} tool calls without calling sf_complete_slice. Stop further investigation unless there is a specific blocker. Call sf_complete_slice now with your summary.`;
|
||||
return `You've performed ${state.toolCalls} tool calls without calling sf_slice_complete. Stop further investigation unless there is a specific blocker. Call sf_slice_complete now with your summary.`;
|
||||
}
|
||||
|
||||
if (!state.reminderSent && state.toolCalls >= firstThreshold) {
|
||||
state.reminderSent = true;
|
||||
return `You've performed ${state.toolCalls} tool calls of investigation. Per the slice plan you should now call sf_complete_slice with your summary. If you genuinely need more context, say so explicitly; otherwise call the tool now.`;
|
||||
return `You've performed ${state.toolCalls} tool calls of investigation. Per the slice plan you should now call sf_slice_complete with your summary. If you genuinely need more context, say so explicitly; otherwise call the tool now.`;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -2592,7 +2592,7 @@ export async function buildCompleteSlicePrompt(
|
|||
// Gates owned by complete-slice (e.g. Q8). Pull from the DB so the
|
||||
// prompt only prompts for gates the plan actually seeded. The tool
|
||||
// handler closes each gate based on the SUMMARY.md section content
|
||||
// after the assistant calls sf_complete_slice.
|
||||
// after the assistant calls sf_slice_complete.
|
||||
const csPending = getPendingGatesForTurn(mid, sid, "complete-slice");
|
||||
// coverage check: every pending row must be owned by complete-slice.
|
||||
// requireAll:false because a slice may have already closed some gates.
|
||||
|
|
|
|||
|
|
@ -400,7 +400,7 @@ export function verifyExpectedArtifact(
|
|||
} else if (!isDbAvailable()) {
|
||||
// LEGACY: Pre-migration fallback for projects without DB.
|
||||
// Require a CHECKED checkbox — a bare heading or unchecked checkbox
|
||||
// does not prove sf_complete_task ran. Summary file on disk alone
|
||||
// does not prove sf_task_complete ran. Summary file on disk alone
|
||||
// is not sufficient evidence (could be a rogue write) (#3607).
|
||||
const planAbs = resolveSliceFile(base, mid, sid, "PLAN");
|
||||
if (planAbs && existsSync(planAbs)) {
|
||||
|
|
|
|||
|
|
@ -526,7 +526,6 @@ export function markToolEnd(toolCallId: string): void {
|
|||
|
||||
const TASK_COMPLETE_TOOL_NAMES = new Set([
|
||||
"sf_task_complete",
|
||||
"sf_complete_task",
|
||||
]);
|
||||
|
||||
function normalizeTaskCompleteFailure(errorMsg: string): string {
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ export class AutoSession {
|
|||
/** Last turn-level git action status captured during finalize. */
|
||||
lastGitActionStatus: "ok" | "failed" | null = null;
|
||||
/**
|
||||
* Last sf_task_complete/sf_complete_task execution error for the current turn.
|
||||
* Last sf_task_complete execution error for the current turn.
|
||||
* Unlike malformed tool invocation errors, these are normal tool execution
|
||||
* failures (for example a transient SUMMARY.md write failure) and should be
|
||||
* retried in-flow instead of pausing auto-mode.
|
||||
|
|
|
|||
|
|
@ -1344,7 +1344,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|||
pi.registerTool(planTaskTool);
|
||||
registerAlias(pi, planTaskTool, "sf_task_plan", "sf_plan_task");
|
||||
|
||||
// ─── sf_task_complete (sf_complete_task alias) ────────────────────────
|
||||
// ─── sf_task_complete ─────────────────────────────────────────────────
|
||||
|
||||
const taskCompleteExecute = async (
|
||||
_toolCallId: string,
|
||||
|
|
@ -1365,7 +1365,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|||
promptSnippet:
|
||||
"Complete a SF task (DB write + summary render + checkbox toggle)",
|
||||
promptGuidelines: [
|
||||
"Use sf_task_complete (or sf_complete_task) when a task is finished and needs to be recorded.",
|
||||
"Use sf_task_complete when a task is finished and needs to be recorded.",
|
||||
"All string fields are required. verificationEvidence is an array of objects with command, exitCode, verdict, durationMs.",
|
||||
"The tool validates required fields and returns an error message if any are missing.",
|
||||
"On success, returns the summaryPath where the SUMMARY.md was written.",
|
||||
|
|
@ -1441,9 +1441,8 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|||
};
|
||||
|
||||
pi.registerTool(taskCompleteTool);
|
||||
registerAlias(pi, taskCompleteTool, "sf_complete_task", "sf_task_complete");
|
||||
|
||||
// ─── sf_slice_complete (sf_complete_slice alias) ─────────────────────
|
||||
// ─── sf_slice_complete ────────────────────────────────────────────────
|
||||
|
||||
const sliceCompleteExecute = async (
|
||||
_toolCallId: string,
|
||||
|
|
@ -1464,7 +1463,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|||
promptSnippet:
|
||||
"Complete a SF slice (DB write + summary/UAT render + roadmap checkbox toggle)",
|
||||
promptGuidelines: [
|
||||
"Use sf_slice_complete (or sf_complete_slice) when all tasks in a slice are finished and the slice needs to be recorded.",
|
||||
"Use sf_slice_complete when all tasks in a slice are finished and the slice needs to be recorded.",
|
||||
"All tasks in the slice must have status 'complete' — the handler validates this before proceeding.",
|
||||
"On success, returns summaryPath and uatPath where the files were written.",
|
||||
"Idempotent — calling with the same params twice will not crash.",
|
||||
|
|
@ -1607,12 +1606,6 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|||
};
|
||||
|
||||
pi.registerTool(sliceCompleteTool);
|
||||
registerAlias(
|
||||
pi,
|
||||
sliceCompleteTool,
|
||||
"sf_complete_slice",
|
||||
"sf_slice_complete",
|
||||
);
|
||||
|
||||
// ─── sf_skip_slice (#3477 / #3487) ───────────────────────────────────
|
||||
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
|
|||
- `soft_timeout_minutes`: minutes before the supervisor issues a soft warning (default: 20).
|
||||
- `idle_timeout_minutes`: minutes of inactivity before the supervisor intervenes (default: 10).
|
||||
- `hard_timeout_minutes`: minutes before the supervisor forces termination (default: 30).
|
||||
- `completion_nudge_after`: tool calls in a complete-slice unit before nudging the agent to call `sf_complete_slice` (default: 10; set `0` to disable).
|
||||
- `completion_nudge_after`: tool calls in a complete-slice unit before nudging the agent to call `sf_slice_complete` (default: 10; set `0` to disable).
|
||||
- `runaway_guard_enabled`: enable active-loop diagnosis for long-running units (default: `true`).
|
||||
- `runaway_tool_call_warning`: unit tool calls before a runaway warning (default: `60`; set `0` to disable this signal).
|
||||
- `runaway_token_warning`: unit tokens before a runaway warning (default: `1000000`; set `0` to disable this signal).
|
||||
|
|
|
|||
|
|
@ -248,7 +248,7 @@ export interface AutoSupervisorConfig {
|
|||
idle_timeout_minutes?: number;
|
||||
hard_timeout_minutes?: number;
|
||||
phase_timeout_minutes?: number;
|
||||
/** Tool-call count before nudging complete-slice agents to call sf_complete_slice. Default: 10. Set 0 to disable. */
|
||||
/** Tool-call count before nudging complete-slice agents to call sf_slice_complete. Default: 10. Set 0 to disable. */
|
||||
completion_nudge_after?: number;
|
||||
/** Enable the runaway guard that asks long-running active units for diagnosis before pausing. Default: true. */
|
||||
runaway_guard_enabled?: boolean;
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ This skill does NOT decide whether a slice is shippable in isolation — that's
|
|||
|
||||
- After `spec-first-tdd` reaches GREEN on the contract test.
|
||||
- After a debugging fix from `systematic-debugging`.
|
||||
- Before calling `sf_complete_task` or `sf_complete_slice`.
|
||||
- Before calling `sf_task_complete` or `sf_slice_complete`.
|
||||
- Inside an autonomous iteration loop, between slices.
|
||||
|
||||
If used inside an autonomous iteration loop and the user goal is still in progress:
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ function baseMessages(): AgentMessage[] {
|
|||
];
|
||||
}
|
||||
|
||||
test("completion nudge injects reminder after N tool calls without sf_complete_slice", () => {
|
||||
test("completion nudge injects reminder after N tool calls without sf_slice_complete", () => {
|
||||
resetCompletionNudgeState("complete-slice", "M001/S01", 3);
|
||||
recordCompletionNudgeToolCall("read");
|
||||
recordCompletionNudgeToolCall("grep");
|
||||
|
|
@ -40,14 +40,14 @@ test("completion nudge injects reminder after N tool calls without sf_complete_s
|
|||
String(nudge.content),
|
||||
/You've performed 3 tool calls of investigation/,
|
||||
);
|
||||
assert.match(String(nudge.content), /call sf_complete_slice/);
|
||||
assert.match(String(nudge.content), /call sf_slice_complete/);
|
||||
});
|
||||
|
||||
test("completion nudge does not inject after sf_complete_slice is called", () => {
|
||||
test("completion nudge does not inject after sf_slice_complete is called", () => {
|
||||
resetCompletionNudgeState("complete-slice", "M001/S01", 3);
|
||||
recordCompletionNudgeToolCall("read");
|
||||
recordCompletionNudgeToolCall("grep");
|
||||
recordCompletionNudgeToolCall("sf_complete_slice");
|
||||
recordCompletionNudgeToolCall("sf_slice_complete");
|
||||
recordCompletionNudgeToolCall("bash");
|
||||
|
||||
const messages = maybeInjectCompletionNudgeMessage(baseMessages());
|
||||
|
|
@ -66,7 +66,7 @@ test("completion nudge injects stronger message at 2N and lowers temperature", (
|
|||
const messages = maybeInjectCompletionNudgeMessage(baseMessages());
|
||||
assert.equal(messages.length, 2);
|
||||
const nudge = messages[1] as CustomTestMessage;
|
||||
assert.match(String(nudge.content), /without calling sf_complete_slice/);
|
||||
assert.match(String(nudge.content), /without calling sf_slice_complete/);
|
||||
|
||||
const payload = { temperature: 0.9, generationConfig: { temperature: 0.8 } };
|
||||
applyCompletionNudgeTemperature(payload);
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
*
|
||||
* Bug #2936: When the LLM calls ask_user_questions during auto-mode units
|
||||
* (plan-slice, execute-task, complete-slice), the interactive tool queues a
|
||||
* user response which causes the subsequent sf_plan_slice / sf_complete_task
|
||||
* user response which causes the subsequent sf_plan_slice / sf_task_complete
|
||||
* call to fail with "Skipped due to queued user message." The canonical SF
|
||||
* tool call is never recorded, verifyExpectedArtifact finds no artifact, and
|
||||
* the dispatch loop re-dispatches the same unit 2-4x.
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* Regression test for #3674 — block direct writes to sf.db
|
||||
*
|
||||
* When sf_complete_task was unavailable, agents fell back to shell-based
|
||||
* When sf_task_complete was unavailable, agents fell back to shell-based
|
||||
* sqlite3 writes, corrupting the WAL-backed database. The fix extends
|
||||
* write-intercept to block file writes and bash commands targeting sf.db.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -39,9 +39,7 @@ const HEAVY_TOOLS = [
|
|||
"sf_plan_task",
|
||||
"sf_task_plan",
|
||||
"sf_task_complete",
|
||||
"sf_complete_task",
|
||||
"sf_slice_complete",
|
||||
"sf_complete_slice",
|
||||
"sf_complete_milestone",
|
||||
"sf_milestone_complete",
|
||||
"sf_validate_milestone",
|
||||
|
|
|
|||
|
|
@ -702,7 +702,7 @@ test("verifyExpectedArtifact execute-task rejects heading-style plan without che
|
|||
);
|
||||
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "# T01 Summary\n\nDone.");
|
||||
// Heading-style entries no longer count as verified — only checked
|
||||
// checkboxes prove sf_complete_task ran (#3607).
|
||||
// checkboxes prove sf_task_complete ran (#3607).
|
||||
assert.strictEqual(
|
||||
verifyExpectedArtifact("execute-task", "M001/S01/T01", base),
|
||||
false,
|
||||
|
|
|
|||
|
|
@ -301,17 +301,17 @@ test("research prompts stop after saving and forbid planning tools", () => {
|
|||
}
|
||||
});
|
||||
|
||||
// ─── Prompt migration: execute-task → sf_complete_task ───────────────
|
||||
// ─── Prompt migration: execute-task → sf_task_complete ───────────────
|
||||
|
||||
test("execute-task prompt references sf_complete_task tool", () => {
|
||||
test("execute-task prompt references sf_task_complete tool", () => {
|
||||
const prompt = readPrompt("execute-task");
|
||||
assert.match(prompt, /sf_complete_task/);
|
||||
assert.match(prompt, /sf_task_complete/);
|
||||
});
|
||||
|
||||
test("execute-task prompt uses sf_complete_task as canonical summary write path", () => {
|
||||
test("execute-task prompt uses sf_task_complete as canonical summary write path", () => {
|
||||
const prompt = readPrompt("execute-task");
|
||||
assert.match(prompt, /\{\{taskSummaryPath\}\}/);
|
||||
assert.match(prompt, /sf_complete_task/);
|
||||
assert.match(prompt, /sf_task_complete/);
|
||||
assert.match(prompt, /DB-backed tool is the canonical write path/i);
|
||||
assert.match(
|
||||
prompt,
|
||||
|
|
@ -348,11 +348,11 @@ test("guided-execute-task prompt does not instruct manual file write", () => {
|
|||
);
|
||||
});
|
||||
|
||||
// ─── Prompt migration: complete-slice → sf_complete_slice ────────────
|
||||
// ─── Prompt migration: complete-slice → sf_slice_complete ────────────
|
||||
|
||||
test("complete-slice prompt references sf_complete_slice tool", () => {
|
||||
test("complete-slice prompt references sf_slice_complete tool", () => {
|
||||
const prompt = readPrompt("complete-slice");
|
||||
assert.match(prompt, /sf_complete_slice/);
|
||||
assert.match(prompt, /sf_slice_complete/);
|
||||
});
|
||||
|
||||
test("complete-slice prompt does not instruct LLM to toggle checkboxes manually", () => {
|
||||
|
|
@ -369,7 +369,7 @@ test("complete-slice prompt instructs writing summary and UAT files before tool
|
|||
const prompt = readPrompt("complete-slice");
|
||||
assert.match(prompt, /\{\{sliceSummaryPath\}\}/);
|
||||
assert.match(prompt, /\{\{sliceUatPath\}\}/);
|
||||
assert.match(prompt, /sf_complete_slice/);
|
||||
assert.match(prompt, /sf_slice_complete/);
|
||||
assert.match(prompt, /DB-backed tool is the canonical write path/i);
|
||||
assert.match(
|
||||
prompt,
|
||||
|
|
@ -529,14 +529,14 @@ test("reassess-roadmap prompt names sf_reassess_roadmap as the tool to use", ()
|
|||
|
||||
test("execute-task prompt uses camelCase parameter names matching TypeBox schema", () => {
|
||||
const prompt = readPrompt("execute-task");
|
||||
// The sf_complete_task tool schema uses camelCase: milestoneId, sliceId, taskId
|
||||
// The sf_task_complete tool schema uses camelCase: milestoneId, sliceId, taskId
|
||||
// Prompts must NOT tell the LLM to use snake_case (milestone_id, slice_id, task_id)
|
||||
const toolCallLine = prompt
|
||||
.split("\n")
|
||||
.find((l) => /sf_complete_task/.test(l) || /sf_task_complete/.test(l));
|
||||
.find((l) => /sf_task_complete/.test(l));
|
||||
assert.ok(
|
||||
toolCallLine,
|
||||
"prompt must contain a sf_complete_task or sf_task_complete tool call line",
|
||||
"prompt must contain a sf_task_complete tool call line",
|
||||
);
|
||||
assert.doesNotMatch(
|
||||
toolCallLine!,
|
||||
|
|
@ -557,13 +557,13 @@ test("execute-task prompt uses camelCase parameter names matching TypeBox schema
|
|||
|
||||
test("complete-slice prompt uses camelCase parameter names matching TypeBox schema", () => {
|
||||
const prompt = readPrompt("complete-slice");
|
||||
// The sf_complete_slice tool schema uses camelCase: milestoneId, sliceId
|
||||
// The sf_slice_complete tool schema uses camelCase: milestoneId, sliceId
|
||||
const toolCallLine = prompt
|
||||
.split("\n")
|
||||
.find((l) => /sf_complete_slice/.test(l) || /sf_slice_complete/.test(l));
|
||||
.find((l) => /sf_slice_complete/.test(l));
|
||||
assert.ok(
|
||||
toolCallLine,
|
||||
"prompt must contain a sf_complete_slice or sf_slice_complete tool call line",
|
||||
"prompt must contain a sf_slice_complete tool call line",
|
||||
);
|
||||
assert.doesNotMatch(
|
||||
toolCallLine!,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Regression tests for #2883: sf_complete_slice tool invocation fails with
|
||||
* Regression tests for #2883: sf_slice_complete tool invocation fails with
|
||||
* JSON truncation, causing stuck retry loop.
|
||||
*
|
||||
* When a SF tool is invoked with malformed/truncated JSON arguments, the tool
|
||||
|
|
@ -29,7 +29,7 @@ describe("#2883: tool invocation error tracking on AutoSession", () => {
|
|||
|
||||
test("lastToolInvocationError is cleared on reset()", () => {
|
||||
const s = new AutoSession();
|
||||
s.lastToolInvocationError = "Validation failed for tool sf_complete_slice";
|
||||
s.lastToolInvocationError = "Validation failed for tool sf_slice_complete";
|
||||
assert.ok(s.lastToolInvocationError);
|
||||
s.reset();
|
||||
assert.equal(s.lastToolInvocationError, null);
|
||||
|
|
@ -54,7 +54,7 @@ describe("#2883: isToolInvocationError classification", () => {
|
|||
test("detects JSON validation failure pattern", () => {
|
||||
assert.equal(
|
||||
isToolInvocationError(
|
||||
"Validation failed for tool sf_complete_slice: Expected ',' or '}' in JSON",
|
||||
"Validation failed for tool sf_slice_complete: Expected ',' or '}' in JSON",
|
||||
),
|
||||
true,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
// tool-naming — Verifies canonical + alias tool registration for SF DB tools.
|
||||
// tool-naming — Verifies canonical + supported alias tool registration for SF DB tools.
|
||||
//
|
||||
// Each DB tool must register under its canonical sf_concept_action name
|
||||
// AND under a backward-compatible alias name.
|
||||
// The alias must share the exact same execute function reference as the canonical tool.
|
||||
// DB tools with supported aliases must register under both names.
|
||||
// Completion tools are canonical-only; do not reintroduce legacy aliases.
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { registerDbTools } from "../bootstrap/db-tools.ts";
|
||||
|
|
@ -28,8 +27,6 @@ const RENAME_MAP: Array<{ canonical: string; alias: string }> = [
|
|||
{ canonical: "sf_requirement_save", alias: "sf_save_requirement" },
|
||||
{ canonical: "sf_summary_save", alias: "sf_save_summary" },
|
||||
{ canonical: "sf_milestone_generate_id", alias: "sf_generate_milestone_id" },
|
||||
{ canonical: "sf_task_complete", alias: "sf_complete_task" },
|
||||
{ canonical: "sf_slice_complete", alias: "sf_complete_slice" },
|
||||
{ canonical: "sf_plan_milestone", alias: "sf_milestone_plan" },
|
||||
{ canonical: "sf_plan_slice", alias: "sf_slice_plan" },
|
||||
{ canonical: "sf_plan_task", alias: "sf_task_plan" },
|
||||
|
|
@ -43,6 +40,13 @@ const EXTRA_DB_TOOLS = [
|
|||
"sf_self_report",
|
||||
"sf_skip_slice",
|
||||
"sf_save_gate_result",
|
||||
"sf_task_complete",
|
||||
"sf_slice_complete",
|
||||
] as const;
|
||||
|
||||
const REMOVED_COMPLETION_ALIASES = [
|
||||
"sf_complete_task",
|
||||
"sf_complete_slice",
|
||||
] as const;
|
||||
|
||||
// ─── Registration count ──────────────────────────────────────────────────────
|
||||
|
|
@ -83,6 +87,13 @@ for (const name of EXTRA_DB_TOOLS) {
|
|||
);
|
||||
}
|
||||
|
||||
for (const name of REMOVED_COMPLETION_ALIASES) {
|
||||
assert.ok(
|
||||
!pi.tools.some((t: any) => t.name === name),
|
||||
`Removed completion alias "${name}" should not be registered`,
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Execute function identity ───────────────────────────────────────────────
|
||||
|
||||
console.log("\n── Tool naming: execute function identity (===) ──");
|
||||
|
|
@ -196,4 +207,4 @@ console.log("\n── Tool naming: milestone planning renderer summarizes work
|
|||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
*
|
||||
* The legacy (pre-migration) fallback in verifyExpectedArtifact previously
|
||||
* accepted either a heading match (### T01 --) or a checked checkbox as proof
|
||||
* that sf_complete_task ran. A heading alone does not prove completion —
|
||||
* that sf_task_complete ran. A heading alone does not prove completion —
|
||||
* it could result from a rogue write.
|
||||
*
|
||||
* The fix removes the hdRe heading regex and requires only a checked checkbox
|
||||
|
|
|
|||
|
|
@ -81,8 +81,8 @@ test("write-intercept: BLOCKED_WRITE_ERROR is a non-empty string", () => {
|
|||
|
||||
test("write-intercept: BLOCKED_WRITE_ERROR mentions engine tool calls", () => {
|
||||
assert.ok(
|
||||
BLOCKED_WRITE_ERROR.includes("sf_complete_task"),
|
||||
"should mention sf_complete_task",
|
||||
BLOCKED_WRITE_ERROR.includes("sf_task_complete"),
|
||||
"should mention sf_task_complete",
|
||||
);
|
||||
assert.ok(
|
||||
BLOCKED_WRITE_ERROR.includes("engine tool calls"),
|
||||
|
|
|
|||
|
|
@ -587,7 +587,7 @@ export async function handleCompleteSlice(
|
|||
);
|
||||
invalidateStateCache();
|
||||
return {
|
||||
error: `database update failed after SUMMARY.md/UAT.md write succeeded at ${summaryPath}: ${msg}. Files were kept; retry sf_complete_slice after fixing the DB.`,
|
||||
error: `database update failed after SUMMARY.md/UAT.md write succeeded at ${summaryPath}: ${msg}. Files were kept; retry sf_slice_complete after fixing the DB.`,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* complete-task handler — the core operation behind sf_complete_task.
|
||||
* complete-task handler — the core operation behind sf_task_complete.
|
||||
*
|
||||
* Validates inputs, atomically renders SUMMARY.md to disk, then writes the
|
||||
* task row to DB in a transaction, toggles the plan checkbox, and invalidates
|
||||
|
|
|
|||
|
|
@ -537,7 +537,7 @@ export interface BrowserFlowResult {
|
|||
duration: number;
|
||||
}
|
||||
|
||||
// ─── Complete Task Params (sf_complete_task tool input) ─────────────────
|
||||
// ─── Complete Task Params (sf_task_complete tool input) ─────────────────
|
||||
|
||||
export interface CompleteTaskParams {
|
||||
taskId: string;
|
||||
|
|
@ -587,7 +587,7 @@ export interface CompleteTaskParams {
|
|||
triggerReason?: string;
|
||||
}
|
||||
|
||||
// ─── Complete Slice Params (sf_complete_slice tool input) ───────────────
|
||||
// ─── Complete Slice Params (sf_slice_complete tool input) ───────────────
|
||||
|
||||
export interface CompleteSliceParams {
|
||||
sliceId: string;
|
||||
|
|
|
|||
|
|
@ -24,8 +24,6 @@ const MCP_WORKFLOW_TOOL_SURFACE = new Set([
|
|||
"ask_user_questions",
|
||||
"sf_decision_save",
|
||||
"sf_complete_milestone",
|
||||
"sf_complete_task",
|
||||
"sf_complete_slice",
|
||||
"sf_generate_milestone_id",
|
||||
"sf_journal_query",
|
||||
"sf_milestone_complete",
|
||||
|
|
|
|||
|
|
@ -96,8 +96,8 @@ function matchesBlockedPattern(path: string): boolean {
|
|||
* Directs the agent to use engine tool calls instead.
|
||||
*/
|
||||
export const BLOCKED_WRITE_ERROR = `Direct writes to .sf/STATE.md and .sf/sf.db are blocked. Use engine tool calls instead:
|
||||
- To complete a task: call sf_complete_task(milestone_id, slice_id, task_id, summary)
|
||||
- To complete a slice: call sf_complete_slice(milestone_id, slice_id, summary, uat_result)
|
||||
- To complete a task: call sf_task_complete(milestone_id, slice_id, task_id, summary)
|
||||
- To complete a slice: call sf_slice_complete(milestone_id, slice_id, summary, uat_result)
|
||||
- To save a decision: call sf_save_decision(scope, decision, choice, rationale)
|
||||
- To start a task: call sf_start_task(milestone_id, slice_id, task_id)
|
||||
- To record verification: call sf_record_verification(milestone_id, slice_id, task_id, evidence)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue