fix(sf): contain research unit dispatch

This commit is contained in:
Mikael Hugo 2026-05-02 14:23:01 +02:00
parent 64b46fcb8a
commit a8f0c63b0a
10 changed files with 242 additions and 53 deletions

View file

@ -20,6 +20,7 @@ import {
buildResearchSlicePrompt,
buildRunUatPrompt,
} from "./auto-prompts.js";
import { scopeActiveToolsForUnitType } from "./constants.js";
import { loadFile } from "./files.js";
import { parseRoadmap } from "./parsers.js";
import {
@ -336,8 +337,24 @@ export async function dispatchDirectPhase(
ctx.ui.notify("Session creation cancelled.", "warning");
return;
}
pi.sendMessage(
{ customType: "sf-dispatch", content: prompt, display: false },
{ triggerTurn: true },
);
let savedTools: string[] | null = null;
if (
typeof pi.getActiveTools === "function" &&
typeof pi.setActiveTools === "function"
) {
const currentTools = pi.getActiveTools();
const scopedTools = scopeActiveToolsForUnitType(unitType, currentTools);
if (scopedTools.length !== currentTools.length) {
savedTools = currentTools;
pi.setActiveTools(scopedTools);
}
}
try {
pi.sendMessage(
{ customType: "sf-dispatch", content: prompt, display: false },
{ triggerTurn: true },
);
} finally {
if (savedTools) pi.setActiveTools(savedTools);
}
}

View file

@ -8,13 +8,24 @@ import type {
ExtensionAPI,
ExtensionContext,
} from "@singularity-forge/pi-coding-agent";
import {
collectSessionTokenUsage,
collectWorktreeFingerprint,
countChangedFiles,
resetRunawayGuardState,
} from "../auto-runaway-guard.js";
import { scopeActiveToolsForUnitType } from "../constants.js";
import { debugLog } from "../debug-logger.js";
import {
resolveAutoSupervisorConfig,
resolvePersistModelChanges,
} from "../preferences.js";
import { logWarning } from "../workflow-logger.js";
import { _clearCurrentResolve, _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js";
import {
_clearCurrentResolve,
_setCurrentResolve,
_setSessionSwitchInFlight,
} from "./resolve.js";
import type { AutoSession } from "./session.js";
import { NEW_SESSION_TIMEOUT_MS } from "./session.js";
import {
@ -89,13 +100,10 @@ export async function runUnit(
}
});
const timeoutPromise = new Promise<{ cancelled: true }>((resolve) => {
sessionTimeoutHandle = setTimeout(
() => {
sessionAbortController.abort();
resolve({ cancelled: true });
},
NEW_SESSION_TIMEOUT_MS,
);
sessionTimeoutHandle = setTimeout(() => {
sessionAbortController.abort();
resolve({ cancelled: true });
}, NEW_SESSION_TIMEOUT_MS);
});
sessionResult = await Promise.race([sessionPromise, timeoutPromise]);
} catch (sessionErr) {
@ -203,6 +211,15 @@ export async function runUnit(
}
}
// Refresh the runaway baseline after newSession(). Resumed sessions recover
// old context during session creation; taking the baseline before that makes
// historical tokens look like budget spent by this unit.
resetRunawayGuardState(unitType, unitId, {
sessionTokens: collectSessionTokenUsage(ctx),
changedFiles: countChangedFiles(s.basePath),
worktreeFingerprint: collectWorktreeFingerprint(s.basePath),
});
// ── Send the prompt ──
debugLog("runUnit", { phase: "send-message", unitType, unitId });
@ -213,10 +230,34 @@ export async function runUnit(
const capturedTurnGen = getCurrentTurnGeneration();
const requestDispatchedAt = Date.now();
pi.sendMessage(
{ customType: "sf-auto", content: prompt, display: s.verbose },
{ triggerTurn: true },
);
let savedTools: string[] | null = null;
if (
typeof pi.getActiveTools === "function" &&
typeof pi.setActiveTools === "function"
) {
const currentTools = pi.getActiveTools();
const scopedTools = scopeActiveToolsForUnitType(unitType, currentTools);
if (scopedTools.length !== currentTools.length) {
savedTools = currentTools;
pi.setActiveTools(scopedTools);
debugLog("unit-tool-scoping", {
unitType,
before: currentTools.length,
after: scopedTools.length,
removed: currentTools.length - scopedTools.length,
});
}
}
try {
pi.sendMessage(
{ customType: "sf-auto", content: prompt, display: s.verbose },
{ triggerTurn: true },
);
} finally {
if (savedTools) {
pi.setActiveTools(savedTools);
}
}
// ── Await agent_end with absolute timeout (H4 fix) ──
// If supervision fails to resolve unitPromise within 30s, treat as cancelled.

View file

@ -63,3 +63,61 @@ export const DISCUSS_TOOLS_ALLOWLIST: readonly string[] = [
"sf_requirement_update",
"sf_update_requirement",
];
/**
* SF tools allowed during research units.
*
* Purpose: keep research turns in their lane. A research agent writes a
* RESEARCH artifact and may report SF friction, but planning tools belong to
* later planner units. Allowing milestone/slice planning tools in research
* turns lets a saved research artifact drift into speculative planning and
* can keep the unit alive until runaway supervision pauses it.
*
* Consumer: guided-flow.ts and auto/run-unit.ts when narrowing SF tools for
* research-milestone and research-slice turns.
*/
export const RESEARCH_TOOLS_ALLOWLIST: readonly string[] = [
"sf_summary_save",
"sf_save_summary",
"sf_self_report",
];
/**
* Return the SF tool allowlist for a workflow unit, or null when the full SF
* tool set is appropriate.
*
* Purpose: centralize per-unit active-tool narrowing so guided and auto
* dispatches enforce the same phase boundaries.
*
* Consumer: guided-flow.ts and auto/run-unit.ts before sending a unit prompt.
*/
export function getSfToolsAllowlistForUnitType(
unitType: string | undefined,
): readonly string[] | null {
if (!unitType) return null;
if (unitType.startsWith("discuss-")) return DISCUSS_TOOLS_ALLOWLIST;
if (unitType === "research-milestone" || unitType === "research-slice") {
return RESEARCH_TOOLS_ALLOWLIST;
}
return null;
}
/**
* Narrow active tools to the SF tools allowed for the current unit while
* preserving all non-SF tools.
*
* Purpose: hide out-of-phase SF mutation tools from the model without
* removing builtin or third-party tools needed to complete the unit.
*
* Consumer: guided-flow.ts and auto/run-unit.ts tool-scope guards.
*/
export function scopeActiveToolsForUnitType(
unitType: string | undefined,
currentTools: readonly string[],
): string[] {
const allowlist = getSfToolsAllowlistForUnitType(unitType);
if (!allowlist) return [...currentTools];
return currentTools.filter(
(t) => !t.startsWith("sf_") || allowlist.includes(t),
);
}

View file

@ -21,13 +21,13 @@ import type {
ExtensionContext,
} from "@singularity-forge/pi-coding-agent";
import { showConfirm, showNextAction } from "../shared/tui.js";
import { resolveExpectedArtifactPath, startAutoDetached } from "./auto.js";
import { ensureAgenticDocsScaffold } from "./agentic-docs-scaffold.js";
import { resolveExpectedArtifactPath, startAutoDetached } from "./auto.js";
import { selectAndApplyModel } from "./auto-model-selection.js";
import { buildSkillActivationBlock } from "./auto-prompts.js";
import { invalidateAllCaches } from "./cache.js";
import { ensureSiftIndexWarmup } from "./code-intelligence.js";
import { DISCUSS_TOOLS_ALLOWLIST } from "./constants.js";
import { scopeActiveToolsForUnitType } from "./constants.js";
import { clearLock } from "./crash-recovery.js";
import { debugLog } from "./debug-logger.js";
import { detectProjectState } from "./detection.js";
@ -522,27 +522,25 @@ async function dispatchWorkflow(
}
}
// Scope tools for discuss flows (#2949).
// Scope tools for bounded workflow phases (#2949, research containment).
// Providers with grammar-based constrained decoding (xAI/Grok) return
// "Grammar is too complex" when the combined tool schema is too large.
// Discuss flows only need a small subset of SF tools — strip the heavy
// planning/execution/completion tools to keep the grammar within limits.
// Some phases also only need a small subset of SF tools; strip
// out-of-phase planning/execution/completion tools to keep turns bounded.
let savedTools: string[] | null = null;
if (unitType?.startsWith("discuss-")) {
if (unitType) {
const currentTools = pi.getActiveTools();
savedTools = currentTools;
// Keep all non-SF tools (builtins, other extensions) and only the
// SF tools on the discuss allowlist.
const scopedTools = currentTools.filter(
(t) => !t.startsWith("sf_") || DISCUSS_TOOLS_ALLOWLIST.includes(t),
);
pi.setActiveTools(scopedTools);
debugLog("discuss-tool-scoping", {
unitType,
before: currentTools.length,
after: scopedTools.length,
removed: currentTools.length - scopedTools.length,
});
const scopedTools = scopeActiveToolsForUnitType(unitType, currentTools);
if (scopedTools.length !== currentTools.length) {
savedTools = currentTools;
pi.setActiveTools(scopedTools);
debugLog("unit-tool-scoping", {
unitType,
before: currentTools.length,
after: scopedTools.length,
removed: currentTools.length - scopedTools.length,
});
}
}
const workflowPath =
@ -786,7 +784,10 @@ export function bootstrapProject(basePath: string): void {
ensureGitignore(basePath);
ensurePreferences(basePath);
ensureAgenticDocsScaffold(basePath);
ensureSiftIndexWarmup(basePath, loadEffectiveSFPreferences()?.preferences?.codebase);
ensureSiftIndexWarmup(
basePath,
loadEffectiveSFPreferences()?.preferences?.codebase,
);
untrackRuntimeFiles(basePath);
}

View file

@ -1,4 +1,4 @@
Research slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions, don't contradict them. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements this slice owns or supports and target research toward risks, unknowns, and constraints that could affect delivery of those requirements. {{skillActivation}} Use native `lsp` first for symbol lookup, references, and cross-file navigation. For direct text inspection use `rg`/`find` for targeted reads, or `scout` if the area is broad or unfamiliar. If there are 2-3 independent unknowns, use a research swarm with parallel `scout`/`researcher` subagents and synthesize their findings here; do not swarm narrow sequence-dependent research. Check libraries DeepWiki-first: `ask_question` / `read_wiki_structure` / `read_wiki_contents` for any GitHub-hosted library; fall back to `resolve_library` / `get_library_docs` (Context7, capped at 1000 req/month free) for npm/pypi/crates packages DeepWiki doesn't have. Skip both for libraries already used in this codebase. Use the **Research** output template below. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the research content — the tool writes the file to disk and persists to DB.
Research slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions, don't contradict them. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements this slice owns or supports and target research toward risks, unknowns, and constraints that could affect delivery of those requirements. {{skillActivation}} Use native `lsp` first for symbol lookup, references, and cross-file navigation. For direct text inspection use `rg`/`find` for targeted reads, or `scout` if the area is broad or unfamiliar. If there are 2-3 independent unknowns, use a research swarm with parallel `scout`/`researcher` subagents and synthesize their findings here; do not swarm narrow sequence-dependent research. Check libraries DeepWiki-first: `ask_question` / `read_wiki_structure` / `read_wiki_contents` for any GitHub-hosted library; fall back to `resolve_library` / `get_library_docs` (Context7, capped at 1000 req/month free) for npm/pypi/crates packages DeepWiki doesn't have. Skip both for libraries already used in this codebase. Use the **Research** output template below. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the research content — the tool writes the file to disk and persists to DB. After `sf_summary_save` succeeds, stop immediately; do **not** call `sf_milestone_generate_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool.
**You are the scout.** A planner agent reads your output in a fresh context to decompose this slice into tasks. Write for the planner — surface key files, where the work divides naturally, what to build first, and how to verify. If the research doc is vague, the planner re-explores code you already read. If it's precise, the planner decomposes immediately.

View file

@ -52,6 +52,7 @@ Then research the codebase and relevant technologies. Narrate key findings and s
**Research is advisory, not auto-binding.** Surface candidate requirements clearly instead of silently expanding scope.
**You MUST call `sf_summary_save` with the research content before finishing.**
After `sf_summary_save` succeeds, do **not** call `sf_milestone_generate_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool. The orchestrator dispatches planner units after research.
After saving research, update `.sf/PM-STRATEGY.md` — append new findings to the Opportunity Map and Guiding Policies sections. If the file doesn't exist yet, create it. This is the project's persistent PM memory — research findings that shaped planning decisions belong here.
@ -59,4 +60,4 @@ After saving research, update `.sf/PM-STRATEGY.md` — append new findings to th
This unit produces observations as its primary output — be especially diligent about filing sf-internal friction you notice along the way. If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit.
When done, say: "Milestone {{milestoneId}} researched."
When done, say only: "Milestone {{milestoneId}} researched."

View file

@ -54,9 +54,10 @@ Research what this slice needs. Narrate key findings and surprises as you go —
The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir.
**You MUST call `sf_summary_save` with the research content before finishing.**
After `sf_summary_save` succeeds, stop immediately. Do **not** call `sf_milestone_generate_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool. The orchestrator dispatches planner units after research.
### Report sf-internal observations
This unit produces observations as its primary output — be especially diligent about filing sf-internal friction you notice along the way. If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit.
When done, say: "Slice {{sliceId}} researched."
When done, say only: "Slice {{sliceId}} researched."

View file

@ -18,10 +18,13 @@
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { describe, test } from 'vitest';
import { fileURLToPath } from "node:url";
import { describe, test } from "vitest";
import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts";
import {
DISCUSS_TOOLS_ALLOWLIST,
RESEARCH_TOOLS_ALLOWLIST,
} from "../constants.ts";
const __dirname = dirname(fileURLToPath(import.meta.url));
const guidedFlowSource = readFileSync(
@ -37,11 +40,10 @@ describe("#3616 — discuss tool scoping must not leak across sessions", () => {
);
});
test("tool scoping only activates for discuss-* unit types", () => {
// The guard must be: if (unitType?.startsWith("discuss-"))
test("tool scoping activates through the shared per-unit helper", () => {
assert.ok(
guidedFlowSource.includes('unitType?.startsWith("discuss-")'),
"tool scoping should only trigger for discuss-* unit types",
guidedFlowSource.includes("scopeActiveToolsForUnitType"),
"tool scoping should go through the shared per-unit helper",
);
});
@ -54,6 +56,17 @@ describe("#3616 — discuss tool scoping must not leak across sessions", () => {
);
});
test("research scope excludes planning tools", () => {
assert.ok(
!RESEARCH_TOOLS_ALLOWLIST.includes("sf_plan_milestone"),
"sf_plan_milestone should be excluded from research scope",
);
assert.ok(
!RESEARCH_TOOLS_ALLOWLIST.includes("sf_milestone_generate_id"),
"sf_milestone_generate_id should be excluded from research scope",
);
});
test("newSession() in agent-session.ts has defense against tool narrowing persistence", () => {
const agentSessionSource = readFileSync(
join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"),

View file

@ -17,10 +17,14 @@
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { describe, test } from 'vitest';
import { fileURLToPath } from "node:url";
import { describe, test } from "vitest";
import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts";
import {
DISCUSS_TOOLS_ALLOWLIST,
RESEARCH_TOOLS_ALLOWLIST,
scopeActiveToolsForUnitType,
} from "../constants.ts";
const __dirname = dirname(fileURLToPath(import.meta.url));
const promptsDir = join(__dirname, "..", "prompts");
@ -118,16 +122,54 @@ describe("discuss tool scoping (#2949)", () => {
);
});
test("dispatchWorkflow source code scopes tools for discuss unit types", () => {
test("dispatchWorkflow source code scopes tools for bounded unit types", () => {
const source = readFileSync(guidedFlowPath, "utf-8");
// Verify that dispatchWorkflow references the allowlist for tool scoping
assert.ok(
source.includes("DISCUSS_TOOLS_ALLOWLIST"),
"guided-flow.ts should reference DISCUSS_TOOLS_ALLOWLIST for tool scoping",
source.includes("scopeActiveToolsForUnitType"),
"guided-flow.ts should use shared per-unit tool scoping",
);
assert.ok(
source.includes("setActiveTools"),
"guided-flow.ts should call setActiveTools to scope tools during discuss",
"guided-flow.ts should call setActiveTools to scope tools",
);
});
});
describe("research tool scoping", () => {
test("RESEARCH_TOOLS_ALLOWLIST permits only summary save and self-report SF tools", () => {
assert.deepEqual(RESEARCH_TOOLS_ALLOWLIST, [
"sf_summary_save",
"sf_save_summary",
"sf_self_report",
]);
for (const planningTool of [
"sf_milestone_generate_id",
"sf_plan_milestone",
"sf_plan_slice",
"sf_plan_task",
]) {
assert.ok(
!RESEARCH_TOOLS_ALLOWLIST.includes(planningTool),
`research scope should exclude ${planningTool}`,
);
}
});
test("scopeActiveToolsForUnitType removes planning tools from research turns", () => {
const scoped = scopeActiveToolsForUnitType("research-slice", [
"read",
"bash",
"sf_summary_save",
"sf_self_report",
"sf_milestone_generate_id",
"sf_plan_milestone",
"sf_plan_slice",
]);
assert.deepEqual(scoped, [
"read",
"bash",
"sf_summary_save",
"sf_self_report",
]);
});
});

View file

@ -1,7 +1,7 @@
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join } from "node:path";
import { test } from 'vitest';
import { test } from "vitest";
const promptsDir = join(process.cwd(), "src/resources/extensions/sf/prompts");
const skillsDir = join(process.cwd(), "src/resources/extensions/sf/skills");
@ -286,6 +286,21 @@ test("guided-resume-task prompt preserves recovery state until work is supersede
assert.doesNotMatch(prompt, /Delete the continue file after reading it/i);
});
test("research prompts stop after saving and forbid planning tools", () => {
for (const name of [
"research-milestone",
"research-slice",
"guided-research-slice",
] as const) {
const prompt = readPrompt(name);
assert.match(prompt, /After `sf_summary_save` succeeds/i);
assert.match(prompt, /do \*\*not\*\* call `sf_milestone_generate_id`/i);
assert.match(prompt, /`sf_plan_milestone`/);
assert.match(prompt, /`sf_plan_slice`/);
assert.match(prompt, /planning\/creation tool/i);
}
});
// ─── Prompt migration: execute-task → sf_complete_task ───────────────
test("execute-task prompt references sf_complete_task tool", () => {