diff --git a/src/resources/extensions/sf/auto-direct-dispatch.ts b/src/resources/extensions/sf/auto-direct-dispatch.ts index f017d1b87..11f8de4c0 100644 --- a/src/resources/extensions/sf/auto-direct-dispatch.ts +++ b/src/resources/extensions/sf/auto-direct-dispatch.ts @@ -20,6 +20,7 @@ import { buildResearchSlicePrompt, buildRunUatPrompt, } from "./auto-prompts.js"; +import { scopeActiveToolsForUnitType } from "./constants.js"; import { loadFile } from "./files.js"; import { parseRoadmap } from "./parsers.js"; import { @@ -336,8 +337,24 @@ export async function dispatchDirectPhase( ctx.ui.notify("Session creation cancelled.", "warning"); return; } - pi.sendMessage( - { customType: "sf-dispatch", content: prompt, display: false }, - { triggerTurn: true }, - ); + let savedTools: string[] | null = null; + if ( + typeof pi.getActiveTools === "function" && + typeof pi.setActiveTools === "function" + ) { + const currentTools = pi.getActiveTools(); + const scopedTools = scopeActiveToolsForUnitType(unitType, currentTools); + if (scopedTools.length !== currentTools.length) { + savedTools = currentTools; + pi.setActiveTools(scopedTools); + } + } + try { + pi.sendMessage( + { customType: "sf-dispatch", content: prompt, display: false }, + { triggerTurn: true }, + ); + } finally { + if (savedTools) pi.setActiveTools(savedTools); + } } diff --git a/src/resources/extensions/sf/auto/run-unit.ts b/src/resources/extensions/sf/auto/run-unit.ts index eb775699a..95ca2fafe 100644 --- a/src/resources/extensions/sf/auto/run-unit.ts +++ b/src/resources/extensions/sf/auto/run-unit.ts @@ -8,13 +8,24 @@ import type { ExtensionAPI, ExtensionContext, } from "@singularity-forge/pi-coding-agent"; +import { + collectSessionTokenUsage, + collectWorktreeFingerprint, + countChangedFiles, + resetRunawayGuardState, +} from "../auto-runaway-guard.js"; +import { scopeActiveToolsForUnitType } from "../constants.js"; import { debugLog } from "../debug-logger.js"; import { resolveAutoSupervisorConfig, resolvePersistModelChanges, } from "../preferences.js"; import { logWarning } from "../workflow-logger.js"; -import { _clearCurrentResolve, _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js"; +import { + _clearCurrentResolve, + _setCurrentResolve, + _setSessionSwitchInFlight, +} from "./resolve.js"; import type { AutoSession } from "./session.js"; import { NEW_SESSION_TIMEOUT_MS } from "./session.js"; import { @@ -89,13 +100,10 @@ export async function runUnit( } }); const timeoutPromise = new Promise<{ cancelled: true }>((resolve) => { - sessionTimeoutHandle = setTimeout( - () => { - sessionAbortController.abort(); - resolve({ cancelled: true }); - }, - NEW_SESSION_TIMEOUT_MS, - ); + sessionTimeoutHandle = setTimeout(() => { + sessionAbortController.abort(); + resolve({ cancelled: true }); + }, NEW_SESSION_TIMEOUT_MS); }); sessionResult = await Promise.race([sessionPromise, timeoutPromise]); } catch (sessionErr) { @@ -203,6 +211,15 @@ export async function runUnit( } } + // Refresh the runaway baseline after newSession(). Resumed sessions recover + // old context during session creation; taking the baseline before that makes + // historical tokens look like budget spent by this unit. + resetRunawayGuardState(unitType, unitId, { + sessionTokens: collectSessionTokenUsage(ctx), + changedFiles: countChangedFiles(s.basePath), + worktreeFingerprint: collectWorktreeFingerprint(s.basePath), + }); + // ── Send the prompt ── debugLog("runUnit", { phase: "send-message", unitType, unitId }); @@ -213,10 +230,34 @@ export async function runUnit( const capturedTurnGen = getCurrentTurnGeneration(); const requestDispatchedAt = Date.now(); - pi.sendMessage( - { customType: "sf-auto", content: prompt, display: s.verbose }, - { triggerTurn: true }, - ); + let savedTools: string[] | null = null; + if ( + typeof pi.getActiveTools === "function" && + typeof pi.setActiveTools === "function" + ) { + const currentTools = pi.getActiveTools(); + const scopedTools = scopeActiveToolsForUnitType(unitType, currentTools); + if (scopedTools.length !== currentTools.length) { + savedTools = currentTools; + pi.setActiveTools(scopedTools); + debugLog("unit-tool-scoping", { + unitType, + before: currentTools.length, + after: scopedTools.length, + removed: currentTools.length - scopedTools.length, + }); + } + } + try { + pi.sendMessage( + { customType: "sf-auto", content: prompt, display: s.verbose }, + { triggerTurn: true }, + ); + } finally { + if (savedTools) { + pi.setActiveTools(savedTools); + } + } // ── Await agent_end with absolute timeout (H4 fix) ── // If supervision fails to resolve unitPromise within 30s, treat as cancelled. diff --git a/src/resources/extensions/sf/constants.ts b/src/resources/extensions/sf/constants.ts index 8175af4c4..5acf7b9be 100644 --- a/src/resources/extensions/sf/constants.ts +++ b/src/resources/extensions/sf/constants.ts @@ -63,3 +63,61 @@ export const DISCUSS_TOOLS_ALLOWLIST: readonly string[] = [ "sf_requirement_update", "sf_update_requirement", ]; + +/** + * SF tools allowed during research units. + * + * Purpose: keep research turns in their lane. A research agent writes a + * RESEARCH artifact and may report SF friction, but planning tools belong to + * later planner units. Allowing milestone/slice planning tools in research + * turns lets a saved research artifact drift into speculative planning and + * can keep the unit alive until runaway supervision pauses it. + * + * Consumer: guided-flow.ts and auto/run-unit.ts when narrowing SF tools for + * research-milestone and research-slice turns. + */ +export const RESEARCH_TOOLS_ALLOWLIST: readonly string[] = [ + "sf_summary_save", + "sf_save_summary", + "sf_self_report", +]; + +/** + * Return the SF tool allowlist for a workflow unit, or null when the full SF + * tool set is appropriate. + * + * Purpose: centralize per-unit active-tool narrowing so guided and auto + * dispatches enforce the same phase boundaries. + * + * Consumer: guided-flow.ts and auto/run-unit.ts before sending a unit prompt. + */ +export function getSfToolsAllowlistForUnitType( + unitType: string | undefined, +): readonly string[] | null { + if (!unitType) return null; + if (unitType.startsWith("discuss-")) return DISCUSS_TOOLS_ALLOWLIST; + if (unitType === "research-milestone" || unitType === "research-slice") { + return RESEARCH_TOOLS_ALLOWLIST; + } + return null; +} + +/** + * Narrow active tools to the SF tools allowed for the current unit while + * preserving all non-SF tools. + * + * Purpose: hide out-of-phase SF mutation tools from the model without + * removing builtin or third-party tools needed to complete the unit. + * + * Consumer: guided-flow.ts and auto/run-unit.ts tool-scope guards. + */ +export function scopeActiveToolsForUnitType( + unitType: string | undefined, + currentTools: readonly string[], +): string[] { + const allowlist = getSfToolsAllowlistForUnitType(unitType); + if (!allowlist) return [...currentTools]; + return currentTools.filter( + (t) => !t.startsWith("sf_") || allowlist.includes(t), + ); +} diff --git a/src/resources/extensions/sf/guided-flow.ts b/src/resources/extensions/sf/guided-flow.ts index 968307ea1..d0bbf1b60 100644 --- a/src/resources/extensions/sf/guided-flow.ts +++ b/src/resources/extensions/sf/guided-flow.ts @@ -21,13 +21,13 @@ import type { ExtensionContext, } from "@singularity-forge/pi-coding-agent"; import { showConfirm, showNextAction } from "../shared/tui.js"; -import { resolveExpectedArtifactPath, startAutoDetached } from "./auto.js"; import { ensureAgenticDocsScaffold } from "./agentic-docs-scaffold.js"; +import { resolveExpectedArtifactPath, startAutoDetached } from "./auto.js"; import { selectAndApplyModel } from "./auto-model-selection.js"; import { buildSkillActivationBlock } from "./auto-prompts.js"; import { invalidateAllCaches } from "./cache.js"; import { ensureSiftIndexWarmup } from "./code-intelligence.js"; -import { DISCUSS_TOOLS_ALLOWLIST } from "./constants.js"; +import { scopeActiveToolsForUnitType } from "./constants.js"; import { clearLock } from "./crash-recovery.js"; import { debugLog } from "./debug-logger.js"; import { detectProjectState } from "./detection.js"; @@ -522,27 +522,25 @@ async function dispatchWorkflow( } } - // Scope tools for discuss flows (#2949). + // Scope tools for bounded workflow phases (#2949, research containment). // Providers with grammar-based constrained decoding (xAI/Grok) return // "Grammar is too complex" when the combined tool schema is too large. - // Discuss flows only need a small subset of SF tools — strip the heavy - // planning/execution/completion tools to keep the grammar within limits. + // Some phases also only need a small subset of SF tools; strip + // out-of-phase planning/execution/completion tools to keep turns bounded. let savedTools: string[] | null = null; - if (unitType?.startsWith("discuss-")) { + if (unitType) { const currentTools = pi.getActiveTools(); - savedTools = currentTools; - // Keep all non-SF tools (builtins, other extensions) and only the - // SF tools on the discuss allowlist. - const scopedTools = currentTools.filter( - (t) => !t.startsWith("sf_") || DISCUSS_TOOLS_ALLOWLIST.includes(t), - ); - pi.setActiveTools(scopedTools); - debugLog("discuss-tool-scoping", { - unitType, - before: currentTools.length, - after: scopedTools.length, - removed: currentTools.length - scopedTools.length, - }); + const scopedTools = scopeActiveToolsForUnitType(unitType, currentTools); + if (scopedTools.length !== currentTools.length) { + savedTools = currentTools; + pi.setActiveTools(scopedTools); + debugLog("unit-tool-scoping", { + unitType, + before: currentTools.length, + after: scopedTools.length, + removed: currentTools.length - scopedTools.length, + }); + } } const workflowPath = @@ -786,7 +784,10 @@ export function bootstrapProject(basePath: string): void { ensureGitignore(basePath); ensurePreferences(basePath); ensureAgenticDocsScaffold(basePath); - ensureSiftIndexWarmup(basePath, loadEffectiveSFPreferences()?.preferences?.codebase); + ensureSiftIndexWarmup( + basePath, + loadEffectiveSFPreferences()?.preferences?.codebase, + ); untrackRuntimeFiles(basePath); } diff --git a/src/resources/extensions/sf/prompts/guided-research-slice.md b/src/resources/extensions/sf/prompts/guided-research-slice.md index ee3e6b7fd..e5be157e6 100644 --- a/src/resources/extensions/sf/prompts/guided-research-slice.md +++ b/src/resources/extensions/sf/prompts/guided-research-slice.md @@ -1,4 +1,4 @@ -Research slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions, don't contradict them. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements this slice owns or supports and target research toward risks, unknowns, and constraints that could affect delivery of those requirements. {{skillActivation}} Use native `lsp` first for symbol lookup, references, and cross-file navigation. For direct text inspection use `rg`/`find` for targeted reads, or `scout` if the area is broad or unfamiliar. If there are 2-3 independent unknowns, use a research swarm with parallel `scout`/`researcher` subagents and synthesize their findings here; do not swarm narrow sequence-dependent research. Check libraries DeepWiki-first: `ask_question` / `read_wiki_structure` / `read_wiki_contents` for any GitHub-hosted library; fall back to `resolve_library` / `get_library_docs` (Context7, capped at 1000 req/month free) for npm/pypi/crates packages DeepWiki doesn't have. Skip both for libraries already used in this codebase. Use the **Research** output template below. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the research content — the tool writes the file to disk and persists to DB. +Research slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions, don't contradict them. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements this slice owns or supports and target research toward risks, unknowns, and constraints that could affect delivery of those requirements. {{skillActivation}} Use native `lsp` first for symbol lookup, references, and cross-file navigation. For direct text inspection use `rg`/`find` for targeted reads, or `scout` if the area is broad or unfamiliar. If there are 2-3 independent unknowns, use a research swarm with parallel `scout`/`researcher` subagents and synthesize their findings here; do not swarm narrow sequence-dependent research. Check libraries DeepWiki-first: `ask_question` / `read_wiki_structure` / `read_wiki_contents` for any GitHub-hosted library; fall back to `resolve_library` / `get_library_docs` (Context7, capped at 1000 req/month free) for npm/pypi/crates packages DeepWiki doesn't have. Skip both for libraries already used in this codebase. Use the **Research** output template below. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the research content — the tool writes the file to disk and persists to DB. After `sf_summary_save` succeeds, stop immediately; do **not** call `sf_milestone_generate_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool. **You are the scout.** A planner agent reads your output in a fresh context to decompose this slice into tasks. Write for the planner — surface key files, where the work divides naturally, what to build first, and how to verify. If the research doc is vague, the planner re-explores code you already read. If it's precise, the planner decomposes immediately. diff --git a/src/resources/extensions/sf/prompts/research-milestone.md b/src/resources/extensions/sf/prompts/research-milestone.md index 2354743b2..2dd9a3c6c 100644 --- a/src/resources/extensions/sf/prompts/research-milestone.md +++ b/src/resources/extensions/sf/prompts/research-milestone.md @@ -52,6 +52,7 @@ Then research the codebase and relevant technologies. Narrate key findings and s **Research is advisory, not auto-binding.** Surface candidate requirements clearly instead of silently expanding scope. **You MUST call `sf_summary_save` with the research content before finishing.** +After `sf_summary_save` succeeds, do **not** call `sf_milestone_generate_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool. The orchestrator dispatches planner units after research. After saving research, update `.sf/PM-STRATEGY.md` — append new findings to the Opportunity Map and Guiding Policies sections. If the file doesn't exist yet, create it. This is the project's persistent PM memory — research findings that shaped planning decisions belong here. @@ -59,4 +60,4 @@ After saving research, update `.sf/PM-STRATEGY.md` — append new findings to th This unit produces observations as its primary output — be especially diligent about filing sf-internal friction you notice along the way. If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. -When done, say: "Milestone {{milestoneId}} researched." +When done, say only: "Milestone {{milestoneId}} researched." diff --git a/src/resources/extensions/sf/prompts/research-slice.md b/src/resources/extensions/sf/prompts/research-slice.md index 1d9f096fa..33c645cec 100644 --- a/src/resources/extensions/sf/prompts/research-slice.md +++ b/src/resources/extensions/sf/prompts/research-slice.md @@ -54,9 +54,10 @@ Research what this slice needs. Narrate key findings and surprises as you go — The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir. **You MUST call `sf_summary_save` with the research content before finishing.** +After `sf_summary_save` succeeds, stop immediately. Do **not** call `sf_milestone_generate_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool. The orchestrator dispatches planner units after research. ### Report sf-internal observations This unit produces observations as its primary output — be especially diligent about filing sf-internal friction you notice along the way. If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. -When done, say: "Slice {{sliceId}} researched." +When done, say only: "Slice {{sliceId}} researched." diff --git a/src/resources/extensions/sf/tests/discuss-tool-scope-leak.test.ts b/src/resources/extensions/sf/tests/discuss-tool-scope-leak.test.ts index c97b90607..ea3501364 100644 --- a/src/resources/extensions/sf/tests/discuss-tool-scope-leak.test.ts +++ b/src/resources/extensions/sf/tests/discuss-tool-scope-leak.test.ts @@ -18,10 +18,13 @@ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { dirname, join } from "node:path"; -import { describe, test } from 'vitest'; import { fileURLToPath } from "node:url"; +import { describe, test } from "vitest"; -import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts"; +import { + DISCUSS_TOOLS_ALLOWLIST, + RESEARCH_TOOLS_ALLOWLIST, +} from "../constants.ts"; const __dirname = dirname(fileURLToPath(import.meta.url)); const guidedFlowSource = readFileSync( @@ -37,11 +40,10 @@ describe("#3616 — discuss tool scoping must not leak across sessions", () => { ); }); - test("tool scoping only activates for discuss-* unit types", () => { - // The guard must be: if (unitType?.startsWith("discuss-")) + test("tool scoping activates through the shared per-unit helper", () => { assert.ok( - guidedFlowSource.includes('unitType?.startsWith("discuss-")'), - "tool scoping should only trigger for discuss-* unit types", + guidedFlowSource.includes("scopeActiveToolsForUnitType"), + "tool scoping should go through the shared per-unit helper", ); }); @@ -54,6 +56,17 @@ describe("#3616 — discuss tool scoping must not leak across sessions", () => { ); }); + test("research scope excludes planning tools", () => { + assert.ok( + !RESEARCH_TOOLS_ALLOWLIST.includes("sf_plan_milestone"), + "sf_plan_milestone should be excluded from research scope", + ); + assert.ok( + !RESEARCH_TOOLS_ALLOWLIST.includes("sf_milestone_generate_id"), + "sf_milestone_generate_id should be excluded from research scope", + ); + }); + test("newSession() in agent-session.ts has defense against tool narrowing persistence", () => { const agentSessionSource = readFileSync( join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"), diff --git a/src/resources/extensions/sf/tests/discuss-tool-scoping.test.ts b/src/resources/extensions/sf/tests/discuss-tool-scoping.test.ts index ac466fcf5..2ce31b246 100644 --- a/src/resources/extensions/sf/tests/discuss-tool-scoping.test.ts +++ b/src/resources/extensions/sf/tests/discuss-tool-scoping.test.ts @@ -17,10 +17,14 @@ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { dirname, join } from "node:path"; -import { describe, test } from 'vitest'; import { fileURLToPath } from "node:url"; +import { describe, test } from "vitest"; -import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts"; +import { + DISCUSS_TOOLS_ALLOWLIST, + RESEARCH_TOOLS_ALLOWLIST, + scopeActiveToolsForUnitType, +} from "../constants.ts"; const __dirname = dirname(fileURLToPath(import.meta.url)); const promptsDir = join(__dirname, "..", "prompts"); @@ -118,16 +122,54 @@ describe("discuss tool scoping (#2949)", () => { ); }); - test("dispatchWorkflow source code scopes tools for discuss unit types", () => { + test("dispatchWorkflow source code scopes tools for bounded unit types", () => { const source = readFileSync(guidedFlowPath, "utf-8"); - // Verify that dispatchWorkflow references the allowlist for tool scoping assert.ok( - source.includes("DISCUSS_TOOLS_ALLOWLIST"), - "guided-flow.ts should reference DISCUSS_TOOLS_ALLOWLIST for tool scoping", + source.includes("scopeActiveToolsForUnitType"), + "guided-flow.ts should use shared per-unit tool scoping", ); assert.ok( source.includes("setActiveTools"), - "guided-flow.ts should call setActiveTools to scope tools during discuss", + "guided-flow.ts should call setActiveTools to scope tools", ); }); }); + +describe("research tool scoping", () => { + test("RESEARCH_TOOLS_ALLOWLIST permits only summary save and self-report SF tools", () => { + assert.deepEqual(RESEARCH_TOOLS_ALLOWLIST, [ + "sf_summary_save", + "sf_save_summary", + "sf_self_report", + ]); + for (const planningTool of [ + "sf_milestone_generate_id", + "sf_plan_milestone", + "sf_plan_slice", + "sf_plan_task", + ]) { + assert.ok( + !RESEARCH_TOOLS_ALLOWLIST.includes(planningTool), + `research scope should exclude ${planningTool}`, + ); + } + }); + + test("scopeActiveToolsForUnitType removes planning tools from research turns", () => { + const scoped = scopeActiveToolsForUnitType("research-slice", [ + "read", + "bash", + "sf_summary_save", + "sf_self_report", + "sf_milestone_generate_id", + "sf_plan_milestone", + "sf_plan_slice", + ]); + assert.deepEqual(scoped, [ + "read", + "bash", + "sf_summary_save", + "sf_self_report", + ]); + }); +}); diff --git a/src/resources/extensions/sf/tests/prompt-contracts.test.ts b/src/resources/extensions/sf/tests/prompt-contracts.test.ts index f9e0439e0..831ce818a 100644 --- a/src/resources/extensions/sf/tests/prompt-contracts.test.ts +++ b/src/resources/extensions/sf/tests/prompt-contracts.test.ts @@ -1,7 +1,7 @@ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { join } from "node:path"; -import { test } from 'vitest'; +import { test } from "vitest"; const promptsDir = join(process.cwd(), "src/resources/extensions/sf/prompts"); const skillsDir = join(process.cwd(), "src/resources/extensions/sf/skills"); @@ -286,6 +286,21 @@ test("guided-resume-task prompt preserves recovery state until work is supersede assert.doesNotMatch(prompt, /Delete the continue file after reading it/i); }); +test("research prompts stop after saving and forbid planning tools", () => { + for (const name of [ + "research-milestone", + "research-slice", + "guided-research-slice", + ] as const) { + const prompt = readPrompt(name); + assert.match(prompt, /After `sf_summary_save` succeeds/i); + assert.match(prompt, /do \*\*not\*\* call `sf_milestone_generate_id`/i); + assert.match(prompt, /`sf_plan_milestone`/); + assert.match(prompt, /`sf_plan_slice`/); + assert.match(prompt, /planning\/creation tool/i); + } +}); + // ─── Prompt migration: execute-task → sf_complete_task ─────────────── test("execute-task prompt references sf_complete_task tool", () => {