singularity-forge/src/resources/extensions/sf/bootstrap/register-hooks.js
Mikael Hugo e50321b62b feat(selection): thread unitType + failure_mode into fallback outcome records
- FallbackResolver.setUnitContext() stores {unitType,unitId} from autonomous dispatch
- run-unit.js calls pi.setFallbackUnitContext() before/after each unit
- _findAnyAvailableFallback uses real unitType/unitId from context, not sentinel
- Schema v59: failure_mode column in llm_task_outcomes
- insertLlmTaskOutcome accepts failure_mode (rate_limit, quota_exhausted, auth_error)
- register-hooks.js passes event.classification.reason as failure_mode
- register-hooks.js uses real event.unitId when available
- ExtensionRuntimeActions.setFallbackUnitContext added to pi API surface

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-10 23:14:22 +02:00

1564 lines
54 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { spawnSync } from "node:child_process";
import {
existsSync,
mkdirSync,
readdirSync,
readFileSync,
unlinkSync,
writeFileSync,
} from "node:fs";
import { join, relative, resolve } from "node:path";
import { isToolCallEventType } from "@singularity-forge/coding-agent";
import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
import { formatTokenCount } from "@singularity-forge/coding-agent";
import { saveActivityLog } from "../activity-log.js";
import {
getAutoDashboardData,
hasResearchTerminalTransition,
isAutoActive,
isAutoPaused,
isCanAskUser,
markResearchTerminalTransition,
markToolEnd,
markToolStart,
recordToolInvocationError,
startAutoDetached,
} from "../auto.js";
import {
applyCompletionNudgeTemperature,
maybeInjectCompletionNudgeMessage,
recordCompletionNudgeToolCall,
} from "../auto-completion-nudge.js";
import { recordToolCallName } from "../auto-tool-tracking.js";
import { loadToolApiKeys } from "../commands-config.js";
import { getEcosystemReadyPromise } from "../ecosystem/loader.js";
import { updateSnapshot } from "../ecosystem/sf-extension-api.js";
import {
buildExecutionPolicyJournalEntry,
classifyExecutionPolicyCall,
} from "../execution-policy.js";
import { formatContinue, loadFile, saveFile } from "../files.js";
import { getDiscussionMilestoneId } from "../guided-flow.js";
import { initHealthWidget } from "../health-widget.js";
import { emitJournalEvent } from "../journal.js";
import {
initializeLearningRuntime,
resetLearningRuntime,
selectLearnedModel,
} from "../learning/runtime.js";
import {
observeToolResult,
resetToolWatchdog,
} from "../tool-watchdog.js";
import { NOTICE_KIND, initNotificationStore } from "../notification-store.js";
import { initNotificationWidget } from "../notification-widget.js";
import {
isParallelActive,
shutdownParallel,
} from "../parallel-orchestrator.js";
import {
buildMilestoneFileName,
resolveMilestonePath,
resolveSliceFile,
resolveSlicePath,
} from "../paths.js";
import { cleanupQuickBranch } from "../quick.js";
import {
applyRemoteSteeringDirectives,
formatRemoteSteeringResults,
parseRemoteSteeringDirectives,
} from "../remote-steering.js";
import { classifyCommand } from "../safety/destructive-guard.js";
import {
recordToolCall as safetyRecordToolCall,
recordToolResult as safetyRecordToolResult,
saveEvidenceToDisk,
} from "../safety/evidence-collector.js";
import { initSessionRecorder } from "../session-recorder.js";
import { deriveState } from "../state.js";
import { countGoogleGeminiCliTokens } from "../token-counter.js";
import { getSessionTodoCompactionBlock } from "../tools/session-todo-tool.js";
import { parseUnitId } from "../unit-id.js";
import { logWarning as safetyLogWarning } from "../workflow-logger.js";
import {
BLOCKED_WRITE_ERROR,
isBashWriteToStateFile,
isBlockedStateFile,
} from "../write-intercept.js";
import { handleAgentEnd } from "./agent-end-recovery.js";
import { installNotifyInterceptor } from "./notify-interceptor.js";
import { buildBeforeAgentStartResult } from "./system-context.js";
import {
checkToolCallLoop,
resetToolCallLoopGuard,
} from "./tool-call-loop-guard.js";
import {
clearDiscussionFlowState,
clearPendingGate,
extractDepthVerificationMilestoneId,
getPendingGate,
getSelectedGateAnswer,
isDepthConfirmationAnswer,
isGateQuestionId,
isQueuePhaseActive,
markDepthVerified,
resetWriteGateState,
setPendingGate,
shouldBlockContextWrite,
shouldBlockPendingGate,
shouldBlockPendingGateBash,
shouldBlockQueueExecution,
} from "./write-gate.js";
// Skip the welcome screen on the very first session_start — cli.ts already
// printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
let isFirstSession = true;
let lastGeminiPreflightWarning;
const COMPACTION_AUTO_RESUME_FILE = "compaction-auto-resume.json";
function compactionResumePath(basePath) {
return join(basePath, ".sf", "runtime", COMPACTION_AUTO_RESUME_FILE);
}
function writeCompactionResumeMarker(basePath, marker) {
try {
const runtimeDir = join(basePath, ".sf", "runtime");
mkdirSync(runtimeDir, { recursive: true });
writeFileSync(
compactionResumePath(basePath),
JSON.stringify(marker, null, 2) + "\n",
"utf-8",
);
} catch {
// Non-fatal: compaction autoresume marker is best-effort.
}
}
function consumeCompactionResumeMarker(basePath) {
const markerPath = compactionResumePath(basePath);
if (!existsSync(markerPath)) return null;
try {
const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
unlinkSync(markerPath);
return marker;
} catch {
try {
unlinkSync(markerPath);
} catch {
// Ignore cleanup errors.
}
return null;
}
}
export async function runAgentEndMemoryBackfill(runBackfill) {
try {
const runner =
runBackfill ??
(await import("../memory-embeddings.js")).runEmbeddingBackfill;
await runner();
} catch {
// Never break agent_end on backfill issues.
}
}
async function runSessionStartupDoctorFix(ctx) {
if (process.env.SF_DISABLE_STARTUP_DOCTOR === "1") return;
try {
const { runSFDoctor, summarizeDoctorIssues } = await import("../doctor.js");
const report = await runSFDoctor(process.cwd(), { fix: true });
if (report.fixesApplied.length > 0) {
ctx.ui?.notify?.(
`Startup doctor: applied ${report.fixesApplied.length} fix(es).`,
"info",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
}
const summary = summarizeDoctorIssues(report.issues);
if (summary.errors > 0) {
ctx.ui?.notify?.(
`Startup doctor found ${summary.errors} blocking issue(s). Run /doctor audit for details.`,
"warning",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
}
} catch (error) {
const msg = error instanceof Error ? error.message : String(error);
safetyLogWarning("startup-doctor", msg);
}
}
async function syncServiceTierStatus(ctx) {
const {
getEffectiveServiceTier,
formatServiceTierFooterStatus,
isServiceTierDisabled,
} = await import("../service-tier.js");
// Skip the footer event entirely when the feature is explicitly disabled —
// no setStatus call, no RPC traffic, no leak into headless stderr even if
// the TUI_FOOTER_STATUS_KEYS filter is bypassed.
if (isServiceTierDisabled()) return;
ctx.ui.setStatus(
"sf-fast",
formatServiceTierFooterStatus(getEffectiveServiceTier(), ctx.model?.id),
);
}
/**
* Run all *.sh scripts found in .sf/hooks/<phase>/ with the given JSON payload
* piped to stdin. Returns a block result if any pre-tool hook exits non-zero,
* otherwise null (allow).
*
* Purpose: Copilot-style user-defined shell hooks that can approve or deny
* individual tool calls. Scripts receive `{"tool":"...","input":{...}}` on
* stdin and signal denial by exiting non-zero (stdout becomes the reason).
*
* Consumer: tool_call handler (pre-tool, blocking) and tool_result handler
* (post-tool, non-blocking). Phase names: "pre-tool", "post-tool",
* "session-start", "session-end".
*/
function runShellHooks(phase, payload, blocking = false) {
const hooksDir = join(process.cwd(), ".sf", "hooks", phase);
if (!existsSync(hooksDir)) return null;
let scripts;
try {
scripts = readdirSync(hooksDir)
.filter((f) => f.endsWith(".sh"))
.sort()
.map((f) => join(hooksDir, f));
} catch {
return null;
}
const stdinJson = JSON.stringify(payload);
for (const script of scripts) {
let result;
try {
result = spawnSync("sh", [script], {
input: stdinJson,
encoding: "utf-8",
timeout: 5_000,
stdio: ["pipe", "pipe", "pipe"],
});
} catch {
continue; // non-fatal: script invocation error
}
if (blocking && result.status !== 0) {
const reason =
(result.stdout || "").trim() ||
`Shell hook ${script} exited ${result.status}`;
return { block: true, reason };
}
}
return null;
}
export function registerHooks(pi, ecosystemHandlers = []) {
pi.on("session_start", async (_event, ctx) => {
lastGeminiPreflightWarning = undefined;
resetLearningRuntime();
resetToolWatchdog();
try {
const sid = ctx.sessionManager?.getSessionId?.() ?? "";
const _sfile = ctx.sessionManager?.getSessionFile?.() ?? "";
if (sid) {
process.stderr.write(`[forge] session ${sid.slice(0, 8)}\n`);
}
// Establish the session row so all subsequent turns have a parent.
// Git context (repo, branch) is patched in before_agent_start once the
// DB is open and the cwd is confirmed.
initSessionRecorder(sid, { mode: "interactive", cwd: process.cwd() });
} catch {
/* non-fatal */
}
initNotificationStore(process.cwd());
installNotifyInterceptor(ctx);
initNotificationWidget(ctx);
initHealthWidget(ctx);
resetWriteGateState();
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
await syncServiceTierStatus(ctx);
await initializeLearningRuntime();
await runSessionStartupDoctorFix(ctx);
// Initialize metrics-central with database adapter
try {
const { initMetricsCentral } = await import("../metrics-central.js");
const { getDatabase } = await import("../sf-db.js");
const dbAdapter = getDatabase();
const sessionId = ctx.sessionManager?.getSessionId?.() || "";
initMetricsCentral(process.cwd(), {
sessionId,
dbAdapter,
});
} catch (err) {
// Non-fatal: metrics should not block session start
const { logWarning } = await import("../workflow-logger.js");
logWarning(
"session-start",
`Failed to initialize metrics-central: ${err instanceof Error ? err.message : String(err)}`,
);
}
// Apply show_token_cost preference (#1515)
try {
const { loadEffectiveSFPreferences } = await import("../preferences.js");
const prefs = loadEffectiveSFPreferences();
process.env.SF_SHOW_TOKEN_COST = prefs?.preferences.show_token_cost
? "1"
: "";
} catch {
/* non-fatal */
}
if (isFirstSession) {
isFirstSession = false;
} else {
try {
const sfBinPath = process.env.SF_BIN_PATH;
if (sfBinPath) {
const { dirname } = await import("node:path");
const { printWelcomeScreen } = await import(
join(dirname(sfBinPath), "welcome-screen.js")
);
let remoteChannel;
try {
const { resolveRemoteConfig } = await import(
"../../remote-questions/config.js"
);
const rc = resolveRemoteConfig();
if (rc) remoteChannel = rc.channel;
} catch {
/* non-fatal */
}
printWelcomeScreen({
version: process.env.SF_VERSION || "0.0.0",
remoteChannel,
});
}
} catch {
/* non-fatal */
}
}
loadToolApiKeys();
// Flow audit cleans over-budget optional child processes automatically and
// only warns for real blockers such as stale dispatch or recent errors.
try {
const { runFlowAudit } = await import("../doctor.js");
const flow = await runFlowAudit(process.cwd(), {
killOverBudgetChildren: true,
});
if (!flow.ok) {
ctx.ui?.notify?.(`Flow audit: ${flow.recommendedAction}`, "warning", {
noticeKind: NOTICE_KIND.SYSTEM_NOTICE,
dedupe_key: "flow-audit-recommended-action",
});
}
} catch {
/* non-fatal — flow audit must never block session start */
}
// Drain self-feedback: auto-resolve entries whose blocking
// sf-version constraint has been satisfied by the current sf bump,
// and surface entries that remain blocked to the operator. Done after
// other init so notifications appear in the same session-start sweep.
try {
const {
compactSelfFeedbackMarkdown,
markResolved,
migrateLegacyBacklogFilename,
resolveFeedbackForCompletedMilestones,
triageBlockedEntries,
} = await import("../self-feedback.js");
migrateLegacyBacklogFilename(process.cwd());
compactSelfFeedbackMarkdown(process.cwd());
// Auto-resolve blocking entries for milestones that already completed
const autoResolved = resolveFeedbackForCompletedMilestones(process.cwd());
for (const id of autoResolved) {
ctx.ui?.notify?.(
`Self-feedback ${id} auto-resolved — milestone is complete.`,
"info",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
}
const triage = triageBlockedEntries(process.cwd());
const currentSfVersion = process.env.SF_VERSION || "unknown";
for (const e of triage.retry) {
markResolved(
e.id,
{
reason: `sf bumped past ${e.sfVersion} (was blocking on this version)`,
evidence: {
kind: "auto-version-bump",
fromVersion: e.sfVersion,
toVersion: currentSfVersion,
},
},
process.cwd(),
);
const occ = e.occurredIn;
const unit = occ
? [occ.milestone, occ.slice, occ.task].filter(Boolean).join("/") ||
occ.unitType ||
"(unknown unit)"
: "(unknown unit)";
ctx.ui?.notify?.(
`Self-feedback ${e.id} (${e.kind}) auto-resolved — sf bumped past ${e.sfVersion}. Originating unit ${unit} should be re-run.`,
"info",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
}
if (triage.stillBlocked.length > 0) {
ctx.ui?.notify?.(
`${triage.stillBlocked.length} unresolved self-feedback entr${triage.stillBlocked.length === 1 ? "y" : "ies"} require sf fixes. See .sf/SELF-FEEDBACK.md or ~/.sf/agent/upstream-feedback.jsonl.`,
"warning",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
}
// Forge-only: high/critical entries are queued as hidden follow-up repair
// work on startup, even outside /autonomous. The drain helper owns claim TTL
// and delivery failure retry, so this is safe to call opportunistically.
const highBlocked = triage.stillBlocked.filter(
(e) => e.severity === "high" || e.severity === "critical",
);
if (highBlocked.length > 0) {
const ids = highBlocked.map((e) => `${e.id} (${e.kind})`).join(", ");
ctx.ui?.notify?.(
`${highBlocked.length} high/critical inline-fix candidate${highBlocked.length === 1 ? "" : "s"} pending in .sf/SELF-FEEDBACK.md: ${ids}`,
"warning",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
const { dispatchSelfFeedbackInlineFixIfNeeded } = await import(
"../self-feedback-drain.js"
);
dispatchSelfFeedbackInlineFixIfNeeded(process.cwd(), ctx, pi);
}
} catch {
/* non-fatal — self-feedback drain must never block session start */
}
// Run gap audit to detect orphaned prompts, handlers, native modules, commands
try {
const { runGapAudit } = await import("../gap-audit.js");
const filed = runGapAudit(process.cwd());
if (filed > 0) {
const { selfFeedbackDestinationLabel } = await import(
"../self-feedback.js"
);
ctx.ui?.notify?.(
`Gap audit filed ${filed} new finding${filed === 1 ? "" : "s"} in ${selfFeedbackDestinationLabel(process.cwd())}`,
"info",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
}
} catch {
/* non-fatal — gap audit must never block session start */
}
// Summarise the last UOK parity report so the operator can act on
// divergences/fallbacks before starting any new work.
try {
const { summarizeParityReport } = await import(
"../uok-parity-summary.js"
);
await summarizeParityReport(process.cwd(), ctx);
} catch {
/* non-fatal — parity summary must never block session start */
}
// Bridge upstream feedback into forge-local self-feedback
try {
const { bridgeUpstreamFeedback } = await import("../upstream-bridge.js");
const filed = bridgeUpstreamFeedback(process.cwd());
if (filed > 0) {
ctx.ui?.notify?.(
`Upstream bridge filed ${filed} rollup${filed === 1 ? "" : "s"} in .sf/SELF-FEEDBACK.md`,
"info",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
}
} catch {
/* non-fatal — upstream bridge must never block session start */
}
// Promote recurring feedback clusters to REQUIREMENTS.md
try {
const { promoteFeedbackToRequirements } = await import(
"../requirement-promoter.js"
);
const { promoted, requirementIds } = promoteFeedbackToRequirements(
process.cwd(),
);
if (promoted > 0) {
ctx.ui?.notify?.(
`Promoted ${promoted} cluster${promoted === 1 ? "" : "s"} to requirements: ${requirementIds.join(", ")}`,
"info",
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
);
}
} catch {
/* non-fatal — requirement promoter must never block session start */
}
// Refresh model catalogs for discoverable providers in the background.
try {
const { scheduleModelCatalogRefresh } = await import(
"../model-catalog-cache.js"
);
const { getKeyManagerAuthStorage } = await import("../key-manager.js");
scheduleModelCatalogRefresh(process.cwd(), getKeyManagerAuthStorage());
} catch {
/* non-fatal — model catalog refresh must never block session start */
}
// Compaction should never behave like a stop boundary. If autonomous mode
// was active when compaction happened, continue automatically on session start.
try {
if (!isAutoActive() && !isAutoPaused()) {
const marker = consumeCompactionResumeMarker(process.cwd());
if (
marker?.reason === "session_before_compact" &&
marker?.autoActive === true &&
marker?.basePath === process.cwd()
) {
ctx.ui?.notify?.(
"Resuming autonomous mode after compaction.",
"info",
{
noticeKind: NOTICE_KIND.SYSTEM_NOTICE,
dedupe_key: "autonomous-resume-after-compaction",
},
);
startAutoDetached(ctx, pi, process.cwd(), marker?.verbose === true, {
step: marker?.stepMode === true,
canAskUser: marker?.canAskUser !== false,
milestoneLock:
typeof marker?.milestoneLock === "string"
? marker.milestoneLock
: null,
});
}
}
} catch {
/* non-fatal — autoresume must never block session start */
}
});
pi.on("session_switch", async (_event, ctx) => {
lastGeminiPreflightWarning = undefined;
resetLearningRuntime();
resetToolWatchdog();
initNotificationStore(process.cwd());
installNotifyInterceptor(ctx);
resetWriteGateState();
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
clearDiscussionFlowState();
await syncServiceTierStatus(ctx);
await initializeLearningRuntime();
loadToolApiKeys();
// Re-establish session recorder for the new session so turn recording
// continues under the correct session_id without contaminating the
// previous session's rows.
try {
const sid = ctx.sessionManager?.getSessionId?.() ?? "";
initSessionRecorder(sid, { mode: "interactive", cwd: process.cwd() });
} catch {
/* non-fatal */
}
});
pi.on("before_agent_start", async (event, ctx) => {
// Refresh the ecosystem snapshot BEFORE running ecosystem handlers so they
// see current phase/unit state (#3338).
try {
const { ensureDbOpen } = await import("./dynamic-tools.js");
await ensureDbOpen();
const basePath = process.cwd();
const state = await deriveState(basePath);
updateSnapshot(state);
// Patch git context on the first turn now that the DB is confirmed open.
// Best-effort: git may be absent (e.g. tmp dirs), so we swallow errors.
try {
const { execFileSync } = await import("node:child_process");
const branch = execFileSync(
"git",
["rev-parse", "--abbrev-ref", "HEAD"],
{
cwd: basePath,
encoding: "utf-8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 3_000,
},
).trim();
const remoteUrl = execFileSync(
"git",
["config", "--get", "remote.origin.url"],
{
cwd: basePath,
encoding: "utf-8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 3_000,
},
).trim();
patchSessionGitContext(remoteUrl || null, branch || null);
} catch {
/* non-fatal: git absent or not a repo */
}
} catch {
updateSnapshot(null);
}
// Record user message as the start of a new turn. Done after ensureDbOpen
// so the turns row lands in the DB immediately; agent_end will patch the
// assistant_response onto it once the model finishes.
try {
recordTurnStart(typeof event.prompt === "string" ? event.prompt : null);
} catch {
/* non-fatal: turn recording must never block the agent */
}
// Await ecosystem loading, then dispatch any registered handlers.
await getEcosystemReadyPromise();
for (const handler of ecosystemHandlers) {
try {
await handler(event, ctx);
} catch {
// Non-fatal: don't break the SF turn if a third-party handler throws.
}
}
return buildBeforeAgentStartResult(event, ctx);
});
pi.on("agent_end", async (event, ctx) => {
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
await handleAgentEnd(pi, event, ctx);
// Complete the pending turn row with the assistant's text response.
// event.messages is an array; the last entry is the model's reply.
// Its .content is an array of content blocks — extract the first text block.
try {
const msgs = Array.isArray(event.messages) ? event.messages : [];
const lastMsg = msgs[msgs.length - 1];
const textBlock = Array.isArray(lastMsg?.content)
? lastMsg.content.find((b) => b.type === "text" && b.text)
: null;
recordTurnEnd(textBlock?.text ?? null);
} catch {
/* non-fatal: turn recording must never block agent teardown */
}
// Best-effort embedding backfill: when SF_LLM_GATEWAY_KEY is set and the
// gateway has an embed worker online, embed any memories that don't yet
// have a vector. Bounded per invocation; logs once-per-minute when the
// gateway is unavailable so we don't spam the journal.
await runAgentEndMemoryBackfill();
});
// Squash-merge quick-task branch back to the original branch after the
// agent turn completes (#2668). cleanupQuickBranch is a no-op when no
// quick-return state is pending, so this is safe to call on every turn.
pi.on("turn_end", async (_event, ctx) => {
try {
cleanupQuickBranch();
} catch {
// Best-effort: don't break the turn lifecycle if cleanup fails.
}
try {
const {
consumeCompletedInlineFixClaim,
dispatchSelfFeedbackInlineFixIfNeeded,
} = await import("../self-feedback-drain.js");
const resolvedIds = consumeCompletedInlineFixClaim(process.cwd());
if (resolvedIds.length > 0) {
const requestReload = ctx.requestReload;
requestReload?.(
`self-feedback inline fix resolved ${resolvedIds.length} entr${resolvedIds.length === 1 ? "y" : "ies"}`,
);
return;
}
dispatchSelfFeedbackInlineFixIfNeeded(process.cwd(), ctx, pi);
} catch {
// Best-effort: stale code should not break normal turn completion.
}
});
pi.on("session_before_compact", async () => {
// Never cancel compaction — instead provide a custom compaction summary
// that preserves work state. Cancelling compaction causes context overflow
// which degrades performance and can hit hard limits. The custom summary
// ensures the agent retains critical context after compaction.
const basePath = process.cwd();
const { ensureDbOpen } = await import("./dynamic-tools.js");
await ensureDbOpen();
const state = await deriveState(basePath);
// Build work state summary for compaction context preservation
const workState = [];
if (state.activeMilestone) {
workState.push(
`Active milestone: ${state.activeMilestone.id} (${state.activeMilestone.title})`,
);
}
if (state.activeSlice) {
workState.push(
`Active slice: ${state.activeSlice.id} (${state.activeSlice.title})`,
);
}
if (state.activeTask) {
workState.push(
`Active task: ${state.activeTask.id} (${state.activeTask.title})`,
);
}
if (state.phase) {
workState.push(`Current phase: ${state.phase}`);
}
// Include mode state in compaction summary
const { getAutoSession } = await import("../auto/session.js");
const session = getAutoSession();
const mode = session?.getMode?.();
if (mode) {
workState.push(
`Work mode: ${mode.workMode}, Run control: ${mode.runControl}, Permission: ${mode.permissionProfile}, Model: ${mode.modelMode}`,
);
}
// If autonomous mode is active, include current unit details
if (isAutoActive()) {
const dash = getAutoDashboardData();
if (dash.currentUnit) {
workState.push(
`Current unit: ${dash.currentUnit.type} ${dash.currentUnit.id}`,
);
}
}
// Write CONTINUE file for task recovery if in executing phase
if (
state.activeMilestone &&
state.activeSlice &&
state.activeTask &&
state.phase === "executing"
) {
const sliceDir = resolveSlicePath(
basePath,
state.activeMilestone.id,
state.activeSlice.id,
);
if (sliceDir) {
const existingFile = resolveSliceFile(
basePath,
state.activeMilestone.id,
state.activeSlice.id,
"CONTINUE",
);
const hasExisting = existingFile && (await loadFile(existingFile));
const legacyContinue = join(sliceDir, "continue.md");
const hasLegacy = await loadFile(legacyContinue);
if (!hasExisting && !hasLegacy) {
const continuePath = join(
sliceDir,
`${state.activeSlice.id}-CONTINUE.md`,
);
await saveFile(
continuePath,
formatContinue({
frontmatter: {
milestone: state.activeMilestone.id,
slice: state.activeSlice.id,
task: state.activeTask.id,
step: 0,
totalSteps: 0,
status: "compacted",
savedAt: new Date().toISOString(),
},
completedWork: `Task ${state.activeTask.id} (${state.activeTask.title}) was in progress when compaction occurred.`,
remainingWork: "Check the task plan for remaining steps.",
decisions: "Check task summary files for prior decisions.",
context: `Session was compacted. Work state: ${workState.join("; ")}`,
nextAction: `Resume task ${state.activeTask.id}: ${state.activeTask.title}.`,
}),
);
}
}
}
// Return custom compaction summary that preserves work state
// instead of cancelling compaction
const todoBlock = getSessionTodoCompactionBlock(basePath);
const baseSummary =
workState.length > 0
? `Work in progress: ${workState.join(". ")}.`
: "Session compacted. No active work state.";
const summary = todoBlock ? `${baseSummary}\n\n${todoBlock}` : baseSummary;
const result = {
compaction: {
summary,
firstKeptEntryId: undefined, // Let Pi decide
tokensBefore: undefined, // Let Pi measure
details: {
workState,
isAutoActive: isAutoActive(),
mode: mode || null,
},
},
};
if (isAutoActive()) {
writeCompactionResumeMarker(basePath, {
reason: "session_before_compact",
ts: new Date().toISOString(),
autoActive: true,
basePath,
stepMode: session?.stepMode === true,
canAskUser: isCanAskUser(),
milestoneLock:
typeof session?.sessionMilestoneLock === "string"
? session.sessionMilestoneLock
: null,
verbose: session?.verbose === true,
});
}
// Persist compaction summary as the session's most recent work description
// so memory-pipeline ingestion has a compact semantic handle for retrieval.
try {
updateSessionSummary(result.compaction.summary);
} catch {
/* non-fatal */
}
return result;
});
pi.on("session_shutdown", async (_event, ctx) => {
// Flush any in-flight turn (e.g. interrupted agent) and clear session state
// so the recorder doesn't carry stale IDs into a subsequent process reuse.
try {
resetSessionRecorder();
} catch {
/* non-fatal */
}
resetLearningRuntime();
// Stop metrics-central on session shutdown
try {
const { stopMetricsCentral } = await import("../metrics-central.js");
stopMetricsCentral();
} catch {
// Non-fatal: cleanup should not block shutdown
}
if (isParallelActive()) {
try {
await shutdownParallel(process.cwd());
} catch {
// best-effort
}
}
if (!isAutoActive() && !isAutoPaused()) return;
const dash = getAutoDashboardData();
if (dash.currentUnit) {
saveActivityLog(
ctx,
dash.basePath,
dash.currentUnit.type,
dash.currentUnit.id,
);
}
});
pi.on("tool_call", async (event) => {
const discussionBasePath = process.cwd();
// ── Shell pre-tool hooks (.sf/hooks/pre-tool/*.sh) ────────────────────
// User-authored scripts that can approve or deny a tool call.
// Exit 0 = approve; non-zero = block with stdout as the reason.
const hookBlock = runShellHooks(
"pre-tool",
{ tool: event.toolName, input: event.input ?? {} },
true,
);
if (hookBlock) return hookBlock;
// ── Loop guard: block repeated identical tool calls ──
const loopCheck = checkToolCallLoop(event.toolName, event.input);
if (loopCheck.block) {
return { block: true, reason: loopCheck.reason };
}
// ── Session file-touch recording ──────────────────────────────────────
// Best-effort: path may be absent for non-file tools; recordFileTouch
// no-ops on non-write tools and when no session is active.
try {
recordFileTouch(event.toolName, event.input?.path ?? null);
} catch {
/* non-fatal */
}
// ── Research unit terminal transition enforcement ─────────────────────
// After a research unit (research-slice/research-milestone) successfully
// saves its RESEARCH artifact via save_summary, the tool returns
// terminal_transition: true. We track this and block subsequent planning
// tool calls to prevent post-artifact drift (e.g. calling plan_milestone
// after research is complete). This addresses sf-moocx6m5-ij630a.
if (isAutoActive()) {
const dash = getAutoDashboardData();
const currentUnit = dash.currentUnit;
if (
currentUnit &&
(currentUnit.type === "research-slice" ||
currentUnit.type === "research-milestone")
) {
if (hasResearchTerminalTransition()) {
const planningTools = new Set([
"plan_milestone",
"plan_slice",
"plan_task",
"new_milestone_id",
"replan_slice",
"reassess_roadmap",
]);
if (planningTools.has(event.toolName)) {
return {
block: true,
reason:
`Research unit terminal transition: ${currentUnit.type} ${currentUnit.id} has already completed its RESEARCH artifact. ` +
`Post-artifact drift is blocked before runaway supervision treats it as legitimate large research. ` +
`Planning tools (${event.toolName}) are blocked. The orchestrator will dispatch planner units after research.`,
};
}
}
}
}
// ── Discussion gate enforcement: track pending gate questions ─────────
// Only gate-shaped ask_user_questions calls should block execution.
// The gate stays pending until the user selects the approval option.
// In autonomous mode (canAskUser=false): block the gate question and
// instruct the agent to investigate and resolve its own uncertainty
// rather than waiting for human input. The question exists because
// confidence is low — the agent must research until it can decide.
if (event.toolName === "ask_user_questions") {
const questions = event.input?.questions ?? [];
const questionId = questions.find(
(question) =>
typeof question?.id === "string" && isGateQuestionId(question.id),
)?.id;
if (typeof questionId === "string") {
if (isAutoActive() && !isCanAskUser()) {
// Do not set a pending gate — the agent must self-resolve.
// The gate was triggered because confidence < 0.98. The agent
// must investigate until confidence reaches 0.98, then proceed.
const questionSummary =
questions.find((q) => q.id === questionId)?.question ?? questionId;
return {
block: true,
reason:
`AUTONOMOUS MODE — gate question blocked. You called ask_user_questions ` +
`because your confidence was below 0.98. Target: reach ≥ 0.98 confidence ` +
`through investigation, then proceed without pausing.\n\n` +
`Gate question (self-resolve this): "${questionSummary}"\n\n` +
`Confidence-building protocol (execute in order, stop when confidence ≥ 0.98):\n` +
`1. Tier 1 — Read artifacts: task plans, slice plans, .sf/DECISIONS.md, ` +
`.sf/REQUIREMENTS.md, .sf/CONTEXT.md, docs/records/, existing test coverage.\n` +
`2. Tier 1 — Run verification: lint, type-check, relevant tests. Read the output.\n` +
`3. Tier 2 — External lookup: WebSearch / WebFetch / Context7 for any factual ` +
`unknowns (API behavior, version compatibility, library defaults).\n` +
`4. Self-assess: rate your confidence 01. If ≥ 0.98 → proceed. ` +
`If still < 0.98 → there is a real, concrete blocker.\n` +
`5. Concrete blocker only: call checkpoint with outcome="blocked" ` +
`and a precise blockerReason naming the specific unresolvable gap ` +
`(missing credentials, impossible contract, safety violation, legal scope).\n\n` +
`Do NOT call ask_user_questions again. Do NOT pause for user input. ` +
`Factual gaps are YOUR job to close via Tier 1 and Tier 2.`,
};
}
setPendingGate(questionId);
}
}
// ── Discussion gate enforcement: block tool calls while gate is pending ──
// If ask_user_questions was called with a gate ID but hasn't been confirmed,
// block all non-read-only tool calls to prevent the model from skipping gates.
// In autonomous mode: auto-clear any stale pending gate so it never blocks.
if (getPendingGate()) {
if (isAutoActive() && !isCanAskUser()) {
clearPendingGate();
} else {
const milestoneId = getDiscussionMilestoneId(discussionBasePath);
if (isToolCallEventType("bash", event)) {
const bashGuard = shouldBlockPendingGateBash(
event.input.command,
milestoneId,
isQueuePhaseActive(),
);
if (bashGuard.block) return bashGuard;
} else {
const gateGuard = shouldBlockPendingGate(
event.toolName,
milestoneId,
isQueuePhaseActive(),
);
if (gateGuard.block) return gateGuard;
}
}
}
// ── Queue-mode execution guard (#2545): block source-code mutations ──
// When /queue is active, the agent should only create milestones,
// not execute work. Block write/edit to non-.sf/ paths and bash commands
// that would modify files.
if (isQueuePhaseActive()) {
let queueInput = "";
if (isToolCallEventType("write", event)) {
queueInput = event.input.path;
} else if (isToolCallEventType("edit", event)) {
queueInput = event.input.path;
} else if (isToolCallEventType("bash", event)) {
queueInput = event.input.command;
}
const queueGuard = shouldBlockQueueExecution(
event.toolName,
queueInput,
true,
);
if (queueGuard.block) return queueGuard;
}
// ── Execution policy enforcement: block based on permission profile ──
// When autonomous mode is active, enforce the session's permission profile
// at the tool boundary. This is the enforcement layer that makes
// /permission-profile restricted|normal|trusted|unrestricted meaningful.
if (isAutoActive()) {
const { getAutoSession } = await import("../auto/session.js");
const session = getAutoSession();
const profile = session?.permissionProfile ?? "normal";
const input =
event.toolName === "bash"
? (event.input?.command ?? "")
: event.toolName === "write" || event.toolName === "edit"
? (event.input?.path ?? "")
: "";
const decision = classifyExecutionPolicyCall(
profile,
event.toolName,
input,
);
if (!decision.allowed) {
return {
block: true,
reason: `Execution policy block: ${decision.reason} (profile: ${profile}, tool: ${event.toolName})`,
};
}
}
// ── Single-writer engine: block direct writes to STATE.md ──────────
// Covers write, edit, and bash tools to prevent bypass vectors.
if (isToolCallEventType("write", event)) {
if (isBlockedStateFile(event.input.path)) {
return { block: true, reason: BLOCKED_WRITE_ERROR };
}
}
if (isToolCallEventType("edit", event)) {
if (isBlockedStateFile(event.input.path)) {
return { block: true, reason: BLOCKED_WRITE_ERROR };
}
}
if (isToolCallEventType("bash", event)) {
if (isBashWriteToStateFile(event.input.command)) {
return { block: true, reason: BLOCKED_WRITE_ERROR };
}
}
if (!isToolCallEventType("write", event)) return;
// ── Worktree isolation: block writes outside the worktree and main .sf/ ──
// Only enforced in autonomous mode — interactive sessions skip this check.
// When SF_WORKTREE is set, process.cwd() is the worktree directory.
// The agent should only write inside the worktree OR inside the main repo's .sf/.
if (isAutoActive() && process.env.SF_WORKTREE) {
const worktreeRoot = process.cwd();
const mainRepoRoot =
process.env.SF_PROJECT_ROOT ?? resolve(worktreeRoot, "..");
const targetPath = resolve(event.input.path);
const worktreeRel = relative(worktreeRoot, targetPath);
const mainSfRel = relative(join(mainRepoRoot, ".sf"), targetPath);
const worktreeOk =
!worktreeRel.startsWith("..") && !worktreeRel.startsWith("/");
const mainSfOk =
!mainSfRel.startsWith("..") && !mainSfRel.startsWith("/");
if (!worktreeOk && !mainSfOk) {
return {
block: true,
reason:
`HARD BLOCK: Worktree isolation is active. Cannot write to "${event.input.path}" — ` +
`path is outside the worktree (${worktreeRoot}) and outside the main repo's .sf/ directory. ` +
`Write only inside the worktree or inside ${join(mainRepoRoot, ".sf")}/milestones/ for planning artifacts.`,
};
}
}
const result = shouldBlockContextWrite(
event.toolName,
event.input.path,
getDiscussionMilestoneId(discussionBasePath),
isQueuePhaseActive(),
);
if (result.block) return result;
});
// ── Trajectory recording: capture tool calls ──
pi.on("tool_call", async (event) => {
try {
const { recordTrajectoryStep, STEP_TYPES } = await import(
"../trajectory-recorder.js"
);
const sessionId = process.env.SF_SESSION_ID || "default";
const dash = getAutoDashboardData();
const unitId = dash.currentUnit?.id;
recordTrajectoryStep({
sessionId,
unitId,
stepType: STEP_TYPES.TOOL_CALL,
toolName: event.toolName,
toolParams: event.input,
metadata: {
toolCallId: event.toolCallId,
isAutoActive: isAutoActive(),
},
});
} catch {
// Trajectory recording is best-effort
}
});
// ── Safety harness: evidence collection + destructive command warnings ──
pi.on("tool_call", async (event, ctx) => {
if (!isAutoActive()) return;
safetyRecordToolCall(event.toolCallId, event.toolName, event.input);
const policyDash = getAutoDashboardData();
// Use session permission profile if available, fall back to queue-aware default
const { getAutoSession } = await import("../auto/session.js");
const session = getAutoSession();
const sessionProfile = session?.permissionProfile;
const policyProfile =
sessionProfile ?? (isQueuePhaseActive() ? "restricted" : "normal");
if (policyDash.basePath) {
emitJournalEvent(
policyDash.basePath,
buildExecutionPolicyJournalEntry({
event,
profileId: policyProfile,
unit: policyDash.currentUnit,
flowId: `execution-policy:${event.toolCallId ?? event.toolName}`,
now: () => new Date().toISOString(),
}),
);
}
// Persist evidence immediately at dispatch so a mid-unit session restart
// (resetEvidence() + loadEvidenceFromDisk()) cannot wipe the entry between
// tool_call and tool_execution_end. Without this the "no bash calls" false
// positive fires when the LLM clearly ran a verification command (Bug #4385).
const callDash = policyDash;
if (callDash.basePath && callDash.currentUnit?.type === "execute-task") {
const {
milestone: cMid,
slice: cSid,
task: cTid,
} = parseUnitId(callDash.currentUnit.id);
if (cMid && cSid && cTid) {
saveEvidenceToDisk(callDash.basePath, cMid, cSid, cTid);
}
}
// Destructive command classification (warn only, never block)
if (isToolCallEventType("bash", event)) {
const classification = classifyCommand(event.input.command);
if (classification.destructive) {
safetyLogWarning(
"safety",
`destructive command: ${classification.labels.join(", ")}`,
{
command: String(event.input.command).slice(0, 200),
},
);
ctx.ui.notify(
`Destructive command detected: ${classification.labels.join(", ")}`,
"warning",
{
noticeKind: NOTICE_KIND.TOOL_NOTICE,
dedupe_key: `destructive-command:${classification.labels.join(",")}`,
},
);
}
}
});
pi.on("tool_result", async (event) => {
// ── Shell post-tool hooks (.sf/hooks/post-tool/*.sh) ─────────────────
// Fire-and-forget: scripts receive tool name + result text; exit code ignored.
runShellHooks("post-tool", {
tool: event.toolName,
input: event.input ?? {},
result:
typeof event.content === "string"
? event.content.slice(0, 2_000)
: null,
});
if (isAutoActive()) {
if (
event.toolName === "save_summary" &&
event.details &&
typeof event.details === "object" &&
event.details.terminal_transition === true &&
event.details.unit_type === "research"
) {
markResearchTerminalTransition();
}
const steer = observeToolResult(event);
if (steer) {
pi.sendMessage(
{
customType: "sf-tool-watchdog",
content: steer.content,
display: false,
details: {
key: steer.key,
severity: steer.severity,
toolName: event.toolName,
toolCallId: event.toolCallId,
},
},
{ deliverAs: "steer" },
);
}
}
if (event.toolName !== "ask_user_questions") return;
const milestoneId = getDiscussionMilestoneId(process.cwd());
const queueActive = isQueuePhaseActive();
const details = event.details;
// ── Discussion gate enforcement: handle gate question responses ──
// Single consolidated loop: finds depth_verification questions, verifies the answer,
// marks the milestone as depth-verified, and clears the pending gate.
// Also handles the legacy pending-gate path (set by tool_call) for robustness.
const questions = event.input?.questions ?? [];
const currentPendingGate = getPendingGate();
if (details?.cancelled || !details?.response) return;
if (details.remote === true) {
const steering = parseRemoteSteeringDirectives(details.response);
if (steering.steering) {
const results = applyRemoteSteeringDirectives(steering.directives);
pi.sendMessage(
{
customType: "sf-remote-steering",
content: formatRemoteSteeringResults(results),
display: false,
details: {
toolName: event.toolName,
toolCallId: event.toolCallId,
promptId: details.promptId,
channel: details.channel,
results,
},
},
{ deliverAs: "steer" },
);
}
}
for (const question of questions) {
if (typeof question.id !== "string") continue;
// Check if this is a depth_verification question (either directly or via pending gate)
const isDepthQ = question.id.includes("depth_verification");
const isPendingQ = question.id === currentPendingGate;
if (!isDepthQ && !isPendingQ) continue;
const answer = details.response?.answers?.[question.id];
if (
isDepthConfirmationAnswer(
getSelectedGateAnswer(answer),
question.options,
)
) {
// Always mark depth-verified AND clear the gate
if (isDepthQ) {
const inferredMilestoneId =
extractDepthVerificationMilestoneId(question.id) ?? milestoneId;
markDepthVerified(inferredMilestoneId);
}
clearPendingGate();
break;
}
}
if (!milestoneId && !queueActive) return;
if (!milestoneId) return;
const basePath = process.cwd();
const milestoneDir = resolveMilestonePath(basePath, milestoneId);
if (!milestoneDir) return;
const discussionPath = join(
milestoneDir,
buildMilestoneFileName(milestoneId, "DISCUSSION"),
);
const timestamp = new Date().toISOString();
const lines = [`## Exchange — ${timestamp}`, ""];
for (const question of questions) {
lines.push(
`### ${question.header ?? "Question"}`,
"",
question.question ?? "",
);
if (Array.isArray(question.options)) {
lines.push("");
for (const opt of question.options) {
lines.push(`- **${opt.label}** — ${opt.description ?? ""}`);
}
}
const answer = details.response?.answers?.[question.id];
if (answer) {
lines.push("");
const selectedValue = getSelectedGateAnswer(answer);
const selected = Array.isArray(selectedValue)
? selectedValue.join(", ")
: selectedValue;
lines.push(`**Selected:** ${selected}`);
if (answer.notes) {
lines.push(`**Notes:** ${answer.notes}`);
}
}
lines.push("");
}
lines.push("---", "");
const existing =
(await loadFile(discussionPath)) ?? `# ${milestoneId} Discussion Log\n\n`;
await saveFile(discussionPath, existing + lines.join("\n"));
});
pi.on("tool_execution_start", async (event) => {
if (!isAutoActive()) return;
markToolStart(event.toolCallId, event.toolName);
recordToolCallName(event.toolName);
recordCompletionNudgeToolCall(event.toolName);
});
pi.on("tool_execution_end", async (event) => {
markToolEnd(event.toolCallId);
// Record tool execution performance metrics
try {
const { recordToolExecution } = await import("../metrics-central.js");
recordToolExecution(
event.toolName,
event.durationMs,
event.isError,
event.isError
? typeof event.result === "string"
? event.result
: "tool_error"
: undefined,
);
} catch {
// Non-fatal: metrics should not break tool execution
}
// #2883/#4974: Capture deterministic invocation/policy errors so
// postUnitPreVerification can break the retry loop instead of re-dispatching.
// Covers sf_ tool JSON errors AND write-gate blocks on write/edit/bash tools.
if (event.isError) {
const errorText =
typeof event.result === "string"
? event.result
: typeof event.result?.content?.[0]?.text === "string"
? event.result.content[0].text
: String(event.result);
recordToolInvocationError(event.toolName, errorText);
}
// Safety harness: record tool execution results for evidence cross-referencing
if (isAutoActive()) {
safetyRecordToolResult(
event.toolCallId,
event.toolName,
event.result,
event.isError,
);
// Persist evidence to disk after each tool result so it survives a session
// restart mid-unit (Bug #4385 — non-persisted evidence false positives).
const endDash = getAutoDashboardData();
if (endDash.basePath && endDash.currentUnit?.type === "execute-task") {
const {
milestone: pMid,
slice: pSid,
task: pTid,
} = parseUnitId(endDash.currentUnit.id);
if (pMid && pSid && pTid) {
saveEvidenceToDisk(endDash.basePath, pMid, pSid, pTid);
}
}
}
// Trajectory recording: capture every tool execution step
try {
const { recordTrajectoryStep, STEP_TYPES } = await import(
"../trajectory-recorder.js"
);
const sessionId = process.env.SF_SESSION_ID || "default";
const dash = getAutoDashboardData();
const unitId = dash.currentUnit?.id;
// Record tool result
recordTrajectoryStep({
sessionId,
unitId,
stepType: event.isError ? STEP_TYPES.ERROR : STEP_TYPES.TOOL_RESULT,
toolName: event.toolName,
toolResult:
typeof event.result === "string"
? event.result
: JSON.stringify(event.result).slice(0, 2000),
isError: event.isError,
errorMessage: event.isError
? (typeof event.result === "string"
? event.result
: String(event.result)
).slice(0, 500)
: undefined,
metadata: {
duration_ms: event.durationMs,
toolCallId: event.toolCallId,
},
});
} catch {
// Trajectory recording is best-effort; don't fail tool execution
}
});
pi.on("model_select", async (_event, ctx) => {
await syncServiceTierStatus(ctx);
});
pi.on("context", async (event) => {
if (!isAutoActive()) return;
const messages = maybeInjectCompletionNudgeMessage(event.messages);
if (messages === event.messages) return;
return { messages };
});
pi.on("before_provider_request", async (event, ctx) => {
const modelId = event.model?.id;
const payload = event.payload;
if (!payload || typeof payload !== "object") return;
applyCompletionNudgeTemperature(payload);
// ── Observation Masking ─────────────────────────────────────────────
// Replace old tool results with placeholders to reduce context bloat.
// Only active during autonomous mode when context_management.observation_masking is enabled.
if (isAutoActive()) {
try {
const { loadEffectiveSFPreferences } = await import(
"../preferences.js"
);
const prefs = loadEffectiveSFPreferences();
const cmConfig = prefs?.preferences.context_management;
// Observation masking: replace old tool results with placeholders
if (cmConfig?.observation_masking !== false) {
const keepTurns = cmConfig?.observation_mask_turns ?? 8;
const { createObservationMask } = await import(
"../context-masker.js"
);
const mask = createObservationMask(keepTurns);
const messages = payload.messages;
if (Array.isArray(messages)) {
payload.messages = mask(messages);
}
}
// Tool result truncation: cap individual tool result content length.
// In pi-ai format, toolResult messages have role: "toolResult" and content: TextContent[].
// Creates new objects to avoid mutating shared conversation state.
const maxChars = cmConfig?.tool_result_max_chars ?? 800;
const msgs = payload.messages;
if (Array.isArray(msgs)) {
payload.messages = msgs.map((msg) => {
// Match toolResult messages (role: "toolResult", content is array of content blocks)
if (msg?.role === "toolResult" && Array.isArray(msg.content)) {
const blocks = msg.content;
const totalLen = blocks.reduce(
(sum, b) =>
sum + (typeof b.text === "string" ? b.text.length : 0),
0,
);
if (totalLen > maxChars) {
const truncated = blocks.map((b) => {
if (typeof b.text === "string" && b.text.length > maxChars) {
return {
...b,
text: b.text.slice(0, maxChars) + "\n…[truncated]",
};
}
return b;
});
return { ...msg, content: truncated };
}
}
return msg;
});
}
} catch {
/* non-fatal */
}
}
// ── Service Tier ────────────────────────────────────────────────────
if (!modelId) {
ctx.ui.setStatus("sf-gemini-tokens", undefined);
return payload;
}
const {
getEffectiveServiceTier,
supportsServiceTier,
isServiceTierDisabled,
} = await import("../service-tier.js");
// Short-circuit on explicit disable — never inject service_tier on any
// setup that has opted out, regardless of model.
if (!isServiceTierDisabled()) {
const tier = getEffectiveServiceTier();
if (tier && supportsServiceTier(modelId)) {
payload.service_tier = tier;
}
}
if (event.model?.provider !== "google-gemini-cli") {
ctx.ui.setStatus("sf-gemini-tokens", undefined);
return payload;
}
try {
const resolvedModel =
ctx.model &&
ctx.model.provider === event.model.provider &&
ctx.model.id === event.model.id
? ctx.model
: ctx.modelRegistry
.getAvailable()
.find(
(m) =>
m.provider === event.model?.provider &&
m.id === event.model?.id,
);
if (!resolvedModel) {
ctx.ui.setStatus("sf-gemini-tokens", undefined);
return payload;
}
const apiKey = await ctx.modelRegistry.getApiKey(resolvedModel);
const totalTokens = await countGoogleGeminiCliTokens(payload, apiKey);
if (typeof totalTokens !== "number") {
ctx.ui.setStatus("sf-gemini-tokens", undefined);
return payload;
}
const contextWindow = resolvedModel.contextWindow ?? 0;
const pct =
contextWindow > 0
? Math.round((totalTokens / contextWindow) * 100)
: undefined;
ctx.ui.setStatus(
"sf-gemini-tokens",
pct !== undefined
? `gemini ${formatTokenCount(totalTokens)} (${pct}%)`
: `gemini ${formatTokenCount(totalTokens)}`,
);
if (contextWindow > 0 && totalTokens >= Math.floor(contextWindow * 0.8)) {
const warningKey = `${resolvedModel.id}:${totalTokens}:${contextWindow}`;
if (lastGeminiPreflightWarning !== warningKey) {
lastGeminiPreflightWarning = warningKey;
ctx.ui.notify(
`Gemini preflight: ${formatTokenCount(totalTokens)} tokens (${pct}% of ${formatTokenCount(contextWindow)} context).`,
"warning",
{
noticeKind: NOTICE_KIND.SYSTEM_NOTICE,
dedupe_key: `gemini-preflight:${resolvedModel.id}`,
},
);
}
}
} catch {
ctx.ui.setStatus("sf-gemini-tokens", undefined);
}
return payload;
});
// Capability-aware model routing hook (ADR-004)
// Extensions can override model selection by returning { modelId: "..." }
// Return undefined to let the built-in capability scoring proceed.
// When reason="fallback", the current model just failed — record it as a failure
// so the learning system demotes it in future selections.
pi.on("before_model_select", async (event) => {
if (event.reason === "fallback" && event.currentModelId) {
recordLearnedOutcome({
model_id: event.currentModelId,
unit_type: event.unitType ?? "execute-task",
// Use the real unit_id when the autonomous dispatch set it; fall back to a
// sentinel so the NOT NULL constraint is always satisfied.
unit_id: event.unitId || `fallback:${event.currentModelId}`,
succeeded: false,
failure_mode: event.classification?.reason ?? null,
recorded_at: Date.now(),
});
}
return selectLearnedModel({
unitType: event.unitType,
eligibleModels: event.eligibleModels,
phaseConfig: event.phaseConfig,
});
});
// Tool set adaptation hook (ADR-005 Phase 4)
// Extensions can override tool set after model selection by returning { toolNames: [...] }
// Return undefined to let the built-in provider compatibility filtering proceed.
pi.on("adjust_tool_set", async (_event) => {
// Default: no override — let provider capability filtering handle tool set
return undefined;
});
}