- FallbackResolver.setUnitContext() stores {unitType,unitId} from autonomous dispatch
- run-unit.js calls pi.setFallbackUnitContext() before/after each unit
- _findAnyAvailableFallback uses real unitType/unitId from context, not sentinel
- Schema v59: failure_mode column in llm_task_outcomes
- insertLlmTaskOutcome accepts failure_mode (rate_limit, quota_exhausted, auth_error)
- register-hooks.js passes event.classification.reason as failure_mode
- register-hooks.js uses real event.unitId when available
- ExtensionRuntimeActions.setFallbackUnitContext added to pi API surface
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1564 lines
54 KiB
JavaScript
1564 lines
54 KiB
JavaScript
import { spawnSync } from "node:child_process";
|
||
import {
|
||
existsSync,
|
||
mkdirSync,
|
||
readdirSync,
|
||
readFileSync,
|
||
unlinkSync,
|
||
writeFileSync,
|
||
} from "node:fs";
|
||
import { join, relative, resolve } from "node:path";
|
||
import { isToolCallEventType } from "@singularity-forge/coding-agent";
|
||
import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
|
||
import { formatTokenCount } from "@singularity-forge/coding-agent";
|
||
import { saveActivityLog } from "../activity-log.js";
|
||
import {
|
||
getAutoDashboardData,
|
||
hasResearchTerminalTransition,
|
||
isAutoActive,
|
||
isAutoPaused,
|
||
isCanAskUser,
|
||
markResearchTerminalTransition,
|
||
markToolEnd,
|
||
markToolStart,
|
||
recordToolInvocationError,
|
||
startAutoDetached,
|
||
} from "../auto.js";
|
||
import {
|
||
applyCompletionNudgeTemperature,
|
||
maybeInjectCompletionNudgeMessage,
|
||
recordCompletionNudgeToolCall,
|
||
} from "../auto-completion-nudge.js";
|
||
import { recordToolCallName } from "../auto-tool-tracking.js";
|
||
import { loadToolApiKeys } from "../commands-config.js";
|
||
import { getEcosystemReadyPromise } from "../ecosystem/loader.js";
|
||
import { updateSnapshot } from "../ecosystem/sf-extension-api.js";
|
||
import {
|
||
buildExecutionPolicyJournalEntry,
|
||
classifyExecutionPolicyCall,
|
||
} from "../execution-policy.js";
|
||
import { formatContinue, loadFile, saveFile } from "../files.js";
|
||
import { getDiscussionMilestoneId } from "../guided-flow.js";
|
||
import { initHealthWidget } from "../health-widget.js";
|
||
import { emitJournalEvent } from "../journal.js";
|
||
import {
|
||
initializeLearningRuntime,
|
||
resetLearningRuntime,
|
||
selectLearnedModel,
|
||
} from "../learning/runtime.js";
|
||
import {
|
||
observeToolResult,
|
||
resetToolWatchdog,
|
||
} from "../tool-watchdog.js";
|
||
import { NOTICE_KIND, initNotificationStore } from "../notification-store.js";
|
||
import { initNotificationWidget } from "../notification-widget.js";
|
||
import {
|
||
isParallelActive,
|
||
shutdownParallel,
|
||
} from "../parallel-orchestrator.js";
|
||
import {
|
||
buildMilestoneFileName,
|
||
resolveMilestonePath,
|
||
resolveSliceFile,
|
||
resolveSlicePath,
|
||
} from "../paths.js";
|
||
import { cleanupQuickBranch } from "../quick.js";
|
||
import {
|
||
applyRemoteSteeringDirectives,
|
||
formatRemoteSteeringResults,
|
||
parseRemoteSteeringDirectives,
|
||
} from "../remote-steering.js";
|
||
import { classifyCommand } from "../safety/destructive-guard.js";
|
||
import {
|
||
recordToolCall as safetyRecordToolCall,
|
||
recordToolResult as safetyRecordToolResult,
|
||
saveEvidenceToDisk,
|
||
} from "../safety/evidence-collector.js";
|
||
import { initSessionRecorder } from "../session-recorder.js";
|
||
import { deriveState } from "../state.js";
|
||
import { countGoogleGeminiCliTokens } from "../token-counter.js";
|
||
import { getSessionTodoCompactionBlock } from "../tools/session-todo-tool.js";
|
||
import { parseUnitId } from "../unit-id.js";
|
||
import { logWarning as safetyLogWarning } from "../workflow-logger.js";
|
||
import {
|
||
BLOCKED_WRITE_ERROR,
|
||
isBashWriteToStateFile,
|
||
isBlockedStateFile,
|
||
} from "../write-intercept.js";
|
||
import { handleAgentEnd } from "./agent-end-recovery.js";
|
||
import { installNotifyInterceptor } from "./notify-interceptor.js";
|
||
import { buildBeforeAgentStartResult } from "./system-context.js";
|
||
import {
|
||
checkToolCallLoop,
|
||
resetToolCallLoopGuard,
|
||
} from "./tool-call-loop-guard.js";
|
||
import {
|
||
clearDiscussionFlowState,
|
||
clearPendingGate,
|
||
extractDepthVerificationMilestoneId,
|
||
getPendingGate,
|
||
getSelectedGateAnswer,
|
||
isDepthConfirmationAnswer,
|
||
isGateQuestionId,
|
||
isQueuePhaseActive,
|
||
markDepthVerified,
|
||
resetWriteGateState,
|
||
setPendingGate,
|
||
shouldBlockContextWrite,
|
||
shouldBlockPendingGate,
|
||
shouldBlockPendingGateBash,
|
||
shouldBlockQueueExecution,
|
||
} from "./write-gate.js";
|
||
|
||
// Skip the welcome screen on the very first session_start — cli.ts already
|
||
// printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
|
||
let isFirstSession = true;
|
||
let lastGeminiPreflightWarning;
|
||
const COMPACTION_AUTO_RESUME_FILE = "compaction-auto-resume.json";
|
||
|
||
function compactionResumePath(basePath) {
|
||
return join(basePath, ".sf", "runtime", COMPACTION_AUTO_RESUME_FILE);
|
||
}
|
||
|
||
function writeCompactionResumeMarker(basePath, marker) {
|
||
try {
|
||
const runtimeDir = join(basePath, ".sf", "runtime");
|
||
mkdirSync(runtimeDir, { recursive: true });
|
||
writeFileSync(
|
||
compactionResumePath(basePath),
|
||
JSON.stringify(marker, null, 2) + "\n",
|
||
"utf-8",
|
||
);
|
||
} catch {
|
||
// Non-fatal: compaction autoresume marker is best-effort.
|
||
}
|
||
}
|
||
|
||
function consumeCompactionResumeMarker(basePath) {
|
||
const markerPath = compactionResumePath(basePath);
|
||
if (!existsSync(markerPath)) return null;
|
||
try {
|
||
const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
|
||
unlinkSync(markerPath);
|
||
return marker;
|
||
} catch {
|
||
try {
|
||
unlinkSync(markerPath);
|
||
} catch {
|
||
// Ignore cleanup errors.
|
||
}
|
||
return null;
|
||
}
|
||
}
|
||
|
||
export async function runAgentEndMemoryBackfill(runBackfill) {
|
||
try {
|
||
const runner =
|
||
runBackfill ??
|
||
(await import("../memory-embeddings.js")).runEmbeddingBackfill;
|
||
await runner();
|
||
} catch {
|
||
// Never break agent_end on backfill issues.
|
||
}
|
||
}
|
||
|
||
async function runSessionStartupDoctorFix(ctx) {
|
||
if (process.env.SF_DISABLE_STARTUP_DOCTOR === "1") return;
|
||
try {
|
||
const { runSFDoctor, summarizeDoctorIssues } = await import("../doctor.js");
|
||
const report = await runSFDoctor(process.cwd(), { fix: true });
|
||
if (report.fixesApplied.length > 0) {
|
||
ctx.ui?.notify?.(
|
||
`Startup doctor: applied ${report.fixesApplied.length} fix(es).`,
|
||
"info",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
}
|
||
const summary = summarizeDoctorIssues(report.issues);
|
||
if (summary.errors > 0) {
|
||
ctx.ui?.notify?.(
|
||
`Startup doctor found ${summary.errors} blocking issue(s). Run /doctor audit for details.`,
|
||
"warning",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
}
|
||
} catch (error) {
|
||
const msg = error instanceof Error ? error.message : String(error);
|
||
safetyLogWarning("startup-doctor", msg);
|
||
}
|
||
}
|
||
|
||
async function syncServiceTierStatus(ctx) {
|
||
const {
|
||
getEffectiveServiceTier,
|
||
formatServiceTierFooterStatus,
|
||
isServiceTierDisabled,
|
||
} = await import("../service-tier.js");
|
||
// Skip the footer event entirely when the feature is explicitly disabled —
|
||
// no setStatus call, no RPC traffic, no leak into headless stderr even if
|
||
// the TUI_FOOTER_STATUS_KEYS filter is bypassed.
|
||
if (isServiceTierDisabled()) return;
|
||
ctx.ui.setStatus(
|
||
"sf-fast",
|
||
formatServiceTierFooterStatus(getEffectiveServiceTier(), ctx.model?.id),
|
||
);
|
||
}
|
||
/**
|
||
* Run all *.sh scripts found in .sf/hooks/<phase>/ with the given JSON payload
|
||
* piped to stdin. Returns a block result if any pre-tool hook exits non-zero,
|
||
* otherwise null (allow).
|
||
*
|
||
* Purpose: Copilot-style user-defined shell hooks that can approve or deny
|
||
* individual tool calls. Scripts receive `{"tool":"...","input":{...}}` on
|
||
* stdin and signal denial by exiting non-zero (stdout becomes the reason).
|
||
*
|
||
* Consumer: tool_call handler (pre-tool, blocking) and tool_result handler
|
||
* (post-tool, non-blocking). Phase names: "pre-tool", "post-tool",
|
||
* "session-start", "session-end".
|
||
*/
|
||
function runShellHooks(phase, payload, blocking = false) {
|
||
const hooksDir = join(process.cwd(), ".sf", "hooks", phase);
|
||
if (!existsSync(hooksDir)) return null;
|
||
let scripts;
|
||
try {
|
||
scripts = readdirSync(hooksDir)
|
||
.filter((f) => f.endsWith(".sh"))
|
||
.sort()
|
||
.map((f) => join(hooksDir, f));
|
||
} catch {
|
||
return null;
|
||
}
|
||
const stdinJson = JSON.stringify(payload);
|
||
for (const script of scripts) {
|
||
let result;
|
||
try {
|
||
result = spawnSync("sh", [script], {
|
||
input: stdinJson,
|
||
encoding: "utf-8",
|
||
timeout: 5_000,
|
||
stdio: ["pipe", "pipe", "pipe"],
|
||
});
|
||
} catch {
|
||
continue; // non-fatal: script invocation error
|
||
}
|
||
if (blocking && result.status !== 0) {
|
||
const reason =
|
||
(result.stdout || "").trim() ||
|
||
`Shell hook ${script} exited ${result.status}`;
|
||
return { block: true, reason };
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
export function registerHooks(pi, ecosystemHandlers = []) {
|
||
pi.on("session_start", async (_event, ctx) => {
|
||
lastGeminiPreflightWarning = undefined;
|
||
resetLearningRuntime();
|
||
resetToolWatchdog();
|
||
try {
|
||
const sid = ctx.sessionManager?.getSessionId?.() ?? "";
|
||
const _sfile = ctx.sessionManager?.getSessionFile?.() ?? "";
|
||
if (sid) {
|
||
process.stderr.write(`[forge] session ${sid.slice(0, 8)}\n`);
|
||
}
|
||
// Establish the session row so all subsequent turns have a parent.
|
||
// Git context (repo, branch) is patched in before_agent_start once the
|
||
// DB is open and the cwd is confirmed.
|
||
initSessionRecorder(sid, { mode: "interactive", cwd: process.cwd() });
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
initNotificationStore(process.cwd());
|
||
installNotifyInterceptor(ctx);
|
||
initNotificationWidget(ctx);
|
||
initHealthWidget(ctx);
|
||
resetWriteGateState();
|
||
resetToolCallLoopGuard();
|
||
resetAskUserQuestionsCache();
|
||
await syncServiceTierStatus(ctx);
|
||
await initializeLearningRuntime();
|
||
await runSessionStartupDoctorFix(ctx);
|
||
// Initialize metrics-central with database adapter
|
||
try {
|
||
const { initMetricsCentral } = await import("../metrics-central.js");
|
||
const { getDatabase } = await import("../sf-db.js");
|
||
const dbAdapter = getDatabase();
|
||
const sessionId = ctx.sessionManager?.getSessionId?.() || "";
|
||
initMetricsCentral(process.cwd(), {
|
||
sessionId,
|
||
dbAdapter,
|
||
});
|
||
} catch (err) {
|
||
// Non-fatal: metrics should not block session start
|
||
const { logWarning } = await import("../workflow-logger.js");
|
||
logWarning(
|
||
"session-start",
|
||
`Failed to initialize metrics-central: ${err instanceof Error ? err.message : String(err)}`,
|
||
);
|
||
}
|
||
// Apply show_token_cost preference (#1515)
|
||
try {
|
||
const { loadEffectiveSFPreferences } = await import("../preferences.js");
|
||
const prefs = loadEffectiveSFPreferences();
|
||
process.env.SF_SHOW_TOKEN_COST = prefs?.preferences.show_token_cost
|
||
? "1"
|
||
: "";
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
if (isFirstSession) {
|
||
isFirstSession = false;
|
||
} else {
|
||
try {
|
||
const sfBinPath = process.env.SF_BIN_PATH;
|
||
if (sfBinPath) {
|
||
const { dirname } = await import("node:path");
|
||
const { printWelcomeScreen } = await import(
|
||
join(dirname(sfBinPath), "welcome-screen.js")
|
||
);
|
||
let remoteChannel;
|
||
try {
|
||
const { resolveRemoteConfig } = await import(
|
||
"../../remote-questions/config.js"
|
||
);
|
||
const rc = resolveRemoteConfig();
|
||
if (rc) remoteChannel = rc.channel;
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
printWelcomeScreen({
|
||
version: process.env.SF_VERSION || "0.0.0",
|
||
remoteChannel,
|
||
});
|
||
}
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
}
|
||
loadToolApiKeys();
|
||
// Flow audit cleans over-budget optional child processes automatically and
|
||
// only warns for real blockers such as stale dispatch or recent errors.
|
||
try {
|
||
const { runFlowAudit } = await import("../doctor.js");
|
||
const flow = await runFlowAudit(process.cwd(), {
|
||
killOverBudgetChildren: true,
|
||
});
|
||
if (!flow.ok) {
|
||
ctx.ui?.notify?.(`Flow audit: ${flow.recommendedAction}`, "warning", {
|
||
noticeKind: NOTICE_KIND.SYSTEM_NOTICE,
|
||
dedupe_key: "flow-audit-recommended-action",
|
||
});
|
||
}
|
||
} catch {
|
||
/* non-fatal — flow audit must never block session start */
|
||
}
|
||
// Drain self-feedback: auto-resolve entries whose blocking
|
||
// sf-version constraint has been satisfied by the current sf bump,
|
||
// and surface entries that remain blocked to the operator. Done after
|
||
// other init so notifications appear in the same session-start sweep.
|
||
try {
|
||
const {
|
||
compactSelfFeedbackMarkdown,
|
||
markResolved,
|
||
migrateLegacyBacklogFilename,
|
||
resolveFeedbackForCompletedMilestones,
|
||
triageBlockedEntries,
|
||
} = await import("../self-feedback.js");
|
||
migrateLegacyBacklogFilename(process.cwd());
|
||
compactSelfFeedbackMarkdown(process.cwd());
|
||
// Auto-resolve blocking entries for milestones that already completed
|
||
const autoResolved = resolveFeedbackForCompletedMilestones(process.cwd());
|
||
for (const id of autoResolved) {
|
||
ctx.ui?.notify?.(
|
||
`Self-feedback ${id} auto-resolved — milestone is complete.`,
|
||
"info",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
}
|
||
const triage = triageBlockedEntries(process.cwd());
|
||
const currentSfVersion = process.env.SF_VERSION || "unknown";
|
||
for (const e of triage.retry) {
|
||
markResolved(
|
||
e.id,
|
||
{
|
||
reason: `sf bumped past ${e.sfVersion} (was blocking on this version)`,
|
||
evidence: {
|
||
kind: "auto-version-bump",
|
||
fromVersion: e.sfVersion,
|
||
toVersion: currentSfVersion,
|
||
},
|
||
},
|
||
process.cwd(),
|
||
);
|
||
const occ = e.occurredIn;
|
||
const unit = occ
|
||
? [occ.milestone, occ.slice, occ.task].filter(Boolean).join("/") ||
|
||
occ.unitType ||
|
||
"(unknown unit)"
|
||
: "(unknown unit)";
|
||
ctx.ui?.notify?.(
|
||
`Self-feedback ${e.id} (${e.kind}) auto-resolved — sf bumped past ${e.sfVersion}. Originating unit ${unit} should be re-run.`,
|
||
"info",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
}
|
||
if (triage.stillBlocked.length > 0) {
|
||
ctx.ui?.notify?.(
|
||
`${triage.stillBlocked.length} unresolved self-feedback entr${triage.stillBlocked.length === 1 ? "y" : "ies"} require sf fixes. See .sf/SELF-FEEDBACK.md or ~/.sf/agent/upstream-feedback.jsonl.`,
|
||
"warning",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
}
|
||
// Forge-only: high/critical entries are queued as hidden follow-up repair
|
||
// work on startup, even outside /autonomous. The drain helper owns claim TTL
|
||
// and delivery failure retry, so this is safe to call opportunistically.
|
||
const highBlocked = triage.stillBlocked.filter(
|
||
(e) => e.severity === "high" || e.severity === "critical",
|
||
);
|
||
if (highBlocked.length > 0) {
|
||
const ids = highBlocked.map((e) => `${e.id} (${e.kind})`).join(", ");
|
||
ctx.ui?.notify?.(
|
||
`${highBlocked.length} high/critical inline-fix candidate${highBlocked.length === 1 ? "" : "s"} pending in .sf/SELF-FEEDBACK.md: ${ids}`,
|
||
"warning",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
const { dispatchSelfFeedbackInlineFixIfNeeded } = await import(
|
||
"../self-feedback-drain.js"
|
||
);
|
||
dispatchSelfFeedbackInlineFixIfNeeded(process.cwd(), ctx, pi);
|
||
}
|
||
} catch {
|
||
/* non-fatal — self-feedback drain must never block session start */
|
||
}
|
||
// Run gap audit to detect orphaned prompts, handlers, native modules, commands
|
||
try {
|
||
const { runGapAudit } = await import("../gap-audit.js");
|
||
const filed = runGapAudit(process.cwd());
|
||
if (filed > 0) {
|
||
const { selfFeedbackDestinationLabel } = await import(
|
||
"../self-feedback.js"
|
||
);
|
||
ctx.ui?.notify?.(
|
||
`Gap audit filed ${filed} new finding${filed === 1 ? "" : "s"} in ${selfFeedbackDestinationLabel(process.cwd())}`,
|
||
"info",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
}
|
||
} catch {
|
||
/* non-fatal — gap audit must never block session start */
|
||
}
|
||
// Summarise the last UOK parity report so the operator can act on
|
||
// divergences/fallbacks before starting any new work.
|
||
try {
|
||
const { summarizeParityReport } = await import(
|
||
"../uok-parity-summary.js"
|
||
);
|
||
await summarizeParityReport(process.cwd(), ctx);
|
||
} catch {
|
||
/* non-fatal — parity summary must never block session start */
|
||
}
|
||
// Bridge upstream feedback into forge-local self-feedback
|
||
try {
|
||
const { bridgeUpstreamFeedback } = await import("../upstream-bridge.js");
|
||
const filed = bridgeUpstreamFeedback(process.cwd());
|
||
if (filed > 0) {
|
||
ctx.ui?.notify?.(
|
||
`Upstream bridge filed ${filed} rollup${filed === 1 ? "" : "s"} in .sf/SELF-FEEDBACK.md`,
|
||
"info",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
}
|
||
} catch {
|
||
/* non-fatal — upstream bridge must never block session start */
|
||
}
|
||
// Promote recurring feedback clusters to REQUIREMENTS.md
|
||
try {
|
||
const { promoteFeedbackToRequirements } = await import(
|
||
"../requirement-promoter.js"
|
||
);
|
||
const { promoted, requirementIds } = promoteFeedbackToRequirements(
|
||
process.cwd(),
|
||
);
|
||
if (promoted > 0) {
|
||
ctx.ui?.notify?.(
|
||
`Promoted ${promoted} cluster${promoted === 1 ? "" : "s"} to requirements: ${requirementIds.join(", ")}`,
|
||
"info",
|
||
{ noticeKind: NOTICE_KIND.SYSTEM_NOTICE },
|
||
);
|
||
}
|
||
} catch {
|
||
/* non-fatal — requirement promoter must never block session start */
|
||
}
|
||
// Refresh model catalogs for discoverable providers in the background.
|
||
try {
|
||
const { scheduleModelCatalogRefresh } = await import(
|
||
"../model-catalog-cache.js"
|
||
);
|
||
const { getKeyManagerAuthStorage } = await import("../key-manager.js");
|
||
scheduleModelCatalogRefresh(process.cwd(), getKeyManagerAuthStorage());
|
||
} catch {
|
||
/* non-fatal — model catalog refresh must never block session start */
|
||
}
|
||
// Compaction should never behave like a stop boundary. If autonomous mode
|
||
// was active when compaction happened, continue automatically on session start.
|
||
try {
|
||
if (!isAutoActive() && !isAutoPaused()) {
|
||
const marker = consumeCompactionResumeMarker(process.cwd());
|
||
if (
|
||
marker?.reason === "session_before_compact" &&
|
||
marker?.autoActive === true &&
|
||
marker?.basePath === process.cwd()
|
||
) {
|
||
ctx.ui?.notify?.(
|
||
"Resuming autonomous mode after compaction.",
|
||
"info",
|
||
{
|
||
noticeKind: NOTICE_KIND.SYSTEM_NOTICE,
|
||
dedupe_key: "autonomous-resume-after-compaction",
|
||
},
|
||
);
|
||
startAutoDetached(ctx, pi, process.cwd(), marker?.verbose === true, {
|
||
step: marker?.stepMode === true,
|
||
canAskUser: marker?.canAskUser !== false,
|
||
milestoneLock:
|
||
typeof marker?.milestoneLock === "string"
|
||
? marker.milestoneLock
|
||
: null,
|
||
});
|
||
}
|
||
}
|
||
} catch {
|
||
/* non-fatal — autoresume must never block session start */
|
||
}
|
||
});
|
||
pi.on("session_switch", async (_event, ctx) => {
|
||
lastGeminiPreflightWarning = undefined;
|
||
resetLearningRuntime();
|
||
resetToolWatchdog();
|
||
initNotificationStore(process.cwd());
|
||
installNotifyInterceptor(ctx);
|
||
resetWriteGateState();
|
||
resetToolCallLoopGuard();
|
||
resetAskUserQuestionsCache();
|
||
clearDiscussionFlowState();
|
||
await syncServiceTierStatus(ctx);
|
||
await initializeLearningRuntime();
|
||
loadToolApiKeys();
|
||
// Re-establish session recorder for the new session so turn recording
|
||
// continues under the correct session_id without contaminating the
|
||
// previous session's rows.
|
||
try {
|
||
const sid = ctx.sessionManager?.getSessionId?.() ?? "";
|
||
initSessionRecorder(sid, { mode: "interactive", cwd: process.cwd() });
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
});
|
||
pi.on("before_agent_start", async (event, ctx) => {
|
||
// Refresh the ecosystem snapshot BEFORE running ecosystem handlers so they
|
||
// see current phase/unit state (#3338).
|
||
try {
|
||
const { ensureDbOpen } = await import("./dynamic-tools.js");
|
||
await ensureDbOpen();
|
||
const basePath = process.cwd();
|
||
const state = await deriveState(basePath);
|
||
updateSnapshot(state);
|
||
// Patch git context on the first turn now that the DB is confirmed open.
|
||
// Best-effort: git may be absent (e.g. tmp dirs), so we swallow errors.
|
||
try {
|
||
const { execFileSync } = await import("node:child_process");
|
||
const branch = execFileSync(
|
||
"git",
|
||
["rev-parse", "--abbrev-ref", "HEAD"],
|
||
{
|
||
cwd: basePath,
|
||
encoding: "utf-8",
|
||
stdio: ["ignore", "pipe", "ignore"],
|
||
timeout: 3_000,
|
||
},
|
||
).trim();
|
||
const remoteUrl = execFileSync(
|
||
"git",
|
||
["config", "--get", "remote.origin.url"],
|
||
{
|
||
cwd: basePath,
|
||
encoding: "utf-8",
|
||
stdio: ["ignore", "pipe", "ignore"],
|
||
timeout: 3_000,
|
||
},
|
||
).trim();
|
||
patchSessionGitContext(remoteUrl || null, branch || null);
|
||
} catch {
|
||
/* non-fatal: git absent or not a repo */
|
||
}
|
||
} catch {
|
||
updateSnapshot(null);
|
||
}
|
||
// Record user message as the start of a new turn. Done after ensureDbOpen
|
||
// so the turns row lands in the DB immediately; agent_end will patch the
|
||
// assistant_response onto it once the model finishes.
|
||
try {
|
||
recordTurnStart(typeof event.prompt === "string" ? event.prompt : null);
|
||
} catch {
|
||
/* non-fatal: turn recording must never block the agent */
|
||
}
|
||
// Await ecosystem loading, then dispatch any registered handlers.
|
||
await getEcosystemReadyPromise();
|
||
for (const handler of ecosystemHandlers) {
|
||
try {
|
||
await handler(event, ctx);
|
||
} catch {
|
||
// Non-fatal: don't break the SF turn if a third-party handler throws.
|
||
}
|
||
}
|
||
return buildBeforeAgentStartResult(event, ctx);
|
||
});
|
||
pi.on("agent_end", async (event, ctx) => {
|
||
resetToolCallLoopGuard();
|
||
resetAskUserQuestionsCache();
|
||
await handleAgentEnd(pi, event, ctx);
|
||
// Complete the pending turn row with the assistant's text response.
|
||
// event.messages is an array; the last entry is the model's reply.
|
||
// Its .content is an array of content blocks — extract the first text block.
|
||
try {
|
||
const msgs = Array.isArray(event.messages) ? event.messages : [];
|
||
const lastMsg = msgs[msgs.length - 1];
|
||
const textBlock = Array.isArray(lastMsg?.content)
|
||
? lastMsg.content.find((b) => b.type === "text" && b.text)
|
||
: null;
|
||
recordTurnEnd(textBlock?.text ?? null);
|
||
} catch {
|
||
/* non-fatal: turn recording must never block agent teardown */
|
||
}
|
||
// Best-effort embedding backfill: when SF_LLM_GATEWAY_KEY is set and the
|
||
// gateway has an embed worker online, embed any memories that don't yet
|
||
// have a vector. Bounded per invocation; logs once-per-minute when the
|
||
// gateway is unavailable so we don't spam the journal.
|
||
await runAgentEndMemoryBackfill();
|
||
});
|
||
// Squash-merge quick-task branch back to the original branch after the
|
||
// agent turn completes (#2668). cleanupQuickBranch is a no-op when no
|
||
// quick-return state is pending, so this is safe to call on every turn.
|
||
pi.on("turn_end", async (_event, ctx) => {
|
||
try {
|
||
cleanupQuickBranch();
|
||
} catch {
|
||
// Best-effort: don't break the turn lifecycle if cleanup fails.
|
||
}
|
||
try {
|
||
const {
|
||
consumeCompletedInlineFixClaim,
|
||
dispatchSelfFeedbackInlineFixIfNeeded,
|
||
} = await import("../self-feedback-drain.js");
|
||
const resolvedIds = consumeCompletedInlineFixClaim(process.cwd());
|
||
if (resolvedIds.length > 0) {
|
||
const requestReload = ctx.requestReload;
|
||
requestReload?.(
|
||
`self-feedback inline fix resolved ${resolvedIds.length} entr${resolvedIds.length === 1 ? "y" : "ies"}`,
|
||
);
|
||
return;
|
||
}
|
||
dispatchSelfFeedbackInlineFixIfNeeded(process.cwd(), ctx, pi);
|
||
} catch {
|
||
// Best-effort: stale code should not break normal turn completion.
|
||
}
|
||
});
|
||
pi.on("session_before_compact", async () => {
|
||
// Never cancel compaction — instead provide a custom compaction summary
|
||
// that preserves work state. Cancelling compaction causes context overflow
|
||
// which degrades performance and can hit hard limits. The custom summary
|
||
// ensures the agent retains critical context after compaction.
|
||
const basePath = process.cwd();
|
||
const { ensureDbOpen } = await import("./dynamic-tools.js");
|
||
await ensureDbOpen();
|
||
const state = await deriveState(basePath);
|
||
|
||
// Build work state summary for compaction context preservation
|
||
const workState = [];
|
||
if (state.activeMilestone) {
|
||
workState.push(
|
||
`Active milestone: ${state.activeMilestone.id} (${state.activeMilestone.title})`,
|
||
);
|
||
}
|
||
if (state.activeSlice) {
|
||
workState.push(
|
||
`Active slice: ${state.activeSlice.id} (${state.activeSlice.title})`,
|
||
);
|
||
}
|
||
if (state.activeTask) {
|
||
workState.push(
|
||
`Active task: ${state.activeTask.id} (${state.activeTask.title})`,
|
||
);
|
||
}
|
||
if (state.phase) {
|
||
workState.push(`Current phase: ${state.phase}`);
|
||
}
|
||
|
||
// Include mode state in compaction summary
|
||
const { getAutoSession } = await import("../auto/session.js");
|
||
const session = getAutoSession();
|
||
const mode = session?.getMode?.();
|
||
if (mode) {
|
||
workState.push(
|
||
`Work mode: ${mode.workMode}, Run control: ${mode.runControl}, Permission: ${mode.permissionProfile}, Model: ${mode.modelMode}`,
|
||
);
|
||
}
|
||
|
||
// If autonomous mode is active, include current unit details
|
||
if (isAutoActive()) {
|
||
const dash = getAutoDashboardData();
|
||
if (dash.currentUnit) {
|
||
workState.push(
|
||
`Current unit: ${dash.currentUnit.type} ${dash.currentUnit.id}`,
|
||
);
|
||
}
|
||
}
|
||
|
||
// Write CONTINUE file for task recovery if in executing phase
|
||
if (
|
||
state.activeMilestone &&
|
||
state.activeSlice &&
|
||
state.activeTask &&
|
||
state.phase === "executing"
|
||
) {
|
||
const sliceDir = resolveSlicePath(
|
||
basePath,
|
||
state.activeMilestone.id,
|
||
state.activeSlice.id,
|
||
);
|
||
if (sliceDir) {
|
||
const existingFile = resolveSliceFile(
|
||
basePath,
|
||
state.activeMilestone.id,
|
||
state.activeSlice.id,
|
||
"CONTINUE",
|
||
);
|
||
const hasExisting = existingFile && (await loadFile(existingFile));
|
||
const legacyContinue = join(sliceDir, "continue.md");
|
||
const hasLegacy = await loadFile(legacyContinue);
|
||
|
||
if (!hasExisting && !hasLegacy) {
|
||
const continuePath = join(
|
||
sliceDir,
|
||
`${state.activeSlice.id}-CONTINUE.md`,
|
||
);
|
||
await saveFile(
|
||
continuePath,
|
||
formatContinue({
|
||
frontmatter: {
|
||
milestone: state.activeMilestone.id,
|
||
slice: state.activeSlice.id,
|
||
task: state.activeTask.id,
|
||
step: 0,
|
||
totalSteps: 0,
|
||
status: "compacted",
|
||
savedAt: new Date().toISOString(),
|
||
},
|
||
completedWork: `Task ${state.activeTask.id} (${state.activeTask.title}) was in progress when compaction occurred.`,
|
||
remainingWork: "Check the task plan for remaining steps.",
|
||
decisions: "Check task summary files for prior decisions.",
|
||
context: `Session was compacted. Work state: ${workState.join("; ")}`,
|
||
nextAction: `Resume task ${state.activeTask.id}: ${state.activeTask.title}.`,
|
||
}),
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Return custom compaction summary that preserves work state
|
||
// instead of cancelling compaction
|
||
const todoBlock = getSessionTodoCompactionBlock(basePath);
|
||
const baseSummary =
|
||
workState.length > 0
|
||
? `Work in progress: ${workState.join(". ")}.`
|
||
: "Session compacted. No active work state.";
|
||
const summary = todoBlock ? `${baseSummary}\n\n${todoBlock}` : baseSummary;
|
||
const result = {
|
||
compaction: {
|
||
summary,
|
||
firstKeptEntryId: undefined, // Let Pi decide
|
||
tokensBefore: undefined, // Let Pi measure
|
||
details: {
|
||
workState,
|
||
isAutoActive: isAutoActive(),
|
||
mode: mode || null,
|
||
},
|
||
},
|
||
};
|
||
if (isAutoActive()) {
|
||
writeCompactionResumeMarker(basePath, {
|
||
reason: "session_before_compact",
|
||
ts: new Date().toISOString(),
|
||
autoActive: true,
|
||
basePath,
|
||
stepMode: session?.stepMode === true,
|
||
canAskUser: isCanAskUser(),
|
||
milestoneLock:
|
||
typeof session?.sessionMilestoneLock === "string"
|
||
? session.sessionMilestoneLock
|
||
: null,
|
||
verbose: session?.verbose === true,
|
||
});
|
||
}
|
||
// Persist compaction summary as the session's most recent work description
|
||
// so memory-pipeline ingestion has a compact semantic handle for retrieval.
|
||
try {
|
||
updateSessionSummary(result.compaction.summary);
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
return result;
|
||
});
|
||
pi.on("session_shutdown", async (_event, ctx) => {
|
||
// Flush any in-flight turn (e.g. interrupted agent) and clear session state
|
||
// so the recorder doesn't carry stale IDs into a subsequent process reuse.
|
||
try {
|
||
resetSessionRecorder();
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
resetLearningRuntime();
|
||
// Stop metrics-central on session shutdown
|
||
try {
|
||
const { stopMetricsCentral } = await import("../metrics-central.js");
|
||
stopMetricsCentral();
|
||
} catch {
|
||
// Non-fatal: cleanup should not block shutdown
|
||
}
|
||
if (isParallelActive()) {
|
||
try {
|
||
await shutdownParallel(process.cwd());
|
||
} catch {
|
||
// best-effort
|
||
}
|
||
}
|
||
if (!isAutoActive() && !isAutoPaused()) return;
|
||
const dash = getAutoDashboardData();
|
||
if (dash.currentUnit) {
|
||
saveActivityLog(
|
||
ctx,
|
||
dash.basePath,
|
||
dash.currentUnit.type,
|
||
dash.currentUnit.id,
|
||
);
|
||
}
|
||
});
|
||
pi.on("tool_call", async (event) => {
|
||
const discussionBasePath = process.cwd();
|
||
// ── Shell pre-tool hooks (.sf/hooks/pre-tool/*.sh) ────────────────────
|
||
// User-authored scripts that can approve or deny a tool call.
|
||
// Exit 0 = approve; non-zero = block with stdout as the reason.
|
||
const hookBlock = runShellHooks(
|
||
"pre-tool",
|
||
{ tool: event.toolName, input: event.input ?? {} },
|
||
true,
|
||
);
|
||
if (hookBlock) return hookBlock;
|
||
// ── Loop guard: block repeated identical tool calls ──
|
||
const loopCheck = checkToolCallLoop(event.toolName, event.input);
|
||
if (loopCheck.block) {
|
||
return { block: true, reason: loopCheck.reason };
|
||
}
|
||
// ── Session file-touch recording ──────────────────────────────────────
|
||
// Best-effort: path may be absent for non-file tools; recordFileTouch
|
||
// no-ops on non-write tools and when no session is active.
|
||
try {
|
||
recordFileTouch(event.toolName, event.input?.path ?? null);
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
// ── Research unit terminal transition enforcement ─────────────────────
|
||
// After a research unit (research-slice/research-milestone) successfully
|
||
// saves its RESEARCH artifact via save_summary, the tool returns
|
||
// terminal_transition: true. We track this and block subsequent planning
|
||
// tool calls to prevent post-artifact drift (e.g. calling plan_milestone
|
||
// after research is complete). This addresses sf-moocx6m5-ij630a.
|
||
if (isAutoActive()) {
|
||
const dash = getAutoDashboardData();
|
||
const currentUnit = dash.currentUnit;
|
||
if (
|
||
currentUnit &&
|
||
(currentUnit.type === "research-slice" ||
|
||
currentUnit.type === "research-milestone")
|
||
) {
|
||
if (hasResearchTerminalTransition()) {
|
||
const planningTools = new Set([
|
||
"plan_milestone",
|
||
"plan_slice",
|
||
"plan_task",
|
||
"new_milestone_id",
|
||
"replan_slice",
|
||
"reassess_roadmap",
|
||
]);
|
||
if (planningTools.has(event.toolName)) {
|
||
return {
|
||
block: true,
|
||
reason:
|
||
`Research unit terminal transition: ${currentUnit.type} ${currentUnit.id} has already completed its RESEARCH artifact. ` +
|
||
`Post-artifact drift is blocked before runaway supervision treats it as legitimate large research. ` +
|
||
`Planning tools (${event.toolName}) are blocked. The orchestrator will dispatch planner units after research.`,
|
||
};
|
||
}
|
||
}
|
||
}
|
||
}
|
||
// ── Discussion gate enforcement: track pending gate questions ─────────
|
||
// Only gate-shaped ask_user_questions calls should block execution.
|
||
// The gate stays pending until the user selects the approval option.
|
||
// In autonomous mode (canAskUser=false): block the gate question and
|
||
// instruct the agent to investigate and resolve its own uncertainty
|
||
// rather than waiting for human input. The question exists because
|
||
// confidence is low — the agent must research until it can decide.
|
||
if (event.toolName === "ask_user_questions") {
|
||
const questions = event.input?.questions ?? [];
|
||
const questionId = questions.find(
|
||
(question) =>
|
||
typeof question?.id === "string" && isGateQuestionId(question.id),
|
||
)?.id;
|
||
if (typeof questionId === "string") {
|
||
if (isAutoActive() && !isCanAskUser()) {
|
||
// Do not set a pending gate — the agent must self-resolve.
|
||
// The gate was triggered because confidence < 0.98. The agent
|
||
// must investigate until confidence reaches 0.98, then proceed.
|
||
const questionSummary =
|
||
questions.find((q) => q.id === questionId)?.question ?? questionId;
|
||
return {
|
||
block: true,
|
||
reason:
|
||
`AUTONOMOUS MODE — gate question blocked. You called ask_user_questions ` +
|
||
`because your confidence was below 0.98. Target: reach ≥ 0.98 confidence ` +
|
||
`through investigation, then proceed without pausing.\n\n` +
|
||
`Gate question (self-resolve this): "${questionSummary}"\n\n` +
|
||
`Confidence-building protocol (execute in order, stop when confidence ≥ 0.98):\n` +
|
||
`1. Tier 1 — Read artifacts: task plans, slice plans, .sf/DECISIONS.md, ` +
|
||
`.sf/REQUIREMENTS.md, .sf/CONTEXT.md, docs/records/, existing test coverage.\n` +
|
||
`2. Tier 1 — Run verification: lint, type-check, relevant tests. Read the output.\n` +
|
||
`3. Tier 2 — External lookup: WebSearch / WebFetch / Context7 for any factual ` +
|
||
`unknowns (API behavior, version compatibility, library defaults).\n` +
|
||
`4. Self-assess: rate your confidence 0–1. If ≥ 0.98 → proceed. ` +
|
||
`If still < 0.98 → there is a real, concrete blocker.\n` +
|
||
`5. Concrete blocker only: call checkpoint with outcome="blocked" ` +
|
||
`and a precise blockerReason naming the specific unresolvable gap ` +
|
||
`(missing credentials, impossible contract, safety violation, legal scope).\n\n` +
|
||
`Do NOT call ask_user_questions again. Do NOT pause for user input. ` +
|
||
`Factual gaps are YOUR job to close via Tier 1 and Tier 2.`,
|
||
};
|
||
}
|
||
setPendingGate(questionId);
|
||
}
|
||
}
|
||
// ── Discussion gate enforcement: block tool calls while gate is pending ──
|
||
// If ask_user_questions was called with a gate ID but hasn't been confirmed,
|
||
// block all non-read-only tool calls to prevent the model from skipping gates.
|
||
// In autonomous mode: auto-clear any stale pending gate so it never blocks.
|
||
if (getPendingGate()) {
|
||
if (isAutoActive() && !isCanAskUser()) {
|
||
clearPendingGate();
|
||
} else {
|
||
const milestoneId = getDiscussionMilestoneId(discussionBasePath);
|
||
if (isToolCallEventType("bash", event)) {
|
||
const bashGuard = shouldBlockPendingGateBash(
|
||
event.input.command,
|
||
milestoneId,
|
||
isQueuePhaseActive(),
|
||
);
|
||
if (bashGuard.block) return bashGuard;
|
||
} else {
|
||
const gateGuard = shouldBlockPendingGate(
|
||
event.toolName,
|
||
milestoneId,
|
||
isQueuePhaseActive(),
|
||
);
|
||
if (gateGuard.block) return gateGuard;
|
||
}
|
||
}
|
||
}
|
||
// ── Queue-mode execution guard (#2545): block source-code mutations ──
|
||
// When /queue is active, the agent should only create milestones,
|
||
// not execute work. Block write/edit to non-.sf/ paths and bash commands
|
||
// that would modify files.
|
||
if (isQueuePhaseActive()) {
|
||
let queueInput = "";
|
||
if (isToolCallEventType("write", event)) {
|
||
queueInput = event.input.path;
|
||
} else if (isToolCallEventType("edit", event)) {
|
||
queueInput = event.input.path;
|
||
} else if (isToolCallEventType("bash", event)) {
|
||
queueInput = event.input.command;
|
||
}
|
||
const queueGuard = shouldBlockQueueExecution(
|
||
event.toolName,
|
||
queueInput,
|
||
true,
|
||
);
|
||
if (queueGuard.block) return queueGuard;
|
||
}
|
||
// ── Execution policy enforcement: block based on permission profile ──
|
||
// When autonomous mode is active, enforce the session's permission profile
|
||
// at the tool boundary. This is the enforcement layer that makes
|
||
// /permission-profile restricted|normal|trusted|unrestricted meaningful.
|
||
if (isAutoActive()) {
|
||
const { getAutoSession } = await import("../auto/session.js");
|
||
const session = getAutoSession();
|
||
const profile = session?.permissionProfile ?? "normal";
|
||
const input =
|
||
event.toolName === "bash"
|
||
? (event.input?.command ?? "")
|
||
: event.toolName === "write" || event.toolName === "edit"
|
||
? (event.input?.path ?? "")
|
||
: "";
|
||
const decision = classifyExecutionPolicyCall(
|
||
profile,
|
||
event.toolName,
|
||
input,
|
||
);
|
||
if (!decision.allowed) {
|
||
return {
|
||
block: true,
|
||
reason: `Execution policy block: ${decision.reason} (profile: ${profile}, tool: ${event.toolName})`,
|
||
};
|
||
}
|
||
}
|
||
// ── Single-writer engine: block direct writes to STATE.md ──────────
|
||
// Covers write, edit, and bash tools to prevent bypass vectors.
|
||
if (isToolCallEventType("write", event)) {
|
||
if (isBlockedStateFile(event.input.path)) {
|
||
return { block: true, reason: BLOCKED_WRITE_ERROR };
|
||
}
|
||
}
|
||
if (isToolCallEventType("edit", event)) {
|
||
if (isBlockedStateFile(event.input.path)) {
|
||
return { block: true, reason: BLOCKED_WRITE_ERROR };
|
||
}
|
||
}
|
||
if (isToolCallEventType("bash", event)) {
|
||
if (isBashWriteToStateFile(event.input.command)) {
|
||
return { block: true, reason: BLOCKED_WRITE_ERROR };
|
||
}
|
||
}
|
||
if (!isToolCallEventType("write", event)) return;
|
||
// ── Worktree isolation: block writes outside the worktree and main .sf/ ──
|
||
// Only enforced in autonomous mode — interactive sessions skip this check.
|
||
// When SF_WORKTREE is set, process.cwd() is the worktree directory.
|
||
// The agent should only write inside the worktree OR inside the main repo's .sf/.
|
||
if (isAutoActive() && process.env.SF_WORKTREE) {
|
||
const worktreeRoot = process.cwd();
|
||
const mainRepoRoot =
|
||
process.env.SF_PROJECT_ROOT ?? resolve(worktreeRoot, "..");
|
||
const targetPath = resolve(event.input.path);
|
||
const worktreeRel = relative(worktreeRoot, targetPath);
|
||
const mainSfRel = relative(join(mainRepoRoot, ".sf"), targetPath);
|
||
const worktreeOk =
|
||
!worktreeRel.startsWith("..") && !worktreeRel.startsWith("/");
|
||
const mainSfOk =
|
||
!mainSfRel.startsWith("..") && !mainSfRel.startsWith("/");
|
||
if (!worktreeOk && !mainSfOk) {
|
||
return {
|
||
block: true,
|
||
reason:
|
||
`HARD BLOCK: Worktree isolation is active. Cannot write to "${event.input.path}" — ` +
|
||
`path is outside the worktree (${worktreeRoot}) and outside the main repo's .sf/ directory. ` +
|
||
`Write only inside the worktree or inside ${join(mainRepoRoot, ".sf")}/milestones/ for planning artifacts.`,
|
||
};
|
||
}
|
||
}
|
||
const result = shouldBlockContextWrite(
|
||
event.toolName,
|
||
event.input.path,
|
||
getDiscussionMilestoneId(discussionBasePath),
|
||
isQueuePhaseActive(),
|
||
);
|
||
if (result.block) return result;
|
||
});
|
||
// ── Trajectory recording: capture tool calls ──
|
||
pi.on("tool_call", async (event) => {
|
||
try {
|
||
const { recordTrajectoryStep, STEP_TYPES } = await import(
|
||
"../trajectory-recorder.js"
|
||
);
|
||
const sessionId = process.env.SF_SESSION_ID || "default";
|
||
const dash = getAutoDashboardData();
|
||
const unitId = dash.currentUnit?.id;
|
||
recordTrajectoryStep({
|
||
sessionId,
|
||
unitId,
|
||
stepType: STEP_TYPES.TOOL_CALL,
|
||
toolName: event.toolName,
|
||
toolParams: event.input,
|
||
metadata: {
|
||
toolCallId: event.toolCallId,
|
||
isAutoActive: isAutoActive(),
|
||
},
|
||
});
|
||
} catch {
|
||
// Trajectory recording is best-effort
|
||
}
|
||
});
|
||
// ── Safety harness: evidence collection + destructive command warnings ──
|
||
pi.on("tool_call", async (event, ctx) => {
|
||
if (!isAutoActive()) return;
|
||
safetyRecordToolCall(event.toolCallId, event.toolName, event.input);
|
||
const policyDash = getAutoDashboardData();
|
||
// Use session permission profile if available, fall back to queue-aware default
|
||
const { getAutoSession } = await import("../auto/session.js");
|
||
const session = getAutoSession();
|
||
const sessionProfile = session?.permissionProfile;
|
||
const policyProfile =
|
||
sessionProfile ?? (isQueuePhaseActive() ? "restricted" : "normal");
|
||
if (policyDash.basePath) {
|
||
emitJournalEvent(
|
||
policyDash.basePath,
|
||
buildExecutionPolicyJournalEntry({
|
||
event,
|
||
profileId: policyProfile,
|
||
unit: policyDash.currentUnit,
|
||
flowId: `execution-policy:${event.toolCallId ?? event.toolName}`,
|
||
now: () => new Date().toISOString(),
|
||
}),
|
||
);
|
||
}
|
||
// Persist evidence immediately at dispatch so a mid-unit session restart
|
||
// (resetEvidence() + loadEvidenceFromDisk()) cannot wipe the entry between
|
||
// tool_call and tool_execution_end. Without this the "no bash calls" false
|
||
// positive fires when the LLM clearly ran a verification command (Bug #4385).
|
||
const callDash = policyDash;
|
||
if (callDash.basePath && callDash.currentUnit?.type === "execute-task") {
|
||
const {
|
||
milestone: cMid,
|
||
slice: cSid,
|
||
task: cTid,
|
||
} = parseUnitId(callDash.currentUnit.id);
|
||
if (cMid && cSid && cTid) {
|
||
saveEvidenceToDisk(callDash.basePath, cMid, cSid, cTid);
|
||
}
|
||
}
|
||
// Destructive command classification (warn only, never block)
|
||
if (isToolCallEventType("bash", event)) {
|
||
const classification = classifyCommand(event.input.command);
|
||
if (classification.destructive) {
|
||
safetyLogWarning(
|
||
"safety",
|
||
`destructive command: ${classification.labels.join(", ")}`,
|
||
{
|
||
command: String(event.input.command).slice(0, 200),
|
||
},
|
||
);
|
||
ctx.ui.notify(
|
||
`Destructive command detected: ${classification.labels.join(", ")}`,
|
||
"warning",
|
||
{
|
||
noticeKind: NOTICE_KIND.TOOL_NOTICE,
|
||
dedupe_key: `destructive-command:${classification.labels.join(",")}`,
|
||
},
|
||
);
|
||
}
|
||
}
|
||
});
|
||
pi.on("tool_result", async (event) => {
|
||
// ── Shell post-tool hooks (.sf/hooks/post-tool/*.sh) ─────────────────
|
||
// Fire-and-forget: scripts receive tool name + result text; exit code ignored.
|
||
runShellHooks("post-tool", {
|
||
tool: event.toolName,
|
||
input: event.input ?? {},
|
||
result:
|
||
typeof event.content === "string"
|
||
? event.content.slice(0, 2_000)
|
||
: null,
|
||
});
|
||
if (isAutoActive()) {
|
||
if (
|
||
event.toolName === "save_summary" &&
|
||
event.details &&
|
||
typeof event.details === "object" &&
|
||
event.details.terminal_transition === true &&
|
||
event.details.unit_type === "research"
|
||
) {
|
||
markResearchTerminalTransition();
|
||
}
|
||
const steer = observeToolResult(event);
|
||
if (steer) {
|
||
pi.sendMessage(
|
||
{
|
||
customType: "sf-tool-watchdog",
|
||
content: steer.content,
|
||
display: false,
|
||
details: {
|
||
key: steer.key,
|
||
severity: steer.severity,
|
||
toolName: event.toolName,
|
||
toolCallId: event.toolCallId,
|
||
},
|
||
},
|
||
{ deliverAs: "steer" },
|
||
);
|
||
}
|
||
}
|
||
if (event.toolName !== "ask_user_questions") return;
|
||
const milestoneId = getDiscussionMilestoneId(process.cwd());
|
||
const queueActive = isQueuePhaseActive();
|
||
const details = event.details;
|
||
// ── Discussion gate enforcement: handle gate question responses ──
|
||
// Single consolidated loop: finds depth_verification questions, verifies the answer,
|
||
// marks the milestone as depth-verified, and clears the pending gate.
|
||
// Also handles the legacy pending-gate path (set by tool_call) for robustness.
|
||
const questions = event.input?.questions ?? [];
|
||
const currentPendingGate = getPendingGate();
|
||
if (details?.cancelled || !details?.response) return;
|
||
if (details.remote === true) {
|
||
const steering = parseRemoteSteeringDirectives(details.response);
|
||
if (steering.steering) {
|
||
const results = applyRemoteSteeringDirectives(steering.directives);
|
||
pi.sendMessage(
|
||
{
|
||
customType: "sf-remote-steering",
|
||
content: formatRemoteSteeringResults(results),
|
||
display: false,
|
||
details: {
|
||
toolName: event.toolName,
|
||
toolCallId: event.toolCallId,
|
||
promptId: details.promptId,
|
||
channel: details.channel,
|
||
results,
|
||
},
|
||
},
|
||
{ deliverAs: "steer" },
|
||
);
|
||
}
|
||
}
|
||
for (const question of questions) {
|
||
if (typeof question.id !== "string") continue;
|
||
// Check if this is a depth_verification question (either directly or via pending gate)
|
||
const isDepthQ = question.id.includes("depth_verification");
|
||
const isPendingQ = question.id === currentPendingGate;
|
||
if (!isDepthQ && !isPendingQ) continue;
|
||
const answer = details.response?.answers?.[question.id];
|
||
if (
|
||
isDepthConfirmationAnswer(
|
||
getSelectedGateAnswer(answer),
|
||
question.options,
|
||
)
|
||
) {
|
||
// Always mark depth-verified AND clear the gate
|
||
if (isDepthQ) {
|
||
const inferredMilestoneId =
|
||
extractDepthVerificationMilestoneId(question.id) ?? milestoneId;
|
||
markDepthVerified(inferredMilestoneId);
|
||
}
|
||
clearPendingGate();
|
||
break;
|
||
}
|
||
}
|
||
if (!milestoneId && !queueActive) return;
|
||
if (!milestoneId) return;
|
||
const basePath = process.cwd();
|
||
const milestoneDir = resolveMilestonePath(basePath, milestoneId);
|
||
if (!milestoneDir) return;
|
||
const discussionPath = join(
|
||
milestoneDir,
|
||
buildMilestoneFileName(milestoneId, "DISCUSSION"),
|
||
);
|
||
const timestamp = new Date().toISOString();
|
||
const lines = [`## Exchange — ${timestamp}`, ""];
|
||
for (const question of questions) {
|
||
lines.push(
|
||
`### ${question.header ?? "Question"}`,
|
||
"",
|
||
question.question ?? "",
|
||
);
|
||
if (Array.isArray(question.options)) {
|
||
lines.push("");
|
||
for (const opt of question.options) {
|
||
lines.push(`- **${opt.label}** — ${opt.description ?? ""}`);
|
||
}
|
||
}
|
||
const answer = details.response?.answers?.[question.id];
|
||
if (answer) {
|
||
lines.push("");
|
||
const selectedValue = getSelectedGateAnswer(answer);
|
||
const selected = Array.isArray(selectedValue)
|
||
? selectedValue.join(", ")
|
||
: selectedValue;
|
||
lines.push(`**Selected:** ${selected}`);
|
||
if (answer.notes) {
|
||
lines.push(`**Notes:** ${answer.notes}`);
|
||
}
|
||
}
|
||
lines.push("");
|
||
}
|
||
lines.push("---", "");
|
||
const existing =
|
||
(await loadFile(discussionPath)) ?? `# ${milestoneId} Discussion Log\n\n`;
|
||
await saveFile(discussionPath, existing + lines.join("\n"));
|
||
});
|
||
pi.on("tool_execution_start", async (event) => {
|
||
if (!isAutoActive()) return;
|
||
markToolStart(event.toolCallId, event.toolName);
|
||
recordToolCallName(event.toolName);
|
||
recordCompletionNudgeToolCall(event.toolName);
|
||
});
|
||
pi.on("tool_execution_end", async (event) => {
|
||
markToolEnd(event.toolCallId);
|
||
// Record tool execution performance metrics
|
||
try {
|
||
const { recordToolExecution } = await import("../metrics-central.js");
|
||
recordToolExecution(
|
||
event.toolName,
|
||
event.durationMs,
|
||
event.isError,
|
||
event.isError
|
||
? typeof event.result === "string"
|
||
? event.result
|
||
: "tool_error"
|
||
: undefined,
|
||
);
|
||
} catch {
|
||
// Non-fatal: metrics should not break tool execution
|
||
}
|
||
// #2883/#4974: Capture deterministic invocation/policy errors so
|
||
// postUnitPreVerification can break the retry loop instead of re-dispatching.
|
||
// Covers sf_ tool JSON errors AND write-gate blocks on write/edit/bash tools.
|
||
if (event.isError) {
|
||
const errorText =
|
||
typeof event.result === "string"
|
||
? event.result
|
||
: typeof event.result?.content?.[0]?.text === "string"
|
||
? event.result.content[0].text
|
||
: String(event.result);
|
||
recordToolInvocationError(event.toolName, errorText);
|
||
}
|
||
// Safety harness: record tool execution results for evidence cross-referencing
|
||
if (isAutoActive()) {
|
||
safetyRecordToolResult(
|
||
event.toolCallId,
|
||
event.toolName,
|
||
event.result,
|
||
event.isError,
|
||
);
|
||
// Persist evidence to disk after each tool result so it survives a session
|
||
// restart mid-unit (Bug #4385 — non-persisted evidence false positives).
|
||
const endDash = getAutoDashboardData();
|
||
if (endDash.basePath && endDash.currentUnit?.type === "execute-task") {
|
||
const {
|
||
milestone: pMid,
|
||
slice: pSid,
|
||
task: pTid,
|
||
} = parseUnitId(endDash.currentUnit.id);
|
||
if (pMid && pSid && pTid) {
|
||
saveEvidenceToDisk(endDash.basePath, pMid, pSid, pTid);
|
||
}
|
||
}
|
||
}
|
||
// Trajectory recording: capture every tool execution step
|
||
try {
|
||
const { recordTrajectoryStep, STEP_TYPES } = await import(
|
||
"../trajectory-recorder.js"
|
||
);
|
||
const sessionId = process.env.SF_SESSION_ID || "default";
|
||
const dash = getAutoDashboardData();
|
||
const unitId = dash.currentUnit?.id;
|
||
|
||
// Record tool result
|
||
recordTrajectoryStep({
|
||
sessionId,
|
||
unitId,
|
||
stepType: event.isError ? STEP_TYPES.ERROR : STEP_TYPES.TOOL_RESULT,
|
||
toolName: event.toolName,
|
||
toolResult:
|
||
typeof event.result === "string"
|
||
? event.result
|
||
: JSON.stringify(event.result).slice(0, 2000),
|
||
isError: event.isError,
|
||
errorMessage: event.isError
|
||
? (typeof event.result === "string"
|
||
? event.result
|
||
: String(event.result)
|
||
).slice(0, 500)
|
||
: undefined,
|
||
metadata: {
|
||
duration_ms: event.durationMs,
|
||
toolCallId: event.toolCallId,
|
||
},
|
||
});
|
||
} catch {
|
||
// Trajectory recording is best-effort; don't fail tool execution
|
||
}
|
||
});
|
||
pi.on("model_select", async (_event, ctx) => {
|
||
await syncServiceTierStatus(ctx);
|
||
});
|
||
pi.on("context", async (event) => {
|
||
if (!isAutoActive()) return;
|
||
const messages = maybeInjectCompletionNudgeMessage(event.messages);
|
||
if (messages === event.messages) return;
|
||
return { messages };
|
||
});
|
||
pi.on("before_provider_request", async (event, ctx) => {
|
||
const modelId = event.model?.id;
|
||
const payload = event.payload;
|
||
if (!payload || typeof payload !== "object") return;
|
||
applyCompletionNudgeTemperature(payload);
|
||
// ── Observation Masking ─────────────────────────────────────────────
|
||
// Replace old tool results with placeholders to reduce context bloat.
|
||
// Only active during autonomous mode when context_management.observation_masking is enabled.
|
||
if (isAutoActive()) {
|
||
try {
|
||
const { loadEffectiveSFPreferences } = await import(
|
||
"../preferences.js"
|
||
);
|
||
const prefs = loadEffectiveSFPreferences();
|
||
const cmConfig = prefs?.preferences.context_management;
|
||
// Observation masking: replace old tool results with placeholders
|
||
if (cmConfig?.observation_masking !== false) {
|
||
const keepTurns = cmConfig?.observation_mask_turns ?? 8;
|
||
const { createObservationMask } = await import(
|
||
"../context-masker.js"
|
||
);
|
||
const mask = createObservationMask(keepTurns);
|
||
const messages = payload.messages;
|
||
if (Array.isArray(messages)) {
|
||
payload.messages = mask(messages);
|
||
}
|
||
}
|
||
// Tool result truncation: cap individual tool result content length.
|
||
// In pi-ai format, toolResult messages have role: "toolResult" and content: TextContent[].
|
||
// Creates new objects to avoid mutating shared conversation state.
|
||
const maxChars = cmConfig?.tool_result_max_chars ?? 800;
|
||
const msgs = payload.messages;
|
||
if (Array.isArray(msgs)) {
|
||
payload.messages = msgs.map((msg) => {
|
||
// Match toolResult messages (role: "toolResult", content is array of content blocks)
|
||
if (msg?.role === "toolResult" && Array.isArray(msg.content)) {
|
||
const blocks = msg.content;
|
||
const totalLen = blocks.reduce(
|
||
(sum, b) =>
|
||
sum + (typeof b.text === "string" ? b.text.length : 0),
|
||
0,
|
||
);
|
||
if (totalLen > maxChars) {
|
||
const truncated = blocks.map((b) => {
|
||
if (typeof b.text === "string" && b.text.length > maxChars) {
|
||
return {
|
||
...b,
|
||
text: b.text.slice(0, maxChars) + "\n…[truncated]",
|
||
};
|
||
}
|
||
return b;
|
||
});
|
||
return { ...msg, content: truncated };
|
||
}
|
||
}
|
||
return msg;
|
||
});
|
||
}
|
||
} catch {
|
||
/* non-fatal */
|
||
}
|
||
}
|
||
// ── Service Tier ────────────────────────────────────────────────────
|
||
if (!modelId) {
|
||
ctx.ui.setStatus("sf-gemini-tokens", undefined);
|
||
return payload;
|
||
}
|
||
const {
|
||
getEffectiveServiceTier,
|
||
supportsServiceTier,
|
||
isServiceTierDisabled,
|
||
} = await import("../service-tier.js");
|
||
// Short-circuit on explicit disable — never inject service_tier on any
|
||
// setup that has opted out, regardless of model.
|
||
if (!isServiceTierDisabled()) {
|
||
const tier = getEffectiveServiceTier();
|
||
if (tier && supportsServiceTier(modelId)) {
|
||
payload.service_tier = tier;
|
||
}
|
||
}
|
||
if (event.model?.provider !== "google-gemini-cli") {
|
||
ctx.ui.setStatus("sf-gemini-tokens", undefined);
|
||
return payload;
|
||
}
|
||
try {
|
||
const resolvedModel =
|
||
ctx.model &&
|
||
ctx.model.provider === event.model.provider &&
|
||
ctx.model.id === event.model.id
|
||
? ctx.model
|
||
: ctx.modelRegistry
|
||
.getAvailable()
|
||
.find(
|
||
(m) =>
|
||
m.provider === event.model?.provider &&
|
||
m.id === event.model?.id,
|
||
);
|
||
if (!resolvedModel) {
|
||
ctx.ui.setStatus("sf-gemini-tokens", undefined);
|
||
return payload;
|
||
}
|
||
const apiKey = await ctx.modelRegistry.getApiKey(resolvedModel);
|
||
const totalTokens = await countGoogleGeminiCliTokens(payload, apiKey);
|
||
if (typeof totalTokens !== "number") {
|
||
ctx.ui.setStatus("sf-gemini-tokens", undefined);
|
||
return payload;
|
||
}
|
||
const contextWindow = resolvedModel.contextWindow ?? 0;
|
||
const pct =
|
||
contextWindow > 0
|
||
? Math.round((totalTokens / contextWindow) * 100)
|
||
: undefined;
|
||
ctx.ui.setStatus(
|
||
"sf-gemini-tokens",
|
||
pct !== undefined
|
||
? `gemini ${formatTokenCount(totalTokens)} (${pct}%)`
|
||
: `gemini ${formatTokenCount(totalTokens)}`,
|
||
);
|
||
if (contextWindow > 0 && totalTokens >= Math.floor(contextWindow * 0.8)) {
|
||
const warningKey = `${resolvedModel.id}:${totalTokens}:${contextWindow}`;
|
||
if (lastGeminiPreflightWarning !== warningKey) {
|
||
lastGeminiPreflightWarning = warningKey;
|
||
ctx.ui.notify(
|
||
`Gemini preflight: ${formatTokenCount(totalTokens)} tokens (${pct}% of ${formatTokenCount(contextWindow)} context).`,
|
||
"warning",
|
||
{
|
||
noticeKind: NOTICE_KIND.SYSTEM_NOTICE,
|
||
dedupe_key: `gemini-preflight:${resolvedModel.id}`,
|
||
},
|
||
);
|
||
}
|
||
}
|
||
} catch {
|
||
ctx.ui.setStatus("sf-gemini-tokens", undefined);
|
||
}
|
||
return payload;
|
||
});
|
||
// Capability-aware model routing hook (ADR-004)
|
||
// Extensions can override model selection by returning { modelId: "..." }
|
||
// Return undefined to let the built-in capability scoring proceed.
|
||
// When reason="fallback", the current model just failed — record it as a failure
|
||
// so the learning system demotes it in future selections.
|
||
pi.on("before_model_select", async (event) => {
|
||
if (event.reason === "fallback" && event.currentModelId) {
|
||
recordLearnedOutcome({
|
||
model_id: event.currentModelId,
|
||
unit_type: event.unitType ?? "execute-task",
|
||
// Use the real unit_id when the autonomous dispatch set it; fall back to a
|
||
// sentinel so the NOT NULL constraint is always satisfied.
|
||
unit_id: event.unitId || `fallback:${event.currentModelId}`,
|
||
succeeded: false,
|
||
failure_mode: event.classification?.reason ?? null,
|
||
recorded_at: Date.now(),
|
||
});
|
||
}
|
||
return selectLearnedModel({
|
||
unitType: event.unitType,
|
||
eligibleModels: event.eligibleModels,
|
||
phaseConfig: event.phaseConfig,
|
||
});
|
||
});
|
||
// Tool set adaptation hook (ADR-005 Phase 4)
|
||
// Extensions can override tool set after model selection by returning { toolNames: [...] }
|
||
// Return undefined to let the built-in provider compatibility filtering proceed.
|
||
pi.on("adjust_tool_set", async (_event) => {
|
||
// Default: no override — let provider capability filtering handle tool set
|
||
return undefined;
|
||
});
|
||
}
|