diff --git a/src/resources/extensions/sf/abandon-detect.ts b/src/resources/extensions/sf/abandon-detect.ts new file mode 100644 index 000000000..2f5071148 --- /dev/null +++ b/src/resources/extensions/sf/abandon-detect.ts @@ -0,0 +1,62 @@ +/** + * Abandon-milestone detection for rewrite-docs overrides (#3490). + * + * Isolated from auto-post-unit.ts so behavioral tests can import this module + * without pulling in the full post-unit handler graph (which transitively + * loads model-router, workflow engine, etc.). + */ + +import type { Override } from "./files.js"; + +// Detect when a rewrite-docs override is about abandoning THE CURRENT +// MILESTONE — not just any override containing an abandon verb. Naively +// matching `/\b(abandon|cancel|drop|...)\b/` against override text produces +// false positives on scope-change prose ("cancel the standup reminder", +// "drop the dependency on X", "scrap the v1 design for the landing page"). +// +// To qualify as an abandon-milestone signal, an override must contain both: +// 1. An abandon-family verb (abandon|descope|cancel|shelve|drop|scrap) +// 2. A milestone reference — either the literal word "milestone" or the +// current milestone ID — in the same override text. + +// Verb variants cover both US and UK inflections: +// cancel / canceled / canceling / cancelled / cancelling / cancels +// travel-style "l"-doubling also applies to shelve/drop/scrap. +// "descope" also accepts "de-scope" and "de scope" (hyphen / space forms). +const ABANDON_VERB_RE = /\b(abandon(?:ed|ing|s)?|de[-\s]?scope(?:d|s|ing)?|cancel(?:led|ling|ed|ing|s)?|shelve(?:d|s)?|shelving|drop(?:ped|ping|s)?|scrap(?:ped|ping|s)?)\b/i; + +export interface AbandonDecision { + shouldPark: boolean; + reason: string; + matched: string[]; +} + +/** + * Decide whether a set of active overrides indicates the current milestone + * should be parked. Pure function — no I/O, no imports beyond types. + */ +export function detectAbandonMilestone( + overrides: Override[], + currentMilestoneId: string | null | undefined, +): AbandonDecision { + if (!currentMilestoneId) { + return { shouldPark: false, reason: "", matched: [] }; + } + + const escapedId = currentMilestoneId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const milestoneRefRe = new RegExp(`\\b(?:milestone|${escapedId})\\b`, "i"); + + const matched = overrides + .filter(o => ABANDON_VERB_RE.test(o.change) && milestoneRefRe.test(o.change)) + .map(o => o.change); + + if (matched.length === 0) { + return { shouldPark: false, reason: "", matched: [] }; + } + + return { + shouldPark: true, + reason: matched.join("; "), + matched, + }; +} diff --git a/src/resources/extensions/sf/auto-model-selection.ts b/src/resources/extensions/sf/auto-model-selection.ts index 0a49fbf84..281e4b7e5 100644 --- a/src/resources/extensions/sf/auto-model-selection.ts +++ b/src/resources/extensions/sf/auto-model-selection.ts @@ -84,6 +84,14 @@ export function clearToolBaseline(pi: ExtensionAPI | object): void { TOOL_BASELINE.delete(pi as unknown as object); } +function reapplyThinkingLevel( + pi: ExtensionAPI, + level: ReturnType | null | undefined, +): void { + if (!level) return; + pi.setThinkingLevel(level); +} + function restoreToolBaseline(pi: ExtensionAPI): void { const key = pi as unknown as object; const baseline = TOOL_BASELINE.get(key); @@ -154,6 +162,8 @@ export async function selectAndApplyModel( isAutoMode = true, /** Explicit /sf model pin captured at bootstrap for long-running auto loops. */ sessionModelOverride?: { provider: string; id: string } | null, + /** Thinking level captured at auto-mode start and re-applied after model swaps. */ + autoModeStartThinkingLevel?: ReturnType | null, ): Promise { // ── Restore active-tool baseline before policy evaluation (#4959, #4681, #4850) ── // Per-unit narrowing at the bottom of this function calls @@ -474,6 +484,7 @@ export async function selectAndApplyModel( const ok = await pi.setModel(model, { persist: persistModelChanges }); if (ok) { appliedModel = model; + reapplyThinkingLevel(pi, autoModeStartThinkingLevel); // ADR-005: Adjust active tool set for the selected model's provider capabilities. // Hard-filter incompatible tools, then let extensions override via adjust_tool_set hook. @@ -543,10 +554,14 @@ export async function selectAndApplyModel( const byId = availableModels.find(m => m.id === autoModeStartModel.id); if (byId) { const fallbackOk = await pi.setModel(byId, { persist: persistModelChanges }); - if (fallbackOk) appliedModel = byId; + if (fallbackOk) { + appliedModel = byId; + reapplyThinkingLevel(pi, autoModeStartThinkingLevel); + } } } else { appliedModel = startModel; + reapplyThinkingLevel(pi, autoModeStartThinkingLevel); } } } diff --git a/src/resources/extensions/sf/auto-post-unit.ts b/src/resources/extensions/sf/auto-post-unit.ts index 59be993cc..bd491b525 100644 --- a/src/resources/extensions/sf/auto-post-unit.ts +++ b/src/resources/extensions/sf/auto-post-unit.ts @@ -15,6 +15,7 @@ import type { ExtensionContext, ExtensionAPI } from "@singularity-forge/pi-codin import { deriveState } from "./state.js"; import { logWarning, logError } from "./workflow-logger.js"; import { loadFile, parseSummary, resolveAllOverrides } from "./files.js"; +import { detectAbandonMilestone } from "./abandon-detect.js"; import { loadPrompt } from "./prompt-loader.js"; import { resolveSliceFile, @@ -592,6 +593,35 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV // Rewrite-docs completion if (s.currentUnit.type === "rewrite-docs") { await runSafely("postUnit", "rewrite-docs-resolve", async () => { + // Detect abandon/descope overrides BEFORE resolving them (#3490). + // If an override is about abandoning the milestone, park it so the + // state engine skips it. Without this, rewrite-docs only edits + // markdown but the DB still has the milestone as active. + try { + const { loadActiveOverrides } = await import("./files.js"); + const overrides = await loadActiveOverrides(s.basePath); + const decision = detectAbandonMilestone(overrides, s.currentMilestoneId); + if (decision.shouldPark && s.currentMilestoneId) { + const { parkMilestone } = await import("./milestone-actions.js"); + const parked = parkMilestone(s.basePath, s.currentMilestoneId, decision.reason); + if (parked) { + ctx.ui.notify(`Milestone ${s.currentMilestoneId} parked: "${decision.reason}"`, "info"); + } else { + // Park refused: milestone directory missing, milestone already + // completed (SUMMARY present), or PARKED.md already exists. + // resolveAllOverrides below will still consume the override — + // surface this loudly so the user notices state drift rather + // than silently losing the abandon directive. + const msg = `Abandon detected for ${s.currentMilestoneId} but park refused (milestone is completed, already parked, or missing). Override will be resolved anyway — verify state is correct.`; + logError("engine", msg); + ctx.ui.notify(msg, "warning"); + } + } + } catch (err) { + logError("engine", `abandon-detect failed: ${(err as Error).message}`); + ctx.ui.notify(`Abandon detection failed — check logs. Overrides will still be resolved.`, "warning"); + } + await resolveAllOverrides(s.basePath); // Reset both disk and in-memory counters. Disk counter is authoritative // (survives restarts); in-memory is kept in sync for the current session. diff --git a/src/resources/extensions/sf/auto/loop-deps.ts b/src/resources/extensions/sf/auto/loop-deps.ts index 5138eafe1..4316cbe40 100644 --- a/src/resources/extensions/sf/auto/loop-deps.ts +++ b/src/resources/extensions/sf/auto/loop-deps.ts @@ -220,6 +220,7 @@ export interface LoopDeps { retryContext?: { isRetry: boolean; previousTier?: string }, isAutoMode?: boolean, sessionModelOverride?: { provider: string; id: string } | null, + autoModeStartThinkingLevel?: unknown, ) => Promise<{ routing: { tier: string; modelDowngraded: boolean } | null; appliedModel: { provider: string; id: string } | null; diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts index e186a4a2e..0fec104b5 100644 --- a/src/resources/extensions/sf/auto/phases.ts +++ b/src/resources/extensions/sf/auto/phases.ts @@ -1433,6 +1433,7 @@ export async function runUnitPhase( sidecarItem ? undefined : { isRetry, previousTier }, undefined, s.manualSessionModelOverride, + s.autoModeStartThinkingLevel, ); s.currentUnitRouting = modelResult.routing as AutoSession["currentUnitRouting"]; @@ -1447,6 +1448,9 @@ export async function runUnitPhase( if (match) { const ok = await pi.setModel(match, { persist: resolvePersistModelChanges() }); if (ok) { + if (s.autoModeStartThinkingLevel) { + pi.setThinkingLevel(s.autoModeStartThinkingLevel); + } s.currentUnitModel = match as AutoSession["currentUnitModel"]; ctx.ui.notify(`Hook model override: ${match.provider}/${match.id}`, "info"); } else { diff --git a/src/resources/extensions/sf/auto/run-unit.ts b/src/resources/extensions/sf/auto/run-unit.ts index 9e7a4b13c..8f99edad9 100644 --- a/src/resources/extensions/sf/auto/run-unit.ts +++ b/src/resources/extensions/sf/auto/run-unit.ts @@ -10,6 +10,10 @@ import type { AutoSession } from "./session.js"; import { NEW_SESSION_TIMEOUT_MS } from "./session.js"; import type { UnitResult } from "./types.js"; import { _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js"; +import { + getCurrentTurnGeneration, + runWithTurnGeneration, +} from "./turn-epoch.js"; import { debugLog } from "../debug-logger.js"; import { logWarning, logError } from "../workflow-logger.js"; import { resolveAutoSupervisorConfig, resolvePersistModelChanges } from "../preferences.js"; @@ -111,6 +115,12 @@ export async function runUnit( // ── Send the prompt ── debugLog("runUnit", { phase: "send-message", unitType, unitId }); + // Capture the turn generation BEFORE sendMessage so any stale-write + // checks reached from within this turn see the same generation we start + // with. bumpTurnGeneration() is called by timeout-recovery when this turn + // is superseded; isStaleWrite() in journal.ts uses it to drop late writes. + const capturedTurnGen = getCurrentTurnGeneration(); + pi.sendMessage( { customType: "sf-auto", content: prompt, display: s.verbose }, { triggerTurn: true }, @@ -131,7 +141,9 @@ export async function runUnit( resolve({ status: "cancelled", errorContext: { message: "Unit hard timeout — supervision may have failed", category: "timeout", isTransient: true } }); }, UNIT_HARD_TIMEOUT_MS); }); - const result = await Promise.race([unitPromise, timeoutResult]); + const result = await runWithTurnGeneration(capturedTurnGen, () => + Promise.race([unitPromise, timeoutResult]), + ); if (unitTimeoutHandle) clearTimeout(unitTimeoutHandle); debugLog("runUnit", { phase: "agent-end-received", diff --git a/src/resources/extensions/sf/auto/session.ts b/src/resources/extensions/sf/auto/session.ts index 44160e413..92af98b69 100644 --- a/src/resources/extensions/sf/auto/session.ts +++ b/src/resources/extensions/sf/auto/session.ts @@ -17,13 +17,15 @@ */ import type { Api, Model } from "@singularity-forge/pi-ai"; -import type { ExtensionCommandContext } from "@singularity-forge/pi-coding-agent"; +import type { ExtensionAPI, ExtensionCommandContext } from "@singularity-forge/pi-coding-agent"; import type { GitServiceImpl, TaskCommitContext } from "../git-service.js"; import type { CaptureEntry } from "../captures.js"; import type { BudgetAlertLevel } from "../auto-budget.js"; // ─── Exported Types ────────────────────────────────────────────────────────── +export type ThinkingLevelSnapshot = ReturnType; + export interface CurrentUnit { type: string; id: string; @@ -113,6 +115,8 @@ export class AutoSession { // ── Model state ────────────────────────────────────────────────────────── autoModeStartModel: StartModel | null = null; + autoModeStartThinkingLevel: ThinkingLevelSnapshot | null = null; + originalThinkingLevel: ThinkingLevelSnapshot | null = null; /** Explicit /sf model pin captured at bootstrap (session-scoped policy override). */ manualSessionModelOverride: StartModel | null = null; currentUnitModel: Model | null = null; @@ -261,6 +265,8 @@ export class AutoSession { // Model this.autoModeStartModel = null; + this.autoModeStartThinkingLevel = null; + this.originalThinkingLevel = null; this.manualSessionModelOverride = null; this.currentUnitModel = null; this.currentDispatchedModelId = null; diff --git a/src/resources/extensions/sf/auto/turn-epoch.ts b/src/resources/extensions/sf/auto/turn-epoch.ts new file mode 100644 index 000000000..1e84fb863 --- /dev/null +++ b/src/resources/extensions/sf/auto/turn-epoch.ts @@ -0,0 +1,108 @@ +/** + * auto/turn-epoch.ts — Turn generation counter + AsyncLocalStorage-backed + * capture for stale-turn write dropping. + * + * Problem: when auto-timeout-recovery synthetically resolves a timed-out + * unit so the loop can advance, the original LLM turn keeps running in the + * background. Its subsequent writes (journal events, audit events, tool + * calls that flow through closeout) then race the replacement unit's + * writes. DB-level guards (complete-task/complete-slice) block double + * state transitions, but journal/audit/closeout side-effects still fire + * with fresh identifiers and pollute forensics. + * + * Containment: every time we decide a turn is done (timeout recovery, + * explicit cancellation), bump a module-level generation counter. + * Turn-aware call sites wrap their body in `runWithTurnGeneration`, which + * captures the generation into AsyncLocalStorage. Write sites deep in the + * stack call `isStaleWrite` — if the captured generation is older than + * current, the turn has been superseded and the write is dropped. + * + * Failure mode: if AsyncLocalStorage context is lost across some exotic + * async boundary (e.g. a native-side worker callback), the write site sees + * `no-store` and falls through to current behavior — the write proceeds + * normally. That is a safe default; the correctness regression is only + * "noisier forensics under rare boundary loss," not duplicated state. + */ + +import { AsyncLocalStorage } from "node:async_hooks"; + +import { debugLog } from "../debug-logger.js"; + +let _currentGeneration = 0; + +const turnContext = new AsyncLocalStorage<{ capturedGen: number }>(); + +/** Current turn generation. Mutated only by bumpTurnGeneration. */ +export function getCurrentTurnGeneration(): number { + return _currentGeneration; +} + +/** + * Bump the turn generation and return the new value. Every caller should + * pass a short `reason` string so forensics can reconstruct why a given + * turn was marked stale. + */ +export function bumpTurnGeneration(reason: string): number { + _currentGeneration += 1; + debugLog("turnEpoch.bump", { reason, newGeneration: _currentGeneration }); + return _currentGeneration; +} + +/** + * Run fn() with `capturedGen` attached to AsyncLocalStorage so that any + * write site reached from within fn() can check for staleness without + * parameter threading. + */ +export function runWithTurnGeneration(capturedGen: number, fn: () => T): T { + return turnContext.run({ capturedGen }, fn); +} + +/** + * True when the current async context was started at a turn generation + * older than the current one — meaning the turn has been superseded by + * recovery/cancellation since it began. + * + * Returns false when there is no captured generation (e.g. the write is + * happening outside any wrapped turn). That is the safe default: writes + * proceed as they did before this epoch was introduced. + */ +export function isStaleWrite(component?: string): boolean { + const store = turnContext.getStore(); + if (!store) return false; + const captured = store.capturedGen; + const current = _currentGeneration; + if (captured < current) { + debugLog("turnEpoch.stale", { + component: component ?? "unknown", + captured, + current, + }); + return true; + } + return false; +} + +/** + * Snapshot of both the captured turn generation and the current one. + * Used by closeoutUnit to persist an orphan-marker entry instead of + * silently skipping the full closeout on a stale turn. + */ +export function describeTurnEpoch(): { + captured: number | null; + current: number; + stale: boolean; +} { + const store = turnContext.getStore(); + const captured = store?.capturedGen ?? null; + const current = _currentGeneration; + return { + captured, + current, + stale: captured !== null && captured < current, + }; +} + +/** Test helper — resets module state so tests start from a known baseline. */ +export function _resetTurnEpoch(): void { + _currentGeneration = 0; +} diff --git a/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts b/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts index 074fed763..852df963a 100644 --- a/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts +++ b/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts @@ -194,7 +194,10 @@ export async function handleAgentEnd( } // No usable fallback — pause - await pauseAutoForProviderError(pi, `Model unsupported for this account${errorDetail}`); + await pauseAutoForProviderError(ctx.ui, `Model unsupported for this account${errorDetail}`, () => pauseAuto(ctx, pi, { + message: `Model unsupported for this account${errorDetail}`, + category: "provider", + })); return; } diff --git a/src/resources/extensions/sf/bootstrap/exec-tools.ts b/src/resources/extensions/sf/bootstrap/exec-tools.ts new file mode 100644 index 000000000..a2073c0ee --- /dev/null +++ b/src/resources/extensions/sf/bootstrap/exec-tools.ts @@ -0,0 +1,110 @@ +// SF — Exec (context-mode) tool registration. +// +// Exposes the `sf_exec`, `sf_exec_search`, and `sf_resume` tools over MCP. +// Opt-in: sf_exec is disabled unless `context_mode.enabled: true` is set +// (or left unset — enabled by default). + +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent"; + +import { executeSfExec } from "../tools/exec-tool.js"; +import { executeExecSearch } from "../tools/exec-search-tool.js"; +import { executeResume } from "../tools/resume-tool.js"; +import { loadEffectiveSFPreferences } from "../preferences.js"; +import { logWarning } from "../workflow-logger.js"; + +export function registerExecTools(pi: ExtensionAPI): void { + pi.registerTool({ + name: "sf_exec", + label: "Exec (Sandboxed)", + description: + "Run a short script (bash/node/python) in a subprocess. Full stdout/stderr persist to " + + ".sf/exec/.{stdout,stderr,meta.json}; only a short digest returns in context. Use " + + "this instead of reading many files or emitting large tool outputs — e.g. have the script " + + "count/grep/summarize and log the finding. Enabled by default; opt out via " + + "preferences.context_mode.enabled=false.", + promptSnippet: + "Run a bash/node/python script in a sandbox; full output is saved to disk and only a digest returns", + promptGuidelines: [ + "Prefer sf_exec for analyses that would otherwise read >3 files or produce large tool output.", + "Write scripts that log the finding (counts, matches, summaries) rather than raw dumps.", + "The digest is the last ~300 chars of stdout — size your log output accordingly.", + "Need the full output? Read the stdout_path returned in details (file on local disk).", + ], + parameters: Type.Object({ + runtime: Type.Union( + [Type.Literal("bash"), Type.Literal("node"), Type.Literal("python")], + { description: "Interpreter: bash (-c), node (-e), or python3 (-c)." }, + ), + script: Type.String({ description: "Script body. Keep output small (log the finding, not the data)." }), + purpose: Type.Optional(Type.String({ description: "Short label recorded in meta.json for later review." })), + timeout_ms: Type.Optional( + Type.Number({ + description: "Per-invocation timeout (ms). Capped at 600000. Default from preferences.", + minimum: 1_000, + maximum: 600_000, + }), + ), + }), + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + let prefs: Awaited> | null = null; + try { + prefs = loadEffectiveSFPreferences(); + } catch (err) { + logWarning("tool", `sf_exec could not load preferences: ${err instanceof Error ? err.message : String(err)}`); + } + return executeSfExec(params as Parameters[0], { + baseDir: process.cwd(), + preferences: prefs?.preferences ?? null, + }); + }, + }); + + pi.registerTool({ + name: "sf_exec_search", + label: "Search sf_exec History", + description: + "List prior sf_exec runs (most recent first) from .sf/exec/*.meta.json. Useful for " + + "rediscovering the stdout_path of an earlier run without re-executing it. Read-only.", + promptSnippet: "Search prior sf_exec runs by substring, runtime, or failing-only filter", + promptGuidelines: [ + "Use this before re-running an expensive analysis — the prior run's stdout file may still answer.", + "The preview shows the trailing ~300 chars of stdout; read stdout_path for the full transcript.", + ], + parameters: Type.Object({ + query: Type.Optional(Type.String({ description: "Substring matched against id and purpose (case-insensitive)." })), + runtime: Type.Optional( + Type.Union([Type.Literal("bash"), Type.Literal("node"), Type.Literal("python")], { + description: "Restrict to one runtime.", + }), + ), + failing_only: Type.Optional(Type.Boolean({ description: "Only non-zero exit codes and timeouts." })), + limit: Type.Optional(Type.Number({ description: "Max results (default 20, cap 200)", minimum: 1, maximum: 200 })), + }), + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + return executeExecSearch(params as Parameters[0], { + baseDir: process.cwd(), + }); + }, + }); + + pi.registerTool({ + name: "sf_resume", + label: "Resume (Read Snapshot)", + description: + "Return the contents of .sf/last-snapshot.md — a ≤2 KB digest of top memories, recent " + + "sf_exec runs, and active context, written automatically on session_before_compact. Use " + + "this after compaction or session resume to re-orient quickly.", + promptSnippet: "Read the pre-compaction snapshot to re-orient after context loss", + promptGuidelines: [ + "Call this right after a session resumes if you feel you've lost durable context.", + "The snapshot is a summary — use memory_query or sf_exec_search for detail.", + ], + parameters: Type.Object({}), + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + return executeResume(params as Parameters[0], { + baseDir: process.cwd(), + }); + }, + }); +} diff --git a/src/resources/extensions/sf/bootstrap/memory-tools.ts b/src/resources/extensions/sf/bootstrap/memory-tools.ts new file mode 100644 index 000000000..7acdec821 --- /dev/null +++ b/src/resources/extensions/sf/bootstrap/memory-tools.ts @@ -0,0 +1,152 @@ +// SF — Memory tool registration +// +// Exposes the memory-layer tools (capture_thought, memory_query, sf_graph) +// to the LLM over MCP. All three degrade gracefully when the SF database +// is unavailable. + +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent"; + +import { ensureDbOpen } from "./dynamic-tools.js"; +import { + executeSfGraph, + executeMemoryCapture, + executeMemoryQuery, +} from "../tools/memory-tools.js"; + +export function registerMemoryTools(pi: ExtensionAPI): void { + // ─── capture_thought ──────────────────────────────────────────────────── + + pi.registerTool({ + name: "capture_thought", + label: "Capture Thought", + description: + "Record a durable piece of project knowledge (decision, convention, gotcha, pattern, " + + "preference, or environment detail) into the SF memory store. Use sparingly — one memory " + + "per genuinely reusable insight, not per task.", + promptSnippet: + "Capture a durable project insight into the SF memory store (categories: architecture, convention, gotcha, pattern, preference, environment)", + promptGuidelines: [ + "Use capture_thought for insights that will remain useful across future sessions.", + "Do NOT capture one-off bug fixes, temporary state, secrets, or task-specific details.", + "Keep content to 1–3 sentences.", + "Set confidence: 0.6 tentative, 0.8 solid, 0.95 well-confirmed (default 0.8).", + ], + parameters: Type.Object({ + category: Type.Union( + [ + Type.Literal("architecture"), + Type.Literal("convention"), + Type.Literal("gotcha"), + Type.Literal("preference"), + Type.Literal("environment"), + Type.Literal("pattern"), + ], + { description: "Memory category" }, + ), + content: Type.String({ description: "The memory text (1–3 sentences, no secrets)" }), + confidence: Type.Optional( + Type.Number({ description: "0.1–0.99, default 0.8", minimum: 0.1, maximum: 0.99 }), + ), + }), + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const ok = await ensureDbOpen(); + if (!ok) { + return { + content: [{ type: "text" as const, text: "Error: SF database is not available. Cannot capture memory." }], + details: { operation: "memory_capture", error: "db_unavailable" }, + isError: true, + }; + } + return executeMemoryCapture(params as Parameters[0]); + }, + }); + + // ─── memory_query ─────────────────────────────────────────────────────── + + pi.registerTool({ + name: "memory_query", + label: "Query Memory", + description: + "Search the SF memory store for relevant memories. Uses keyword matching ranked " + + "by confidence and reinforcement.", + promptSnippet: + "Search the SF memory store by keyword; returns ranked memories with id, category, and content", + promptGuidelines: [ + "Use memory_query when you need durable project context that may not be in the current prompt.", + "Provide a short keyword-style query — not a full question.", + "Use category to narrow results to gotchas, conventions, architecture notes, etc.", + ], + parameters: Type.Object({ + query: Type.String({ description: "Keyword query (2+ char terms)" }), + k: Type.Optional(Type.Number({ description: "Max results (default 10, max 50)", minimum: 1, maximum: 50 })), + category: Type.Optional( + Type.Union( + [ + Type.Literal("architecture"), + Type.Literal("convention"), + Type.Literal("gotcha"), + Type.Literal("preference"), + Type.Literal("environment"), + Type.Literal("pattern"), + ], + { description: "Restrict results to a single category" }, + ), + ), + reinforce_hits: Type.Optional( + Type.Boolean({ description: "Increment hit_count on returned memories (default false)" }), + ), + }), + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const ok = await ensureDbOpen(); + if (!ok) { + return { + content: [{ type: "text" as const, text: "Error: SF database is not available. Cannot query memory." }], + details: { operation: "memory_query", error: "db_unavailable" }, + isError: true, + }; + } + return executeMemoryQuery(params as Parameters[0]); + }, + }); + + // ─── sf_graph ────────────────────────────────────────────────────────── + + pi.registerTool({ + name: "sf_graph", + label: "SF Knowledge Graph", + description: + "Inspect the relationship graph between memories. mode=query walks supersedes edges from a " + + "given memoryId; mode=build is a placeholder for future graph edge rebuilds.", + promptSnippet: "Query the memory relationship graph or trigger a rebuild", + promptGuidelines: [ + "Use mode=query with a memoryId when you want to see how a memory relates to others.", + "Phase 1 only exposes supersedes edges; additional relation types arrive in later phases.", + ], + parameters: Type.Object({ + mode: Type.Union([Type.Literal("build"), Type.Literal("query")], { + description: "build = recompute graph (placeholder), query = inspect edges", + }), + memoryId: Type.Optional(Type.String({ description: "Memory ID (required when mode=query)" })), + depth: Type.Optional(Type.Number({ description: "Hops to traverse (0–5, default 1)", minimum: 0, maximum: 5 })), + rel: Type.Optional(Type.Union([ + Type.Literal("related_to"), + Type.Literal("depends_on"), + Type.Literal("contradicts"), + Type.Literal("elaborates"), + Type.Literal("supersedes"), + ], { description: "Only include edges with this relation type" })), + }), + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const ok = await ensureDbOpen(); + if (!ok) { + return { + content: [{ type: "text" as const, text: "Error: SF database is not available." }], + details: { operation: "sf_graph", error: "db_unavailable" }, + isError: true, + }; + } + return executeSfGraph(params as Parameters[0]); + }, + }); +} diff --git a/src/resources/extensions/sf/bootstrap/register-extension.ts b/src/resources/extensions/sf/bootstrap/register-extension.ts index 075cd96f8..79da1ae3e 100644 --- a/src/resources/extensions/sf/bootstrap/register-extension.ts +++ b/src/resources/extensions/sf/bootstrap/register-extension.ts @@ -8,7 +8,9 @@ import { registerExitCommand } from "../exit-command.js"; import { registerWorktreeCommand } from "../worktree-command.js"; import { registerDbTools } from "./db-tools.js"; import { registerDynamicTools } from "./dynamic-tools.js"; +import { registerExecTools } from "./exec-tools.js"; import { registerJournalTools } from "./journal-tools.js"; +import { registerMemoryTools } from "./memory-tools.js"; import { registerQueryTools } from "./query-tools.js"; import { registerHooks } from "./register-hooks.js"; import { registerShortcuts } from "./register-shortcuts.js"; @@ -81,6 +83,8 @@ export function registerSfExtension(pi: ExtensionAPI): void { const nonCriticalRegistrations: Array<[string, () => void]> = [ ["dynamic-tools", () => registerDynamicTools(pi)], ["db-tools", () => registerDbTools(pi)], + ["exec-tools", () => registerExecTools(pi)], + ["memory-tools", () => registerMemoryTools(pi)], ["journal-tools", () => registerJournalTools(pi)], ["query-tools", () => registerQueryTools(pi)], ["shortcuts", () => registerShortcuts(pi)], diff --git a/src/resources/extensions/sf/compaction-snapshot.ts b/src/resources/extensions/sf/compaction-snapshot.ts new file mode 100644 index 000000000..e9723267e --- /dev/null +++ b/src/resources/extensions/sf/compaction-snapshot.ts @@ -0,0 +1,157 @@ +// SF Compaction Snapshot — writes a ≤2 KB markdown digest of durable +// project state before the session context is compacted. On resume, an +// agent can `sf_resume` (or Read .sf/last-snapshot.md) to re-orient +// without re-deriving the same memories. + +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { resolve } from "node:path"; + +import { getActiveMemoriesRanked, type Memory } from "./memory-store.js"; +import { listExecHistory, type ExecHistoryEntry } from "./exec-history.js"; + +export const DEFAULT_SNAPSHOT_BYTES = 2048; +export const SNAPSHOT_FILENAME = "last-snapshot.md"; + +export interface SnapshotSources { + memories: Memory[]; + execHistory: ExecHistoryEntry[]; + generatedAt: Date; + /** Optional free-form context string (e.g. active unit id). */ + activeContext?: string | null; +} + +export interface BuildSnapshotOptions { + /** Hard cap in bytes (UTF-8). Default 2048. */ + maxBytes?: number; + /** Memory count cap before truncation (default 6). */ + maxMemories?: number; + /** Exec history cap (default 5). */ + maxExec?: number; +} + +/** + * Build a priority-tiered markdown snapshot. Pure — no I/O. Tiers: + * 1. Active context (if any) + * 2. Top memories by rank + * 3. Recent exec runs (failures highlighted) + */ +export function buildSnapshot(sources: SnapshotSources, opts: BuildSnapshotOptions = {}): string { + const maxBytes = opts.maxBytes ?? DEFAULT_SNAPSHOT_BYTES; + const maxMemories = opts.maxMemories ?? 6; + const maxExec = opts.maxExec ?? 5; + + const lines: string[] = []; + lines.push(`# SF context snapshot (${sources.generatedAt.toISOString()})`); + lines.push(""); + + if (sources.activeContext && sources.activeContext.trim().length > 0) { + lines.push("## Active context"); + lines.push(sources.activeContext.trim()); + lines.push(""); + } + + const memories = sources.memories.slice(0, maxMemories); + if (memories.length > 0) { + lines.push("## Top project memories"); + for (const memory of memories) { + lines.push(`- [${memory.id}] (${memory.category}) ${memory.content.trim()}`); + } + lines.push(""); + } + + const exec = sources.execHistory.slice(0, maxExec); + if (exec.length > 0) { + lines.push("## Recent sf_exec runs"); + for (const entry of exec) { + const status = entry.timed_out + ? "timeout" + : entry.exit_code === null + ? "exit:null" + : `exit:${entry.exit_code}`; + const purpose = entry.purpose ? ` — ${entry.purpose}` : ""; + lines.push(`- [${entry.id}] ${entry.runtime} ${status}${purpose}`); + } + lines.push(""); + } + + if (memories.length === 0 && exec.length === 0 && !sources.activeContext) { + lines.push("_No durable memories, active context, or exec history to surface._"); + } + + return enforceByteCap(lines.join("\n").trimEnd(), maxBytes); +} + +function enforceByteCap(input: string, maxBytes: number): string { + if (Buffer.byteLength(input, "utf-8") <= maxBytes) return input; + const marker = "\n…[truncated]"; + const markerBytes = Buffer.byteLength(marker, "utf-8"); + const budget = Math.max(0, maxBytes - markerBytes); + const buf = Buffer.from(input, "utf-8").subarray(0, budget); + return `${buf.toString("utf-8")}${marker}`; +} + +export interface WriteSnapshotOptions extends BuildSnapshotOptions { + activeContext?: string | null; + now?: () => Date; +} + +export interface WriteSnapshotResult { + path: string; + bytes: number; + memories: number; + execRuns: number; +} + +export function writeCompactionSnapshot( + baseDir: string, + opts: WriteSnapshotOptions = {}, +): WriteSnapshotResult { + const memories = safeGetMemories(); + const execHistory = safeListExec(baseDir); + const content = buildSnapshot( + { + memories, + execHistory, + generatedAt: (opts.now ?? (() => new Date()))(), + activeContext: opts.activeContext ?? null, + }, + opts, + ); + const sfDir = resolve(baseDir, ".sf"); + if (!existsSync(sfDir)) mkdirSync(sfDir, { recursive: true }); + const path = resolve(sfDir, SNAPSHOT_FILENAME); + const finalContent = `${content}\n`; + writeFileSync(path, finalContent, "utf-8"); + return { + path, + bytes: Buffer.byteLength(finalContent, "utf-8"), + memories: memories.length, + execRuns: execHistory.length, + }; +} + +export function readCompactionSnapshot(baseDir: string): string | null { + const path = resolve(baseDir, ".sf", SNAPSHOT_FILENAME); + if (!existsSync(path)) return null; + try { + return readFileSync(path, "utf-8"); + } catch { + return null; + } +} + +function safeGetMemories(): Memory[] { + try { + return getActiveMemoriesRanked(12); + } catch { + return []; + } +} + +function safeListExec(baseDir: string): ExecHistoryEntry[] { + try { + return listExecHistory(baseDir); + } catch { + return []; + } +} diff --git a/src/resources/extensions/sf/exec-history.ts b/src/resources/extensions/sf/exec-history.ts new file mode 100644 index 000000000..bfc5440d9 --- /dev/null +++ b/src/resources/extensions/sf/exec-history.ts @@ -0,0 +1,149 @@ +// SF Exec History — read-side helpers for the exec sandbox. +// +// Pure I/O: scans `.sf/exec/*.meta.json` under a base directory and +// returns lightweight records. Used by the sf_exec_search tool and +// any future compaction-snapshot enrichment. + +import { closeSync, openSync, readdirSync, readFileSync, readSync, statSync } from "node:fs"; +import { join, resolve } from "node:path"; + +export interface ExecHistoryEntry { + id: string; + runtime: "bash" | "node" | "python" | string; + purpose: string | null; + started_at: string; + finished_at: string; + duration_ms: number; + exit_code: number | null; + signal: string | null; + timed_out: boolean; + stdout_bytes: number; + stderr_bytes: number; + stdout_truncated: boolean; + stderr_truncated: boolean; + stdout_path: string; + stderr_path: string; + meta_path: string; +} + +export interface ExecSearchOptions { + /** Case-insensitive needle matched against purpose. Empty string matches all. */ + query?: string; + /** Restrict to this runtime. */ + runtime?: ExecHistoryEntry["runtime"]; + /** Include only entries with exit_code !== 0 || timed_out. */ + failing_only?: boolean; + /** Return at most N entries, most recent first. Default 20, cap 200. */ + limit?: number; +} + +export interface ExecSearchHit { + entry: ExecHistoryEntry; + /** Tail of stdout (first 300 chars) — cheap to read, useful for disambiguation. */ + digest_preview?: string; +} + +function listMetaFiles(baseDir: string): string[] { + const dir = resolve(baseDir, ".sf", "exec"); + try { + return readdirSync(dir) + .filter((name) => name.endsWith(".meta.json")) + .map((name) => join(dir, name)); + } catch { + return []; + } +} + +function safeReadMeta(path: string): ExecHistoryEntry | null { + try { + const raw = readFileSync(path, "utf-8"); + const parsed = JSON.parse(raw) as Partial; + if (typeof parsed.id !== "string" || typeof parsed.runtime !== "string") return null; + return { + id: parsed.id, + runtime: parsed.runtime, + purpose: typeof parsed.purpose === "string" ? parsed.purpose : null, + started_at: typeof parsed.started_at === "string" ? parsed.started_at : "", + finished_at: typeof parsed.finished_at === "string" ? parsed.finished_at : "", + duration_ms: typeof parsed.duration_ms === "number" ? parsed.duration_ms : 0, + exit_code: typeof parsed.exit_code === "number" ? parsed.exit_code : null, + signal: typeof parsed.signal === "string" ? parsed.signal : null, + timed_out: parsed.timed_out === true, + stdout_bytes: typeof parsed.stdout_bytes === "number" ? parsed.stdout_bytes : 0, + stderr_bytes: typeof parsed.stderr_bytes === "number" ? parsed.stderr_bytes : 0, + stdout_truncated: parsed.stdout_truncated === true, + stderr_truncated: parsed.stderr_truncated === true, + stdout_path: path.replace(/\.meta\.json$/, ".stdout"), + stderr_path: path.replace(/\.meta\.json$/, ".stderr"), + meta_path: path, + }; + } catch { + return null; + } +} + +export function listExecHistory(baseDir: string): ExecHistoryEntry[] { + const metas = listMetaFiles(baseDir) + .map((path) => { + let mtime = 0; + try { mtime = statSync(path).mtimeMs; } catch { /* ignore */ } + const entry = safeReadMeta(path); + return entry ? { entry, mtime } : null; + }) + .filter((value): value is { entry: ExecHistoryEntry; mtime: number } => value !== null); + metas.sort((a, b) => b.mtime - a.mtime); + return metas.map((m) => m.entry); +} + +function matchesFilters(entry: ExecHistoryEntry, opts: ExecSearchOptions): boolean { + if (opts.runtime && entry.runtime !== opts.runtime) return false; + if (opts.failing_only) { + const failed = entry.timed_out || (entry.exit_code !== 0 && entry.exit_code !== null); + if (!failed) return false; + } + const query = (opts.query ?? "").trim().toLowerCase(); + if (!query) return true; + const haystack = `${entry.id} ${entry.purpose ?? ""}`.toLowerCase(); + return haystack.includes(query); +} + +function readDigestPreview(entry: ExecHistoryEntry, maxChars: number): string | undefined { + if (!entry.stdout_path || maxChars <= 0) return undefined; + try { + const size = statSync(entry.stdout_path).size; + if (size === 0) return undefined; + const readBytes = Math.min(size, maxChars * 4); + const buf = Buffer.allocUnsafe(readBytes); + const fd = openSync(entry.stdout_path, "r"); + try { + const bytesRead = readSync(fd, buf, 0, readBytes, Math.max(0, size - readBytes)); + const text = buf.subarray(0, bytesRead).toString("utf-8"); + const trimmed = text.trimEnd(); + return trimmed.length <= maxChars ? trimmed : trimmed.slice(trimmed.length - maxChars); + } finally { + closeSync(fd); + } + } catch { + return undefined; + } +} + +export function searchExecHistory( + baseDir: string, + opts: ExecSearchOptions = {}, +): ExecSearchHit[] { + const limit = clampLimit(opts.limit, 20, 200); + const entries = listExecHistory(baseDir); + const filtered = entries.filter((entry) => matchesFilters(entry, opts)); + return filtered.slice(0, limit).map((entry) => ({ + entry, + digest_preview: readDigestPreview(entry, 300), + })); +} + +function clampLimit(value: unknown, fallback: number, max: number): number { + if (typeof value !== "number" || !Number.isFinite(value)) return fallback; + if (value < 1) return 1; + if (value > max) return max; + return Math.floor(value); +} diff --git a/src/resources/extensions/sf/exec-sandbox.ts b/src/resources/extensions/sf/exec-sandbox.ts new file mode 100644 index 000000000..f12842f01 --- /dev/null +++ b/src/resources/extensions/sf/exec-sandbox.ts @@ -0,0 +1,310 @@ +// SF Exec Sandbox — tool-output sandboxing for sub-sessions. +// +// Runs a script in a subprocess and persists stdout/stderr to +// `.sf/exec/.{stdout,stderr,meta.json}`. Only a short digest is +// returned to the calling agent's context, keeping large outputs +// (e.g. Playwright snapshots, issue dumps) out of the window. + +import { spawn } from "node:child_process"; +import { existsSync, mkdirSync, writeFileSync } from "node:fs"; +import { randomUUID } from "node:crypto"; +import { resolve } from "node:path"; + +export interface ExecSandboxRequest { + /** Interpreter to use. */ + runtime: "bash" | "node" | "python"; + /** Script body. Executed via the runtime's -c equivalent. */ + script: string; + /** Optional purpose/label recorded in meta.json. */ + purpose?: string; + /** Per-invocation timeout in ms. Clamped to `clamp_timeout_ms`. */ + timeout_ms?: number; +} + +export interface ExecSandboxOptions { + /** Project root. stdout/stderr persist under `/.sf/exec/`. */ + baseDir: string; + /** Absolute upper bound for the timeout. */ + clamp_timeout_ms: number; + /** Default timeout if request omits one. */ + default_timeout_ms: number; + /** Cap on persisted stdout bytes. Further output is truncated with a marker. */ + stdout_cap_bytes: number; + /** Cap on persisted stderr bytes. */ + stderr_cap_bytes: number; + /** Number of trailing stdout chars returned as the digest. */ + digest_chars: number; + /** Env var allowlist (case-sensitive). PATH/HOME always forwarded. */ + env_allowlist: readonly string[]; + /** Optional override of process.env for tests. */ + env?: NodeJS.ProcessEnv; + /** Optional override for the current time (tests). */ + now?: () => Date; + /** Optional override for id generation (tests). */ + generateId?: () => string; +} + +export interface ExecSandboxResult { + id: string; + runtime: ExecSandboxRequest["runtime"]; + exit_code: number | null; + signal: NodeJS.Signals | null; + timed_out: boolean; + duration_ms: number; + stdout_bytes: number; + stderr_bytes: number; + stdout_truncated: boolean; + stderr_truncated: boolean; + stdout_path: string; + stderr_path: string; + meta_path: string; + digest: string; +} + +const ALWAYS_FORWARD_ENV = ["PATH", "HOME"] as const; + +export const EXEC_DEFAULTS = { + clampTimeoutMs: 600_000, + defaultTimeoutMs: 30_000, + stdoutCapBytes: 1_048_576, + stderrCapBytes: 262_144, + digestChars: 300, + envAllowlist: [ + "LANG", + "LC_ALL", + "TERM", + "TZ", + "SHELL", + "USER", + "LOGNAME", + "TMPDIR", + "NODE_OPTIONS", + "PYTHONPATH", + "PYTHONIOENCODING", + ] as const, +} as const; + +function buildChildEnv(opts: ExecSandboxOptions): NodeJS.ProcessEnv { + const source = opts.env ?? process.env; + const out: NodeJS.ProcessEnv = {}; + const allowed = new Set([...ALWAYS_FORWARD_ENV, ...opts.env_allowlist]); + for (const key of allowed) { + const value = source[key]; + if (typeof value === "string") out[key] = value; + } + return out; +} + +function clampTimeout(request: ExecSandboxRequest, opts: ExecSandboxOptions): number { + const requested = typeof request.timeout_ms === "number" && Number.isFinite(request.timeout_ms) + ? Math.floor(request.timeout_ms) + : opts.default_timeout_ms; + if (requested < 1) return 1; + if (requested > opts.clamp_timeout_ms) return opts.clamp_timeout_ms; + return requested; +} + +function resolveCommand(runtime: ExecSandboxRequest["runtime"]): { cmd: string; args: string[] } { + switch (runtime) { + case "bash": + return { cmd: "bash", args: ["-c"] }; + case "node": + return { cmd: process.execPath, args: ["-e"] }; + case "python": + return { cmd: "python3", args: ["-c"] }; + } +} + +function tail(buf: Buffer, chars: number): string { + if (chars <= 0) return ""; + const text = buf.toString("utf-8"); + return text.length <= chars ? text : text.slice(text.length - chars); +} + +/** + * Run a script in a subprocess, capture stdout/stderr to files under + * `.sf/exec/.{stdout,stderr,meta.json}`, and return an `ExecSandboxResult`. + * + * Errors from spawn failures resolve (not reject) with `exit_code=null`. + */ +export function runExecSandbox( + request: ExecSandboxRequest, + opts: ExecSandboxOptions, +): Promise { + return new Promise((resolveP) => { + const id = (opts.generateId ?? defaultGenerateId)(); + const now = (opts.now ?? (() => new Date()))(); + const execDir = resolve(opts.baseDir, ".sf", "exec"); + if (!existsSync(execDir)) mkdirSync(execDir, { recursive: true }); + const stdoutPath = resolve(execDir, `${id}.stdout`); + const stderrPath = resolve(execDir, `${id}.stderr`); + const metaPath = resolve(execDir, `${id}.meta.json`); + + const timeoutMs = clampTimeout(request, opts); + const { cmd, args } = resolveCommand(request.runtime); + const env = buildChildEnv(opts); + const useProcessGroup = process.platform !== "win32"; + + const started = Date.now(); + let child; + try { + child = spawn(cmd, [...args, request.script], { + cwd: opts.baseDir, + env, + stdio: ["ignore", "pipe", "pipe"], + ...(useProcessGroup ? { detached: true } : {}), + }); + } catch (err) { + const duration = Date.now() - started; + const message = err instanceof Error ? err.message : String(err); + writeFileSync(stdoutPath, ""); + writeFileSync(stderrPath, `spawn error: ${message}\n`); + const result: ExecSandboxResult = { + id, + runtime: request.runtime, + exit_code: null, + signal: null, + timed_out: false, + duration_ms: duration, + stdout_bytes: 0, + stderr_bytes: Buffer.byteLength(`spawn error: ${message}\n`), + stdout_truncated: false, + stderr_truncated: false, + stdout_path: stdoutPath, + stderr_path: stderrPath, + meta_path: metaPath, + digest: `[spawn error: ${message}]`, + }; + writeMeta(metaPath, result, request, now); + resolveP(result); + return; + } + + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + let stdoutBytes = 0; + let stderrBytes = 0; + let stdoutTruncated = false; + let stderrTruncated = false; + + child.stdout?.on("data", (chunk: Buffer) => { + const remaining = opts.stdout_cap_bytes - stdoutBytes; + if (remaining <= 0) { stdoutTruncated = true; return; } + if (chunk.length <= remaining) { + stdoutChunks.push(chunk); + stdoutBytes += chunk.length; + } else { + stdoutChunks.push(chunk.subarray(0, remaining)); + stdoutBytes += remaining; + stdoutTruncated = true; + } + }); + child.stderr?.on("data", (chunk: Buffer) => { + const remaining = opts.stderr_cap_bytes - stderrBytes; + if (remaining <= 0) { stderrTruncated = true; return; } + if (chunk.length <= remaining) { + stderrChunks.push(chunk); + stderrBytes += chunk.length; + } else { + stderrChunks.push(chunk.subarray(0, remaining)); + stderrBytes += remaining; + stderrTruncated = true; + } + }); + + let timedOut = false; + const timer = setTimeout(() => { + timedOut = true; + if (useProcessGroup && child.pid != null) { + try { process.kill(-child.pid, "SIGKILL"); } catch { child.kill("SIGKILL"); } + } else { + child.kill("SIGKILL"); + } + }, timeoutMs); + timer.unref?.(); + + const finalize = (exitCode: number | null, signal: NodeJS.Signals | null) => { + clearTimeout(timer); + const duration = Date.now() - started; + const stdoutBuf = Buffer.concat(stdoutChunks); + const stderrBuf = Buffer.concat(stderrChunks); + const stdoutSuffix = stdoutTruncated ? "\n[truncated: stdout cap reached]\n" : ""; + const stderrSuffix = stderrTruncated ? "\n[truncated: stderr cap reached]\n" : ""; + writeFileSync(stdoutPath, Buffer.concat([stdoutBuf, Buffer.from(stdoutSuffix, "utf-8")])); + writeFileSync(stderrPath, Buffer.concat([stderrBuf, Buffer.from(stderrSuffix, "utf-8")])); + + const digestBody = tail(stdoutBuf, opts.digest_chars); + const digest = + digestBody.length > 0 + ? digestBody + : timedOut + ? "[no stdout — timed out]" + : stderrBuf.length > 0 + ? `[no stdout — tail of stderr]\n${tail(stderrBuf, opts.digest_chars)}` + : "[no output]"; + + const result: ExecSandboxResult = { + id, + runtime: request.runtime, + exit_code: exitCode, + signal, + timed_out: timedOut, + duration_ms: duration, + stdout_bytes: stdoutBytes, + stderr_bytes: stderrBytes, + stdout_truncated: stdoutTruncated, + stderr_truncated: stderrTruncated, + stdout_path: stdoutPath, + stderr_path: stderrPath, + meta_path: metaPath, + digest, + }; + writeMeta(metaPath, result, request, now); + resolveP(result); + }; + + child.on("error", (err) => { + const message = err instanceof Error ? err.message : String(err); + const line = `child error: ${message}\n`; + const remaining = opts.stderr_cap_bytes - stderrBytes; + if (remaining > 0) { + const chunk = Buffer.from(line, "utf-8").subarray(0, remaining); + stderrChunks.push(chunk); + stderrBytes += chunk.length; + if (chunk.length < Buffer.byteLength(line, "utf-8")) stderrTruncated = true; + } + }); + child.on("close", (code, signal) => finalize(code, signal)); + }); +} + +function defaultGenerateId(): string { + return randomUUID(); +} + +function writeMeta( + path: string, + result: ExecSandboxResult, + request: ExecSandboxRequest, + now: Date, +): void { + const meta = { + id: result.id, + runtime: result.runtime, + purpose: request.purpose ?? null, + script_chars: request.script.length, + started_at: now.toISOString(), + finished_at: new Date(now.getTime() + result.duration_ms).toISOString(), + exit_code: result.exit_code, + signal: result.signal, + timed_out: result.timed_out, + duration_ms: result.duration_ms, + stdout_bytes: result.stdout_bytes, + stderr_bytes: result.stderr_bytes, + stdout_truncated: result.stdout_truncated, + stderr_truncated: result.stderr_truncated, + stdout_path: result.stdout_path, + stderr_path: result.stderr_path, + }; + writeFileSync(path, `${JSON.stringify(meta, null, 2)}\n`); +} diff --git a/src/resources/extensions/sf/journal.ts b/src/resources/extensions/sf/journal.ts index 35b8f0c16..9e24e7b8c 100644 --- a/src/resources/extensions/sf/journal.ts +++ b/src/resources/extensions/sf/journal.ts @@ -15,6 +15,7 @@ import { appendFileSync, mkdirSync, readdirSync, readFileSync } from "node:fs"; import { join } from "node:path"; import { sfRoot } from "./paths.js"; +import { isStaleWrite } from "./auto/turn-epoch.js"; import { buildAuditEnvelope, emitUokAuditEvent } from "./uok/audit.js"; import { isAuditEnvelopeEnabled } from "./uok/audit-toggle.js"; @@ -93,6 +94,9 @@ export interface JournalQueryFilters { * Never throws — all errors are silently caught. */ export function emitJournalEvent(basePath: string, entry: JournalEntry): void { + // Drop writes from a turn superseded by timeout recovery / cancellation. + // See auto/turn-epoch.ts for the full rationale. + if (isStaleWrite("journal")) return; try { const journalDir = join(sfRoot(basePath), "journal"); mkdirSync(journalDir, { recursive: true }); diff --git a/src/resources/extensions/sf/memory-relations.ts b/src/resources/extensions/sf/memory-relations.ts new file mode 100644 index 000000000..964d65bac --- /dev/null +++ b/src/resources/extensions/sf/memory-relations.ts @@ -0,0 +1,235 @@ +// SF Memory Relations — knowledge-graph edges between memories +// +// Phase 4 companion to memory-store.ts. Edges live in the `memory_relations` +// table and are created by (a) explicit LINK actions emitted by the memory +// extractor, or (b) future `/sf memory link` CLI commands. All writes go +// through the single-writer gate in `sf-db.ts`. + +import { + _getAdapter, + isDbAvailable, +} from "./sf-db.js"; + +// ─── Types ────────────────────────────────────────────────────────────────── + +export type RelationType = + | "related_to" + | "depends_on" + | "contradicts" + | "elaborates" + | "supersedes"; + +export const VALID_RELATIONS: readonly RelationType[] = [ + "related_to", + "depends_on", + "contradicts", + "elaborates", + "supersedes", +]; + +export interface MemoryRelation { + from: string; + to: string; + rel: RelationType; + confidence: number; + createdAt: string; +} + +export interface MemoryGraphNode { + id: string; + category: string; + content: string; + confidence: number; +} + +export interface MemoryGraph { + nodes: MemoryGraphNode[]; + edges: MemoryRelation[]; +} + +// ─── Helpers ──────────────────────────────────────────────────────────────── + +export function isValidRelation(value: unknown): value is RelationType { + return typeof value === "string" && (VALID_RELATIONS as readonly string[]).includes(value); +} + +function clampConfidence(value: unknown): number { + if (typeof value !== "number" || !Number.isFinite(value)) return 0.8; + if (value < 0.1) return 0.1; + if (value > 0.99) return 0.99; + return value; +} + +// ─── Mutations ────────────────────────────────────────────────────────────── + +export function createMemoryRelation( + from: string, + to: string, + rel: RelationType, + confidence?: number, +): boolean { + if (!isDbAvailable()) return false; + if (!from || !to || from === to || !isValidRelation(rel)) return false; + + const adapter = _getAdapter(); + if (!adapter) return false; + + try { + const fromRow = adapter.prepare("SELECT 1 FROM memories WHERE id = :id").get({ ":id": from }); + const toRow = adapter.prepare("SELECT 1 FROM memories WHERE id = :id").get({ ":id": to }); + if (!fromRow || !toRow) return false; + + adapter.prepare( + "INSERT OR REPLACE INTO memory_relations (from_id, to_id, rel, confidence, created_at) VALUES (:from_id, :to_id, :rel, :confidence, :created_at)", + ).run({ + ":from_id": from, + ":to_id": to, + ":rel": rel, + ":confidence": clampConfidence(confidence), + ":created_at": new Date().toISOString(), + }); + return true; + } catch { + return false; + } +} + +export function removeMemoryRelationsFor(memoryId: string): void { + if (!isDbAvailable() || !memoryId) return; + const adapter = _getAdapter(); + if (!adapter) return; + try { + adapter.prepare("DELETE FROM memory_relations WHERE from_id = :id OR to_id = :id").run({ ":id": memoryId }); + } catch { + // non-fatal + } +} + +// ─── Queries ──────────────────────────────────────────────────────────────── + +export function listRelationsFor(memoryId: string): MemoryRelation[] { + if (!isDbAvailable()) return []; + const adapter = _getAdapter(); + if (!adapter) return []; + try { + const rows = adapter + .prepare( + "SELECT from_id, to_id, rel, confidence, created_at FROM memory_relations WHERE from_id = :id OR to_id = :id", + ) + .all({ ":id": memoryId }); + return rows.map(rowToRelation); + } catch { + return []; + } +} + +export function traverseGraph(startId: string, depth: number): MemoryGraph { + const emptyResult: MemoryGraph = { nodes: [], edges: [] }; + if (!isDbAvailable() || !startId) return emptyResult; + const adapter = _getAdapter(); + if (!adapter) return emptyResult; + + const hop = Math.max(0, Math.min(5, Math.floor(depth || 0))); + + try { + const visited = new Set(); + const queue: Array<{ id: string; hop: number }> = [{ id: startId, hop: 0 }]; + const nodes = new Map(); + const edges: MemoryRelation[] = []; + + while (queue.length > 0) { + const { id, hop: level } = queue.shift()!; + if (visited.has(id)) continue; + visited.add(id); + + const nodeRow = adapter + .prepare( + "SELECT id, category, content, confidence, superseded_by FROM memories WHERE id = :id", + ) + .get({ ":id": id }); + if (!nodeRow) continue; + + nodes.set(id, { + id: nodeRow["id"] as string, + category: nodeRow["category"] as string, + content: nodeRow["content"] as string, + confidence: nodeRow["confidence"] as number, + }); + + // Include supersedes edges from the base table so old graphs remain + // connected even before the extractor starts emitting LINK actions. + const successor = nodeRow["superseded_by"] as string | null; + if (successor && successor !== "CAP_EXCEEDED") { + edges.push({ from: id, to: successor, rel: "supersedes", confidence: 1, createdAt: "" }); + if (!visited.has(successor) && level < hop) { + queue.push({ id: successor, hop: level + 1 }); + } + } + const predecessors = adapter + .prepare("SELECT id FROM memories WHERE superseded_by = :id") + .all({ ":id": id }); + for (const pred of predecessors) { + const predId = pred["id"] as string; + edges.push({ from: predId, to: id, rel: "supersedes", confidence: 1, createdAt: "" }); + if (!visited.has(predId) && level < hop) { + queue.push({ id: predId, hop: level + 1 }); + } + } + + if (level >= hop) continue; + + const outgoing = adapter + .prepare( + "SELECT from_id, to_id, rel, confidence, created_at FROM memory_relations WHERE from_id = :id", + ) + .all({ ":id": id }); + for (const row of outgoing) { + const edge = rowToRelation(row); + edges.push(edge); + if (!visited.has(edge.to)) queue.push({ id: edge.to, hop: level + 1 }); + } + + const incoming = adapter + .prepare( + "SELECT from_id, to_id, rel, confidence, created_at FROM memory_relations WHERE to_id = :id", + ) + .all({ ":id": id }); + for (const row of incoming) { + const edge = rowToRelation(row); + edges.push(edge); + if (!visited.has(edge.from)) queue.push({ id: edge.from, hop: level + 1 }); + } + } + + return { + nodes: [...nodes.values()], + edges: dedupeEdges(edges), + }; + } catch { + return emptyResult; + } +} + +function rowToRelation(row: Record): MemoryRelation { + const relRaw = row["rel"] as string; + const rel = isValidRelation(relRaw) ? relRaw : ("related_to" as RelationType); + return { + from: row["from_id"] as string, + to: row["to_id"] as string, + rel, + confidence: (row["confidence"] as number) ?? 0.8, + createdAt: (row["created_at"] as string) ?? "", + }; +} + +function dedupeEdges(edges: MemoryRelation[]): MemoryRelation[] { + const seen = new Set(); + const out: MemoryRelation[] = []; + for (const e of edges) { + const key = `${e.from}|${e.to}|${e.rel}`; + if (seen.has(key)) continue; + seen.add(key); + out.push(e); + } + return out; +} diff --git a/src/resources/extensions/sf/onboarding-state.ts b/src/resources/extensions/sf/onboarding-state.ts new file mode 100644 index 000000000..8268f0283 --- /dev/null +++ b/src/resources/extensions/sf/onboarding-state.ts @@ -0,0 +1,139 @@ +// SF — Onboarding completion record (~/.sf/agent/onboarding.json) +// +// First-class state for the onboarding wizard so re-entry, resume, and the +// web boot probe all read the same source of truth. Replaces the implicit +// "settings.defaultProvider exists" heuristic. + +import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs" +import { homedir } from "node:os" +import { dirname, join } from "node:path" +import { logWarning } from "./workflow-logger.js" + +/** + * Bump `FLOW_VERSION` whenever a new required step is added to ONBOARDING_STEPS. + * Records with an older flowVersion are treated as "needs partial re-onboarding" + * by isOnboardingComplete(). + */ +export const FLOW_VERSION = 1 + +const RECORD_VERSION = 1 +// Inline agentDir computation — keep this module rootDir-clean for the +// resources tsconfig; importing from src/ pulls files outside src/resources +// and breaks the build. +const AGENT_DIR = + process.env.SF_CODING_AGENT_DIR || + join(process.env.SF_HOME || join(homedir(), ".sf"), "agent") +const FILE = join(AGENT_DIR, "onboarding.json") + +export interface OnboardingRecord { + version: number + flowVersion: number + completedAt: string | null + completedSteps: string[] + skippedSteps: string[] + lastResumePoint: string | null +} + +const DEFAULT: OnboardingRecord = { + version: RECORD_VERSION, + flowVersion: FLOW_VERSION, + completedAt: null, + completedSteps: [], + skippedSteps: [], + lastResumePoint: null, +} + +export function readOnboardingRecord(): OnboardingRecord { + if (!existsSync(FILE)) return { ...DEFAULT } + try { + const raw = JSON.parse(readFileSync(FILE, "utf-8")) as Partial + return { + version: typeof raw.version === "number" ? raw.version : RECORD_VERSION, + flowVersion: typeof raw.flowVersion === "number" ? raw.flowVersion : 0, + completedAt: typeof raw.completedAt === "string" ? raw.completedAt : null, + completedSteps: Array.isArray(raw.completedSteps) ? raw.completedSteps.filter(s => typeof s === "string") : [], + skippedSteps: Array.isArray(raw.skippedSteps) ? raw.skippedSteps.filter(s => typeof s === "string") : [], + lastResumePoint: typeof raw.lastResumePoint === "string" ? raw.lastResumePoint : null, + } + } catch { + return { ...DEFAULT } + } +} + +function atomicWrite(record: OnboardingRecord): void { + mkdirSync(dirname(FILE), { recursive: true }) + const tmp = `${FILE}.tmp.${process.pid}.${Date.now()}` + try { + writeFileSync(tmp, JSON.stringify(record, null, 2), "utf-8") + renameSync(tmp, FILE) + } catch (err) { + try { if (existsSync(tmp)) unlinkSync(tmp) } catch { /* swallow secondary error */ } + throw err + } +} + +export function writeOnboardingRecord(patch: Partial): OnboardingRecord { + const current = readOnboardingRecord() + const next: OnboardingRecord = { + ...current, + ...patch, + version: RECORD_VERSION, + flowVersion: typeof patch.flowVersion === "number" ? patch.flowVersion : current.flowVersion, + } + try { + atomicWrite(next) + } catch (err) { + logWarning("state", `Failed to persist onboarding record: ${err instanceof Error ? err.message : String(err)}`, { + file: FILE, + }) + } + return next +} + +/** + * Onboarding is "complete" when there's a completedAt timestamp AND the + * flowVersion matches the current FLOW_VERSION. + */ +export function isOnboardingComplete(): boolean { + const r = readOnboardingRecord() + return r.completedAt !== null && r.flowVersion === FLOW_VERSION +} + +export function markStepCompleted(stepId: string): void { + const r = readOnboardingRecord() + if (r.completedSteps.includes(stepId)) { + writeOnboardingRecord({ lastResumePoint: stepId }) + return + } + writeOnboardingRecord({ + completedSteps: [...r.completedSteps, stepId], + skippedSteps: r.skippedSteps.filter(s => s !== stepId), + lastResumePoint: stepId, + }) +} + +export function markStepSkipped(stepId: string): void { + const r = readOnboardingRecord() + if (r.skippedSteps.includes(stepId) || r.completedSteps.includes(stepId)) return + writeOnboardingRecord({ + skippedSteps: [...r.skippedSteps, stepId], + lastResumePoint: stepId, + }) +} + +export function markOnboardingComplete(completedSteps: string[]): void { + writeOnboardingRecord({ + completedAt: new Date().toISOString(), + flowVersion: FLOW_VERSION, + completedSteps, + }) +} + +export function resetOnboarding(): void { + writeOnboardingRecord({ + completedAt: null, + completedSteps: [], + skippedSteps: [], + lastResumePoint: null, + }) +} diff --git a/src/resources/extensions/sf/preferences-types.ts b/src/resources/extensions/sf/preferences-types.ts index 6c3e44220..59d43023e 100644 --- a/src/resources/extensions/sf/preferences-types.ts +++ b/src/resources/extensions/sf/preferences-types.ts @@ -28,6 +28,28 @@ export interface ContextManagementConfig { compaction_threshold_percent?: number; // default: 0.70, range: 0.5-0.95 tool_result_max_chars?: number; // default: 800, range: 200-10000 } + +export interface ContextModeConfig { + /** Master switch. Default: true (opt-out via `enabled: false`). */ + enabled?: boolean; + /** Per-invocation timeout in milliseconds. Default: 30_000. Range: 1_000–600_000. */ + exec_timeout_ms?: number; + /** Cap on persisted stdout bytes per invocation. Default: 1_048_576 (1 MiB). Range: 4_096–16_777_216. */ + exec_stdout_cap_bytes?: number; + /** Number of trailing stdout characters returned in the digest. Default: 300. Range: 0–4_000. */ + exec_digest_chars?: number; + /** Environment variables forwarded to sandboxed processes. PATH and HOME are always forwarded. */ + exec_env_allowlist?: string[]; +} + +/** + * Resolve whether context-mode features (sf_exec sandbox + compaction snapshot) + * should be active. Default is ON: missing config or missing `enabled` is true. + */ +export function isContextModeEnabled(prefs: { context_mode?: ContextModeConfig } | null | undefined): boolean { + return prefs?.context_mode?.enabled !== false; +} + import type { GitHubSyncConfig } from "../github-sync/types.js"; // ─── Workflow Modes ────────────────────────────────────────────────────────── @@ -331,6 +353,7 @@ export interface SFPreferences { /** Per-model capability overrides. Deep-merged with built-in profiles for capability-aware routing (ADR-004). */ modelOverrides?: Record }>; context_management?: ContextManagementConfig; + context_mode?: ContextModeConfig; token_profile?: TokenProfile; phases?: PhaseSkipPreferences; auto_visualize?: boolean; diff --git a/src/resources/extensions/sf/tools/exec-search-tool.ts b/src/resources/extensions/sf/tools/exec-search-tool.ts new file mode 100644 index 000000000..daf5be70b --- /dev/null +++ b/src/resources/extensions/sf/tools/exec-search-tool.ts @@ -0,0 +1,81 @@ +// SF Exec Search Tool — lists and filters prior sf_exec runs. +// +// Scans .sf/exec/*.meta.json and returns a ranked summary so agents can +// re-discover past runs without re-executing. Read-only; no DB writes. + +import { searchExecHistory, type ExecSearchOptions } from "../exec-history.js"; + +export interface ExecSearchToolParams { + query?: string; + runtime?: "bash" | "node" | "python"; + failing_only?: boolean; + limit?: number; +} + +export interface ToolExecutionResult { + content: Array<{ type: "text"; text: string }>; + details: Record; + isError?: boolean; +} + +export function executeExecSearch( + params: ExecSearchToolParams, + opts: { baseDir: string }, +): ToolExecutionResult { + const searchOpts: ExecSearchOptions = { + query: typeof params.query === "string" ? params.query : undefined, + runtime: params.runtime, + failing_only: params.failing_only === true, + limit: typeof params.limit === "number" ? params.limit : undefined, + }; + const hits = searchExecHistory(opts.baseDir, searchOpts); + + if (hits.length === 0) { + return { + content: [{ type: "text", text: "No prior sf_exec runs match those filters." }], + details: { operation: "sf_exec_search", matches: 0 }, + }; + } + + const lines: string[] = [`Found ${hits.length} exec run(s), most recent first:`]; + for (const hit of hits) { + const e = hit.entry; + const status = formatStatus(e); + const purpose = e.purpose ? ` — ${e.purpose}` : ""; + const truncated = e.stdout_truncated ? " (stdout truncated)" : ""; + lines.push( + `- [${e.id}] ${e.runtime} ${status} ${e.duration_ms}ms${truncated}${purpose}`, + ` stdout: ${e.stdout_path}`, + ); + if (hit.digest_preview) { + const preview = hit.digest_preview.replace(/\n/g, "\n "); + lines.push(` preview:\n ${preview}`); + } + } + + return { + content: [{ type: "text", text: lines.join("\n") }], + details: { + operation: "sf_exec_search", + matches: hits.length, + results: hits.map((hit) => ({ + id: hit.entry.id, + runtime: hit.entry.runtime, + exit_code: hit.entry.exit_code, + timed_out: hit.entry.timed_out, + duration_ms: hit.entry.duration_ms, + purpose: hit.entry.purpose, + stdout_path: hit.entry.stdout_path, + stderr_path: hit.entry.stderr_path, + meta_path: hit.entry.meta_path, + })), + }, + }; +} + +function formatStatus(entry: { exit_code: number | null; timed_out: boolean; signal: string | null }): string { + if (entry.timed_out) return "timeout"; + if (entry.signal) return `signal:${entry.signal}`; + if (entry.exit_code === null) return "exit:null"; + return `exit:${entry.exit_code}`; +} diff --git a/src/resources/extensions/sf/tools/exec-tool.ts b/src/resources/extensions/sf/tools/exec-tool.ts new file mode 100644 index 000000000..ca73ba7ec --- /dev/null +++ b/src/resources/extensions/sf/tools/exec-tool.ts @@ -0,0 +1,179 @@ +// SF Exec Tool — executor for the sf_exec MCP tool. +// +// Thin wrapper around exec-sandbox.ts that reads effective options from +// the project preferences (context_mode block) and formats the result +// for MCP return. + +import { + EXEC_DEFAULTS, + runExecSandbox, + type ExecSandboxOptions, + type ExecSandboxRequest, + type ExecSandboxResult, +} from "../exec-sandbox.js"; +import { isContextModeEnabled, type ContextModeConfig } from "../preferences-types.js"; + +export interface ExecToolParams { + runtime: ExecSandboxRequest["runtime"]; + script: string; + purpose?: string; + timeout_ms?: number; +} + +export interface ToolExecutionResult { + content: Array<{ type: "text"; text: string }>; + details: Record; + isError?: boolean; +} + +export interface ExecToolDeps { + baseDir: string; + preferences: { context_mode?: ContextModeConfig } | null; + /** Optional override for testing. */ + run?: (req: ExecSandboxRequest, opts: ExecSandboxOptions) => Promise; + now?: () => Date; + generateId?: () => string; +} + +export function buildExecOptions( + baseDir: string, + cfg: ContextModeConfig | undefined, + extras?: Pick, +): ExecSandboxOptions { + const allowlist = Array.isArray(cfg?.exec_env_allowlist) ? cfg!.exec_env_allowlist! : EXEC_DEFAULTS.envAllowlist; + const stdoutCap = clampNumber( + cfg?.exec_stdout_cap_bytes, + EXEC_DEFAULTS.stdoutCapBytes, + 4_096, + 16_777_216, + ); + const defaultTimeout = clampNumber( + cfg?.exec_timeout_ms, + EXEC_DEFAULTS.defaultTimeoutMs, + 1_000, + EXEC_DEFAULTS.clampTimeoutMs, + ); + const digestChars = clampNumber(cfg?.exec_digest_chars, EXEC_DEFAULTS.digestChars, 0, 4_000); + return { + baseDir, + clamp_timeout_ms: EXEC_DEFAULTS.clampTimeoutMs, + default_timeout_ms: defaultTimeout, + stdout_cap_bytes: stdoutCap, + stderr_cap_bytes: EXEC_DEFAULTS.stderrCapBytes, + digest_chars: digestChars, + env_allowlist: allowlist, + ...extras, + }; +} + +function clampNumber(value: unknown, fallback: number, min: number, max: number): number { + if (typeof value !== "number" || !Number.isFinite(value)) return fallback; + if (value < min) return min; + if (value > max) return max; + return Math.floor(value); +} + +function disabledResult(): ToolExecutionResult { + return { + content: [ + { + type: "text", + text: + "sf_exec is disabled by `context_mode.enabled: false` in preferences. Remove that " + + "override (or set it to true) to re-enable sandboxed tool-output execution.", + }, + ], + details: { operation: "sf_exec", error: "context_mode_disabled" }, + isError: true, + }; +} + +function paramError(message: string): ToolExecutionResult { + return { + content: [{ type: "text", text: `Error: ${message}` }], + details: { operation: "sf_exec", error: "invalid_params", detail: message }, + isError: true, + }; +} + +export async function executeSfExec( + params: ExecToolParams, + deps: ExecToolDeps, +): Promise { + if (!isContextModeEnabled(deps.preferences)) return disabledResult(); + + const runtime = params.runtime; + if (runtime !== "bash" && runtime !== "node" && runtime !== "python") { + return paramError(`invalid runtime "${String(runtime)}" — must be bash | node | python`); + } + const script = typeof params.script === "string" ? params.script : ""; + if (script.trim().length === 0) { + return paramError("script is required and must be a non-empty string"); + } + if (Buffer.byteLength(script, "utf8") > 200_000) { + return paramError("script exceeds the 200 KB length limit"); + } + + const opts = buildExecOptions( + deps.baseDir, + deps.preferences?.context_mode, + { now: deps.now, generateId: deps.generateId }, + ); + const run = deps.run ?? runExecSandbox; + + try { + const result = await run( + { + runtime, + script, + ...(typeof params.purpose === "string" ? { purpose: params.purpose } : {}), + ...(typeof params.timeout_ms === "number" ? { timeout_ms: params.timeout_ms } : {}), + }, + opts, + ); + return formatResult(result); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return { + content: [{ type: "text", text: `Error: sf_exec failed — ${message}` }], + details: { operation: "sf_exec", error: message }, + isError: true, + }; + } +} + +function formatResult(result: ExecSandboxResult): ToolExecutionResult { + const headerLines = [ + `sf_exec[${result.id}] runtime=${result.runtime} exit=${formatExit(result)} duration=${result.duration_ms}ms`, + ` stdout: ${result.stdout_bytes}B${result.stdout_truncated ? " (truncated)" : ""} → ${result.stdout_path}`, + ` stderr: ${result.stderr_bytes}B${result.stderr_truncated ? " (truncated)" : ""} → ${result.stderr_path}`, + ]; + const summary = `${headerLines.join("\n")}\n--- digest ---\n${result.digest}`.trimEnd(); + return { + content: [{ type: "text", text: summary }], + details: { + operation: "sf_exec", + id: result.id, + runtime: result.runtime, + exit_code: result.exit_code, + signal: result.signal, + timed_out: result.timed_out, + duration_ms: result.duration_ms, + stdout_bytes: result.stdout_bytes, + stderr_bytes: result.stderr_bytes, + stdout_truncated: result.stdout_truncated, + stderr_truncated: result.stderr_truncated, + stdout_path: result.stdout_path, + stderr_path: result.stderr_path, + meta_path: result.meta_path, + }, + isError: result.timed_out || result.signal !== null || result.exit_code !== 0, + }; +} + +function formatExit(result: ExecSandboxResult): string { + if (result.timed_out) return "timeout"; + if (result.signal) return `signal:${result.signal}`; + if (result.exit_code === null) return "null"; + return String(result.exit_code); +} diff --git a/src/resources/extensions/sf/tools/memory-tools.ts b/src/resources/extensions/sf/tools/memory-tools.ts new file mode 100644 index 000000000..41d67a7ac --- /dev/null +++ b/src/resources/extensions/sf/tools/memory-tools.ts @@ -0,0 +1,278 @@ +// SF Memory Tools — Phase 1 executors for capture_thought, memory_query, sf_graph +// +// These executors back the three memory-layer tools the LLM can call at any +// point in a session. They build on the existing `memory-store.ts` layer +// (SQLite memories table) and degrade gracefully when the DB is unavailable. + +import { _getAdapter, isDbAvailable } from "../sf-db.js"; +import { + createMemory, + getActiveMemoriesRanked, + reinforceMemory, +} from "../memory-store.js"; +import type { Memory } from "../memory-store.js"; +import { traverseGraph } from "../memory-relations.js"; + +// ─── Shared result shape ───────────────────────────────────────────────────── + +export interface ToolExecutionResult { + content: Array<{ type: "text"; text: string }>; + details: Record; + isError?: boolean; +} + +function dbUnavailable(operation: string): ToolExecutionResult { + return { + content: [ + { + type: "text", + text: "Error: SF database is not available. Memory tools require an initialized .sf/ project.", + }, + ], + details: { operation, error: "db_unavailable" }, + isError: true, + }; +} + +// ─── capture_thought ──────────────────────────────────────────────────────── + +export interface MemoryCaptureParams { + category: string; + content: string; + confidence?: number; +} + +const VALID_CATEGORIES = new Set([ + "architecture", + "convention", + "gotcha", + "preference", + "environment", + "pattern", +]); + +export function executeMemoryCapture(params: MemoryCaptureParams): ToolExecutionResult { + if (!isDbAvailable()) return dbUnavailable("memory_capture"); + + const category = (params.category ?? "").trim().toLowerCase(); + const content = (params.content ?? "").trim(); + if (!category || !content) { + return { + content: [{ type: "text", text: "Error: category and content are required." }], + details: { operation: "memory_capture", error: "missing_fields" }, + isError: true, + }; + } + if (!VALID_CATEGORIES.has(category)) { + return { + content: [ + { + type: "text", + text: `Error: invalid category "${category}". Must be one of: ${[...VALID_CATEGORIES].join(", ")}.`, + }, + ], + details: { operation: "memory_capture", error: "invalid_category" }, + isError: true, + }; + } + const confidence = clampConfidence(params.confidence); + const id = createMemory({ category, content, confidence }); + if (!id) { + return { + content: [{ type: "text", text: "Error: failed to create memory." }], + details: { operation: "memory_capture", error: "create_failed" }, + isError: true, + }; + } + + return { + content: [{ type: "text", text: `Captured ${id} (${category}): ${content}` }], + details: { operation: "memory_capture", id, category, confidence }, + }; +} + +function clampConfidence(value: unknown): number { + if (typeof value !== "number" || !Number.isFinite(value)) return 0.8; + if (value < 0.1) return 0.1; + if (value > 0.99) return 0.99; + return value; +} + +// ─── memory_query ─────────────────────────────────────────────────────────── + +export interface MemoryQueryParams { + query: string; + k?: number; + category?: string; + reinforce_hits?: boolean; +} + +export interface MemoryQueryHit { + id: string; + category: string; + content: string; + confidence: number; + hit_count: number; + score: number; +} + +export function executeMemoryQuery(params: MemoryQueryParams): ToolExecutionResult { + if (!isDbAvailable()) return dbUnavailable("memory_query"); + + const query = (params.query ?? "").trim().toLowerCase(); + const k = clampTopK(params.k, 10); + const category = params.category?.trim().toLowerCase() || undefined; + + try { + const candidates = getActiveMemoriesRanked(200); + const filtered = candidates.filter((m) => { + if (category && m.category.toLowerCase() !== category) return false; + if (query && !m.content.toLowerCase().includes(query) && !m.category.toLowerCase().includes(query)) return false; + return true; + }); + + const ranked = filtered + .slice(0, k) + .map((memory) => ({ + memory, + score: memory.confidence * (1 + memory.hit_count * 0.1), + })); + + const hits: MemoryQueryHit[] = ranked.map((r) => ({ + id: r.memory.id, + category: r.memory.category, + content: r.memory.content, + confidence: r.memory.confidence, + hit_count: r.memory.hit_count, + score: r.score, + })); + + if (params.reinforce_hits) { + for (const h of hits) reinforceMemory(h.id); + } + + const summary = hits.length === 0 + ? "No matching memories." + : hits.map((h) => `- [${h.id}] (${h.category}) ${h.content}`).join("\n"); + + return { + content: [{ type: "text", text: summary }], + details: { + operation: "memory_query", + query, + k, + returned: hits.length, + hits, + }, + }; + } catch (err) { + return { + content: [{ type: "text", text: `Error: memory query failed: ${(err as Error).message}` }], + details: { operation: "memory_query", error: (err as Error).message }, + isError: true, + }; + } +} + +function clampTopK(value: unknown, fallback: number): number { + if (typeof value !== "number" || !Number.isFinite(value)) return fallback; + if (value < 1) return 1; + if (value > 50) return 50; + return Math.floor(value); +} + +// ─── sf_graph ────────────────────────────────────────────────────────────── + +export interface SfGraphParams { + mode: "build" | "query"; + memoryId?: string; + depth?: number; + rel?: string; +} + +export function executeSfGraph(params: SfGraphParams): ToolExecutionResult { + if (!isDbAvailable()) return dbUnavailable("sf_graph"); + + if (params.mode === "build") { + return { + content: [ + { + type: "text", + text: + "sf_graph build acknowledged. Graph edges are populated incrementally by memory " + + "extraction (including LINK actions). Use `/sf memory extract ` to trigger " + + "extraction against a specific ingested source.", + }, + ], + details: { operation: "sf_graph", mode: "build", built: 0 }, + }; + } + + if (params.mode !== "query") { + return { + content: [{ type: "text", text: `Error: unknown mode "${params.mode}". Must be "build" or "query".` }], + details: { operation: "sf_graph", error: "invalid_mode" }, + isError: true, + }; + } + + const memoryId = params.memoryId?.trim(); + if (!memoryId) { + return { + content: [{ type: "text", text: "Error: memoryId is required for mode=query." }], + details: { operation: "sf_graph", error: "missing_memory_id" }, + isError: true, + }; + } + + try { + const graph = traverseGraph(memoryId, clampDepth(params.depth)); + const rel = params.rel?.trim().toLowerCase() || null; + const edges = rel ? graph.edges.filter((e) => e.rel === rel) : graph.edges; + const relevantIds = new Set([memoryId]); + for (const e of edges) { + relevantIds.add(e.from); + relevantIds.add(e.to); + } + const nodes = graph.nodes.filter((n) => relevantIds.has(n.id)); + + if (nodes.length === 0) { + return { + content: [{ type: "text", text: `No memory found with id ${memoryId}.` }], + details: { operation: "sf_graph", mode: "query", memoryId, nodes: [], edges: [] }, + }; + } + + const summary = [ + `Memory ${memoryId} — ${nodes.length} node(s), ${edges.length} edge(s).`, + ...nodes.map((n) => ` [${n.id}] (${n.category}) ${n.content}`), + ...edges.map((e) => ` ${e.from} --${e.rel}-> ${e.to}`), + ].join("\n"); + return { + content: [{ type: "text", text: summary }], + details: { + operation: "sf_graph", + mode: "query", + memoryId, + nodes: nodes.map((n) => ({ id: n.id, category: n.category, content: n.content })), + edges: edges.map((e) => ({ from: e.from, to: e.to, rel: e.rel })), + }, + }; + } catch (err) { + return { + content: [{ type: "text", text: `Error: graph query failed: ${(err as Error).message}` }], + details: { operation: "sf_graph", error: (err as Error).message }, + isError: true, + }; + } +} + +function clampDepth(value: unknown): number { + if (typeof value !== "number" || !Number.isFinite(value)) return 1; + if (value < 0) return 0; + if (value > 5) return 5; + return Math.floor(value); +} + +// Suppress unused import warning — Memory is used implicitly via getActiveMemoriesRanked return type. +type _Memory = Memory; diff --git a/src/resources/extensions/sf/tools/resume-tool.ts b/src/resources/extensions/sf/tools/resume-tool.ts new file mode 100644 index 000000000..430b53606 --- /dev/null +++ b/src/resources/extensions/sf/tools/resume-tool.ts @@ -0,0 +1,40 @@ +// SF Resume Tool — returns the contents of .sf/last-snapshot.md so +// agents can re-orient after compaction or session resume without +// re-deriving project memory state. + +import { readCompactionSnapshot } from "../compaction-snapshot.js"; + +export interface ResumeToolParams { + /** Ignored — reserved for future variant (e.g. dated snapshots). */ + _variant?: string; +} + +export interface ToolExecutionResult { + content: Array<{ type: "text"; text: string }>; + details: Record; + isError?: boolean; +} + +export function executeResume( + _params: ResumeToolParams, + opts: { baseDir: string }, +): ToolExecutionResult { + const snapshot = readCompactionSnapshot(opts.baseDir); + if (snapshot == null) { + return { + content: [ + { + type: "text", + text: + "No snapshot found at .sf/last-snapshot.md. The snapshot is written automatically " + + "on session_before_compact (enabled by default; set context_mode.enabled=false to opt out).", + }, + ], + details: { operation: "sf_resume", found: false }, + }; + } + return { + content: [{ type: "text", text: snapshot }], + details: { operation: "sf_resume", found: true, bytes: Buffer.byteLength(snapshot, "utf-8") }, + }; +}