feat: Created draft mapping of SF patterns to ACE reference draft

SF-Task: S05/T01
2026-05-13 02:01:41 +02:00 · 2026-05-13 02:01:41 +02:00 · 65e195a9fd
commit 65e195a9fd
parent 1ed505669b
49 changed files with 2263 additions and 272 deletions
--- a/.gitignore
+++ b/.gitignore
@ -106,4 +106,17 @@ repowise.db
 .sf/scaffold-manifest.json
 .sf/interactive.lock
 .sf/interactive.lock.d/
 # SQLite WAL/SHM are ephemeral checkpoint files — only the .db is durable.
 .sf/metrics.db-wal
 .sf/metrics.db-shm
 .sf/sf.db-wal
 .sf/sf.db-shm
 # Per-dispatch trace files accumulate one-per-request and are runtime-only.
 # Consumers (sf-db-gates, adaptive verification policy) read by mtime window
 # (24h–30d) — on-disk retention is needed, but git tracking is not.
 .sf/traces/pre-dispatch:*.jsonl
 .sf/traces/finalize:*.jsonl
 .sf/traces/guard:*.jsonl
 # `latest` is a symlink retargeted on every dispatch — pure git noise.
 .sf/traces/latest
 test_output.log
--- a/.sf/backups/db/maintenance.json
+++ b/.sf/backups/db/maintenance.json
@ -1,3 +1,3 @@
 {
-  "lastFullVacuumAt": "2026-05-12T13:59:07.765Z"
+  "lastFullVacuumAt": "2026-05-12T20:58:28.744Z"
 }
--- a/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z
+++ b/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z
--- a/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z
+++ b/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z
--- a/.sf/backups/db/sf.db.2026-05-10T05-57-58-732Z
+++ b/.sf/backups/db/sf.db.2026-05-10T05-57-58-732Z
--- a/.sf/backups/db/sf.db.2026-05-12T20-58-28-491Z
+++ b/.sf/backups/db/sf.db.2026-05-12T20-58-28-491Z
--- a/.sf/backups/db/sf.db.2026-05-12T21-15-56-990Z
+++ b/.sf/backups/db/sf.db.2026-05-12T21-15-56-990Z
--- a/.sf/backups/db/sf.db.2026-05-12T23-50-31-488Z
+++ b/.sf/backups/db/sf.db.2026-05-12T23-50-31-488Z
--- a/.sf/graphs/graph.json
+++ b/.sf/graphs/graph.json
@ -60,5 +60,5 @@
      "confidence": "EXTRACTED"
    }
  ],
-  "builtAt": "2026-05-12T15:26:43.252Z"
+  "builtAt": "2026-05-12T23:53:23.408Z"
 }
--- a/.sf/metrics.db
+++ b/.sf/metrics.db
--- a/.sf/metrics.db-shm
+++ b/.sf/metrics.db-shm
--- a/.sf/metrics.db-wal
+++ b/.sf/metrics.db-wal
--- a/.sf/model-catalog/mistral.json
+++ b/.sf/model-catalog/mistral.json
@ -1 +1 @@
-{"fetchedAt":"2026-05-12T14:54:31.656Z","modelIds":["mistral-medium-2505","mistral-medium-2508","mistral-medium-latest","mistral-medium","mistral-vibe-cli-with-tools","open-mistral-nemo","open-mistral-nemo-2407","mistral-tiny-2407","mistral-tiny-latest","codestral-2508","codestral-latest","devstral-2512","devstral-medium-latest","devstral-latest","mistral-small-2603","mistral-small-latest","mistral-vibe-cli-fast","magistral-small-latest","magistral-medium-2509","magistral-medium-latest","labs-leanstral-2603","mistral-large-2512","mistral-large-latest","mistral-large-2512","mistral-large-latest","ministral-3b-2512","ministral-3b-latest","ministral-8b-2512","ministral-8b-latest","ministral-14b-2512","ministral-14b-latest","mistral-medium-3-5","mistral-medium-3.5","mistral-medium-3","mistral-medium-2604","mistral-medium-c21211-r0-75","mistral-vibe-cli-latest","mistral-large-2411","pixtral-large-2411","pixtral-large-latest","mistral-large-pixtral-2411","devstral-small-2507","devstral-medium-2507","magistral-small-2509","mistral-small-2506"]}
+{"fetchedAt":"2026-05-12T21:25:20.919Z","modelIds":["mistral-medium-2505","mistral-medium-2508","mistral-medium-latest","mistral-medium","mistral-vibe-cli-with-tools","open-mistral-nemo","open-mistral-nemo-2407","mistral-tiny-2407","mistral-tiny-latest","codestral-2508","codestral-latest","devstral-2512","devstral-medium-latest","devstral-latest","mistral-small-2603","mistral-small-latest","mistral-vibe-cli-fast","magistral-small-latest","magistral-medium-2509","magistral-medium-latest","labs-leanstral-2603","mistral-large-2512","mistral-large-latest","mistral-large-2512","mistral-large-latest","ministral-3b-2512","ministral-3b-latest","ministral-8b-2512","ministral-8b-latest","ministral-14b-2512","ministral-14b-latest","mistral-medium-3-5","mistral-medium-3.5","mistral-medium-3","mistral-medium-2604","mistral-medium-c21211-r0-75","mistral-vibe-cli-latest","mistral-large-2411","pixtral-large-2411","pixtral-large-latest","mistral-large-pixtral-2411","devstral-small-2507","devstral-medium-2507","magistral-small-2509","mistral-small-2506"]}
--- a/.sf/model-catalog/openrouter.json
+++ b/.sf/model-catalog/openrouter.json
--- a/.sf/model-performance.json
+++ b/.sf/model-performance.json
@ -109,26 +109,26 @@
      "total": 1
    },
    "kimi-coding/kimi-k2.6": {
-      "successes": 1,
+      "successes": 2,
      "failures": 0,
      "timeouts": 0,
-      "totalTokens": 1821480,
+      "totalTokens": 1892068,
-      "totalCost": 0,
+      "totalCost": 0.030715552,
-      "lastUsed": "2026-05-12T20:57:45.179Z",
+      "lastUsed": "2026-05-12T23:58:57.132Z",
      "successRate": 1,
-      "total": 1
+      "total": 2
    }
  },
  "complete-slice": {
    "kimi-coding/kimi-k2.6": {
-      "successes": 1,
+      "successes": 2,
      "failures": 0,
      "timeouts": 0,
-      "totalTokens": 719526,
+      "totalTokens": 814376,
-      "totalCost": 0.026709,
+      "totalCost": 0.053080319800000005,
-      "lastUsed": "2026-05-12T15:26:57.708Z",
+      "lastUsed": "2026-05-12T23:54:01.143Z",
      "successRate": 1,
-      "total": 1
+      "total": 2
    }
  }
 }
--- a/.sf/safety/evidence-M001-6377a4-S04-T01.json
+++ b/.sf/safety/evidence-M001-6377a4-S04-T01.json
@ -1,16 +0,0 @@
 [
  {
    "kind": "write",
    "toolCallId": "write_1778619443353_32",
    "path": ".sf/milestones/M001-6377a4/slices/S04/VERIFICATION_MATRIX.md",
    "timestamp": 1778619443535
  },
  {
    "kind": "bash",
    "toolCallId": "bash_1778619447339_33",
    "command": "test -f .sf/milestones/M001-6377a4/slices/S04/VERIFICATION_MATRIX.md && grep -q \"status\" .sf/milestones/M001-6377a4/slices/S04/VERIFICATION_MATRIX.md && echo \"Matrix exists and contains status command info.\"",
    "exitCode": 0,
    "outputSnippet": "Matrix exists and contains status command info.\n",
    "timestamp": 1778619447544
  }
 ]
--- a/.sf/safety/evidence-M001-6377a4-S04-T02.json
+++ b/.sf/safety/evidence-M001-6377a4-S04-T02.json
@ -0,0 +1 @@
 []
--- a/.sf/safety/evidence-M001-6377a4-S05-T01.json
+++ b/.sf/safety/evidence-M001-6377a4-S05-T01.json
@ -0,0 +1,16 @@
 [
  {
    "kind": "write",
    "toolCallId": "DgPnxQEen",
    "path": "docs/dev/sf-ace-patterns.md.draft",
    "timestamp": 1778630297060
  },
  {
    "kind": "bash",
    "toolCallId": "8FjDDZSlA",
    "command": "test -f docs/dev/sf-ace-patterns.md.draft && grep -c \"SF Implementation\" docs/dev/sf-ace-patterns.md.draft | grep -q \"6\"",
    "exitCode": 0,
    "outputSnippet": "(no output)",
    "timestamp": 1778630298077
  }
 ]
--- a/.sf/slice-routing.json
+++ b/.sf/slice-routing.json
@ -0,0 +1,16 @@
 {
  "M001-6377a4/S04": {
    "provider": "minimax",
    "id": "MiniMax-M2.1",
    "ts": "2026-05-12T23:54:01.079Z",
    "lastUnitType": "complete-slice",
    "lastUnitId": "M001-6377a4/S04"
  },
  "M001-6377a4/S05": {
    "provider": "mistral",
    "id": "codestral-latest",
    "ts": "2026-05-12T23:58:57.088Z",
    "lastUnitType": "execute-task",
    "lastUnitId": "M001-6377a4/S05/T01"
  }
 }
--- a/.sf/traces/latest
+++ b/.sf/traces/latest
@ -1 +1 @@
-guard:76c7c307-91b4-426e-8fad-4ff951d5a52e.jsonl
+guard:b8cbf9df-9fe8-4203-9c63-79fc7264d74e.jsonl
--- a/TODO.md
+++ b/TODO.md
@ -3,3 +3,39 @@
 Dump anything here.
 ---
 ## Self-Feedback Inbox
 ### [prompt-modularization] Phase 3 — migrate remaining builders to `composeUnitContext` v2
 **Context:** Phase 1 (fragment infrastructure, 17-prompt Working Directory deduplication) and
 Phase 2 (5 stub manifests for deploy/smoke-production/release/rollback/challenge) shipped in
 commit `ca5d869e3`. 9 of 26 unit types are now fully manifest-driven via `composeInlinedContext`.
 **What's blocked and why:**
 Migrating the remaining 17 builders to `composeInlinedContext` (v1) is the wrong path because:
 1. `inlineKnowledgeScoped` and `inlineGraphSubgraph` are NOT in `ARTIFACT_KEYS` — these
   artifacts would remain imperative and undeclared in every manifest, making manifests
   structurally unreliable descriptions of actual builder behavior.
 2. Injecting knowledge/graph at the right position in the composed string requires fragile
   sentinel-string searches (e.g., `body.lastIndexOf("### Task Summary:")`). This pattern
   is already untested in the 2 migrated complex builders (`research-milestone`, `complete-slice`).
 3. `composeUnitContext` (v2) in `unit-context-composer.js` already has `computed`, `prepend`,
   and `excerpt` support — knowledge and graph inlining maps cleanly to `computed` entries.
   Migrating to v1 now creates a half-migration state that must be undone when v2 lands.
 **Recommended next slice:**
 1. Add `"knowledge"` and `"graph"` to `ARTIFACT_KEYS` in `unit-context-manifest.js`.
 2. Register them as `computed` entries in relevant `UNIT_MANIFESTS` entries.
 3. Wire one builder (e.g., `buildResearchSlicePrompt`) through `composeUnitContext` v2 as pilot.
 4. Add position-assertion tests to already-migrated complex builders (`research-milestone`,
   `complete-slice`) to guard against silent ordering degradation.
 5. Then migrate remaining builders in batches: slice builders → milestone builders → execute-task.
 **Note on `prompt-cache-optimizer.js`:** Entirely dead code — `optimizeForCaching()`,
 `estimateCacheSavings()`, `computeCacheHitRate()` have zero importers. `reorderForCaching()`
 is wired at `phases-unit.js:519` but no `cache_control` markers are written to outgoing
 requests. Remove the file or wire it in the same slice that adds `cache_control` breakpoints.
 ---
--- a/docs/dev/sf-ace-patterns.md.draft
+++ b/docs/dev/sf-ace-patterns.md.draft
@ -0,0 +1,29 @@
 # SF Patterns to ACE Reference Draft Mapping
 ## Preferences
 **SF Implementation:** `src/resources/extensions/sf/preferences.js`
 ## PDD
 **SF Implementation:** `src/resources/extensions/sf/uok/unit-runtime.js`
 ## UOK Gates
 **SF Implementation:** `src/resources/extensions/sf/uok/gate-runner.js`
 ## Notifications
 **SF Implementation:** `src/resources/extensions/sf/skills/frontmatter.js`
 ## Skills-as-Contracts
 **SF Implementation:** `src/resources/extensions/sf/steerable-autonomous-panel.js`
 ## Idempotency
 **SF Implementation:** `src/resources/extensions/sf/uok/unit-runtime.js`
 ## Verification
 - All 6 patterns have verified file paths in this document.
--- a/docs/product/SURFACE_CAPABILITIES.md
+++ b/docs/product/SURFACE_CAPABILITIES.md
@ -0,0 +1,85 @@
 # SF Product Surface Capabilities
 This document defines the command and feature availability across SF's three product surfaces: **CLI / Headless**, **TUI**, and **Web**. It records intentional gaps so they are not mistaken for bugs.
 ## Surface Definitions
 | Surface | Description | Primary Consumer |
 | :--- | :--- | :--- |
 | **CLI / Headless** | Non-interactive command-line interface and machine-surface protocol (`sf headless`). | Scripts, CI/CD, editor integrations, autonomous dispatch. |
 | **TUI** | Interactive Terminal User Interface with dashboards, visualizers, and live overlays. | Developers working locally who prefer keyboard-driven interaction. |
 | **Web** | Browser-based interface (Next.js) with panels, command surfaces, and visual tools. | Developers who prefer a GUI, remote access, or power-mode workflows. |
 ## Feature Matrix
 | Command / Feature | CLI / Headless | TUI | Web | Notes |
 | :--- | :--- | :--- | :--- | :--- |
 | `/status` | ✅ | ✅ | ✅ | Text in CLI/Headless; dashboard overlay in TUI; terminal or `sf-status` panel in Web. |
 | `/plan` | ✅ | ✅ | ❌ **Intentional Gap** | See [Intentional Gaps](#intentional-gaps) below. |
 | `/run` (`/next`, `/autonomous`) | ✅ | ✅ | ❌ **Intentional Gap** | See [Intentional Gaps](#intentional-gaps) below. |
 | `/steer` | ✅ | ✅ | ✅ | Web exposes via `sf-steer` panel. |
 | `/undo` | ✅ | ✅ | ✅ | Web exposes via `sf-undo` panel. |
 | `/history` | ✅ | ✅ | ✅ | Web exposes via `sf-history` panel. |
 | `/doctor` | ✅ | ✅ | ✅ | Web exposes via `sf-doctor` panel. |
 | `/forensics` | ✅ | ✅ | ✅ | Web exposes via `sf-forensics` panel. |
 | `/skills` | ✅ | ✅ | ✅ | Web exposes via `sf-skill-health` panel. |
 | `/capture` | ✅ | ✅ | ✅ | Web exposes via `sf-capture` panel. |
 | `/triage` | ✅ | ✅ | ✅ | Web exposes via `sf-triage` panel. |
 | `/inspect` | ✅ | ✅ | ✅ | Web exposes via `sf-inspect` panel. |
 | `/hooks` | ✅ | ✅ | ✅ | Web exposes via `sf-hooks` panel. |
 | `/cleanup` | ✅ | ✅ | ✅ | Web exposes via `sf-cleanup` panel. |
 | `/export` | ✅ | ✅ | ✅ | Web exposes via `sf-export` panel. |
 | `/queue` | ✅ | ✅ | ✅ | Web exposes via `sf-queue` panel. |
 | `/visualize` | ✅ | ✅ | ✅ | Web exposes via `sf-visualize` panel. |
 | `/prefs` | ✅ | ✅ | ✅ | Web exposes via `sf-prefs` panel. |
 | `/config` | ✅ | ✅ | ✅ | Web exposes via `sf-config` panel. |
 | `/mode` | ✅ | ✅ | ✅ | Web exposes via `sf-mode` panel. |
 | `/model` | ✅ | ✅ | ✅ | Web exposes via dedicated **Model** command surface. |
 | `/thinking` | ✅ | ✅ | ✅ | Web exposes via dedicated **Thinking** command surface. |
 | `/git` | ✅ | ✅ | ✅ | Web exposes via dedicated **Git** command surface. |
 | `/settings` | ✅ | ✅ | ✅ | Web exposes via dedicated **Settings** command surface (general, recovery, auth, admin, experimental). |
 | `/resume` | ✅ | ✅ | ✅ | Web exposes via dedicated **Resume** command surface. |
 | `/name` | ✅ | ✅ | ✅ | Web exposes via dedicated **Name** command surface. |
 | `/fork` | ✅ | ✅ | ✅ | Web exposes via dedicated **Fork** command surface. |
 | `/session` | ✅ | ✅ | ✅ | Web exposes via dedicated **Session** command surface. |
 | `/compact` | ✅ | ✅ | ✅ | Web exposes via dedicated **Compact** command surface. |
 | `/tasks` | ✅ | ✅ | ✅ | Web exposes via Dashboard and Activity views. |
 | `/research` | ✅ | ✅ | ✅ | Web terminal supports typing the command. |
 | `/implement` | ✅ | ✅ | ✅ | Web terminal supports typing the command. |
 ## Intentional Gaps
 ### `/plan` is not available as a first-class Web UI workflow
 **Why:** The web UI uses a different, browser-native planning and execution model. Planning artifacts are promoted through CLI-first workflows (`sf plan promote`) that require filesystem access, Git operations, and markdown rendering pipelines that are optimized for terminal and editor surfaces. The web surface focuses on higher-level UI interactions (roadmap views, milestone explorers, visual planning tools) rather than raw slash-command promotion.
 **What web users do instead:**
 - Use the **Roadmap** and **Milestone Explorer** views to inspect and navigate planning state.
 - Type `/plan` in the embedded terminal if needed; the command executes but the full promotion workflow is CLI-first.
 ### `/run` (`/next`, `/autonomous`) is not available as a first-class Web UI workflow
 **Why:** The web UI uses a different, browser-native execution model. Backend execution is managed via specific API routes and WebSocket/bridge communication rather than a `/run` command dispatch. The web surface prioritizes supervised, click-driven execution (e.g., **Power Mode**, action buttons, workflow steppers) over autonomous terminal-style dispatch.
 **What web users do instead:**
 - Use **Power Mode** for guided, step-by-step unit execution.
 - Use **Chat Mode** for conversational task dispatch.
 - Type `/autonomous` or `/next` in the embedded terminal if needed; execution proceeds via the PTY bridge.
 ## Design Principle
 > **Behavioral coherence, not visual parity.**
 >
 > Every surface must expose the *same underlying state* (via `deriveState()`, UOK diagnostics, and bridge data) but may present it through different interaction models. A gap is intentional only when the surface provides an equivalent or superior alternative workflow for the same user goal.
 ## Verification
 This matrix is verified against:
 - `src/resources/extensions/sf/commands/handlers/core.js` — CLI/TUI `status` handler.
 - `src/resources/extensions/sf/commands/handlers/ops.js` — CLI/TUI `plan` and `run` handlers.
 - `src/headless.ts` — Headless status and execution entrypoints.
 - `web/components/sf/command-surface.tsx` — Web command surface registry.
 - `web/lib/command-surface-contract.ts` — Web command surface type definitions.
 - `web/components/sf/sidebar.tsx` — Web navigation and exposed commands.
 For the full behavioral audit, see `.sf/milestones/M001-6377a4/slices/S04/VERIFICATION_MATRIX.md`.
--- a/packages/ai/src/providers/openai-completions.test.ts
+++ b/packages/ai/src/providers/openai-completions.test.ts
@ -0,0 +1,75 @@
 import assert from "node:assert/strict";
 import { describe, it } from "vitest";
 import type { Context, Model, OpenAICompletionsCompat } from "../types.js";
 import { convertMessages } from "./openai-completions.js";
 const compat = {
 	supportsDeveloperRole: false,
 	requiresAssistantAfterToolResult: false,
 	requiresThinkingAsText: false,
 } as Required<OpenAICompletionsCompat>;
 function model(provider: string, id: string): Model<"openai-completions"> {
 	return {
 		id,
 		name: id,
 		api: "openai-completions",
 		provider,
 		baseUrl:
 			provider === "openrouter"
 				? "https://openrouter.ai/api/v1"
 				: "https://api.openai.com/v1",
 		reasoning: false,
 		input: ["text"],
 		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
 		contextWindow: 128_000,
 		maxTokens: 4096,
 	};
 }
 function contextWithCacheControl(): Context {
 	return {
 		messages: [
 			{
 				role: "user",
 				content: [
 					{
 						type: "text",
 						text: "stable prefix",
 						cache_control: { type: "ephemeral" },
 					} as any,
 					{ type: "text", text: "dynamic suffix" },
 				],
 				timestamp: Date.now(),
 			},
 		],
 	};
 }
 describe("convertMessages cache_control", () => {
 	it("preserves_cache_control_when_openrouter_anthropic_model", () => {
 		const messages = convertMessages(
 			model("openrouter", "anthropic/claude-sonnet-4.5"),
 			contextWithCacheControl(),
 			compat,
 		);
 		const content = messages[0].content;
 		assert.ok(Array.isArray(content));
 		assert.deepEqual((content[0] as any).cache_control, {
 			type: "ephemeral",
 		});
 	});
 	it("strips_cache_control_when_openai_compatible_model_does_not_support_it", () => {
 		const messages = convertMessages(
 			model("openai", "gpt-5.3-chat-latest"),
 			contextWithCacheControl(),
 			compat,
 		);
 		const content = messages[0].content;
 		assert.ok(Array.isArray(content));
 		assert.equal((content[0] as any).cache_control, undefined);
 	});
 });
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -493,6 +493,12 @@ function maybeAddOpenRouterAnthropicToolCacheControl(
 	}
 }
 function supportsOpenRouterAnthropicCacheControl(
 	model: Model<"openai-completions">,
 ): boolean {
 	return model.provider === "openrouter" && model.id.startsWith("anthropic/");
 }
 function mapReasoningEffort(
 	effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
 	reasoningEffortMap: Partial<
@ -506,8 +512,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
 	model: Model<"openai-completions">,
 	messages: ChatCompletionMessageParam[],
 ): void {
-	if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/"))
+	if (!supportsOpenRouterAnthropicCacheControl(model)) return;
 		return;
 	// Anthropic-style caching requires cache_control on a text part. Add a breakpoint
 	// on the last user/assistant message (walking backwards until we find text content).
@ -622,9 +627,11 @@ export function convertMessages(
 							// Preserve cache_control if present (set upstream for Anthropic prompt caching).
 							// The property is not in the OpenAI SDK type but is accepted by providers
 							// that support Anthropic-style caching (openrouter/anthropic/*).
-							const cacheControl = (
+							const cacheControl = supportsOpenRouterAnthropicCacheControl(
-								item as unknown as Record<string, unknown>
+								model,
-							).cache_control;
+							)
 								? (item as unknown as Record<string, unknown>).cache_control
 								: undefined;
 							if (cacheControl) {
 								(part as unknown as Record<string, unknown>).cache_control =
 									cacheControl;
--- a/src/resources/agents/rubber-duck.md
+++ b/src/resources/agents/rubber-duck.md
@ -0,0 +1,64 @@
 ---
 name: rubber-duck
 description: Constructive pre-implementation critic — catches design flaws, missing edge cases, and gaps before code is written
 model: sonnet
 tools: read, grep, find, ls, bash
 ---
 You are a constructive critic. Your job is to identify real problems in a plan, design, or code change **before** implementation is committed to — when course corrections are still cheap.
 You are **read-only**. Do not edit files. Do not run commands that change the environment.
 ## What you review
 You receive a plan, a design proposal, a code diff, or a task description. You review it for:
 - **Logic errors** — incorrect assumptions, wrong control flow, missing invariants
 - **Missing edge cases** — inputs/states the plan doesn't account for
 - **Design flaws** — abstractions that won't hold, coupling that will hurt, missing separation of concerns
 - **Security issues** — unvalidated inputs, exposed secrets, auth gaps
 - **Test gaps** — behavior that will be untested or untestable with the proposed approach
 - **Spec contradictions** — where the plan conflicts with stated requirements or existing behavior
 ## What you do NOT comment on
 - Code style, formatting, naming conventions
 - Grammar or wording in comments/docs
 - Best practices that don't cause an actual problem
 - Refactoring that doesn't change correctness
 - Minor improvements that don't affect the task outcome
 If something is fine, say so. Do not manufacture findings to seem thorough. A short report with two real findings beats a long report with ten nitpicks.
 ## Output format
 For each finding:
 ```
 ## [Blocking|Non-blocking|Suggestion] — <title>
 **What:** <the specific problem, stated precisely>
 **Why it matters:** <the actual impact — what breaks, under what condition>
 **Fix:** <concrete change to address it>
 ```
 Then a final verdict:
 ```
 ## Verdict
 READY / NEEDS-REVISION
 One sentence: overall assessment.
 ```
 - `READY` — no blocking findings; the plan/code can proceed as-is
 - `NEEDS-REVISION` — at least one blocking finding must be addressed first
 ## Severity guide
 - **Blocking** — will cause a bug, data loss, security issue, or test failure if not fixed
 - **Non-blocking** — should be fixed for quality but won't break the task
 - **Suggestion** — worth considering; low priority
 Lead with blocking findings. If there are none, say so explicitly before the non-blocking ones.
--- a/src/resources/extensions/sf/auto-model-selection.js
+++ b/src/resources/extensions/sf/auto-model-selection.js
@ -18,6 +18,7 @@ import {
 	loadCapabilityOverrides,
 	resolveModelForComplexity,
 } from "./model-router.js";
 import { readStickyModelForUnit } from "./slice-routing-cache.js";
 import {
 	filterModelsByProviderModelAllow,
 	isProviderAllowedByLists,
@ -543,6 +544,15 @@ export async function selectAndApplyModel(
 						selectionMethod: "tier-only",
 					};
 				} else {
 					// Slice-sticky hint: prefer the model that previously succeeded
 					// on a sibling unit in this slice when its capability score is
 					// within window of the winner. Cleared on executor refusal so a
 					// failing model does not re-attach to the slice.
 					const stickyHint = readStickyModelForUnit(
 						basePath,
 						unitType,
 						unitId,
 					);
 					routingResult = resolveModelForComplexity(
 						classification,
 						modelConfig,
@ -551,6 +561,7 @@ export async function selectAndApplyModel(
 						unitType,
 						classification.taskMetadata,
 						capabilityOverrides,
 						stickyHint,
 					);
 				}
 				if (routingResult.wasDowngraded) {
--- a/src/resources/extensions/sf/auto-start.js
+++ b/src/resources/extensions/sf/auto-start.js
@ -82,7 +82,9 @@ import {
 import { initRoutingHistory } from "./routing-history.js";
 import {
 	acquireSessionLock,
 	isSessionPidAlive,
 	releaseSessionLock,
 	terminateExistingSession,
 	updateSessionLock,
 } from "./session-lock.js";
 import { getSessionModelOverride } from "./session-model-override.js";
@ -342,15 +344,91 @@ export async function bootstrapAutoSession(
 		lockBase,
 		buildResolver,
 	} = deps;
-	const lockResult = acquireSessionLock(base, {
+	let lockResult = acquireSessionLock(base, {
 		sessionId: ctx.sessionManager?.getSessionId?.(),
 		sessionFile: ctx.sessionManager?.getSessionFile?.(),
 	});
 	// Lock busy on a *live* peer: instead of just refusing to start, ask the
 	// operator whether to terminate the existing session and take over. Two
 	// non-interactive escape hatches keep CI/headless usage predictable:
 	//   - SF_KILL_EXISTING=1 (or =true / =yes) — auto-confirm the kill
 	//   - SF_KILL_EXISTING=0 (or =false / =no) — auto-decline (current behavior)
 	//   - SF_HEADLESS=1 with no SF_KILL_EXISTING — auto-decline (safe default
 	//     for batch contexts where a hung interactive prompt would deadlock)
 	if (!lockResult.acquired && lockResult.existingPid) {
 		const existingPid = Number(lockResult.existingPid);
 		if (isSessionPidAlive(existingPid)) {
 			const envKill = String(process.env.SF_KILL_EXISTING ?? "")
 				.trim()
 				.toLowerCase();
 			const headless =
 				process.env.SF_HEADLESS === "1" ||
 				String(process.env.SF_HEADLESS ?? "").toLowerCase() === "true";
 			let confirmed;
 			if (envKill === "1" || envKill === "true" || envKill === "yes") {
 				confirmed = true;
 			} else if (envKill === "0" || envKill === "false" || envKill === "no") {
 				confirmed = false;
 			} else if (headless) {
 				// Headless without an explicit opt-in: refuse to kill silently.
 				confirmed = false;
 			} else if (typeof ctx.ui?.confirm === "function") {
 				confirmed = await ctx.ui.confirm(
 					"Stop running SF session?",
 					`Another SF autonomous session (PID ${existingPid}) is already running on this project. Stop it and start a fresh session?`,
 				);
 			} else {
 				confirmed = false;
 			}
 			if (confirmed) {
 				ctx.ui.notify(
 					`Stopping existing SF session (PID ${existingPid})…`,
 					"info",
 				);
 				let result;
 				try {
 					result = await terminateExistingSession(existingPid);
 				} catch (err) {
 					ctx.ui.notify(
 						`Failed to stop existing SF session (PID ${existingPid}): ${err?.message ?? err}. Stop it manually with \`kill ${existingPid}\`.`,
 						"error",
 					);
 					return false;
 				}
 				if (!result.terminated) {
 					ctx.ui.notify(
 						`Unable to stop existing SF session (PID ${existingPid}). It may belong to another user or be unresponsive. Stop it manually with \`kill -9 ${existingPid}\`.`,
 						"error",
 					);
 					return false;
 				}
 				ctx.ui.notify(
 					result.escalated
 						? `Existing SF session (PID ${existingPid}) did not exit on SIGTERM; SIGKILL applied.`
 						: `Existing SF session (PID ${existingPid}) stopped.`,
 					result.escalated ? "warning" : "info",
 				);
 				lockResult = acquireSessionLock(base, {
 					sessionId: ctx.sessionManager?.getSessionId?.(),
 					sessionFile: ctx.sessionManager?.getSessionFile?.(),
 				});
 			}
 		}
 	}
 	if (!lockResult.acquired) {
 		const reason = lockResult.reason;
 		ctx.ui.notify(reason, "error");
 		return false;
 	}
 	// Session-start janitor: prune per-flow trace files older than the longest
 	// analyzer window (30d). Best-effort, never blocks startup, errors swallowed
 	// in pruneStaleTraces. Keeps `.sf/traces/` from growing without bound.
 	try {
 		const { pruneStaleTraces } = await import("./uok/trace-writer.js");
 		pruneStaleTraces(base);
 	} catch {
 		// trace janitor must never break autonomous startup
 	}
 	function releaseLockAndReturn() {
 		releaseSessionLock(base);
 		clearLock(base);
--- a/src/resources/extensions/sf/auto/run-unit.js
+++ b/src/resources/extensions/sf/auto/run-unit.js
@ -6,6 +6,7 @@
 import { scopeActiveToolsForUnitType } from "../constants.js";
 import { debugLog } from "../debug-logger.js";
 import { getErrorMessage } from "../error-utils.js";
 import {
 	resolveAutoSupervisorConfig,
 	resolvePersistModelChanges,
@ -27,11 +28,29 @@ import {
 	getCurrentTurnGeneration,
 	runWithTurnGeneration,
 } from "./turn-epoch.js";
 import { getErrorMessage } from "../error-utils.js";
 // Tracks the latest session-switch attempt so a late timeout settlement from an
 // older runUnit() call cannot clear the guard for a newer one.
 let sessionSwitchGeneration = 0;
 /**
 * Build the custom-message content for a unit prompt.
 *
 * Purpose: preserve the exact prompt text while allowing the provider layer to
 * cache the stable prefix separately from the dynamic suffix.
 *
 * Consumer: runUnit before pi.sendMessage dispatches the autonomous unit turn.
 */
 export function buildUnitPromptMessageContent(prompt, promptParts) {
 	if (!promptParts) return prompt;
 	return [
 		{
 			type: "text",
 			text: `${promptParts.before}\n`,
 			cache_control: { type: "ephemeral" },
 		},
 		{ type: "text", text: promptParts.after },
 	];
 }
 /**
 * Execute a single unit: create a new session, send the prompt, and await
 * the agent_end promise. Returns a UnitResult describing what happened.
@ -122,8 +141,7 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
 			sessionResult = await Promise.race([sessionPromise, timeoutPromise]);
 		} catch (sessionErr) {
 			if (sessionTimeoutHandle) clearTimeout(sessionTimeoutHandle);
-			const msg =
+			const msg = getErrorMessage(sessionErr);
 				getErrorMessage(sessionErr);
 			debugLog("runUnit", {
 				phase: "session-error",
 				unitType,
@ -264,16 +282,7 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
 		// When promptParts is available, send structured content so the provider can
 		// apply cache_control:ephemeral to the stable prefix (before) while leaving
 		// the dynamic suffix (after) uncached.
-		const messageContent = promptParts
+		const messageContent = buildUnitPromptMessageContent(prompt, promptParts);
 			? [
 					{
 						type: "text",
 						text: promptParts.before,
 						cache_control: { type: "ephemeral" },
 					},
 					{ type: "text", text: promptParts.after },
 				]
 			: prompt;
 		await pi.sendMessage(
 			{ customType: "sf-auto", content: messageContent, display: s.verbose },
 			{ triggerTurn: true },
--- a/src/resources/extensions/sf/commands/catalog.js
+++ b/src/resources/extensions/sf/commands/catalog.js
@ -301,7 +301,7 @@ export const TOP_LEVEL_SUBCOMMANDS = [
 	},
 	{
 		cmd: "rubber-duck",
-		desc: "Request constructive code/design review from a rubber-duck subagent (RUBBER_DUCK flag)",
+		desc: "Dispatch a rubber-duck subagent for constructive pre-implementation review (alias: review-code)",
 	},
 	{
 		cmd: "delegate",
--- a/src/resources/extensions/sf/commands/handlers/ops.js
+++ b/src/resources/extensions/sf/commands/handlers/ops.js
@ -613,25 +613,47 @@ async function handleKeepAlive(args, ctx) {
 // ─── /rubber-duck ────────────────────────────────────────────────────────────
 async function handleRubberDuckCommand(topic, ctx, _pi) {
-	if (!getExperimentalFlag("rubber_duck")) {
+	const { execSync } = await import("node:child_process");
-		ctx.ui.notify(
+	const root = projectRoot();
-			"RUBBER_DUCK is not enabled. Run /experimental on rubber_duck to enable.",
+
-			"warning",
+	// Gather git diff for context (staged + unstaged, capped to avoid token bloat)
-		);
+	let diff = "";
 		return;
 	}
 	const prompt = topic
 		? `Rubber-duck review requested: ${topic}\n\nPlease review this as a constructive critic: identify risks, edge cases, missing tests, and improvements. Be direct and concise.`
 		: "Please give constructive feedback on the current code changes or design. Identify risks, edge cases, missing tests, and improvements.";
 	ctx.ui.notify(
 		"Starting rubber-duck review… (RUBBER_DUCK agent is constructive, not adversarial)",
 		"info",
 	);
 	try {
-		await ctx.sendMessage?.(prompt);
+		const staged = execSync("git diff --cached --stat 2>/dev/null || true", {
 			cwd: root,
 			encoding: "utf-8",
 		}).trim();
 		const unstaged = execSync("git diff --stat 2>/dev/null || true", {
 			cwd: root,
 			encoding: "utf-8",
 		}).trim();
 		if (staged || unstaged) {
 			const fullDiff = execSync(
 				"git diff --cached 2>/dev/null; git diff 2>/dev/null",
 				{ cwd: root, encoding: "utf-8" },
 			).slice(0, 8000);
 			diff = `\n\n## Current diff (truncated to 8 kB)\n\n\`\`\`diff\n${fullDiff}\n\`\`\``;
 		}
 	} catch {
 		// diff unavailable — not a hard failure
 	}
 	const focus = topic ? `Focus on: ${topic}\n\n` : "";
 	const reviewPrompt =
 		`Dispatch a \`rubber-duck\` subagent to review the current plan or changes before proceeding. ` +
 		`Use the \`subagent\` tool with \`agent: "rubber-duck"\`.\n\n` +
 		`${focus}` +
 		`Ask the rubber-duck agent to identify blocking issues, non-blocking issues, and suggestions. ` +
 		`After the subagent returns, summarise the verdict and any blocking findings in one short paragraph. ` +
 		`Do not proceed with implementation until the user acknowledges blocking findings.` +
 		diff;
 	ctx.ui.notify("Dispatching rubber-duck review…", "info");
 	try {
 		await ctx.sendMessage?.(reviewPrompt);
 	} catch {
 		ctx.ui.notify(
-			"Could not start rubber-duck session. Try typing your review request directly.",
+			"Could not dispatch rubber-duck. Try: subagent agent=rubber-duck task='review current changes'",
 			"warning",
 		);
 	}
--- a/src/resources/extensions/sf/dashboard-overlay.js
+++ b/src/resources/extensions/sf/dashboard-overlay.js
@ -741,6 +741,66 @@ export class SFDashboardOverlay {
 				);
 			}
 		}
 		// UOK Health section — aligns with headless status output
 		if (this.uokDiagnostics && this.uokDiagnostics.issues.length > 0) {
 			lines.push(blank());
 			lines.push(hr());
 			lines.push(row(th.fg("text", th.bold("UOK Health"))));
 			lines.push(blank());
 			// Compact summary line matching headless format
 			lines.push(
 				row(
 					th.fg(
 						this.uokDiagnostics.verdict === "degraded"
 							? "error"
 							: this.uokDiagnostics.verdict === "attention"
 								? "warning"
 								: "dim",
 						`Verdict: ${this.uokDiagnostics.verdict} (${this.uokDiagnostics.classification})`,
 					),
 				),
 			);
 			lines.push(blank());
 			// Issue list
 			for (const issue of this.uokDiagnostics.issues) {
 				const icon =
 					issue.severity === "error"
 						? th.fg("error", "✗")
 						: th.fg("warning", "⚠");
 				lines.push(row(`  ${icon} ${th.fg("text", issue.code)}`));
 				lines.push(row(th.fg("dim", `     ${issue.message}`)));
 			}
 			// Recommendations
 			if (this.uokDiagnostics.recommendations.length > 0) {
 				lines.push(blank());
 				for (const rec of this.uokDiagnostics.recommendations) {
 					lines.push(row(th.fg("dim", `  → ${rec}`)));
 				}
 			}
 			// Signals table
 			if (this.uokDiagnostics.signals) {
 				lines.push(blank());
 				lines.push(row(th.fg("dim", "Signals:")));
 				for (const [key, value] of Object.entries(
 					this.uokDiagnostics.signals,
 				)) {
 					const signalColor =
 						value === "ok" ||
 						value === "active" ||
 						value === "consistent" ||
 						value === "clear"
 							? "success"
 							: value === "unknown"
 								? "dim"
 								: "warning";
 					lines.push(
 						row(
 							`  ${th.fg(signalColor, "●")} ${th.fg("text", key)}: ${th.fg(signalColor, String(value))}`,
 						),
 					);
 				}
 			}
 		}
 		// Environment health section (#1221) — only show issues
 		const envResults = runEnvironmentChecks(
 			this.dashData.basePath || process.cwd(),
--- a/src/resources/extensions/sf/experimental.js
+++ b/src/resources/extensions/sf/experimental.js
@ -31,18 +31,12 @@ export const EXPERIMENTAL_FLAGS = {
 		"STATUS_LINE — run a user-defined script to feed a custom footer status chip",
 	show_file:
 		"SHOW_FILE — show_file tool renders code snippets inline in the timeline",
 	ask_elicitation:
 		"ASK_USER_ELICITATION — structured form/select UI replaces plain ask_user",
 	multi_turn_agents:
 		"MULTI_TURN_AGENTS — persistent subagents that accept follow-up messages",
 	extensions:
 		"EXTENSIONS — user-installable extensions via marketplace npm install",
 	configure_agent:
 		"CONFIGURE_COPILOT_AGENT — interactive wizard for MCP servers and agents",
 	background_sessions:
 		"BACKGROUND_SESSIONS — concurrent sessions with background switching",
 	rubber_duck:
 		"RUBBER_DUCK — constructive feedback subagent on code and designs",
 	prompt_frame:
 		"PROMPT_FRAME — decorative border rendered above the input prompt",
 	streamer_mode:
--- a/src/resources/extensions/sf/model-router.js
+++ b/src/resources/extensions/sf/model-router.js
@ -107,6 +107,8 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 30,
 		longContext: 80,
 		instruction: 90,
 		// Agentic: Claude Opus is built around extended tool-use loops.
 		agentic: 95,
 	},
 	"claude-sonnet-4-6": {
 		coding: 85,
@ -116,6 +118,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 75,
 		instruction: 85,
 		agentic: 92,
 	},
 	"claude-sonnet-4-5-20250514": {
 		coding: 85,
@ -125,6 +128,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 75,
 		instruction: 85,
 		agentic: 90,
 	},
 	"claude-3-5-sonnet-latest": {
 		coding: 82,
@ -134,6 +138,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 62,
 		longContext: 70,
 		instruction: 82,
 		agentic: 85,
 	},
 	"claude-haiku-4-5": {
 		coding: 60,
@ -143,6 +148,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 50,
 		instruction: 75,
 		// Haiku follows tool-use contracts but is less reliable than Sonnet on
 		// long agentic loops.
 		agentic: 75,
 	},
 	"claude-3-5-haiku-latest": {
 		coding: 60,
@ -152,6 +160,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 50,
 		instruction: 75,
 		agentic: 75,
 	},
 	"claude-3-haiku-20240307": {
 		coding: 50,
@ -163,6 +172,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		instruction: 65,
 	},
 	"claude-3-opus-latest": {
 		agentic: 88,
 		coding: 90,
 		debugging: 85,
 		research: 82,
@ -234,6 +244,8 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 40,
 		longContext: 85,
 		instruction: 90,
 		// GPT-5 family is strongly agentic per OpenAI's tool-use evals.
 		agentic: 92,
 	},
 	"gpt-5-mini": {
 		coding: 62,
@ -261,6 +273,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 35,
 		longContext: 88,
 		instruction: 92,
 		agentic: 94,
 	},
 	"gpt-5.1": {
 		coding: 93,
@ -270,6 +283,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 42,
 		longContext: 86,
 		instruction: 91,
 		agentic: 92,
 	},
 	"gpt-5.1-codex-max": {
 		coding: 90,
@ -279,6 +293,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 75,
 		instruction: 85,
 		// Codex-tuned models are agentic-capable but not as reliable as the
 		// flagship gpt-5/5.x lineup for long tool-use loops.
 		agentic: 80,
 	},
 	"gpt-5.1-codex-mini": {
 		coding: 65,
@ -288,6 +305,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 48,
 		instruction: 72,
 		agentic: 55,
 	},
 	"gpt-5.2": {
 		coding: 93,
@ -297,6 +315,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 42,
 		longContext: 87,
 		instruction: 91,
 		agentic: 92,
 	},
 	"gpt-5.2-codex": {
 		coding: 93,
@ -306,6 +325,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 78,
 		instruction: 88,
 		agentic: 82,
 	},
 	"gpt-5.3-codex": {
 		coding: 94,
@ -315,6 +335,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 80,
 		instruction: 89,
 		agentic: 84,
 	},
 	"gpt-5.3-codex-spark": {
 		coding: 68,
@ -324,6 +345,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 50,
 		instruction: 74,
 		agentic: 55,
 	},
 	"gpt-5.4": {
 		coding: 95,
@ -333,6 +355,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 42,
 		longContext: 88,
 		instruction: 92,
 		agentic: 94,
 	},
 	"gpt-5.4-mini": {
 		coding: 80,
@ -342,6 +365,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 72,
 		longContext: 72,
 		instruction: 80,
 		agentic: 80,
 	},
 	// GPT-5.5 scores are relative to the existing gpt-5.4 profile and backed by
 	// OpenAI's 2026-04-23 published eval deltas across coding, tool use, and long context.
@ -354,6 +378,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 42,
 		longContext: 90,
 		instruction: 93,
 		agentic: 95,
 	},
 	// ── OpenAI o-series (reasoning-first) ──────────────────────────────────────
 	o1: {
@ -410,6 +435,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 48,
 		longContext: 98,
 		instruction: 82,
 		agentic: 85,
 	},
 	"gemini-3-pro-preview": {
 		coding: 82,
@ -419,6 +445,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 96,
 		instruction: 82,
 		agentic: 85,
 	},
 	"gemini-3-flash-preview": {
 		coding: 62,
@ -428,6 +455,10 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 88,
 		instruction: 72,
 		// Gemini Flash follows tool contracts but is occasionally chatty in
 		// agentic loops; mid-tier so it doesn't dominate execute-task vs
 		// a Sonnet/Opus/K2.6 alternative.
 		agentic: 70,
 	},
 	"gemini-3.1-flash-lite-preview": {
 		coding: 55,
@ -583,6 +614,10 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 70,
 		longContext: 60,
 		instruction: 80,
 		// Agentic: code-completion tuning. Refuses agentic tasks with "I'm sorry,
 		// I don't have the necessary tools" (M001-6377a4/S04/T02, 2026-05-12).
 		// Should not be routed to execute-task without explicit operator pin.
 		agentic: 25,
 	},
 	"ministral-8b-latest": {
 		coding: 55,
@ -655,6 +690,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 65,
 		longContext: 65,
 		instruction: 80,
 		// Agentic: Devstral series is coding-completion-tuned; tool-use is not
 		// the design target. Penalize so execute-task routing avoids it.
 		agentic: 30,
 	},
 	"devstral-medium-latest": {
 		coding: 78,
@ -664,6 +702,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 75,
 		longContext: 60,
 		instruction: 75,
 		agentic: 30,
 	},
 	"devstral-medium-2507": {
 		coding: 78,
@ -673,6 +712,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 75,
 		longContext: 60,
 		instruction: 75,
 		agentic: 30,
 	},
 	"devstral-small-2505": {
 		coding: 60,
@ -682,6 +722,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 45,
 		instruction: 65,
 		agentic: 30,
 	},
 	"devstral-small-2507": {
 		coding: 60,
@ -691,6 +732,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 45,
 		instruction: 65,
 		agentic: 30,
 	},
 	"labs-devstral-small-2512": {
 		coding: 65,
@ -700,6 +742,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 60,
 		instruction: 68,
 		agentic: 30,
 	},
 	// ── Zhipu AI (GLM) ─────────────────────────────────────────────────────────
 	"glm-5": {
@ -774,6 +817,8 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 58,
 		longContext: 86,
 		instruction: 78,
 		// Agentic: qwen3-coder is tuned for code completion, not tool-use loops.
 		agentic: 40,
 	},
 	"qwen3-coder-next": {
 		coding: 82,
@ -783,6 +828,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 70,
 		longContext: 86,
 		instruction: 76,
 		agentic: 40,
 	},
 	"qwen3-next:80b": {
 		coding: 70,
@ -802,6 +848,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 86,
 		instruction: 84,
 		// Agentic: K2.6 is the pinned default for the autonomous-solver role
 		// (ADR-0079) — refusal-resistant and follows tool-use contracts.
 		agentic: 90,
 	},
 	"kimi-for-coding": {
 		coding: 88,
@ -811,6 +860,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 86,
 		instruction: 84,
 		// `kimi-for-coding` is an alias for K2.6 on the Kimi Code provider
 		// (memory: bayesian-blender/benchmark-selector both canonicalize it).
 		agentic: 90,
 	},
 	"kimi-k2-thinking": {
 		coding: 86,
@ -820,8 +872,15 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 30,
 		longContext: 86,
 		instruction: 84,
 		agentic: 88,
 	},
 	// ── MiniMax ───────────────────────────────────────────────────────────────
 	// Profiles ordered by generation. Older M2.1 generation gets distinctly
 	// lower agentic + capability scores: the M2.1 stuck-checkpoint loop on
 	// 2026-05-13 (infra repo) traced back to M2.1 being aliased to M2.7's
 	// profile, winning execute-task on cost, then failing to follow the
 	// checkpoint contract reliably across 60+ tool calls. (See
 	// self-feedback sf-mp37kjmo-1mfuru.)
 	"MiniMax-M2.7": {
 		coding: 84,
 		debugging: 80,
@ -830,6 +889,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 52,
 		longContext: 84,
 		instruction: 82,
 		agentic: 78,
 	},
 	"MiniMax-M2.7-highspeed": {
 		coding: 82,
@ -839,6 +899,47 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 72,
 		longContext: 84,
 		instruction: 80,
 		agentic: 76,
 	},
 	"MiniMax-M2.5": {
 		// Distinct profile (previously aliased to M2.7 — overclaimed).
 		coding: 78,
 		debugging: 74,
 		research: 72,
 		reasoning: 78,
 		speed: 55,
 		longContext: 82,
 		instruction: 76,
 		// Mid agentic — better than coding-completion-only models but
 		// noticeably less reliable than current-gen agentic models.
 		agentic: 60,
 	},
 	"MiniMax-M2.1": {
 		// Distinct profile (previously aliased to M2.7 — overclaimed).
 		// M2.1 has demonstrated unreliable tool-use loops in production
 		// (M001-6377a4 / 1-ci-build-pipeline parallel-research, 2026-05-13:
 		// 60+ checkpoint calls with shifting unitId claims). Penalize the
 		// agentic axis so execute-task routing avoids it absent operator
 		// override.
 		coding: 72,
 		debugging: 66,
 		research: 64,
 		reasoning: 70,
 		speed: 60,
 		longContext: 78,
 		instruction: 72,
 		agentic: 40,
 	},
 	"MiniMax-M2": {
 		// Earliest of the M2.x line — older still.
 		coding: 68,
 		debugging: 60,
 		research: 60,
 		reasoning: 66,
 		speed: 62,
 		longContext: 76,
 		instruction: 68,
 		agentic: 35,
 	},
 };
 const MODEL_CAPABILITY_ALIASES = {
@ -864,10 +965,23 @@ const MODEL_CAPABILITY_ALIASES = {
 	"kimi-for-coding": "kimi-k2.6",
 	"kimi-k2.6:cloud": "kimi-k2.6",
 	"kimi-k2.6-cloud": "kimi-k2.6",
-	"minimax-m2": "MiniMax-M2.7",
+	// Each MiniMax generation now has its own profile — previously they all
-	"minimax-m2.1": "MiniMax-M2.7",
+	// aliased to MiniMax-M2.7, which let older/weaker models inherit current
-	"minimax-m2.5": "MiniMax-M2.7",
+	// capability scores and win cost tie-breaks on execute-task. The aliases
 	// below normalize provider-prefixed and casing variants to the canonical
 	// per-generation profile, NOT to the current generation.
 	"minimax-m2": "MiniMax-M2",
 	"minimax/MiniMax-M2": "MiniMax-M2",
 	"minimax/minimax-m2": "MiniMax-M2",
 	"minimax-m2.1": "MiniMax-M2.1",
 	"minimax/MiniMax-M2.1": "MiniMax-M2.1",
 	"minimax/minimax-m2.1": "MiniMax-M2.1",
 	"minimax-m2.5": "MiniMax-M2.5",
 	"minimax/MiniMax-M2.5": "MiniMax-M2.5",
 	"minimax/minimax-m2.5": "MiniMax-M2.5",
 	"minimax-m2.7": "MiniMax-M2.7",
 	"minimax/MiniMax-M2.7": "MiniMax-M2.7",
 	"minimax/minimax-m2.7": "MiniMax-M2.7",
 	"mistral-large-3:675b": "mistral-large-latest",
 	"ministral-3:3b": "ministral-3b-latest",
 	"ministral-3:8b": "ministral-8b-latest",
@ -888,18 +1002,32 @@ const MODEL_CAPABILITY_ALIASES = {
 // ─── Base Task Requirements Data Table ───────────────────────────────────────
 // Per-unit-type base requirement vectors. Weights indicate how important each
 // capability dimension is for this unit type.
 //
 // The `agentic` dimension represents the model's reliability at multi-turn
 // tool-use loops (does it follow the tool-use contract? does it refuse the
 // task? does it call the checkpoint tool when asked?). It is weighted high
 // for any unit type that actually uses tools at runtime — execute-task most
 // of all. See ADR-0079 for the motivation: a Codestral-style refusal on
 // execute-task in M001-6377a4/S04/T02 (2026-05-12) traced back to the router
 // having no agentic axis, so a coding-completion model out-scored agentic
 // alternatives on coding/instruction.
 export const BASE_REQUIREMENTS = {
-	"execute-task": { coding: 0.9, instruction: 0.7, speed: 0.3 },
+	"execute-task": {
 		coding: 0.9,
 		instruction: 0.7,
 		speed: 0.3,
 		agentic: 0.85,
 	},
 	"research-milestone": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
 	"research-slice": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
-	"plan-milestone": { reasoning: 0.9, coding: 0.5 },
+	"plan-milestone": { reasoning: 0.9, coding: 0.5, agentic: 0.6 },
-	"plan-slice": { reasoning: 0.9, coding: 0.5 },
+	"plan-slice": { reasoning: 0.9, coding: 0.5, agentic: 0.6 },
-	"replan-slice": { reasoning: 0.9, debugging: 0.6, coding: 0.5 },
+	"replan-slice": { reasoning: 0.9, debugging: 0.6, coding: 0.5, agentic: 0.6 },
-	"reassess-roadmap": { reasoning: 0.9, research: 0.5 },
+	"reassess-roadmap": { reasoning: 0.9, research: 0.5, agentic: 0.4 },
-	"complete-slice": { instruction: 0.8, speed: 0.7 },
+	"complete-slice": { instruction: 0.8, speed: 0.7, agentic: 0.6 },
-	"run-uat": { instruction: 0.7, speed: 0.8 },
+	"run-uat": { instruction: 0.7, speed: 0.8, agentic: 0.6 },
-	"discuss-milestone": { reasoning: 0.6, instruction: 0.7 },
+	"discuss-milestone": { reasoning: 0.6, instruction: 0.7, agentic: 0.4 },
-	"complete-milestone": { instruction: 0.8, reasoning: 0.5 },
+	"complete-milestone": { instruction: 0.8, reasoning: 0.5, agentic: 0.5 },
 };
 // ─── Public API ──────────────────────────────────────────────────────────────
 /**
@ -1101,6 +1229,7 @@ export function resolveModelForComplexity(
 	unitType,
 	taskMetadata,
 	capabilityOverrides,
 	stickyHint,
 ) {
 	// If no phase config or routing disabled, pass through
 	if (!phaseConfig || !routingConfig.enabled) {
@ -1175,16 +1304,41 @@ export function resolveModelForComplexity(
 		if (winner) {
 			const capScores = {};
 			for (const s of scored) capScores[s.modelId] = s.score;
-			const fallbacks = buildFallbackChain(winner.modelId, phaseConfig);
+			// Slice-sticky preference: if a model previously succeeded on a
 			// sibling unit in this slice AND it is still eligible in the
 			// current tier AND its capability score is within STICKY_WINDOW of
 			// the winner, prefer it. Stops within-slice routing thrash where
 			// T01 → gemini-flash and T02 → codestral on the same slice.
 			const STICKY_WINDOW_POINTS = 8;
 			const stickyId = (() => {
 				if (!stickyHint?.id) return null;
 				const stickyKey = stickyHint.provider
 					? `${stickyHint.provider}/${stickyHint.id}`
 					: stickyHint.id;
 				// Match either "provider/model" or bare model id in the eligible list.
 				const found = scored.find(
 					(s) => s.modelId === stickyKey || s.modelId.endsWith(`/${stickyHint.id}`),
 				);
 				if (!found) return null;
 				if (winner.score - found.score > STICKY_WINDOW_POINTS) return null;
 				return found.modelId;
 			})();
 			const selectedId = stickyId ?? winner.modelId;
 			const selectedScore = (
 				scored.find((s) => s.modelId === selectedId) ?? winner
 			).score;
 			const fallbacks = buildFallbackChain(selectedId, phaseConfig);
 			return {
-				modelId: winner.modelId,
+				modelId: selectedId,
 				fallbacks,
 				tier: requestedTier,
 				wasDowngraded: true,
-				reason: `capability-scored: ${winner.modelId} (${winner.score.toFixed(1)}) for ${unitType}`,
+				reason: stickyId
 					? `slice-sticky: ${selectedId} (${selectedScore.toFixed(1)}, within ${STICKY_WINDOW_POINTS}pt of capability winner) for ${unitType}`
 					: `capability-scored: ${selectedId} (${selectedScore.toFixed(1)}) for ${unitType}`,
 				capabilityScores: capScores,
 				taskRequirements: requirements,
-				selectionMethod: "capability-scored",
+				selectionMethod: stickyId ? "slice-sticky" : "capability-scored",
 			};
 		}
 	}
--- a/src/resources/extensions/sf/prompt-ordering.js
+++ b/src/resources/extensions/sf/prompt-ordering.js
@ -137,6 +137,11 @@ export function reorderForCaching(prompt) {
 * static+semi-static prefix can be marked with cache_control: ephemeral on
 * Anthropic-compatible providers.
 *
 * Purpose: keep SF autonomous prompt prefixes byte-stable across adjacent task
 * dispatches so provider prompt caches can reuse expensive context.
 *
 * Consumer: auto/phases-unit.js before runUnit dispatches an autonomous unit.
 *
 * Returns `{before: string, after: string}` where:
 * - `before` = preamble + all static + all semi-static sections (cache this)
 * - `after`  = all dynamic sections (do not cache)
--- a/src/resources/extensions/sf/session-lock.js
+++ b/src/resources/extensions/sf/session-lock.js
@ -596,3 +596,103 @@ function isPidAlive(pid) {
 		return false;
 	}
 }
 /**
 * Public wrapper around isPidAlive for callers outside this module.
 *
 * Consumer: auto-start's prompt-to-kill flow needs to decide whether the
 * existingPid from acquireSessionLock's failure result is still alive before
 * offering to terminate it.
 */
 export function isSessionPidAlive(pid) {
 	return isPidAlive(Number(pid));
 }
 /**
 * Terminate an existing SF auto session by PID.
 *
 * Why: when acquireSessionLock reports `{ acquired: false, existingPid }`
 * because another SF process is holding the lock, we want a one-call helper
 * that an interactive caller can invoke after confirming with the user. The
 * helper sends SIGTERM, polls for the process to exit, escalates to SIGKILL
 * after the grace window, and waits a short tail for the kernel to reap the
 * PID so a subsequent acquireSessionLock retry sees a dead PID and proceeds
 * down the stale-lock recovery path.
 *
 * Returns `{ terminated: boolean, escalated: boolean, alreadyDead: boolean }`.
 * `terminated` is true iff the PID is no longer alive when the call returns.
 * `escalated` is true iff SIGKILL was needed because SIGTERM did not produce
 * an exit within `gracePeriodMs`.
 *
 * Consumer: auto-start's prompt-to-kill flow. Not part of the normal
 * autonomous loop — only invoked after explicit operator consent.
 *
 * @param {number} pid - The PID to terminate.
 * @param {object} [options]
 * @param {number} [options.gracePeriodMs=5000] - How long to wait between
 *   SIGTERM and SIGKILL.
 * @param {number} [options.reapWaitMs=1000] - How long to wait after the
 *   final kill signal for the kernel to reap.
 * @param {number} [options.pollIntervalMs=100] - Poll interval used while
 *   waiting for exit.
 */
 export async function terminateExistingSession(pid, options = {}) {
 	const numericPid = Number(pid);
 	if (!Number.isInteger(numericPid) || numericPid <= 0) {
 		return { terminated: false, escalated: false, alreadyDead: true };
 	}
 	if (numericPid === process.pid) {
 		// Refuse to terminate ourselves — would deadlock the caller.
 		return { terminated: false, escalated: false, alreadyDead: false };
 	}
 	if (!isPidAlive(numericPid)) {
 		return { terminated: true, escalated: false, alreadyDead: true };
 	}
 	const gracePeriodMs = Number(options.gracePeriodMs ?? 5000);
 	const reapWaitMs = Number(options.reapWaitMs ?? 1000);
 	const pollIntervalMs = Math.max(50, Number(options.pollIntervalMs ?? 100));
 	try {
 		process.kill(numericPid, "SIGTERM");
 	} catch (err) {
 		// ESRCH: process already gone between the alive check and the kill.
 		// EPERM: not ours to kill — surface as not-terminated.
 		if (err?.code === "ESRCH") {
 			return { terminated: true, escalated: false, alreadyDead: true };
 		}
 		if (err?.code === "EPERM") {
 			return { terminated: false, escalated: false, alreadyDead: false };
 		}
 		throw err;
 	}
 	const deadline = Date.now() + gracePeriodMs;
 	while (Date.now() < deadline) {
 		if (!isPidAlive(numericPid)) {
 			return { terminated: true, escalated: false, alreadyDead: false };
 		}
 		await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
 	}
 	// Grace expired — escalate to SIGKILL.
 	try {
 		process.kill(numericPid, "SIGKILL");
 	} catch (err) {
 		if (err?.code === "ESRCH") {
 			return { terminated: true, escalated: true, alreadyDead: false };
 		}
 		if (err?.code === "EPERM") {
 			return { terminated: false, escalated: true, alreadyDead: false };
 		}
 		throw err;
 	}
 	const reapDeadline = Date.now() + reapWaitMs;
 	while (Date.now() < reapDeadline) {
 		if (!isPidAlive(numericPid)) {
 			return { terminated: true, escalated: true, alreadyDead: false };
 		}
 		await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
 	}
 	return {
 		terminated: !isPidAlive(numericPid),
 		escalated: true,
 		alreadyDead: false,
 	};
 }
--- a/src/resources/extensions/sf/slice-routing-cache.js
+++ b/src/resources/extensions/sf/slice-routing-cache.js
@ -0,0 +1,154 @@
 /**
 * slice-routing-cache.js — per-slice sticky-model routing cache.
 *
 * Why: model routing is currently computed per-unit, so the executor can flip
 * between models within a single slice (M001-6377a4/S04 routed T01 to
 * gemini-3-flash-preview, then T02 to codestral-latest — the second was
 * unfit and refused the task, see ADR-0079). Once a model has successfully
 * completed work on a slice, prefer it for the slice's sibling units unless
 * a hard mismatch forces a switch.
 *
 * Contract:
 *   - Cache is small JSON keyed by sliceId. Each entry stores provider/id and
 *     timestamps so stale entries can be aged out.
 *   - Best-effort: read/write errors are swallowed; routing always has a
 *     fallback through the capability scorer.
 *   - Only successful outcomes (`continue` or `complete`) write to the cache.
 *     Refusal/blocker outcomes clear the entry so a failing model does not
 *     re-attach to the slice.
 *
 * Consumer: auto-model-selection.js reads before calling
 * resolveModelForComplexity; auto/phases-unit.js writes after a successful
 * checkpoint and clears on `executor-refused`.
 */
 import { existsSync, mkdirSync, readFileSync, unlinkSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { atomicWriteSync } from "./atomic-write.js";
 import { sfRuntimeRoot } from "./paths.js";
 const CACHE_FILE = "slice-routing.json";
 const DEFAULT_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
 function cachePath(basePath) {
 	return join(sfRuntimeRoot(basePath), CACHE_FILE);
 }
 /**
 * Extract the slice scope from a unit id.
 *
 * Supports the conventional SF unit-id grammar:
 *   - Execute task: "<milestoneId>/<sliceId>/<taskId>"        → "<milestoneId>/<sliceId>"
 *   - Plan / complete slice: "<milestoneId>/<sliceId>"        → "<milestoneId>/<sliceId>" (already a slice)
 *   - Milestone-level units: "<milestoneId>"                   → "<milestoneId>" (no slice scope)
 *
 * Returns null when the unit id is missing or unparseable.
 */
 export function extractSliceScope(unitId) {
 	if (!unitId || typeof unitId !== "string") return null;
 	const parts = unitId.split("/").filter(Boolean);
 	if (parts.length === 0) return null;
 	if (parts.length === 1) return parts[0]; // milestone-only
 	return `${parts[0]}/${parts[1]}`;
 }
 function readCache(basePath) {
 	const path = cachePath(basePath);
 	if (!existsSync(path)) return {};
 	try {
 		return JSON.parse(readFileSync(path, "utf-8"));
 	} catch {
 		return {};
 	}
 }
 function writeCache(basePath, data) {
 	const path = cachePath(basePath);
 	try {
 		mkdirSync(dirname(path), { recursive: true });
 		atomicWriteSync(path, JSON.stringify(data, null, 2));
 	} catch {
 		// best-effort
 	}
 }
 /**
 * Record the model that successfully handled a unit. The slice scope is
 * derived from the unit id. Subsequent units in the same slice will see this
 * as the sticky hint.
 */
 export function recordSliceRouting(basePath, unitType, unitId, model) {
 	if (!basePath || !model?.id) return;
 	const sliceId = extractSliceScope(unitId);
 	if (!sliceId) return;
 	const data = readCache(basePath);
 	data[sliceId] = {
 		provider: String(model.provider ?? ""),
 		id: String(model.id),
 		ts: new Date().toISOString(),
 		lastUnitType: String(unitType ?? ""),
 		lastUnitId: String(unitId ?? ""),
 	};
 	writeCache(basePath, data);
 }
 /**
 * Look up the sticky model for the slice that contains this unit. Returns
 * null when there is no entry, when it's older than maxAgeMs, or when the
 * cache cannot be read.
 *
 * @param {string} basePath
 * @param {string} unitType
 * @param {string} unitId
 * @param {object} [options]
 * @param {number} [options.maxAgeMs=7d]
 * @returns {{ provider: string, id: string } | null}
 */
 export function readStickyModelForUnit(basePath, unitType, unitId, options = {}) {
 	if (!basePath) return null;
 	const sliceId = extractSliceScope(unitId);
 	if (!sliceId) return null;
 	const data = readCache(basePath);
 	const entry = data[sliceId];
 	if (!entry?.id) return null;
 	const maxAgeMs = Number(options.maxAgeMs ?? DEFAULT_MAX_AGE_MS);
 	if (entry.ts) {
 		const age = Date.now() - new Date(entry.ts).getTime();
 		if (Number.isFinite(age) && age > maxAgeMs) return null;
 	}
 	return {
 		provider: String(entry.provider ?? ""),
 		id: String(entry.id),
 	};
 }
 /**
 * Evict the sticky entry for the slice containing this unit. Called when the
 * model attached to the slice refuses or hits a hard mismatch, so the next
 * dispatch falls back to the capability scorer instead of re-pinning the
 * broken model.
 */
 export function clearSliceRoutingForUnit(basePath, unitId) {
 	if (!basePath) return;
 	const sliceId = extractSliceScope(unitId);
 	if (!sliceId) return;
 	const data = readCache(basePath);
 	if (!(sliceId in data)) return;
 	delete data[sliceId];
 	if (Object.keys(data).length === 0) {
 		try {
 			unlinkSync(cachePath(basePath));
 		} catch {
 			// best-effort
 		}
 		return;
 	}
 	writeCache(basePath, data);
 }
 /**
 * Test/debug only — read the entire cache. Production callers should use
 * readStickyModelForUnit instead.
 */
 export function _readCacheForTests(basePath) {
 	return readCache(basePath);
 }
--- a/src/resources/extensions/sf/tests/dashboard-overlay.test.ts
+++ b/src/resources/extensions/sf/tests/dashboard-overlay.test.ts
@ -0,0 +1,467 @@
 /**
 * Dashboard Overlay UOK Diagnostics Tests
 *
 * Purpose: Verify that SFDashboardOverlay consumes writeUokDiagnostics output
 * and renders it consistently with the headless status command.
 *
 * Consumer: TUI users who expect the dashboard to surface the same UOK health
 * information as `sf status` / headless query.
 */
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 // ─── Hoisted mocks ─────────────────────────────────────────────────────────
 const mockDiagnostics = vi.hoisted(() => ({
 	clear: {
 		schemaVersion: 1,
 		generatedAt: new Date().toISOString(),
 		verdict: "clear",
 		classification: "healthy",
 		signals: {
 			lock: "active",
 			parity: "ok",
 			ledger: "consistent",
 			runtimeProjection: "ok",
 			wrapper: "clear",
 		},
 		currentUnit: null,
 		latestRun: null,
 		runtimeUnits: [],
 		issues: [],
 		recommendations: [],
 		reportPath: "/tmp/uok-diagnostics.json",
 	},
 	degraded: {
 		schemaVersion: 1,
 		generatedAt: new Date().toISOString(),
 		verdict: "degraded",
 		classification: "needs-repair",
 		signals: {
 			lock: "stale",
 			parity: "ok",
 			ledger: "open-runs",
 			runtimeProjection: "stale",
 			wrapper: "unknown",
 		},
 		currentUnit: null,
 		latestRun: null,
 		runtimeUnits: [],
 		issues: [
 			{
 				code: "stale-lock",
 				severity: "error",
 				message: "Stale auto.lock detected for PID 12345.",
 				evidence: { lock: { pid: 12345 } },
 			},
 			{
 				code: "open-ledger-without-live-lock",
 				severity: "error",
 				message:
 					"UOK ledger has 2 started run(s) without a live auto.lock owner.",
 				evidence: { runIds: ["run-1", "run-2"] },
 			},
 		],
 		recommendations: [
 			"Clear stale auto.lock before dispatch.",
 			"Mark orphaned UOK runs recovered or restart from lock owner.",
 		],
 		reportPath: "/tmp/uok-diagnostics.json",
 	},
 	attention: {
 		schemaVersion: 1,
 		generatedAt: new Date().toISOString(),
 		verdict: "attention",
 		classification: "degraded",
 		signals: {
 			lock: "active",
 			parity: "degraded",
 			ledger: "consistent",
 			runtimeProjection: "ok",
 			wrapper: "unknown",
 		},
 		currentUnit: { unitType: "execute-task", unitId: "T01", pid: 12345 },
 		latestRun: null,
 		runtimeUnits: [],
 		issues: [
 			{
 				code: "uok-parity-degraded",
 				severity: "warning",
 				message:
 					"UOK parity degraded: 1 critical mismatch(es), 0 missing exit(s).",
 				evidence: { current: { criticalMismatches: 1, missingExitEvents: 0 } },
 			},
 		],
 		recommendations: ["Reconcile UOK parity before mutating git state."],
 		reportPath: "/tmp/uok-diagnostics.json",
 	},
 }));
 const dashDataMock = vi.hoisted(() => ({
 	basePath: "/tmp/sf-test",
 	active: false,
 	paused: false,
 	remoteSession: null,
 	currentUnit: null,
 	elapsed: 0,
 	rtkEnabled: false,
 	rtkSavings: null,
 	pendingCaptureCount: 0,
 }));
 vi.mock("../uok/diagnostic-synthesis.js", () => ({
 	writeUokDiagnostics: vi.fn((_basePath, _options) => mockDiagnostics.clear),
 }));
 vi.mock("../state.js", () => ({
 	deriveState: vi.fn(async () => ({
 		activeMilestone: null,
 		activeSlice: null,
 		activeTask: null,
 		phase: "idle",
 		progress: null,
 		nextAction: null,
 		blockers: [],
 		registry: [],
 	})),
 }));
 vi.mock("../sf-db.js", () => ({
 	isDbAvailable: vi.fn(() => false),
 	getMilestoneSlices: vi.fn(() => []),
 	getSliceTasks: vi.fn(() => []),
 }));
 vi.mock("../auto.js", () => ({
 	getAutoDashboardData: vi.fn(() => dashDataMock),
 }));
 vi.mock("../auto-dashboard.js", () => ({
 	estimateTimeRemaining: vi.fn(() => null),
 }));
 vi.mock("../progress-score.js", () => ({
 	computeProgressScore: vi.fn(() => ({
 		level: "green",
 		summary: "All systems healthy",
 		signals: [],
 	})),
 }));
 vi.mock("../doctor-environment.js", () => ({
 	runEnvironmentChecks: vi.fn(() => []),
 }));
 vi.mock("../worktree-command.js", () => ({
 	getActiveWorktreeName: vi.fn(() => null),
 }));
 vi.mock("../subagent/worker-registry.js", () => ({
 	hasActiveWorkers: vi.fn(() => false),
 	getWorkerBatches: vi.fn(() => new Map()),
 }));
 vi.mock("../metrics.js", () => ({
 	getLedger: vi.fn(() => null),
 	getProjectTotals: vi.fn(() => ({})),
 	aggregateByPhase: vi.fn(() => []),
 	aggregateBySlice: vi.fn(() => []),
 	aggregateByModel: vi.fn(() => []),
 	aggregateCacheHitRate: vi.fn(() => 0),
 	formatCost: vi.fn((n) => `$${n.toFixed(2)}`),
 	formatCostProjection: vi.fn(() => []),
 	formatTokenCount: vi.fn((n) => String(n)),
 }));
 vi.mock("../paths.js", () => ({
 	resolveMilestoneFile: vi.fn(() => null),
 }));
 vi.mock("../files.js", () => ({
 	loadFile: vi.fn(async () => null),
 }));
 vi.mock("../preferences.js", () => ({
 	loadEffectiveSFPreferences: vi.fn(() => null),
 }));
 vi.mock("@singularity-forge/tui", async (importOriginal) => {
 	const actual = (await importOriginal()) as any;
 	return {
 		...actual,
 		Key: {
 			escape: "\u001B",
 			ctrl: (c: string) => `\u0000${c}`,
 			ctrlAlt: (c: string) => `\u001B\u0000${c}`,
 			ctrlShift: (c: string) => `\u001B\u0000${c.toUpperCase()}`,
 			down: "\u001B[B",
 			up: "\u001B[A",
 		},
 		matchesKey: vi.fn(() => false),
 		truncateToWidth: vi.fn((s: string, w: number) =>
 			s.length > w ? s.slice(0, w) : s,
 		),
 		visibleWidth: vi.fn((s: string) => s.length),
 	};
 });
 vi.mock("../shared/mod.js", () => ({
 	centerLine: vi.fn(
 		(s: string, w: number) =>
 			" ".repeat(Math.max(0, Math.floor((w - s.length) / 2))) + s,
 	),
 	fitColumns: vi.fn((parts: string[], _w: number, _sep: string) =>
 		parts.join("  "),
 	),
 	formatDuration: vi.fn((ms: number) => `${Math.round(ms / 1000)}s`),
 	joinColumns: vi.fn(
 		(left: string, right: string, _w: number) =>
 			`${left}${" ".repeat(Math.max(1, _w - left.length - right.length))}${right}`,
 	),
 	padRight: vi.fn((s: string, w: number) => s.padEnd(w, " ")),
 	STATUS_COLOR: {
 		done: "success",
 		active: "accent",
 		pending: "dim",
 	},
 	STATUS_GLYPH: {
 		done: "✓",
 		active: "▶",
 		pending: "○",
 	},
 }));
 vi.mock("../shortcut-defs.js", () => ({
 	formattedShortcutPair: vi.fn(() => "ctrl+alt+g"),
 }));
 // ─── Helpers ───────────────────────────────────────────────────────────────
 function createMockTheme() {
 	return {
 		fg: vi.fn((color: string, text: string) => `[${color}:${text}]`),
 		bold: vi.fn((text: string) => `**${text}**`),
 	};
 }
 function createMockTui() {
 	return {
 		requestRender: vi.fn(),
 	};
 }
 // ─── Tests ─────────────────────────────────────────────────────────────────
 beforeEach(() => {
 	vi.clearAllMocks();
 });
 afterEach(() => {
 	vi.clearAllMocks();
 });
 describe("SFDashboardOverlay UOK diagnostics", () => {
 	it("loadData_calls_writeUokDiagnostics_and_stores_result", async () => {
 		const { writeUokDiagnostics } = await import(
 			"../uok/diagnostic-synthesis.js"
 		);
 		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
 		const tui = createMockTui();
 		const theme = createMockTheme();
 		const overlay = new SFDashboardOverlay(tui, theme, () => {});
 		// Prevent interval from firing during test
 		clearInterval(overlay.refreshTimer);
 		overlay.refreshTimer = null as any;
 		await overlay.loadData();
 		expect(writeUokDiagnostics).toHaveBeenCalledWith("/tmp/sf-test");
 		expect(overlay.uokDiagnostics).toEqual(mockDiagnostics.clear);
 		overlay.dispose();
 	});
 	it("loadData_gracefully_handles_writeUokDiagnostics_failure", async () => {
 		const { writeUokDiagnostics } = await import(
 			"../uok/diagnostic-synthesis.js"
 		);
 		writeUokDiagnostics.mockImplementation(() => {
 			throw new Error("disk full");
 		});
 		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
 		const tui = createMockTui();
 		const theme = createMockTheme();
 		const overlay = new SFDashboardOverlay(tui, theme, () => {});
 		clearInterval(overlay.refreshTimer);
 		overlay.refreshTimer = null as any;
 		await overlay.loadData();
 		expect(overlay.uokDiagnostics).toBeNull();
 		overlay.dispose();
 		writeUokDiagnostics.mockRestore();
 	});
 	it("render_includes_uok_verdict_when_diagnostics_present", async () => {
 		const { writeUokDiagnostics } = await import(
 			"../uok/diagnostic-synthesis.js"
 		);
 		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
 		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
 		const tui = createMockTui();
 		const theme = createMockTheme();
 		const overlay = new SFDashboardOverlay(tui, theme, () => {});
 		clearInterval(overlay.refreshTimer);
 		overlay.refreshTimer = null as any;
 		await overlay.loadData();
 		const lines = overlay.buildContentLines(80);
 		const text = lines.join("\n");
 		expect(text).toContain("UOK");
 		expect(text).toContain("degraded");
 		expect(text).toContain("needs-repair");
 		overlay.dispose();
 	});
 	it("render_includes_first_issue_code_like_headless_status", async () => {
 		const { writeUokDiagnostics } = await import(
 			"../uok/diagnostic-synthesis.js"
 		);
 		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
 		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
 		const tui = createMockTui();
 		const theme = createMockTheme();
 		const overlay = new SFDashboardOverlay(tui, theme, () => {});
 		clearInterval(overlay.refreshTimer);
 		overlay.refreshTimer = null as any;
 		await overlay.loadData();
 		const lines = overlay.buildContentLines(80);
 		const text = lines.join("\n");
 		// Should contain the first issue code, matching headless status behavior
 		expect(text).toContain("stale-lock");
 		overlay.dispose();
 	});
 	it("render_shows_uok_health_section_with_all_issues_when_degraded", async () => {
 		const { writeUokDiagnostics } = await import(
 			"../uok/diagnostic-synthesis.js"
 		);
 		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
 		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
 		const tui = createMockTui();
 		const theme = createMockTheme();
 		const overlay = new SFDashboardOverlay(tui, theme, () => {});
 		clearInterval(overlay.refreshTimer);
 		overlay.refreshTimer = null as any;
 		await overlay.loadData();
 		const lines = overlay.buildContentLines(80);
 		const text = lines.join("\n");
 		// Should show both issue codes in the health section
 		expect(text).toContain("stale-lock");
 		expect(text).toContain("open-ledger-without-live-lock");
 		overlay.dispose();
 	});
 	it("render_shows_recommendations_when_issues_present", async () => {
 		const { writeUokDiagnostics } = await import(
 			"../uok/diagnostic-synthesis.js"
 		);
 		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
 		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
 		const tui = createMockTui();
 		const theme = createMockTheme();
 		const overlay = new SFDashboardOverlay(tui, theme, () => {});
 		clearInterval(overlay.refreshTimer);
 		overlay.refreshTimer = null as any;
 		await overlay.loadData();
 		const lines = overlay.buildContentLines(80);
 		const text = lines.join("\n");
 		expect(text).toContain("Clear stale auto.lock before dispatch.");
 		expect(text).toContain(
 			"Mark orphaned UOK runs recovered or restart from lock owner.",
 		);
 		overlay.dispose();
 	});
 	it("render_shows_uok_signals_table_when_diagnostics_present", async () => {
 		const { writeUokDiagnostics } = await import(
 			"../uok/diagnostic-synthesis.js"
 		);
 		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
 		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
 		const tui = createMockTui();
 		const theme = createMockTheme();
 		const overlay = new SFDashboardOverlay(tui, theme, () => {});
 		clearInterval(overlay.refreshTimer);
 		overlay.refreshTimer = null as any;
 		await overlay.loadData();
 		const lines = overlay.buildContentLines(80);
 		const text = lines.join("\n");
 		// Signals should be visible
 		expect(text).toContain("lock");
 		expect(text).toContain("parity");
 		expect(text).toContain("ledger");
 		overlay.dispose();
 	});
 	it("render_omits_detailed_uok_section_when_verdict_is_clear", async () => {
 		const { writeUokDiagnostics } = await import(
 			"../uok/diagnostic-synthesis.js"
 		);
 		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.clear);
 		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
 		const tui = createMockTui();
 		const theme = createMockTheme();
 		const overlay = new SFDashboardOverlay(tui, theme, () => {});
 		clearInterval(overlay.refreshTimer);
 		overlay.refreshTimer = null as any;
 		await overlay.loadData();
 		const lines = overlay.buildContentLines(80);
 		const text = lines.join("\n");
 		// Should show the compact UOK clear line but no issue details
 		expect(text).toContain("clear");
 		expect(text).not.toContain("stale-lock");
 		overlay.dispose();
 	});
 });
--- a/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
+++ b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
@ -0,0 +1,140 @@
 import { describe, expect, test } from "vitest";
 import {
 	BASE_REQUIREMENTS,
 	MODEL_CAPABILITY_PROFILES,
 	scoreEligibleModels,
 	scoreModel,
 } from "../model-router.js";
 describe("agentic capability axis (ADR-0079)", () => {
 	test("execute-task base requirements weight the agentic dimension", () => {
 		// If this assertion fails because the weight changed: re-read ADR-0079
 		// before adjusting. The whole point of the axis is to outweigh raw
 		// coding score for execute-task routing.
 		expect(BASE_REQUIREMENTS["execute-task"].agentic).toBeGreaterThanOrEqual(
 			0.7,
 		);
 	});
 	test("known agentic-capable models score higher than coding-completion models on execute-task", () => {
 		const codestralScore = scoreModel(
 			MODEL_CAPABILITY_PROFILES["codestral-latest"],
 			BASE_REQUIREMENTS["execute-task"],
 		);
 		const kimiScore = scoreModel(
 			MODEL_CAPABILITY_PROFILES["kimi-k2.6"],
 			BASE_REQUIREMENTS["execute-task"],
 		);
 		const sonnetScore = scoreModel(
 			MODEL_CAPABILITY_PROFILES["claude-sonnet-4-6"],
 			BASE_REQUIREMENTS["execute-task"],
 		);
 		// Codestral has high coding (85) but agentic=25 — must not beat agentic models.
 		expect(kimiScore).toBeGreaterThan(codestralScore);
 		expect(sonnetScore).toBeGreaterThan(codestralScore);
 	});
 	test("devstral variants score below agentic models on execute-task", () => {
 		const devstralScore = scoreModel(
 			MODEL_CAPABILITY_PROFILES["devstral-2512"],
 			BASE_REQUIREMENTS["execute-task"],
 		);
 		const kimiScore = scoreModel(
 			MODEL_CAPABILITY_PROFILES["kimi-k2.6"],
 			BASE_REQUIREMENTS["execute-task"],
 		);
 		expect(kimiScore).toBeGreaterThan(devstralScore);
 	});
 	test("scoreEligibleModels ranks agentic models above coding-only models for execute-task", () => {
 		const eligible = [
 			"mistral/codestral-latest",
 			"mistral/devstral-2512",
 			"moonshotai/kimi-k2.6",
 			"anthropic/claude-sonnet-4-6",
 		];
 		const ranked = scoreEligibleModels(
 			eligible,
 			BASE_REQUIREMENTS["execute-task"],
 		);
 		const top = ranked[0]?.modelId;
 		// Either of the two pinned-agentic models must win.
 		expect(["moonshotai/kimi-k2.6", "anthropic/claude-sonnet-4-6"]).toContain(
 			top,
 		);
 		// And Codestral specifically must not win.
 		expect(top).not.toBe("mistral/codestral-latest");
 	});
 	test("agentic axis preserves research-* unit-type behavior (no agentic weight there)", () => {
 		// Research isn't agentic — those unit types should not gain an agentic
 		// dimension. This protects long-context research-tuned models from
 		// being penalized.
 		expect(BASE_REQUIREMENTS["research-milestone"].agentic).toBeUndefined();
 		expect(BASE_REQUIREMENTS["research-slice"].agentic).toBeUndefined();
 	});
 	test("known coding-only models all have agentic <= 50", () => {
 		const codingOnly = [
 			"codestral-latest",
 			"devstral-2512",
 			"devstral-medium-latest",
 			"devstral-medium-2507",
 			"devstral-small-2505",
 			"devstral-small-2507",
 			"labs-devstral-small-2512",
 			"qwen3-coder:480b",
 			"qwen3-coder-next",
 		];
 		for (const id of codingOnly) {
 			const profile = MODEL_CAPABILITY_PROFILES[id];
 			expect(profile, `${id} should be in MODEL_CAPABILITY_PROFILES`).toBeDefined();
 			expect(profile.agentic, `${id} should have agentic <= 50`).toBeLessThanOrEqual(
 				50,
 			);
 		}
 	});
 	test("older MiniMax generations score lower than current on agentic", () => {
 		// 2026-05-13 incident: minimax/M2.1 stuck in 60+ checkpoint loop on
 		// infra repo. Root cause was the router aliasing all minimax-m2.x
 		// variants to MiniMax-M2.7's profile, so older models inherited
 		// current-gen capability scores and won cost tie-breaks on
 		// execute-task. Per-generation profiles + agentic axis fix the
 		// underlying routing decision.
 		const m21 = MODEL_CAPABILITY_PROFILES["MiniMax-M2.1"];
 		const m25 = MODEL_CAPABILITY_PROFILES["MiniMax-M2.5"];
 		const m27 = MODEL_CAPABILITY_PROFILES["MiniMax-M2.7"];
 		expect(m21, "M2.1 should have its own profile").toBeDefined();
 		expect(m25, "M2.5 should have its own profile").toBeDefined();
 		expect(m27.agentic).toBeGreaterThan(m25.agentic);
 		expect(m25.agentic).toBeGreaterThan(m21.agentic);
 		// And on execute-task, the current generation must beat the older one.
 		const oldScore = scoreModel(m21, BASE_REQUIREMENTS["execute-task"]);
 		const newScore = scoreModel(m27, BASE_REQUIREMENTS["execute-task"]);
 		expect(newScore).toBeGreaterThan(oldScore);
 	});
 	test("known agentic-frontier models all have agentic >= 85", () => {
 		const agenticFrontier = [
 			"claude-opus-4-6",
 			"claude-sonnet-4-6",
 			"claude-sonnet-4-5-20250514",
 			"kimi-k2.6",
 			"kimi-k2-thinking",
 			"gpt-5",
 			"gpt-5.4",
 			"gpt-5.5",
 			"gemini-3-pro-preview",
 			"gemini-3.1-pro-preview",
 		];
 		for (const id of agenticFrontier) {
 			const profile = MODEL_CAPABILITY_PROFILES[id];
 			expect(profile, `${id} should be in MODEL_CAPABILITY_PROFILES`).toBeDefined();
 			expect(
 				profile.agentic,
 				`${id} should have agentic >= 85`,
 			).toBeGreaterThanOrEqual(85);
 		}
 	});
 });
--- a/src/resources/extensions/sf/tests/prompt-ordering.test.mjs
+++ b/src/resources/extensions/sf/tests/prompt-ordering.test.mjs
@ -134,61 +134,3 @@ test("reorderAndSplitForCaching_preamble_goes_into_before", () => {
 		"dynamic section in after",
 	);
 });
 test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
 	const prompt = [
 		"# Milestone Validation",
 		"",
 		"## Working Directory",
 		"/repo",
 		"",
 		"## Mission",
 		"Dispatch reviewers.",
 		"",
 		"## Context",
 		"Inlined below.",
 		"",
 		"## Inlined Context",
 		"### S01 Summary",
 		"# S01",
 		"",
 		"## Requirements Advanced",
 		"- R1",
 		"",
 		"## Requirements Validated",
 		"None.",
 	].join("\n");
 	const reordered = reorderForCaching(prompt);
 	assert.ok(
 		reordered.indexOf("## Mission") <
 			reordered.indexOf("## Requirements Advanced"),
 	);
 	assert.ok(
 		reordered.indexOf("## Context") <
 			reordered.indexOf("## Requirements Advanced"),
 	);
 });
 test("reorderForCaching_when_top_level_requirements_exists_still_hoists_exact_requirements_block", () => {
 	const prompt = [
 		"# Execute",
 		"",
 		"## Mission",
 		"Do work.",
 		"",
 		"## Requirements",
 		"- R1",
 		"",
 		"## Verification",
 		"Run tests.",
 	].join("\n");
 	const reordered = reorderForCaching(prompt);
 	assert.ok(
 		reordered.indexOf("## Requirements") < reordered.indexOf("## Mission"),
 	);
 });
--- a/src/resources/extensions/sf/tests/run-unit.test.mjs
+++ b/src/resources/extensions/sf/tests/run-unit.test.mjs
@ -0,0 +1,30 @@
 import assert from "node:assert/strict";
 import { test } from "vitest";
 import { buildUnitPromptMessageContent } from "../auto/run-unit.js";
 test("buildUnitPromptMessageContent_when_prompt_parts_present_preserves_join_boundary", () => {
 	const content = buildUnitPromptMessageContent("flat", {
 		before: "## Working Directory\n/repo",
 		after: "## Inlined Task Plan\nDo it.",
 	});
 	assert.ok(Array.isArray(content));
 	assert.deepEqual(content[0], {
 		type: "text",
 		text: "## Working Directory\n/repo\n",
 		cache_control: { type: "ephemeral" },
 	});
 	assert.deepEqual(content[1], {
 		type: "text",
 		text: "## Inlined Task Plan\nDo it.",
 	});
 	assert.equal(
 		content.map((part) => part.text).join(""),
 		"## Working Directory\n/repo\n## Inlined Task Plan\nDo it.",
 	);
 });
 test("buildUnitPromptMessageContent_when_no_prompt_parts_returns_flat_prompt", () => {
 	assert.equal(buildUnitPromptMessageContent("flat", null), "flat");
 });
--- a/src/resources/extensions/sf/tests/session-lock-terminate.test.mjs
+++ b/src/resources/extensions/sf/tests/session-lock-terminate.test.mjs
@ -0,0 +1,134 @@
 import { spawn } from "node:child_process";
 import { describe, expect, test } from "vitest";
 import {
 	isSessionPidAlive,
 	terminateExistingSession,
 } from "../session-lock.js";
 function spawnSleeper(seconds = 30) {
 	// `sleep` is a deliberate cooperative target: it exits on SIGTERM, which
 	// lets us exercise the graceful path. For the SIGKILL escalation test we
 	// spawn a child that ignores SIGTERM via `trap '' TERM`.
 	const child = spawn("/bin/sh", ["-c", `sleep ${seconds}`], {
 		stdio: "ignore",
 		detached: false,
 	});
 	return child;
 }
 function spawnIgnoreSigterm(seconds = 30) {
 	// A Node child that installs an explicit SIGTERM handler that does
 	// nothing. Unlike `sh -c "trap '' TERM; sleep N"` (where the shell
 	// tail-call-exec's sleep so SIGTERM hits sleep directly), this child
 	// IS the long-lived process and reliably ignores SIGTERM until the
 	// SIGKILL escalation. This lets us assert the escalation path.
 	const child = spawn(
 		process.execPath,
 		[
 			"-e",
 			`process.on('SIGTERM', () => {}); setTimeout(() => process.exit(0), ${seconds * 1000});`,
 		],
 		{ stdio: "ignore", detached: false },
 	);
 	return child;
 }
 describe("terminateExistingSession", () => {
 	test("returns alreadyDead=true when pid is invalid", async () => {
 		const result = await terminateExistingSession(0);
 		expect(result.terminated).toBe(false);
 		expect(result.alreadyDead).toBe(true);
 	});
 	test("refuses to terminate the current process", async () => {
 		const result = await terminateExistingSession(process.pid);
 		expect(result.terminated).toBe(false);
 	});
 	test("returns alreadyDead=true for a dead pid", async () => {
 		// PID 1 is alive but not ours; use a value that's almost certainly
 		// not assigned. 2^31 - 1 is well above any plausible PID.
 		const result = await terminateExistingSession(2147483646);
 		expect(result.alreadyDead).toBe(true);
 		expect(result.terminated).toBe(true);
 	});
 	test("gracefully terminates a process that respects SIGTERM", async () => {
 		const child = spawnSleeper(60);
 		try {
 			expect(isSessionPidAlive(child.pid)).toBe(true);
 			const result = await terminateExistingSession(child.pid, {
 				gracePeriodMs: 3000,
 				reapWaitMs: 1000,
 				pollIntervalMs: 50,
 			});
 			expect(result.terminated).toBe(true);
 			expect(result.escalated).toBe(false);
 			expect(isSessionPidAlive(child.pid)).toBe(false);
 		} finally {
 			try {
 				child.kill("SIGKILL");
 			} catch {
 				/* may already be dead */
 			}
 		}
 	});
 	test("escalates to SIGKILL when the process ignores SIGTERM", async () => {
 		const child = spawnIgnoreSigterm(60);
 		// Give the child a moment to register its SIGTERM handler before we
 		// send SIGTERM. Without this, the kill may arrive before
 		// process.on('SIGTERM', …) executes and Node uses the default handler
 		// (exit on signal), which makes the test look like graceful exit.
 		await new Promise((resolve) => setTimeout(resolve, 250));
 		try {
 			expect(isSessionPidAlive(child.pid)).toBe(true);
 			const result = await terminateExistingSession(child.pid, {
 				gracePeriodMs: 750,
 				reapWaitMs: 2000,
 				pollIntervalMs: 50,
 			});
 			expect(result.terminated).toBe(true);
 			expect(result.escalated).toBe(true);
 			expect(isSessionPidAlive(child.pid)).toBe(false);
 		} finally {
 			try {
 				child.kill("SIGKILL");
 			} catch {
 				/* may already be dead */
 			}
 		}
 	});
 });
 describe("isSessionPidAlive", () => {
 	test("returns false for current process (self-check is intentionally disabled)", () => {
 		// isPidAlive specifically excludes the current PID to prevent
 		// false-positive self-detection in the lock takeover flow.
 		expect(isSessionPidAlive(process.pid)).toBe(false);
 	});
 	test("returns false for clearly-dead pid", () => {
 		expect(isSessionPidAlive(2147483646)).toBe(false);
 	});
 	test("returns true for a live child", async () => {
 		const child = spawnSleeper(30);
 		try {
 			expect(isSessionPidAlive(child.pid)).toBe(true);
 		} finally {
 			try {
 				child.kill("SIGKILL");
 			} catch {
 				/* may already be dead */
 			}
 		}
 	});
 	test("returns false for non-integer or non-positive inputs", () => {
 		expect(isSessionPidAlive(0)).toBe(false);
 		expect(isSessionPidAlive(-1)).toBe(false);
 		expect(isSessionPidAlive("nope")).toBe(false);
 		expect(isSessionPidAlive(null)).toBe(false);
 	});
 });
--- a/src/resources/extensions/sf/tests/slice-routing-cache.test.mjs
+++ b/src/resources/extensions/sf/tests/slice-routing-cache.test.mjs
@ -0,0 +1,136 @@
 import { mkdtempSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, describe, expect, test } from "vitest";
 import {
 	_readCacheForTests,
 	clearSliceRoutingForUnit,
 	extractSliceScope,
 	readStickyModelForUnit,
 	recordSliceRouting,
 } from "../slice-routing-cache.js";
 let tempDirs = [];
 function makeProject() {
 	const dir = mkdtempSync(join(tmpdir(), "sf-slice-routing-"));
 	tempDirs.push(dir);
 	return dir;
 }
 afterEach(() => {
 	for (const dir of tempDirs) rmSync(dir, { recursive: true, force: true });
 	tempDirs = [];
 });
 describe("extractSliceScope", () => {
 	test("execute-task style unit id collapses to milestone/slice", () => {
 		expect(extractSliceScope("M001-6377a4/S04/T02")).toBe("M001-6377a4/S04");
 	});
 	test("plan/complete slice ids stay as milestone/slice", () => {
 		expect(extractSliceScope("M001-6377a4/S04")).toBe("M001-6377a4/S04");
 	});
 	test("milestone-only ids return the milestone", () => {
 		expect(extractSliceScope("M001-6377a4")).toBe("M001-6377a4");
 	});
 	test("null/undefined/empty return null", () => {
 		expect(extractSliceScope(null)).toBeNull();
 		expect(extractSliceScope("")).toBeNull();
 		expect(extractSliceScope(undefined)).toBeNull();
 	});
 });
 describe("slice routing cache", () => {
 	test("record + read round-trips", () => {
 		const project = makeProject();
 		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
 			provider: "moonshotai",
 			id: "kimi-k2.6",
 		});
 		const sticky = readStickyModelForUnit(
 			project,
 			"execute-task",
 			"M001/S04/T02",
 		);
 		expect(sticky).toEqual({ provider: "moonshotai", id: "kimi-k2.6" });
 	});
 	test("sticky scoped per slice — different slice => no hit", () => {
 		const project = makeProject();
 		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
 			provider: "moonshotai",
 			id: "kimi-k2.6",
 		});
 		expect(
 			readStickyModelForUnit(project, "execute-task", "M001/S05/T01"),
 		).toBeNull();
 	});
 	test("clearSliceRoutingForUnit evicts only the matching slice", () => {
 		const project = makeProject();
 		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
 			provider: "moonshotai",
 			id: "kimi-k2.6",
 		});
 		recordSliceRouting(project, "execute-task", "M001/S05/T01", {
 			provider: "anthropic",
 			id: "claude-sonnet-4-6",
 		});
 		clearSliceRoutingForUnit(project, "M001/S04/T07");
 		expect(
 			readStickyModelForUnit(project, "execute-task", "M001/S04/T99"),
 		).toBeNull();
 		expect(
 			readStickyModelForUnit(project, "execute-task", "M001/S05/T02"),
 		).toEqual({ provider: "anthropic", id: "claude-sonnet-4-6" });
 	});
 	test("readStickyModelForUnit honors maxAgeMs", async () => {
 		const project = makeProject();
 		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
 			provider: "moonshotai",
 			id: "kimi-k2.6",
 		});
 		// Sleep past the retention window so age strictly exceeds maxAgeMs.
 		await new Promise((resolve) => setTimeout(resolve, 25));
 		expect(
 			readStickyModelForUnit(project, "execute-task", "M001/S04/T02", {
 				maxAgeMs: 10,
 			}),
 		).toBeNull();
 	});
 	test("returns null on missing basePath or unparseable unit id", () => {
 		expect(readStickyModelForUnit("", "execute-task", "M001/S04/T01")).toBeNull();
 		const project = makeProject();
 		expect(readStickyModelForUnit(project, "execute-task", "")).toBeNull();
 		expect(readStickyModelForUnit(project, "execute-task", null)).toBeNull();
 	});
 	test("overwrite updates the slice entry in place", () => {
 		const project = makeProject();
 		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
 			provider: "moonshotai",
 			id: "kimi-k2.6",
 		});
 		recordSliceRouting(project, "execute-task", "M001/S04/T02", {
 			provider: "anthropic",
 			id: "claude-opus-4-7",
 		});
 		const cache = _readCacheForTests(project);
 		const entries = Object.values(cache);
 		expect(entries.length).toBe(1);
 		expect(
 			readStickyModelForUnit(project, "execute-task", "M001/S04/T03"),
 		).toEqual({ provider: "anthropic", id: "claude-opus-4-7" });
 	});
 	test("clearSliceRoutingForUnit on the last entry removes the cache file", () => {
 		const project = makeProject();
 		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
 			provider: "moonshotai",
 			id: "kimi-k2.6",
 		});
 		clearSliceRoutingForUnit(project, "M001/S04/T01");
 		const cache = _readCacheForTests(project);
 		expect(Object.keys(cache).length).toBe(0);
 	});
 });
--- a/src/resources/extensions/sf/tests/solver-model.test.mjs
+++ b/src/resources/extensions/sf/tests/solver-model.test.mjs
@ -0,0 +1,134 @@
 import { describe, expect, test } from "vitest";
 import {
 	SOLVER_MODEL_DEFAULT,
 	SOLVER_MODEL_FALLBACKS,
 	isSolverModel,
 	resolveSolverModel,
 	resolveSolverModelCandidates,
 } from "../solver-model.js";
 describe("solver-model invariants", () => {
 	test("default is locked to kimi-k2.6 / kimi-coding", () => {
 		// This is a PROTOCOL INVARIANT, not a tuning parameter. Changing the
 		// default requires an ADR (see ADR-0079). If this test fails because
 		// someone bumped the default, that's a load-bearing change and a code
 		// review reject — re-read the ADR before re-running.
 		expect(SOLVER_MODEL_DEFAULT).toEqual({
 			provider: "kimi-coding",
 			id: "kimi-k2.6",
 		});
 	});
 	test("no fallback is a code-completion-only model", () => {
 		// Code-completion models (Codestral, Devstral, the kimi-for-coding
 		// alias) are the ones that broke the loop in the first place. They
 		// must NEVER appear in the solver fallback chain.
 		const forbidden = new Set([
 			"codestral-latest",
 			"devstral-latest",
 			"kimi-for-coding",
 		]);
 		for (const candidate of SOLVER_MODEL_FALLBACKS) {
 			expect(forbidden.has(candidate.id)).toBe(false);
 		}
 	});
 });
 describe("resolveSolverModel", () => {
 	test("with no preferences returns the pinned default", () => {
 		expect(resolveSolverModel()).toEqual(SOLVER_MODEL_DEFAULT);
 		expect(resolveSolverModel(undefined)).toEqual(SOLVER_MODEL_DEFAULT);
 		expect(resolveSolverModel({})).toEqual(SOLVER_MODEL_DEFAULT);
 	});
 	test("ignores router/benchmark/learning state (no opt-in == default)", () => {
 		// Even with the kitchen sink of unrelated preference fields,
 		// resolveSolverModel must NOT consult any of them. Only an explicit
 		// preferences.autonomousSolver.model entry can override.
 		const preferences = {
 			currentModel: { provider: "mistral", id: "codestral-latest" },
 			modelRouter: { lastSelection: "google-gemini-cli/gemini-3-flash-preview" },
 			benchmarkSelector: { winner: "kimi-for-coding" },
 			learning: { blender: { recommended: "kimi-k2.5" } },
 		};
 		expect(resolveSolverModel(preferences)).toEqual(SOLVER_MODEL_DEFAULT);
 	});
 	test("respects an explicit object override", () => {
 		const resolved = resolveSolverModel({
 			autonomousSolver: { model: { provider: "anthropic", id: "claude-opus-4-7" } },
 		});
 		expect(resolved).toEqual({ provider: "anthropic", id: "claude-opus-4-7" });
 	});
 	test("accepts a string override in provider/model form", () => {
 		const resolved = resolveSolverModel({
 			autonomousSolver: { model: "anthropic/claude-sonnet-4-6" },
 		});
 		expect(resolved).toEqual({
 			provider: "anthropic",
 			id: "claude-sonnet-4-6",
 		});
 	});
 	test("accepts a bare model id and keeps the default provider", () => {
 		const resolved = resolveSolverModel({
 			autonomousSolver: { model: "kimi-k2-thinking" },
 		});
 		expect(resolved).toEqual({
 			provider: SOLVER_MODEL_DEFAULT.provider,
 			id: "kimi-k2-thinking",
 		});
 	});
 	test("ignores an empty-string override", () => {
 		expect(
 			resolveSolverModel({ autonomousSolver: { model: "" } }),
 		).toEqual(SOLVER_MODEL_DEFAULT);
 		expect(
 			resolveSolverModel({ autonomousSolver: { model: "   " } }),
 		).toEqual(SOLVER_MODEL_DEFAULT);
 	});
 });
 describe("resolveSolverModelCandidates", () => {
 	test("primary comes first, then fallback chain (de-duplicated)", () => {
 		const candidates = resolveSolverModelCandidates();
 		expect(candidates[0]).toEqual(SOLVER_MODEL_DEFAULT);
 		expect(candidates.length).toBe(1 + SOLVER_MODEL_FALLBACKS.length);
 	});
 	test("override does not duplicate when also in fallback list", () => {
 		const candidates = resolveSolverModelCandidates({
 			autonomousSolver: { model: "anthropic/claude-opus-4-7" },
 		});
 		const opusEntries = candidates.filter(
 			(c) => c.id === "claude-opus-4-7" && c.provider === "anthropic",
 		);
 		expect(opusEntries.length).toBe(1);
 	});
 });
 describe("isSolverModel", () => {
 	test("returns true for the pinned default", () => {
 		expect(isSolverModel(SOLVER_MODEL_DEFAULT)).toBe(true);
 	});
 	test("returns false for a routed executor model", () => {
 		expect(
 			isSolverModel({ provider: "mistral", id: "codestral-latest" }),
 		).toBe(false);
 		expect(
 			isSolverModel({
 				provider: "google-gemini-cli",
 				id: "gemini-3-flash-preview",
 			}),
 		).toBe(false);
 	});
 	test("returns false for null / malformed inputs", () => {
 		expect(isSolverModel(null)).toBe(false);
 		expect(isSolverModel(undefined)).toBe(false);
 		expect(isSolverModel({})).toBe(false);
 	});
 });
--- a/src/resources/extensions/sf/tests/trace-janitor.test.mjs
+++ b/src/resources/extensions/sf/tests/trace-janitor.test.mjs
@ -0,0 +1,115 @@
 import {
 	existsSync,
 	mkdirSync,
 	mkdtempSync,
 	rmSync,
 	symlinkSync,
 	utimesSync,
 	writeFileSync,
 } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, describe, expect, test } from "vitest";
 import { pruneStaleTraces } from "../uok/trace-writer.js";
 let tempDirs = [];
 function makeProject() {
 	const dir = mkdtempSync(join(tmpdir(), "sf-trace-janitor-"));
 	tempDirs.push(dir);
 	mkdirSync(join(dir, ".sf"), { recursive: true });
 	return dir;
 }
 afterEach(() => {
 	for (const dir of tempDirs) {
 		rmSync(dir, { recursive: true, force: true });
 	}
 	tempDirs = [];
 });
 function makeTraceFile(project, name, daysOld) {
 	const tracesDir = join(project, ".sf", "traces");
 	mkdirSync(tracesDir, { recursive: true });
 	const path = join(tracesDir, name);
 	writeFileSync(path, '{"ts":"2024-01-01T00:00:00Z","type":"gate_run"}\n');
 	if (typeof daysOld === "number") {
 		const epoch = (Date.now() - daysOld * 24 * 60 * 60 * 1000) / 1000;
 		utimesSync(path, epoch, epoch);
 	}
 	return path;
 }
 describe("pruneStaleTraces", () => {
 	test("removes jsonl files older than retention window", () => {
 		const project = makeProject();
 		const oldFile = makeTraceFile(
 			project,
 			"pre-dispatch:old.jsonl",
 			45,
 		);
 		const freshFile = makeTraceFile(
 			project,
 			"pre-dispatch:fresh.jsonl",
 			5,
 		);
 		expect(existsSync(oldFile)).toBe(true);
 		expect(existsSync(freshFile)).toBe(true);
 		const result = pruneStaleTraces(project);
 		expect(result.pruned).toBe(1);
 		expect(existsSync(oldFile)).toBe(false);
 		expect(existsSync(freshFile)).toBe(true);
 	});
 	test("respects a custom retention window", () => {
 		const project = makeProject();
 		const file = makeTraceFile(project, "pre-dispatch:tenday.jsonl", 10);
 		const result = pruneStaleTraces(project, { retentionDays: 7 });
 		expect(result.pruned).toBe(1);
 		expect(existsSync(file)).toBe(false);
 	});
 	test("never touches the `latest` symlink", () => {
 		const project = makeProject();
 		const file = makeTraceFile(project, "pre-dispatch:current.jsonl", 0);
 		const latest = join(project, ".sf", "traces", "latest");
 		symlinkSync("pre-dispatch:current.jsonl", latest);
 		// Make `latest` look old via its target; the symlink itself is fine.
 		pruneStaleTraces(project);
 		expect(existsSync(latest)).toBe(true);
 	});
 	test("ignores non-jsonl files", () => {
 		const project = makeProject();
 		const tracesDir = join(project, ".sf", "traces");
 		mkdirSync(tracesDir, { recursive: true });
 		const txt = join(tracesDir, "notes.txt");
 		writeFileSync(txt, "ignored");
 		const epoch = (Date.now() - 90 * 24 * 60 * 60 * 1000) / 1000;
 		utimesSync(txt, epoch, epoch);
 		pruneStaleTraces(project);
 		expect(existsSync(txt)).toBe(true);
 	});
 	test("returns zero-counts when traces dir does not exist", () => {
 		const project = makeProject();
 		// no traces dir
 		const result = pruneStaleTraces(project);
 		expect(result).toEqual({ scanned: 0, pruned: 0, errors: 0 });
 	});
 	test("respects maxDeletePerCall safety cap", () => {
 		const project = makeProject();
 		for (let i = 0; i < 5; i++) {
 			makeTraceFile(project, `pre-dispatch:old-${i}.jsonl`, 60);
 		}
 		const result = pruneStaleTraces(project, { maxDeletePerCall: 2 });
 		expect(result.pruned).toBe(2);
 	});
 	test("does not throw on missing basePath", () => {
 		expect(() => pruneStaleTraces("")).not.toThrow();
 		expect(() => pruneStaleTraces(undefined)).not.toThrow();
 	});
 });
--- a/src/resources/extensions/sf/ui/index.js
+++ b/src/resources/extensions/sf/ui/index.js
@ -328,13 +328,12 @@ export default function sfTui(pi) {
 		renderResult: ({ output }) => output,
 	});
-	// ASK_USER_ELICITATION — structured form-based ask_user replacement.
+	// ask_user_elicitation — structured form-based ask_user replacement.
-	// When the flag is on and the agent calls this tool with choices, a TUI
+	// Shows a TUI select overlay when choices are provided, freeform input otherwise.
 	// select overlay is shown instead of a plain text prompt.
 	pi.registerTool({
 		name: "ask_user_elicitation",
 		description:
-			"Ask the user a question using a structured form with optional choices. When ASK_USER_ELICITATION is enabled this is preferred over plain ask_user for questions with known choices.",
+			"Ask the user a question using a structured form with optional choices. Shows a TUI select overlay when choices are provided, or a freeform text prompt otherwise.",
 		parameters: {
 			type: "object",
 			properties: {
@ -359,12 +358,6 @@ export default function sfTui(pi) {
 			if (!ctx?.hasUI) {
 				return { output: "No UI available for elicitation." };
 			}
 			if (!getExperimentalFlag("ask_elicitation")) {
 				return {
 					output:
 						"ASK_USER_ELICITATION is not enabled. Run /experimental on ask_elicitation to enable.",
 				};
 			}
 			if (choices?.length) {
 				const answer = await ctx.ui.select(question, choices);
 				if (!answer && allow_freeform) {
@ -379,121 +372,6 @@ export default function sfTui(pi) {
 		renderResult: ({ output }) => (output ? `**Answer:** ${output}` : ""),
 	});
 	// MULTI_TURN_AGENTS — persistent named sub-agent sessions via file-backed state.
 	// Tool that spawns or resumes a named SF child process, relaying messages.
 	pi.registerTool({
 		name: "spawn_agent",
 		description:
 			"Spawn or resume a named persistent sub-agent. Sends a message and waits for the response. The agent persists across calls using file-backed state in .sf/agents/<name>/.",
 		parameters: {
 			type: "object",
 			properties: {
 				name: {
 					type: "string",
 					description:
 						"Unique agent name (alphanumeric + hyphens, e.g. 'researcher')",
 				},
 				message: {
 					type: "string",
 					description: "Message to send to the agent",
 				},
 				reset: {
 					type: "boolean",
 					description:
 						"If true, clear the agent's state and start fresh (default: false)",
 				},
 			},
 			required: ["name", "message"],
 		},
 		execute: async ({ name, message, reset }) => {
 			if (!getExperimentalFlag("multi_turn_agents")) {
 				return {
 					output:
 						"MULTI_TURN_AGENTS is not enabled. Run /experimental on multi_turn_agents to enable.",
 				};
 			}
 			if (!/^[a-z0-9-]{1,32}$/i.test(name)) {
 				return {
 					output: "Agent name must be 1-32 alphanumeric/hyphen characters.",
 				};
 			}
 			const { join: pathJoin } = await import("node:path");
 			const { mkdirSync, writeFileSync, readFileSync, existsSync } =
 				await import("node:fs");
 			const stateDir = pathJoin(
 				projectRoot() ?? process.cwd(),
 				".sf",
 				"agents",
 				name,
 			);
 			mkdirSync(stateDir, { recursive: true });
 			const historyPath = pathJoin(stateDir, "history.jsonl");
 			if (reset && existsSync(historyPath)) {
 				writeFileSync(historyPath, "", "utf-8");
 			}
 			// Append user message to history
 			const entry = JSON.stringify({
 				role: "user",
 				content: message,
 				ts: Date.now(),
 			});
 			const { appendFileSync } = await import("node:fs");
 			appendFileSync(historyPath, `${entry}\n`, "utf-8");
 			// Dispatch to SF headless with the conversation history as context
 			const historyLines = existsSync(historyPath)
 				? readFileSync(historyPath, "utf-8")
 						.trim()
 						.split("\n")
 						.filter(Boolean)
 						.map((l) => {
 							try {
 								return JSON.parse(l);
 							} catch {
 								return null;
 							}
 						})
 						.filter(Boolean)
 				: [];
 			const contextMsg = historyLines
 				.slice(-10) // last 10 turns for context
 				.map((e) => `${e.role === "user" ? "User" : "Agent"}: ${e.content}`)
 				.join("\n");
 			const fullPrompt = `[Agent: ${name}]\n\nConversation history:\n${contextMsg}\n\nRespond to the last user message only.`;
 			const { execFile } = await import("node:child_process");
 			const { promisify } = await import("node:util");
 			const execFileAsync = promisify(execFile);
 			try {
 				const { stdout } = await execFileAsync(
 					process.execPath,
 					[
 						"-y",
 						"node@24",
 						process.env.SF_LOADER ?? "dist/loader.js",
 						"headless",
 						"--print",
 						fullPrompt,
 					],
 					{
 						timeout: 60000,
 						encoding: "utf-8",
 						env: { ...process.env },
 					},
 				);
 				const response = stdout.trim();
 				appendFileSync(
 					historyPath,
 					`${JSON.stringify({ role: "assistant", content: response, ts: Date.now() })}\n`,
 					"utf-8",
 				);
 				return { output: response };
 			} catch (err) {
 				return {
 					output: `Agent dispatch failed: ${getErrorMessage(err)}`,
 				};
 			}
 		},
 		renderResult: ({ output }) => output,
 	});
 }
 /** Run the STATUS_LINE user script on a 5s interval, posting stdout to footer. */
--- a/src/resources/extensions/sf/uok/persistent-agent.js
+++ b/src/resources/extensions/sf/uok/persistent-agent.js
@ -7,6 +7,31 @@
 *
 * Consumer: AgentSwarm orchestrator, swarm role agents (CoordinatorAgent, WorkerAgent etc),
 * and direct use in multi-agent dispatch flows.
 *
 * ## Current state
 * This module implements the **container** half of a persistent agent: identity, inbox,
 * memory blocks, and message routing. It does NOT implement the **runner** half.
 *
 * The missing piece is an LLM execution runner that:
 *   1. Reads pending messages from this agent's inbox (`receive(true)`)
 *   2. Assembles a prompt from core memory blocks + inbox messages
 *   3. Dispatches to SF headless (`node dist/loader.js headless --print <prompt>`)
 *   4. Writes the LLM response back into the bus as a reply
 *   5. Updates memory blocks (eviction, summarization) when context grows large
 *
 * Until the runner exists, `PersistentAgent` is a passive store. The autonomous loop
 * uses it this way for sleeptime memory consolidation (caller sends + immediately reads
 * inbox). `SwarmDispatchLayer` also only enqueues messages — nothing processes them.
 *
 * When building the runner, key design decisions to make:
 *   - Context window management: how many inbox turns to include before summarizing
 *   - Memory eviction: which core blocks are injected, which are summarized to archival
 *   - Turn limits: max rounds before the runner yields and re-queues
 *   - Concurrency: one runner per agent name (enforce via DB lock or process mutex)
 *   - Error handling: failed LLM calls should leave the message as unread, not drop it
 *
 * See: Codex `codex-rs/core/src/agent/control.rs` for the reference implementation of
 * typed parallel subagents (explorer/worker roles) with forked rollout history.
 */
 import { randomUUID } from "node:crypto";
--- a/src/resources/extensions/sf/uok/swarm-dispatch.js
+++ b/src/resources/extensions/sf/uok/swarm-dispatch.js
@ -8,6 +8,18 @@
 *
 * Consumer: UOK kernel dispatch path, parallel orchestrators, and /sf autonomous controller
 * when SF_A2A_ENABLED is set.
 *
 * ## Current state — enqueue only, no runner
 * `_busDispatch` routes an envelope to a role agent's inbox via the MessageBus. It does NOT
 * wait for a response — the `DispatchResult` contains only `messageId` and `targetAgent`,
 * not LLM output. Nothing currently drains agent inboxes and runs LLM calls.
 *
 * This layer is ready to use once `PersistentAgent` gains a runner (see persistent-agent.js
 * module comment for the runner design). At that point `dispatch()` can be extended to
 * optionally block until the runner posts a reply to the bus.
 *
 * Callers outside uok/: none currently. The autonomous loop uses AgentSwarm directly for
 * the sleeptime memory path. Wire this in when building the autonomous orchestrator.
 */
 import { AgentSwarm } from "./agent-swarm.js";
--- a/src/resources/extensions/sf/uok/trace-writer.js
+++ b/src/resources/extensions/sf/uok/trace-writer.js
@ -4,6 +4,7 @@ import {
 	appendFileSync,
 	closeSync,
 	existsSync,
 	lstatSync,
 	mkdirSync,
 	openSync,
 	readdirSync,
@ -15,6 +16,12 @@ import {
 import { join } from "node:path";
 import { sfRoot } from "../paths.js";
 // Longest read window currently used by any trace consumer
 // (sf-db-gates.js:391 reads 30 days). Anything older than this is never
 // read and just consumes disk.
 const TRACE_RETENTION_DAYS_DEFAULT = 30;
 const MS_PER_DAY = 24 * 60 * 60 * 1000;
 function tracesDir(basePath) {
 	return join(sfRoot(basePath), "traces");
 }
@ -45,6 +52,64 @@ export function appendTraceEvent(basePath, traceId, event) {
 	}
 }
 /**
 * Prune .sf/traces/*.jsonl files older than retentionDays.
 *
 * Why: per-flow trace files accumulate one-per-dispatch and are never
 * cleaned. The longest analyzer window today is 30 days
 * (sf-db-gates.js:391); anything older is never read and just consumes
 * disk. The `latest` symlink is preserved unconditionally so the
 * tail-friendly pointer keeps working.
 *
 * Consumer: session-start hook (idempotent, fast, best-effort).
 *
 * @param {string} basePath
 * @param {object} [opts]
 * @param {number} [opts.retentionDays=30]
 * @param {number} [opts.maxDeletePerCall=1000] - safety cap so a runaway
 *   directory doesn't make startup slow.
 * @returns {{ scanned: number, pruned: number, errors: number }}
 */
 export function pruneStaleTraces(basePath, opts = {}) {
 	const retentionDays = Number(opts.retentionDays ?? TRACE_RETENTION_DAYS_DEFAULT);
 	const maxDeletePerCall = Math.max(1, Number(opts.maxDeletePerCall ?? 1000));
 	const result = { scanned: 0, pruned: 0, errors: 0 };
 	if (!basePath || typeof basePath !== "string") return result;
 	let dir;
 	try {
 		dir = tracesDir(basePath);
 	} catch {
 		return result;
 	}
 	if (!existsSync(dir)) return result;
 	const cutoff = Date.now() - retentionDays * MS_PER_DAY;
 	let entries;
 	try {
 		entries = readdirSync(dir);
 	} catch {
 		return result;
 	}
 	for (const name of entries) {
 		if (result.pruned >= maxDeletePerCall) break;
 		if (name === "latest") continue;
 		if (!name.endsWith(".jsonl")) continue;
 		const path = join(dir, name);
 		result.scanned += 1;
 		try {
 			// lstat so we don't follow a symlink (defensive — there shouldn't
 			// be any besides `latest`, but never silently chase).
 			const stat = lstatSync(path);
 			if (!stat.isFile()) continue;
 			if (stat.mtimeMs >= cutoff) continue;
 			unlinkSync(path);
 			result.pruned += 1;
 		} catch {
 			result.errors += 1;
 		}
 	}
 	return result;
 }
 export function readTraceEvents(basePath, type, windowHours = 24) {
 	// Read all trace files modified within windowHours, filter by event type
 	// Returns array of matching events
		`@ -1 +1 @@`
			{"fetchedAt":"2026-05-12T14:54:31.656Z","modelIds":["mistral-medium-2505","mistral-medium-2508","mistral-medium-latest","mistral-medium","mistral-vibe-cli-with-tools","open-mistral-nemo","open-mistral-nemo-2407","mistral-tiny-2407","mistral-tiny-latest","codestral-2508","codestral-latest","devstral-2512","devstral-medium-latest","devstral-latest","mistral-small-2603","mistral-small-latest","mistral-vibe-cli-fast","magistral-small-latest","magistral-medium-2509","magistral-medium-latest","labs-leanstral-2603","mistral-large-2512","mistral-large-latest","mistral-large-2512","mistral-large-latest","ministral-3b-2512","ministral-3b-latest","ministral-8b-2512","ministral-8b-latest","ministral-14b-2512","ministral-14b-latest","mistral-medium-3-5","mistral-medium-3.5","mistral-medium-3","mistral-medium-2604","mistral-medium-c21211-r0-75","mistral-vibe-cli-latest","mistral-large-2411","pixtral-large-2411","pixtral-large-latest","mistral-large-pixtral-2411","devstral-small-2507","devstral-medium-2507","magistral-small-2509","mistral-small-2506"]}				{"fetchedAt":"2026-05-12T21:25:20.919Z","modelIds":["mistral-medium-2505","mistral-medium-2508","mistral-medium-latest","mistral-medium","mistral-vibe-cli-with-tools","open-mistral-nemo","open-mistral-nemo-2407","mistral-tiny-2407","mistral-tiny-latest","codestral-2508","codestral-latest","devstral-2512","devstral-medium-latest","devstral-latest","mistral-small-2603","mistral-small-latest","mistral-vibe-cli-fast","magistral-small-latest","magistral-medium-2509","magistral-medium-latest","labs-leanstral-2603","mistral-large-2512","mistral-large-latest","mistral-large-2512","mistral-large-latest","ministral-3b-2512","ministral-3b-latest","ministral-8b-2512","ministral-8b-latest","ministral-14b-2512","ministral-14b-latest","mistral-medium-3-5","mistral-medium-3.5","mistral-medium-3","mistral-medium-2604","mistral-medium-c21211-r0-75","mistral-vibe-cli-latest","mistral-large-2411","pixtral-large-2411","pixtral-large-latest","mistral-large-pixtral-2411","devstral-small-2507","devstral-medium-2507","magistral-small-2509","mistral-small-2506"]}
		`@ -1 +1 @@`
			`guard:76c7c307-91b4-426e-8fad-4ff951d5a52e.jsonl`				`guard:b8cbf9df-9fe8-4203-9c63-79fc7264d74e.jsonl`