From 12e7333f1cb38fc7d6956858042354e3880f3362 Mon Sep 17 00:00:00 2001
From: Mikael Hugo <mikkihugo@users.noreply.github.com>
Date: Fri, 1 May 2026 20:18:50 +0200
Subject: [PATCH] feat: stabilize autonomous workflow system

---
 .agents/skills/nix-build/SKILL.md             |  26 +++
 .agents/skills/smoke-test/SKILL.md            |  17 ++
 README.md                                     |  47 ++--
 docs/README.md                                |   4 +-
 docs/dev/json-contracts.md                    |  13 ++
 docs/user-docs/auto-mode.md                   |  20 +-
 docs/user-docs/commands.md                    |   9 +-
 docs/user-docs/getting-started.md             |  16 +-
 docs/user-docs/troubleshooting.md             |  10 +-
 packages/daemon/src/orchestrator.ts           |   4 +-
 packages/daemon/src/session-manager.test.ts   |   6 +-
 packages/daemon/src/session-manager.ts        |   6 +-
 packages/daemon/src/types.ts                  |   2 +-
 packages/mcp-server/README.md                 |   2 +-
 packages/mcp-server/src/mcp-server.test.ts    |   8 +-
 packages/mcp-server/src/server.ts             |   6 +-
 packages/mcp-server/src/session-manager.ts    |   6 +-
 packages/mcp-server/src/types.ts              |   2 +-
 packages/pi-agent-core/src/agent-loop.test.ts | 103 +++++++++
 packages/pi-agent-core/src/agent-loop.ts      |   8 +-
 packages/pi-ai/src/utils/event-stream.test.ts | 138 +++++++++++
 ...agent-session-custom-message-queue.test.ts | 110 +++++++++
 .../pi-coding-agent/src/core/agent-session.ts | 121 ++++++++--
 .../src/core/keybindings-followup.test.ts     |  12 +
 .../pi-coding-agent/src/core/keybindings.ts   |   2 +-
 .../src/core/slash-commands.ts                |   1 +
 .../__tests__/tool-execution.test.ts          |  23 +-
 .../interactive/components/tool-execution.ts  |  16 +-
 .../controllers/input-controller.test.ts      |  20 ++
 .../interactive/slash-command-handlers.ts     |   5 +
 scripts/check-versioned-json.mjs              | 115 ++++++----
 scripts/check-versioned-json.test.mjs         |  54 +++++
 src/cli-web-branch.ts                         |   4 +
 src/cli.ts                                    |  52 +++--
 src/headless-ui.ts                            |  12 +
 src/headless.ts                               |  32 ++-
 src/help-text.ts                              |  39 ++--
 src/resources/agents/scout.md                 |   8 +-
 src/resources/agents/worker.md                |   2 +-
 .../claude-code-cli/stream-adapter.ts         |  14 ++
 .../search-the-web/native-search.ts           |  20 +-
 src/resources/extensions/sf-tui/footer.ts     |  29 ++-
 .../extensions/sf/auto-direct-dispatch.ts     |   2 +-
 src/resources/extensions/sf/auto-post-unit.ts |   8 +-
 src/resources/extensions/sf/auto-start.ts     |   4 +-
 .../extensions/sf/auto-timeout-recovery.ts    |   2 +-
 src/resources/extensions/sf/auto.ts           |   4 +-
 src/resources/extensions/sf/auto/loop.ts      | 138 ++++++++++-
 src/resources/extensions/sf/auto/phases.ts    |  87 +++++--
 src/resources/extensions/sf/auto/session.ts   |   6 +
 .../extensions/sf/bootstrap/db-tools.ts       |  55 +++++
 .../extensions/sf/bootstrap/register-hooks.ts |  26 +++
 .../extensions/sf/code-intelligence.ts        |  18 +-
 .../extensions/sf/commands-bootstrap.ts       |  27 +--
 .../extensions/sf/commands-handlers.ts        |  35 ++-
 .../extensions/sf/commands-inspect.ts         |   4 +-
 src/resources/extensions/sf/commands-logs.ts  |   4 +-
 .../sf/commands-workflow-templates.ts         | 132 +++++++----
 .../extensions/sf/commands/catalog.ts         |  33 +--
 .../extensions/sf/commands/handlers/auto.ts   |  18 +-
 .../extensions/sf/commands/handlers/core.ts   |  14 +-
 .../sf/commands/handlers/workflow.ts          |   5 +-
 src/resources/extensions/sf/crash-recovery.ts |   8 +-
 .../extensions/sf/custom-workflow-engine.ts   | 158 ++++++++-----
 .../extensions/sf/dashboard-overlay.ts        |   4 +-
 src/resources/extensions/sf/forensics.ts      |   4 +-
 src/resources/extensions/sf/graph.ts          |  39 ++++
 src/resources/extensions/sf/guided-flow.ts    |   2 +-
 .../sf/learning/data/model-benchmarks.json    |   2 +-
 .../sf/learning/data/unit-weights.json        |   2 +-
 src/resources/extensions/sf/memory-sleeper.ts | 139 +++++++++++
 .../extensions/sf/prompts/discuss-headless.md |   4 +-
 .../extensions/sf/prompts/discuss.md          |   2 +-
 .../sf/prompts/guided-discuss-milestone.md    |   2 +-
 .../sf/prompts/guided-discuss-slice.md        |   2 +-
 src/resources/extensions/sf/prompts/queue.md  |   2 +-
 src/resources/extensions/sf/prompts/system.md |   5 +-
 src/resources/extensions/sf/run-manager.ts    | 126 ++++++++--
 .../sf/safety/evidence-collector.ts           |  63 ++++-
 .../extensions/sf/skills/researcher/SKILL.md  |   2 +-
 .../sf/slice-parallel-orchestrator.ts         |   6 +-
 .../sf/tests/bundled-workflow-defs.test.ts    |  62 +++++
 .../sf/tests/commands-workflow-custom.test.ts | 179 ++++++++++++++-
 .../sf/tests/complete-slice.test.ts           |   2 +-
 .../custom-engine-loop-integration.test.ts    |  98 ++++++++
 .../sf/tests/custom-workflow-engine.test.ts   |  58 ++++-
 .../sf/tests/graph-operations.test.ts         |  44 ++++
 .../sf/tests/integration/git-service.test.ts  |  65 +-----
 .../extensions/sf/tests/md-importer.test.ts   |   4 +-
 .../sf/tests/memory-sleeper.test.ts           | 113 +++++++++
 .../extensions/sf/tests/memory-store.test.ts  |   4 +-
 .../phases-merge-error-stops-auto.test.ts     |   2 +-
 .../extensions/sf/tests/run-manager.test.ts   |  60 ++++-
 .../extensions/sf/tests/sf-db.test.ts         |   4 +-
 .../sf/tests/start-auto-detached.test.ts      |   4 +
 .../extensions/sf/tests/tool-naming.test.ts   |  44 ++++
 .../sf/tests/update-command.test.ts           |  30 +++
 .../tests/workflow-template-compiler.test.ts  |  72 ++++++
 .../sf/tests/workflow-templates.test.ts       | 117 +++++++++-
 .../sf/tests/workspace-index.test.ts          |   4 +-
 .../extensions/sf/tools/plan-milestone.ts     |   8 +
 .../sf/tools/workflow-tool-executors.ts       |  11 +-
 .../sf/workflow-template-compiler.ts          | 101 ++++++++
 .../extensions/sf/workflow-templates.ts       | 146 +++++++++++-
 .../sf/workflow-templates/full-project.md     |   4 +-
 .../sf/workflow-templates/product-plan.md     |  83 +++++++
 .../sf/workflow-templates/product-tracking.md | 116 ++++++++++
 .../sf/workflow-templates/registry.json       |  53 ++++-
 .../extensions/sf/workspace-index.ts          |   2 +-
 .../extensions/sf/worktree-command.ts         |   4 +-
 src/resources/extensions/subagent/index.ts    | 215 ++++++++++++++----
 .../subagent/tests/node-launch.test.ts        |  39 +++-
 src/resources/skills/create-workflow/SKILL.md |   1 +
 .../templates/product-tracking-lifecycle.yaml | 144 ++++++++++++
 .../workflows/create-from-template.md         |   1 +
 src/tests/auto-mode-piped.test.ts             |  28 ++-
 src/tests/auto-piped-io.test.ts               |   8 +-
 src/tests/headless-cli-surface.test.ts        |  37 ++-
 src/tests/headless-events.test.ts             |   2 +-
 .../web-workflow-controls-contract.test.ts    |  20 +-
 src/tests/native-search.test.ts               | 210 +++++++++++++++++
 src/tests/parse-cli-args.test.ts              |   8 +
 src/web/recovery-diagnostics-service.ts       |   2 +-
 tsconfig.extensions.json                      |   4 +-
 web/components/sf/chat-mode.tsx               |  12 +-
 web/lib/workflow-actions.ts                   |   8 +-
 126 files changed, 3954 insertions(+), 617 deletions(-)
 create mode 100644 .agents/skills/nix-build/SKILL.md
 create mode 100644 .agents/skills/smoke-test/SKILL.md
 create mode 100644 docs/dev/json-contracts.md
 create mode 100644 packages/pi-ai/src/utils/event-stream.test.ts
 create mode 100644 packages/pi-coding-agent/src/core/agent-session-custom-message-queue.test.ts
 create mode 100644 packages/pi-coding-agent/src/core/keybindings-followup.test.ts
 create mode 100644 scripts/check-versioned-json.test.mjs
 create mode 100644 src/resources/extensions/sf/memory-sleeper.ts
 create mode 100644 src/resources/extensions/sf/tests/memory-sleeper.test.ts
 create mode 100644 src/resources/extensions/sf/tests/workflow-template-compiler.test.ts
 create mode 100644 src/resources/extensions/sf/workflow-template-compiler.ts
 create mode 100644 src/resources/extensions/sf/workflow-templates/product-plan.md
 create mode 100644 src/resources/extensions/sf/workflow-templates/product-tracking.md
 create mode 100644 src/resources/skills/create-workflow/templates/product-tracking-lifecycle.yaml

diff --git a/.agents/skills/nix-build/SKILL.md b/.agents/skills/nix-build/SKILL.md
new file mode 100644
index 000000000..0646c0474
--- /dev/null
+++ b/.agents/skills/nix-build/SKILL.md
@@ -0,0 +1,26 @@
+---
+name: nix-build
+description: Build any @singularity-forge/* package (or the full stack) via nix develop. Pass a package name like "pi-coding-agent", "native", "mcp-server", or "all" for a full core build.
+---
+
+All build commands in this repo must run inside `nix develop`. Never use bare cargo/bun/rustc.
+
+For a single package:
+```
+nix develop --command bash -c "bun run --filter @singularity-forge/<package> build"
+```
+
+For the full core build (native + all TS packages):
+```
+nix develop --command bash -c "npm run build:core"
+```
+
+For typecheck only:
+```
+nix develop --command bash -c "tsc --noEmit -p tsconfig.json"
+```
+
+For extensions typecheck:
+```
+nix develop --command bash -c "tsc --noEmit -p tsconfig.extensions.json"
+```
diff --git a/.agents/skills/smoke-test/SKILL.md b/.agents/skills/smoke-test/SKILL.md
new file mode 100644
index 000000000..dc2c016cf
--- /dev/null
+++ b/.agents/skills/smoke-test/SKILL.md
@@ -0,0 +1,17 @@
+---
+name: smoke-test
+description: Run the standard sf-run smoke tests (--version, --help, --print). All three must pass before shipping a build.
+---
+
+#!/bin/bash
+set -e
+echo "=== --version ==="
+node dist/loader.js --version
+
+echo "=== --help (first 5 lines) ==="
+node dist/loader.js --help 2>&1 | head -5
+
+echo "=== --print (graceful degradation) ==="
+node dist/loader.js --print 2>&1 | head -5
+
+echo "All smoke tests passed."
diff --git a/README.md b/README.md
index 6b04bba3a..c65d68b90 100644
--- a/README.md
+++ b/README.md
@@ -123,7 +123,7 @@ Full documentation is in the [`docs/`](./docs/) directory:
 ### User Guides
 
 - **[Getting Started](./docs/user-docs/getting-started.md)** — install, first run, basic usage
-- **[Auto Mode](./docs/user-docs/auto-mode.md)** — autonomous execution deep-dive
+- **[Autonomous Mode](./docs/user-docs/auto-mode.md)** — autonomous execution deep-dive
 - **[Configuration](./docs/user-docs/configuration.md)** — all preferences, models, git, and hooks
 - **[Custom Models](./docs/user-docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies)
 - **[Token Optimization](./docs/user-docs/token-optimization.md)** — profiles, context compression, complexity routing
@@ -139,7 +139,7 @@ Full documentation is in the [`docs/`](./docs/) directory:
 - **[Dynamic Model Routing](./docs/user-docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure
 - **[Web Interface](./docs/user-docs/web-interface.md)** — browser-based project management and real-time progress
 - **[Migration from v1](./docs/user-docs/migration.md)** — `.planning` → `.sf` migration
-- **[Docker Sandbox](./docker/README.md)** — run SF auto mode in an isolated Docker container
+- **[Docker Sandbox](./docker/README.md)** — run SF autonomous mode in an isolated Docker container
 
 ### Developer Docs
 
@@ -229,15 +229,15 @@ Plan (with integrated research) → Execute (per task) → Complete → Reassess
 
 **Plan** scouts the codebase, researches relevant docs, and decomposes the slice into tasks with must-haves (mechanically verifiable outcomes). **Execute** runs each task in a fresh context window with only the relevant files pre-loaded — then runs configured verification commands (lint, test, etc.) with auto-fix retries. **Complete** writes the summary, UAT script, marks the roadmap, and commits with meaningful messages derived from task summaries. **Reassess** checks if the roadmap still makes sense given what was learned. **Validate Milestone** runs a reconciliation gate after all slices complete — comparing roadmap success criteria against actual results before sealing the milestone.
 
-### `/sf auto` — The Main Event
+### `/sf autonomous` — The Main Event
 
 This is what makes SF different. Run it, walk away, come back to built software.
 
 ```
-/sf auto
+/sf autonomous
 ```
 
-Auto mode is a state machine driven by files on disk. It reads `.sf/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, auto mode reads disk state again and dispatches the next unit.
+Autonomous mode is a state machine driven by files on disk. It reads `.sf/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, autonomous mode reads disk state again and dispatches the next unit. `/sf auto` remains supported as a short alias.
 
 **What happens under the hood:**
 
@@ -247,15 +247,15 @@ Auto mode is a state machine driven by files on disk. It reads `.sf/STATE.md`, d
 
 3. **Git isolation** — When `git.isolation` is set to `worktree` or `branch`, each milestone runs on its own `milestone/<MID>` branch (in a worktree or in-place). All slice work commits sequentially — no branch switching, no merge conflicts. When the milestone completes, it's squash-merged to main as one clean commit. The default is `none` (work on the current branch), configurable via preferences.
 
-4. **Crash recovery** — A lock file tracks the current unit. If the session dies, the next `/sf auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. Parallel orchestrator state is persisted to disk with PID liveness detection, so multi-worker sessions survive crashes too. In headless mode, crashes trigger automatic restart with exponential backoff (default 3 attempts).
+4. **Crash recovery** — A lock file tracks the current unit. If the session dies, the next `/sf autonomous` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. Parallel orchestrator state is persisted to disk with PID liveness detection, so multi-worker sessions survive crashes too. In headless mode, crashes trigger automatic restart with exponential backoff (default 3 attempts).
 
 5. **Provider error recovery** — Transient provider errors (rate limits, 500/503 server errors, overloaded) auto-resume after a delay. Permanent errors (auth, billing) pause for manual review. The model fallback chain retries transient network errors before switching models.
 
 6. **Stuck detection** — A sliding-window detector identifies repeated dispatch patterns (including multi-unit cycles). On detection, it retries once with a deep diagnostic. If it fails again, auto mode stops with the exact file it expected.
 
-7. **Timeout supervision** — Soft timeout warns the LLM to wrap up. Idle watchdog detects stalls. Hard timeout pauses auto mode. Recovery steering nudges the LLM to finish durable output before giving up.
+7. **Timeout supervision** — Soft timeout warns the LLM to wrap up. Idle watchdog detects stalls. Hard timeout pauses autonomous mode. Recovery steering nudges the LLM to finish durable output before giving up.
 
-8. **Cost tracking** — Every unit's token usage and cost is captured, broken down by phase, slice, and model. The dashboard shows running totals and projections. Budget ceilings can pause auto mode before overspending.
+8. **Cost tracking** — Every unit's token usage and cost is captured, broken down by phase, slice, and model. The dashboard shows running totals and projections. Budget ceilings can pause autonomous mode before overspending.
 
 9. **Adaptive replanning** — After each slice completes, the roadmap is reassessed. If the work revealed new information that changes the plan, slices are reordered, added, or removed before continuing.
 
@@ -263,11 +263,11 @@ Auto mode is a state machine driven by files on disk. It reads `.sf/STATE.md`, d
 
 11. **Milestone validation** — After all slices complete, a `validate-milestone` gate compares roadmap success criteria against actual results before sealing the milestone.
 
-12. **Escape hatch** — Press Escape to pause. The conversation is preserved. Interact with the agent, inspect what happened, or just `/sf auto` to resume from disk state.
+12. **Escape hatch** — Press Escape to pause. The conversation is preserved. Interact with the agent, inspect what happened, or just `/sf autonomous` to resume from disk state.
 
 ### `/sf` and `/sf next` — Step Mode
 
-By default, `/sf` runs in **step mode**: the same state machine as auto mode, but it pauses between units with a wizard showing what completed and what's next. You advance one step at a time, review the output, and continue when ready.
+By default, `/sf` runs in **step mode**: the same state machine as autonomous mode, but it pauses between units with a wizard showing what completed and what's next. You advance one step at a time, review the output, and continue when ready.
 
 - **No `.sf/` directory** → Start a new project. Discussion flow captures your vision, constraints, and preferences.
 - **Milestone exists, no roadmap** → Discuss or research the milestone.
@@ -315,19 +315,19 @@ sf
 
 SF opens an interactive agent session. From there, you have two ways to work:
 
-**`/sf` — step mode.** Type `/sf` and SF executes one unit of work at a time, pausing between each with a wizard showing what completed and what's next. Same state machine as auto mode, but you stay in the loop. No project yet? It starts the discussion flow. Roadmap exists? It plans or executes the next step.
+**`/sf` — step mode.** Type `/sf` and SF executes one unit of work at a time, pausing between each with a wizard showing what completed and what's next. Same state machine as autonomous mode, but you stay in the loop. No project yet? It starts the discussion flow. Roadmap exists? It plans or executes the next step.
 
-**`/sf auto` — autonomous mode.** Type `/sf auto` and walk away. SF researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete. Fresh context window per task. No babysitting.
+**`/sf autonomous` — autonomous mode.** Type `/sf autonomous` and walk away. SF researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete. Fresh context window per task. No babysitting. `/sf auto` is an alias.
 
 ### Two terminals, one project
 
-The real workflow: run auto mode in one terminal, steer from another.
+The real workflow: run autonomous mode in one terminal, steer from another.
 
 **Terminal 1 — let it build**
 
 ```bash
 sf
-/sf auto
+/sf autonomous
 ```
 
 **Terminal 2 — steer while it works**
@@ -339,14 +339,14 @@ sf
 /sf queue      # queue the next milestone
 ```
 
-Both terminals read and write the same `.sf/` files on disk. Your decisions in terminal 2 are picked up automatically at the next phase boundary — no need to stop auto mode.
+Both terminals read and write the same `.sf/` files on disk. Your decisions in terminal 2 are picked up automatically at the next phase boundary — no need to stop autonomous mode.
 
 ### Headless mode — CI and scripts
 
 `sf headless` runs any `/sf` command without a TUI. Designed for CI pipelines, cron jobs, and scripted automation.
 
 ```bash
-# Run auto mode in CI
+# Run autonomous mode in CI
 sf headless --timeout 600000
 
 # Create and execute a milestone end-to-end
@@ -376,15 +376,16 @@ On first run, SF launches a branded setup wizard that walks you through LLM prov
 | ----------------------- | --------------------------------------------------------------- |
 | `/sf`                  | Step mode — executes one unit at a time, pauses between each    |
 | `/sf next`             | Explicit step mode (same as bare `/sf`)                        |
-| `/sf auto`             | Autonomous mode — researches, plans, executes, commits, repeats |
+| `/sf autonomous`       | Autonomous mode — researches, plans, executes, commits, repeats |
+| `/sf auto`             | Alias for `/sf autonomous`                                      |
 | `/sf quick`            | Execute a quick task with SF guarantees, skip planning overhead |
-| `/sf stop`             | Stop auto mode gracefully                                       |
+| `/sf stop`             | Stop autonomous mode gracefully                                 |
 | `/sf steer`            | Hard-steer plan documents during execution                      |
-| `/sf discuss`          | Discuss architecture and decisions (works alongside auto mode)  |
+| `/sf discuss`          | Discuss architecture and decisions (works alongside autonomous mode) |
 | `/sf rethink`          | Conversational project reorganization                           |
 | `/sf mcp`              | MCP server status and connectivity                              |
 | `/sf status`           | Progress dashboard                                              |
-| `/sf queue`            | Queue future milestones (safe during auto mode)                 |
+| `/sf queue`            | Queue future milestones (safe during autonomous mode)           |
 | `/sf prefs`            | Model selection, timeouts, budget ceiling                       |
 | `/sf migrate`          | Migrate a v1 `.planning` directory to `.sf` format             |
 | `/sf help`             | Categorized command reference for all SF subcommands           |
@@ -523,8 +524,8 @@ auto_report: true
 | ---------------------- | ----------------------------------------------------------------------------------------------------- |
 | `models.*`             | Per-phase model selection — string for a single model, or `{model, fallbacks}` for automatic failover |
 | `skill_discovery`      | `auto` / `suggest` / `off` — how SF finds and applies skills                                         |
-| `auto_supervisor.*`    | Timeout thresholds for auto mode supervision                                                          |
-| `budget_ceiling`       | USD ceiling — auto mode pauses when reached                                                           |
+| `auto_supervisor.*`    | Timeout thresholds for autonomous mode supervision                                                    |
+| `budget_ceiling`       | USD ceiling — autonomous mode pauses when reached                                                     |
 | `uat_dispatch`         | Enable automatic UAT runs after slice completion                                                      |
 | `always_use_skills`    | Skills to always load when relevant                                                                   |
 | `skill_rules`          | Situational rules for skill routing                                                                   |
@@ -574,7 +575,7 @@ SF ships with 24 extensions, all loaded automatically:
 
 | Extension              | What it provides                                                                                                       |
 | ---------------------- | ---------------------------------------------------------------------------------------------------------------------- |
-| **SF**                | Core workflow engine, auto mode, commands, dashboard                                                                   |
+| **SF**                | Core workflow engine, autonomous mode, commands, dashboard                                                             |
 | **Browser Tools**      | Playwright-based browser with form intelligence, intent-ranked element finding, semantic actions, PDF export, session state persistence, network mocking, device emulation, structured extraction, visual diffing, region zoom, test code generation, and prompt injection detection |
 | **Search the Web**     | Brave Search, Tavily, or Jina page extraction                                                                          |
 | **Google Search**      | Gemini-powered web search with AI-synthesized answers                                                                  |
diff --git a/docs/README.md b/docs/README.md
index 61f54849e..7cf5ebd67 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,6 +1,6 @@
 # SF Documentation
 
-Welcome to the SF documentation. This covers everything from getting started to advanced configuration, auto-mode internals, and extending SF with the Pi SDK.
+Welcome to the SF documentation. This covers everything from getting started to advanced configuration, autonomous-mode internals, and extending SF with the Pi SDK.
 
 ## User Documentation
 
@@ -11,7 +11,7 @@ Simplified Chinese translation: [`zh-CN/`](./zh-CN/).
 | Guide | Description |
 |-------|-------------|
 | [Getting Started](./user-docs/getting-started.md) | Installation, first run, and basic usage |
-| [Auto Mode](./user-docs/auto-mode.md) | How autonomous execution works — the state machine, crash recovery, and steering |
+| [Autonomous Mode](./user-docs/auto-mode.md) | How autonomous execution works — the state machine, crash recovery, and steering |
 | [Commands Reference](./user-docs/commands.md) | All commands, keyboard shortcuts, and CLI flags |
 | [Remote Questions](./user-docs/remote-questions.md) | Discord and Slack integration for headless auto-mode |
 | [Configuration](./user-docs/configuration.md) | Preferences, model selection, git settings, and token profiles |
diff --git a/docs/dev/json-contracts.md b/docs/dev/json-contracts.md
new file mode 100644
index 000000000..bde4c9c41
--- /dev/null
+++ b/docs/dev/json-contracts.md
@@ -0,0 +1,13 @@
+# JSON Contracts
+
+SF uses `schemaVersion` for JSON files whose shape is owned by SF runtime code.
+Use a numeric value starting at `1`.
+
+Use `version` only for package, plugin, extension, or release versions. Do not
+use `version` as a schema marker for SF-owned data.
+
+For map-like JSON where top-level keys are domain entries, put the marker under
+`_meta.schemaVersion`.
+
+The `check:versioned-json` script parses every tracked `.json` file and enforces
+`schemaVersion` on the SF-owned contract allowlist.
diff --git a/docs/user-docs/auto-mode.md b/docs/user-docs/auto-mode.md
index 1122922f6..693b06181 100644
--- a/docs/user-docs/auto-mode.md
+++ b/docs/user-docs/auto-mode.md
@@ -1,10 +1,10 @@
-# Auto Mode
+# Autonomous Mode
 
-Auto mode is SF's autonomous execution engine. Run `/sf auto`, walk away, come back to built software with clean git history.
+Autonomous mode is SF's product-development execution engine. Run `/sf autonomous`, walk away, come back to built software with clean git history. `/sf auto` remains supported as a short alias.
 
 ## How It Works
 
-Auto mode is a **state machine driven by files on disk**. It reads `.sf/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, auto mode reads disk state again and dispatches the next unit.
+Autonomous mode is a **state machine driven by files on disk**. It reads `.sf/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, autonomous mode reads disk state again and dispatches the next unit.
 
 ### The Loop
 
@@ -59,7 +59,7 @@ When your project has independent milestones, you can run them simultaneously. E
 
 ### Crash Recovery
 
-A lock file tracks the current unit. If the session dies, the next `/sf auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context.
+A lock file tracks the current unit. If the session dies, the next `/sf autonomous` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context.
 
 **Headless auto-restart (v2.26):** When running `sf headless auto`, crashes trigger automatic restart with exponential backoff (5s → 10s → 30s cap, default 3 attempts). Configure with `--max-restarts N`. SIGINT/SIGTERM bypasses restart. Combined with crash recovery, this enables true overnight "run until done" execution.
 
@@ -77,7 +77,7 @@ No manual intervention needed for transient errors — the session pauses briefl
 
 ### Incremental Memory (v2.26)
 
-SF maintains a `KNOWLEDGE.md` file — an append-only register of project-specific rules, patterns, and lessons learned. The agent reads it at the start of every unit and appends to it when discovering recurring issues, non-obvious patterns, or rules that future sessions should follow. This gives auto-mode cross-session memory that survives context window boundaries.
+SF maintains a `KNOWLEDGE.md` file — an append-only register of project-specific rules, patterns, and lessons learned. The agent reads it at the start of every unit and appends to it when discovering recurring issues, non-obvious patterns, or rules that future sessions should follow. This gives autonomous mode cross-session memory that survives context window boundaries.
 
 ### Context Pressure Monitor (v2.26)
 
@@ -208,14 +208,16 @@ Configured skills are automatically resolved and injected into dispatch prompts.
 
 See [Configuration](./configuration.md) for skill routing preferences.
 
-## Controlling Auto Mode
+## Controlling Autonomous Mode
 
 ### Start
 
 ```
-/sf auto
+/sf autonomous
 ```
 
+`/sf auto` is equivalent to `/sf autonomous`.
+
 ### Pause
 
 Press **Escape**. The conversation is preserved. You can interact with the agent, inspect state, or resume.
@@ -223,10 +225,10 @@ Press **Escape**. The conversation is preserved. You can interact with the agent
 ### Resume
 
 ```
-/sf auto
+/sf autonomous
 ```
 
-Auto mode reads disk state and picks up where it left off.
+Autonomous mode reads disk state and picks up where it left off.
 
 ### Stop
 
diff --git a/docs/user-docs/commands.md b/docs/user-docs/commands.md
index 88e7bae06..4a224843a 100644
--- a/docs/user-docs/commands.md
+++ b/docs/user-docs/commands.md
@@ -6,10 +6,11 @@
 |---------|-------------|
 | `/sf` | Step mode — execute one unit at a time, pause between each |
 | `/sf next` | Explicit step mode (same as `/sf`) |
-| `/sf auto` | Autonomous mode — research, plan, execute, commit, repeat |
+| `/sf autonomous` | Autonomous product loop — research, plan, execute, commit, repeat |
+| `/sf auto` | Alias for `/sf autonomous` |
 | `/sf quick` | Execute a quick task with SF guarantees (atomic commits, state tracking) without full planning overhead |
-| `/sf stop` | Stop auto mode gracefully |
-| `/sf pause` | Pause auto-mode (preserves state, `/sf auto` to resume) |
+| `/sf stop` | Stop autonomous mode gracefully |
+| `/sf pause` | Pause autonomous mode (preserves state, `/sf autonomous` to resume) |
 | `/sf steer` | Hard-steer plan documents during execution |
 | `/sf discuss` | Discuss architecture and decisions (works alongside auto mode) |
 | `/sf status` | Progress dashboard |
@@ -99,6 +100,8 @@ See [Parallel Orchestration](./parallel-orchestration.md) for full documentation
 | `/sf workflow pause` | Pause custom workflow auto-mode |
 | `/sf workflow resume` | Resume paused custom workflow auto-mode |
 
+`/sf autonomous` is the product-development loop that chooses the next useful unit from project state. `/sf start` is guided workflow kickoff and may ask clarifying questions. `/sf workflow run` executes an explicit YAML workflow definition. `/sf auto` remains supported as shorthand for `/sf autonomous`.
+
 ## Extensions
 
 | Command | Description |
diff --git a/docs/user-docs/getting-started.md b/docs/user-docs/getting-started.md
index abec860be..893bbfe86 100644
--- a/docs/user-docs/getting-started.md
+++ b/docs/user-docs/getting-started.md
@@ -294,7 +294,7 @@ docker sandbox exec -it sf-sandbox bash
 
 ```bash
 export ANTHROPIC_API_KEY="sk-ant-..."
-sf auto "implement the feature described in issue #42"
+sf autonomous "implement the feature described in issue #42"
 ```
 
 See [Docker Sandbox docs](../../docker/README.md) for full configuration, resource limits, and compose files.
@@ -328,27 +328,27 @@ Type `/sf` inside a session. SF executes one unit of work at a time, pausing bet
 
 Step mode keeps you in the loop, reviewing output between each step.
 
-### Auto Mode — `/sf auto`
+### Autonomous Mode — `/sf autonomous`
 
-Type `/sf auto` and walk away. SF autonomously researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete.
+Type `/sf autonomous` and walk away. SF researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete. `/sf auto` remains available as a short alias.
 
 ```
-/sf auto
+/sf autonomous
 ```
 
-See [Auto Mode](./auto-mode.md) for full details.
+See [Autonomous Mode](./auto-mode.md) for full details.
 
 ---
 
 ## Recommended Workflow: Two Terminals
 
-Run auto mode in one terminal, steer from another.
+Run autonomous mode in one terminal, steer from another.
 
 **Terminal 1 — let it build:**
 
 ```bash
 sf
-/sf auto
+/sf autonomous
 ```
 
 **Terminal 2 — steer while it works:**
@@ -466,7 +466,7 @@ For more, see [Troubleshooting](./troubleshooting.md).
 
 ## Next Steps
 
-- [Auto Mode](./auto-mode.md) — deep dive into autonomous execution
+- [Autonomous Mode](./auto-mode.md) — deep dive into autonomous execution
 - [Configuration](./configuration.md) — model selection, timeouts, budgets
 - [Commands Reference](./commands.md) — all commands and shortcuts
 - [Provider Setup](./providers.md) — detailed setup for every provider
diff --git a/docs/user-docs/troubleshooting.md b/docs/user-docs/troubleshooting.md
index a7671f818..3489bc54d 100644
--- a/docs/user-docs/troubleshooting.md
+++ b/docs/user-docs/troubleshooting.md
@@ -25,13 +25,13 @@ It checks:
 - Stale cache after a crash — the in-memory file listing doesn't reflect new artifacts
 - The LLM didn't produce the expected artifact file
 
-**Fix:** Run `/sf doctor` to repair state, then resume with `/sf auto`. If the issue persists, check that the expected artifact file exists on disk.
+**Fix:** Run `/sf doctor` to repair state, then resume with `/sf autonomous`. If the issue persists, check that the expected artifact file exists on disk.
 
 ### Auto mode stops with "Loop detected"
 
 **Cause:** A unit failed to produce its expected artifact twice in a row.
 
-**Fix:** Check the task plan for clarity. If the plan is ambiguous, refine it manually, then `/sf auto` to resume.
+**Fix:** Check the task plan for clarity. If the plan is ambiguous, refine it manually, then `/sf autonomous` to resume.
 
 ### Wrong files in worktree
 
@@ -103,13 +103,13 @@ For common provider setup issues (role errors, streaming errors, model ID mismat
 
 **Symptoms:** Auto mode pauses with "Budget ceiling reached."
 
-**Fix:** Increase `budget_ceiling` in preferences, or switch to `budget` token profile to reduce per-unit cost, then resume with `/sf auto`.
+**Fix:** Increase `budget_ceiling` in preferences, or switch to `budget` token profile to reduce per-unit cost, then resume with `/sf autonomous`.
 
 ### Stale lock file
 
 **Symptoms:** Auto mode won't start, says another session is running.
 
-**Fix:** SF automatically detects stale locks — if the owning PID is dead, the lock is cleaned up and re-acquired on the next `/sf auto`. This includes stranded `.sf.lock/` directories left by `proper-lockfile` after crashes. If automatic recovery fails, delete `.sf/auto.lock` and the `.sf.lock/` directory manually:
+**Fix:** SF automatically detects stale locks — if the owning PID is dead, the lock is cleaned up and re-acquired on the next `/sf autonomous`. This includes stranded `.sf.lock/` directories left by `proper-lockfile` after crashes. If automatic recovery fails, delete `.sf/auto.lock` and the `.sf.lock/` directory manually:
 
 ```bash
 rm -f .sf/auto.lock
@@ -287,7 +287,7 @@ rm .sf/auto.lock
 rm .sf/completed-units.json
 ```
 
-Then `/sf auto` to restart from current disk state.
+Then `/sf autonomous` to restart from current disk state.
 
 ### Reset routing history
 
diff --git a/packages/daemon/src/orchestrator.ts b/packages/daemon/src/orchestrator.ts
index d80e2b243..d46cb1f71 100644
--- a/packages/daemon/src/orchestrator.ts
+++ b/packages/daemon/src/orchestrator.ts
@@ -93,12 +93,12 @@ const TOOLS: Tool[] = [
   },
   {
     name: 'start_session',
-    description: 'Start a new SF auto-mode session for a project. Provide the absolute project path. Optionally provide a command to run instead of the default "/sf auto".',
+    description: 'Start a new SF autonomous-mode session for a project. Provide the absolute project path. Optionally provide a command to run instead of the default "/sf autonomous".',
     input_schema: {
       type: 'object' as const,
       properties: {
         projectPath: { type: 'string', description: 'Absolute path to the project directory' },
-        command: { type: 'string', description: 'Optional command to send instead of "/sf auto"' },
+        command: { type: 'string', description: 'Optional command to send instead of "/sf autonomous"' },
       },
       required: ['projectPath'],
     },
diff --git a/packages/daemon/src/session-manager.test.ts b/packages/daemon/src/session-manager.test.ts
index 5e9748239..63175f555 100644
--- a/packages/daemon/src/session-manager.test.ts
+++ b/packages/daemon/src/session-manager.test.ts
@@ -159,8 +159,8 @@ class TestableSessionManager extends SessionManager {
         (this as any).handleEvent(session, event);
       });
 
-      // Kick off auto-mode
-      const command = options.command ?? '/sf auto';
+      // Kick off autonomous mode
+      const command = options.command ?? '/sf autonomous';
       await client.prompt(command);
 
       // Emit lifecycle events (matching parent behavior)
@@ -805,7 +805,7 @@ describe('SessionManager', () => {
     const client = manager.lastClient!;
 
     assert.ok(client.prompted.includes('/sf quick fix-typo'));
-    assert.ok(!client.prompted.includes('/sf auto'));
+    assert.ok(!client.prompted.includes('/sf autonomous'));
   });
 
   // ---- getSessionByDir returns session by directory lookup ----
diff --git a/packages/daemon/src/session-manager.ts b/packages/daemon/src/session-manager.ts
index cc1c1be2a..215db18e2 100644
--- a/packages/daemon/src/session-manager.ts
+++ b/packages/daemon/src/session-manager.ts
@@ -71,7 +71,7 @@ export class SessionManager extends EventEmitter {
    *
    * Rejects if a session already exists for this projectDir.
    * Creates an RpcClient, starts the process, performs the v2 init handshake,
-   * wires event tracking, and sends '/sf auto' to begin execution.
+   * wires event tracking, and sends '/sf autonomous' to begin execution.
    */
   async startSession(options: StartSessionOptions): Promise<string> {
     const { projectDir } = options;
@@ -139,8 +139,8 @@ export class SessionManager extends EventEmitter {
         this.handleEvent(session, event);
       });
 
-      // Kick off auto-mode
-      const command = options.command ?? '/sf auto';
+      // Kick off autonomous mode
+      const command = options.command ?? '/sf autonomous';
       await client.prompt(command);
 
       this.logger.info('session started', { sessionId: session.sessionId, projectDir: resolvedDir });
diff --git a/packages/daemon/src/types.ts b/packages/daemon/src/types.ts
index 9db856878..f47925a4a 100644
--- a/packages/daemon/src/types.ts
+++ b/packages/daemon/src/types.ts
@@ -161,7 +161,7 @@ export interface StartSessionOptions {
   /** Absolute path to the project directory */
   projectDir: string;
 
-  /** Command to send after '/sf auto' (default: none) */
+  /** Command to send instead of the default '/sf autonomous' (default: none) */
   command?: string;
 
   /** Model ID override */
diff --git a/packages/mcp-server/README.md b/packages/mcp-server/README.md
index d00171c0d..b304fb0c3 100644
--- a/packages/mcp-server/README.md
+++ b/packages/mcp-server/README.md
@@ -133,7 +133,7 @@ Start a SF auto-mode session for a project directory.
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
 | `projectDir` | `string` | ✅ | Absolute path to the project directory |
-| `command` | `string` | | Command to send (default: `"/sf auto"`) |
+| `command` | `string` | | Command to send (default: `"/sf autonomous"`) |
 | `model` | `string` | | Model ID override |
 | `bare` | `boolean` | | Run in bare mode (skip user config) |
 
diff --git a/packages/mcp-server/src/mcp-server.test.ts b/packages/mcp-server/src/mcp-server.test.ts
index f6f99df9c..e61a70018 100644
--- a/packages/mcp-server/src/mcp-server.test.ts
+++ b/packages/mcp-server/src/mcp-server.test.ts
@@ -171,8 +171,8 @@ class TestableSessionManager extends SessionManager {
         this._handleEvent(session, event);
       });
 
-      // Kick off auto-mode
-      const command = options.command ?? '/sf auto';
+      // Kick off autonomous mode
+      const command = options.command ?? '/sf autonomous';
       await client.prompt(command);
 
       return session.sessionId;
@@ -236,10 +236,10 @@ describe('SessionManager', () => {
     assert.equal(session.projectDir, resolve('/tmp/test-project'));
   });
 
-  it('startSession sends /sf auto by default', async () => {
+  it('startSession sends /sf autonomous by default', async () => {
     await sm.startSession('/tmp/test-prompt', { cliPath: '/usr/bin/sf' });
     assert.ok(sm.lastClient);
-    assert.deepEqual(sm.lastClient.prompted, ['/sf auto']);
+    assert.deepEqual(sm.lastClient.prompted, ['/sf autonomous']);
   });
 
   it('startSession sends custom command when provided', async () => {
diff --git a/packages/mcp-server/src/server.ts b/packages/mcp-server/src/server.ts
index e7ba62abc..ed7bb4eca 100644
--- a/packages/mcp-server/src/server.ts
+++ b/packages/mcp-server/src/server.ts
@@ -24,7 +24,7 @@ import { buildGraph, writeGraph, writeSnapshot, graphStatus, graphQuery, graphDi
 import { resolveSFRoot } from './readers/paths.js';
 import { runDoctorLite } from './readers/doctor-lite.js';
 import { registerWorkflowTools } from './workflow-tools.js';
-import { applySecrets, checkExistingEnvKeys, detectDestination } from './env-writer.js';
+import { applySecrets, checkExistingEnvKeys, detectDestination, resolveProjectEnvFilePath } from './env-writer.js';
 
 // ---------------------------------------------------------------------------
 // Constants
@@ -367,7 +367,7 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{
     'Start a SF auto-mode session for a project directory. Returns a sessionId for tracking.',
     {
       projectDir: z.string().describe('Absolute path to the project directory'),
-      command: z.string().optional().describe('Command to send (default: "/sf auto")'),
+      command: z.string().optional().describe('Command to send (default: "/sf autonomous")'),
       model: z.string().optional().describe('Model ID override'),
       bare: z.boolean().optional().describe('Run in bare mode (skip user config)'),
     },
@@ -589,7 +589,7 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{
       };
 
       try {
-        const resolvedProjectDir = resolve(projectDir);
+        const resolvedProjectDir = resolveProjectEnvFilePath(projectDir);
         const resolvedEnvPath = resolve(resolvedProjectDir, envFilePath ?? '.env');
 
         // (1) Check which keys already exist
diff --git a/packages/mcp-server/src/session-manager.ts b/packages/mcp-server/src/session-manager.ts
index 029a9d635..9c8b1fe2a 100644
--- a/packages/mcp-server/src/session-manager.ts
+++ b/packages/mcp-server/src/session-manager.ts
@@ -60,7 +60,7 @@ export class SessionManager {
    *
    * Rejects if a session already exists for this projectDir.
    * Creates an RpcClient, starts the process, performs the v2 init handshake,
-   * wires event tracking, and sends '/sf auto' to begin execution.
+   * wires event tracking, and sends '/sf autonomous' to begin execution.
    */
   async startSession(projectDir: string, options: ExecuteOptions = {}): Promise<string> {
     if (!projectDir || projectDir.trim() === '') {
@@ -124,8 +124,8 @@ export class SessionManager {
         this.handleEvent(session, event);
       });
 
-      // Kick off auto-mode
-      const command = options.command ?? '/sf auto';
+      // Kick off autonomous mode
+      const command = options.command ?? '/sf autonomous';
       await client.prompt(command);
 
       return session.sessionId;
diff --git a/packages/mcp-server/src/types.ts b/packages/mcp-server/src/types.ts
index dc5588259..fbe93d87e 100644
--- a/packages/mcp-server/src/types.ts
+++ b/packages/mcp-server/src/types.ts
@@ -83,7 +83,7 @@ export interface CostAccumulator {
 // ---------------------------------------------------------------------------
 
 export interface ExecuteOptions {
-  /** Command to send after '/sf auto' (default: none) */
+  /** Command to send instead of the default '/sf autonomous' (default: none) */
   command?: string;
 
   /** Model ID override */
diff --git a/packages/pi-agent-core/src/agent-loop.test.ts b/packages/pi-agent-core/src/agent-loop.test.ts
index a6e463bb2..5ddb1d637 100644
--- a/packages/pi-agent-core/src/agent-loop.test.ts
+++ b/packages/pi-agent-core/src/agent-loop.test.ts
@@ -101,6 +101,109 @@ describe("agent-loop — pauseTurn handling (#2869)", () => {
 });
 
 describe("agent-loop — steering during tool batches", () => {
+	it("does not interrupt the current tool batch for custom system steering", async () => {
+		const calls: string[] = [];
+		const tool = {
+			name: "record",
+			label: "Record",
+			description: "Record a value",
+			parameters: Type.Object({ value: Type.String() }),
+			execute: async (_id: string, args: { value: string }) => {
+				calls.push(args.value);
+				return {
+					content: [{ type: "text" as const, text: `recorded ${args.value}` }],
+					details: {},
+				};
+			},
+		} satisfies AgentTool<{ value: string }>;
+
+		const first = makeAssistantMessage({
+			content: [
+				{
+					type: "toolCall",
+					id: "tc-1",
+					name: "record",
+					arguments: { value: "one" },
+				},
+				{
+					type: "toolCall",
+					id: "tc-2",
+					name: "record",
+					arguments: { value: "two" },
+				},
+			],
+			stopReason: "toolUse",
+		});
+		const second = makeAssistantMessage({
+			content: [{ type: "text", text: "saw system steering" }],
+			stopReason: "stop",
+		});
+		const mockStream = createMockStreamFn([first, second]);
+		let steeringPolls = 0;
+		const steering: AgentMessage = {
+			role: "custom",
+			customType: "sf-memory-sleeper",
+			content: "system notice",
+			display: false,
+			timestamp: Date.now(),
+		} as AgentMessage;
+
+		const context: AgentContext = {
+			systemPrompt: "You are a test agent.",
+			messages: [
+				{
+					role: "user",
+					content: [{ type: "text", text: "record values" }],
+					timestamp: Date.now(),
+				},
+			],
+			tools: [tool],
+		};
+
+		const config: AgentLoopConfig = {
+			model: TEST_MODEL,
+			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
+			toolExecution: "sequential",
+			getSteeringMessages: async () => {
+				steeringPolls += 1;
+				return steeringPolls === 1 ? [steering] : [];
+			},
+		};
+
+		const stream = agentLoop(
+			[
+				{
+					role: "user",
+					content: [{ type: "text", text: "record values" }],
+					timestamp: Date.now(),
+				},
+			],
+			context,
+			config,
+			undefined,
+			mockStream as any,
+		);
+
+		const events = await collectEvents(stream);
+		const skipped = events.filter(
+			(event) =>
+				event.type === "tool_execution_end" &&
+				JSON.stringify(event.result.content).includes(
+					"Skipped due to queued user message",
+				),
+		);
+
+		assert.deepEqual(calls, ["one", "two"]);
+		assert.equal(skipped.length, 0);
+		assert.ok(
+			events.some(
+				(event) =>
+					event.type === "message_start" && event.message === steering,
+			),
+			"system steering should still be delivered after the tool batch",
+		);
+	});
+
 	it("defers queued steering until after the current tool batch when configured", async () => {
 		const calls: string[] = [];
 		const tool = {
diff --git a/packages/pi-agent-core/src/agent-loop.ts b/packages/pi-agent-core/src/agent-loop.ts
index 23fbfdad2..9d909c211 100644
--- a/packages/pi-agent-core/src/agent-loop.ts
+++ b/packages/pi-agent-core/src/agent-loop.ts
@@ -489,6 +489,10 @@ interface ToolExecutionResult {
 	preparationErrorCount: number;
 }
 
+function hasUserSteeringMessage(messages: readonly AgentMessage[]): boolean {
+	return messages.some((message) => message.role === "user");
+}
+
 /**
  * Execute tool calls from an assistant message.
  */
@@ -553,7 +557,7 @@ async function executeToolCallsSequential(
 			const steering = await config.getSteeringMessages();
 			if (steering.length > 0) {
 				steeringMessages = [...(steeringMessages ?? []), ...steering];
-				if (interruptOnSteering) {
+				if (interruptOnSteering && hasUserSteeringMessage(steering)) {
 					const remainingCalls = toolCalls.slice(index + 1);
 					for (const skipped of remainingCalls) {
 						results.push(skipToolCall(skipped, stream));
@@ -604,7 +608,7 @@ async function executeToolCallsParallel(
 			const steering = await config.getSteeringMessages();
 			if (steering.length > 0) {
 				steeringMessages = [...(steeringMessages ?? []), ...steering];
-				if (interruptOnSteering) {
+				if (interruptOnSteering && hasUserSteeringMessage(steering)) {
 					for (const runnable of runnableCalls) {
 						results.push(skipToolCall(runnable.toolCall, stream, { emitStart: false }));
 					}
diff --git a/packages/pi-ai/src/utils/event-stream.test.ts b/packages/pi-ai/src/utils/event-stream.test.ts
new file mode 100644
index 000000000..d5ae103d7
--- /dev/null
+++ b/packages/pi-ai/src/utils/event-stream.test.ts
@@ -0,0 +1,138 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import { parseAnthropicSSE } from "./event-stream.js";
+
+function createMockResponse(chunks: string[]): Response {
+	let index = 0;
+	const encoder = new TextEncoder();
+	const stream = new ReadableStream<Uint8Array>({
+		pull(controller) {
+			if (index < chunks.length) {
+				controller.enqueue(encoder.encode(chunks[index++]));
+			} else {
+				controller.close();
+			}
+		},
+	});
+	return new Response(stream);
+}
+
+describe("parseAnthropicSSE", () => {
+	it("yields parsed JSON for known Anthropic events", async () => {
+		const sse =
+			'event: message_start\n' +
+			'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n' +
+			'\n' +
+			'event: content_block_start\n' +
+			'data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}\n' +
+			'\n' +
+			'event: content_block_delta\n' +
+			'data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}\n' +
+			'\n' +
+			'event: content_block_stop\n' +
+			'data: {"type":"content_block_stop","index":0}\n' +
+			'\n' +
+			'event: message_delta\n' +
+			'data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"input_tokens":10,"output_tokens":1,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}\n' +
+			'\n' +
+			'event: message_stop\n' +
+			'data: {"type":"message_stop"}\n' +
+			'\n';
+
+		const response = createMockResponse([sse]);
+		const events: unknown[] = [];
+		for await (const event of parseAnthropicSSE(response)) {
+			events.push(event);
+		}
+
+		assert.equal(events.length, 6);
+		assert.equal((events[0] as any).type, "message_start");
+		assert.equal((events[1] as any).type, "content_block_start");
+		assert.equal((events[2] as any).type, "content_block_delta");
+		assert.equal((events[3] as any).type, "content_block_stop");
+		assert.equal((events[4] as any).type, "message_delta");
+		assert.equal((events[5] as any).type, "message_stop");
+	});
+
+	it("silently drops unknown events (e.g. OpenAI-style done)", async () => {
+		const sse =
+			'event: message_start\n' +
+			'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n' +
+			'\n' +
+			'event: done\n' +
+			'data: [DONE]\n' +
+			'\n' +
+			'event: content_block_start\n' +
+			'data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}\n' +
+			'\n';
+
+		const response = createMockResponse([sse]);
+		const events: unknown[] = [];
+		for await (const event of parseAnthropicSSE(response)) {
+			events.push(event);
+		}
+
+		assert.equal(events.length, 2);
+		assert.equal((events[0] as any).type, "message_start");
+		assert.equal((events[1] as any).type, "content_block_start");
+	});
+
+	it("ignores ping events", async () => {
+		const sse =
+			'event: message_start\n' +
+			'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n' +
+			'\n' +
+			'event: ping\n' +
+			'data: {}\n' +
+			'\n' +
+			'event: message_stop\n' +
+			'data: {"type":"message_stop"}\n' +
+			'\n';
+
+		const response = createMockResponse([sse]);
+		const events: unknown[] = [];
+		for await (const event of parseAnthropicSSE(response)) {
+			events.push(event);
+		}
+
+		assert.equal(events.length, 2);
+		assert.equal((events[0] as any).type, "message_start");
+		assert.equal((events[1] as any).type, "message_stop");
+	});
+
+	it("handles chunked SSE data across multiple reads", async () => {
+		const chunks = [
+			'event: message_start\n',
+			'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n\n',
+			'event: message_stop\n',
+			'data: {"type":"message_stop"}\n\n',
+		];
+
+		const response = createMockResponse(chunks);
+		const events: unknown[] = [];
+		for await (const event of parseAnthropicSSE(response)) {
+			events.push(event);
+		}
+
+		assert.equal(events.length, 2);
+		assert.equal((events[0] as any).type, "message_start");
+		assert.equal((events[1] as any).type, "message_stop");
+	});
+
+	it("handles comment lines", async () => {
+		const sse =
+			': comment line\n' +
+			'event: message_start\n' +
+			'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n' +
+			'\n';
+
+		const response = createMockResponse([sse]);
+		const events: unknown[] = [];
+		for await (const event of parseAnthropicSSE(response)) {
+			events.push(event);
+		}
+
+		assert.equal(events.length, 1);
+		assert.equal((events[0] as any).type, "message_start");
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/agent-session-custom-message-queue.test.ts b/packages/pi-coding-agent/src/core/agent-session-custom-message-queue.test.ts
new file mode 100644
index 000000000..9195f89b7
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/agent-session-custom-message-queue.test.ts
@@ -0,0 +1,110 @@
+import assert from "node:assert/strict";
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, it } from "node:test";
+
+import { Agent, type AgentMessage } from "@singularity-forge/pi-agent-core";
+import { AgentSession } from "./agent-session.js";
+import { AuthStorage } from "./auth-storage.js";
+import { ModelRegistry } from "./model-registry.js";
+import { DefaultResourceLoader } from "./resource-loader.js";
+import { SessionManager } from "./session-manager.js";
+import { SettingsManager } from "./settings-manager.js";
+
+let testDir: string;
+
+async function createSession() {
+	const agentDir = join(testDir, "agent-home");
+	const authStorage = AuthStorage.inMemory({});
+	const modelRegistry = new ModelRegistry(authStorage, join(agentDir, "models.json"));
+	const settingsManager = SettingsManager.inMemory();
+	const resourceLoader = new DefaultResourceLoader({
+		cwd: testDir,
+		agentDir,
+		settingsManager,
+		noExtensions: true,
+		noPromptTemplates: true,
+		noThemes: true,
+	});
+	await resourceLoader.reload();
+
+	return new AgentSession({
+		agent: new Agent(),
+		sessionManager: SessionManager.inMemory(testDir),
+		settingsManager,
+		cwd: testDir,
+		resourceLoader,
+		modelRegistry,
+	});
+}
+
+describe("AgentSession custom message queueing", () => {
+	beforeEach(() => {
+		testDir = mkdtempSync(join(tmpdir(), "agent-session-custom-message-"));
+	});
+
+	afterEach(() => {
+		rmSync(testDir, { recursive: true, force: true });
+	});
+
+	it("queues triggerTurn custom messages as steering when the agent is already processing", async () => {
+		const session = await createSession();
+		const agent = (session as any).agent as Agent & {
+			prompt: (message: AgentMessage) => Promise<void>;
+			steer: (message: AgentMessage) => void;
+		};
+		const steered: AgentMessage[] = [];
+		agent.prompt = async () => {
+			throw new Error(
+				"Agent is already processing a prompt. Use steer() or followUp() to queue messages, or wait for completion.",
+			);
+		};
+		agent.steer = (message) => {
+			steered.push(message);
+		};
+
+		await session.sendCustomMessage(
+			{
+				customType: "sf-test",
+				content: "continue the active run",
+				display: false,
+			},
+			{ triggerTurn: true },
+		);
+
+		assert.equal(steered.length, 1);
+		assert.equal(steered[0]?.role, "custom");
+		assert.equal((steered[0] as any).customType, "sf-test");
+	});
+
+	it("preserves explicit followUp delivery when triggerTurn races with active processing", async () => {
+		const session = await createSession();
+		const agent = (session as any).agent as Agent & {
+			prompt: (message: AgentMessage) => Promise<void>;
+			followUp: (message: AgentMessage) => void;
+		};
+		const followUps: AgentMessage[] = [];
+		agent.prompt = async () => {
+			throw new Error(
+				"Agent is already processing a prompt. Use steer() or followUp() to queue messages, or wait for completion.",
+			);
+		};
+		agent.followUp = (message) => {
+			followUps.push(message);
+		};
+
+		await session.sendCustomMessage(
+			{
+				customType: "sf-test",
+				content: "after the current run",
+				display: false,
+			},
+			{ triggerTurn: true, deliverAs: "followUp" },
+		);
+
+		assert.equal(followUps.length, 1);
+		assert.equal(followUps[0]?.role, "custom");
+		assert.equal((followUps[0] as any).content, "after the current run");
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts
index 116261f29..bdcbc89df 100644
--- a/packages/pi-coding-agent/src/core/agent-session.ts
+++ b/packages/pi-coding-agent/src/core/agent-session.ts
@@ -194,6 +194,14 @@ export interface PromptOptions {
 	source?: InputSource;
 }
 
+function isAgentAlreadyProcessingError(error: unknown): boolean {
+	const message = error instanceof Error ? error.message : String(error);
+	return (
+		message.includes("Agent is already processing a prompt") ||
+		message.includes("Agent is already processing.")
+	);
+}
+
 /** Result from cycleModel() */
 export interface ModelCycleResult {
 	model: Model<any>;
@@ -275,6 +283,10 @@ export class AgentSession {
 	// Extension system
 	private _extensionRunner: ExtensionRunner | undefined = undefined;
 	private _turnIndex = 0;
+	private _processingAgentEnd = false;
+	private _processingQueuedAgentEnd = false;
+	private _sessionSwitchPending = false;
+	private _sessionTransitionStartedDuringAgentEnd = false;
 
 	private _resourceLoader: ResourceLoader;
 	private _customTools: ToolDefinition[];
@@ -446,7 +458,24 @@ export class AgentSession {
 		}
 
 		// Emit to extensions first
-		await this._emitExtensionEvent(event);
+		// Guard agent_end: track when session transition starts during extension handlers
+		// so post-handlers (retry/compaction) can bail before corrupting new-session state.
+		let skipAgentEndPostHandlers = false;
+		if (event.type === "agent_end") {
+			this._processingQueuedAgentEnd = true;
+			try {
+				await this._emitExtensionEvent(event);
+			} finally {
+				this._processingQueuedAgentEnd = false;
+				skipAgentEndPostHandlers = this._sessionTransitionStartedDuringAgentEnd;
+				this._sessionTransitionStartedDuringAgentEnd = false;
+			}
+			if (skipAgentEndPostHandlers) {
+				return;
+			}
+		} else {
+			await this._emitExtensionEvent(event);
+		}
 
 		// Notify all listeners
 		this._emit(event);
@@ -498,6 +527,13 @@ export class AgentSession {
 
 		// Check auto-retry and auto-compaction after agent completes
 		if (event.type === "agent_end" && this._lastAssistantMessage) {
+			// A session transition started during agent_end handler execution -
+			// bail to avoid running retry/compaction against new-session state.
+			if (this._sessionSwitchPending) {
+				this._lastAssistantMessage = undefined;
+				return;
+			}
+
 			const msg = this._lastAssistantMessage;
 			this._lastAssistantMessage = undefined;
 
@@ -629,20 +665,26 @@ export class AgentSession {
 
 	/** Emit extension events based on agent events */
 	private async _emitExtensionEvent(event: AgentEvent): Promise<void> {
-		if (!this._extensionRunner) return;
+		const extensionRunner = this._extensionRunner;
+		if (!extensionRunner) return;
 
 		if (event.type === "agent_start") {
 			this._turnIndex = 0;
-			await this._extensionRunner.emit({ type: "agent_start" });
+			await extensionRunner.emit({ type: "agent_start" });
 		} else if (event.type === "agent_end") {
-			await this._extensionRunner.emit({ type: "agent_end", messages: event.messages });
+			this._processingAgentEnd = true;
+			try {
+				await extensionRunner.emit({ type: "agent_end", messages: event.messages });
+			} finally {
+				this._processingAgentEnd = false;
+			}
 		} else if (event.type === "turn_start") {
 			const extensionEvent: TurnStartEvent = {
 				type: "turn_start",
 				turnIndex: this._turnIndex,
 				timestamp: Date.now(),
 			};
-			await this._extensionRunner.emit(extensionEvent);
+			await extensionRunner.emit(extensionEvent);
 		} else if (event.type === "turn_end") {
 			const extensionEvent: TurnEndEvent = {
 				type: "turn_end",
@@ -650,27 +692,27 @@ export class AgentSession {
 				message: event.message,
 				toolResults: event.toolResults,
 			};
-			await this._extensionRunner.emit(extensionEvent);
+			await extensionRunner.emit(extensionEvent);
 			this._turnIndex++;
 		} else if (event.type === "message_start") {
 			const extensionEvent: MessageStartEvent = {
 				type: "message_start",
 				message: event.message,
 			};
-			await this._extensionRunner.emit(extensionEvent);
+			await extensionRunner.emit(extensionEvent);
 		} else if (event.type === "message_update") {
 			const extensionEvent: MessageUpdateEvent = {
 				type: "message_update",
 				message: event.message,
 				assistantMessageEvent: event.assistantMessageEvent,
 			};
-			await this._extensionRunner.emit(extensionEvent);
+			await extensionRunner.emit(extensionEvent);
 		} else if (event.type === "message_end") {
 			const extensionEvent: MessageEndEvent = {
 				type: "message_end",
 				message: event.message,
 			};
-			await this._extensionRunner.emit(extensionEvent);
+			await extensionRunner.emit(extensionEvent);
 		} else if (event.type === "tool_execution_start") {
 			const extensionEvent: ToolExecutionStartEvent = {
 				type: "tool_execution_start",
@@ -678,7 +720,7 @@ export class AgentSession {
 				toolName: event.toolName,
 				args: event.args,
 			};
-			await this._extensionRunner.emit(extensionEvent);
+			await extensionRunner.emit(extensionEvent);
 		} else if (event.type === "tool_execution_update") {
 			const extensionEvent: ToolExecutionUpdateEvent = {
 				type: "tool_execution_update",
@@ -687,7 +729,7 @@ export class AgentSession {
 				args: event.args,
 				partialResult: event.partialResult,
 			};
-			await this._extensionRunner.emit(extensionEvent);
+			await extensionRunner.emit(extensionEvent);
 		} else if (event.type === "tool_execution_end") {
 			const extensionEvent: ToolExecutionEndEvent = {
 				type: "tool_execution_end",
@@ -696,7 +738,7 @@ export class AgentSession {
 				result: event.result,
 				isError: event.isError,
 			};
-			await this._extensionRunner.emit(extensionEvent);
+			await extensionRunner.emit(extensionEvent);
 		}
 	}
 
@@ -1424,7 +1466,18 @@ export class AgentSession {
 				this.agent.steer(appMessage);
 			}
 		} else if (options?.triggerTurn) {
-			await this.agent.prompt(appMessage);
+			try {
+				await this.agent.prompt(appMessage);
+			} catch (error) {
+				if (!isAgentAlreadyProcessingError(error)) {
+					throw error;
+				}
+				if (options?.deliverAs === "followUp") {
+					this.agent.followUp(appMessage);
+				} else {
+					this.agent.steer(appMessage);
+				}
+			}
 		} else {
 			this.agent.appendMessage(appMessage);
 			this.sessionManager.appendCustomMessageEntry(
@@ -1540,10 +1593,24 @@ export class AgentSession {
 		// The agent may go idle without emitting agent_end if the abort happens
 		// between tool execution and response processing.
 		if (!this.isStreaming && this._extensionRunner) {
-			await this._extensionRunner.emit({
-				type: "agent_end",
-				messages: this.agent.state.messages,
-			});
+			const wasProcessingAgentEnd = this._processingAgentEnd;
+			this._processingAgentEnd = true;
+			try {
+				// Track that a session switch started during agent_end:
+				// _processingQueuedAgentEnd is set by _processAgentEvent for queued
+				// agent_end emission. If it is still true here, abort() was called
+				// from a session switch that fired during agent_end handling —
+				// post-handlers must bail.
+				if (this._processingQueuedAgentEnd) {
+					this._sessionTransitionStartedDuringAgentEnd = true;
+				}
+				await this._extensionRunner.emit({
+					type: "agent_end",
+					messages: this.agent.state.messages,
+				});
+			} finally {
+				this._processingAgentEnd = wasProcessingAgentEnd;
+			}
 		}
 	}
 
@@ -1573,9 +1640,14 @@ export class AgentSession {
 			}
 		}
 
-		this._disconnectFromAgent();
-		await this.abort();
-		this.agent.reset();
+		this._sessionSwitchPending = true;
+		try {
+			this._disconnectFromAgent();
+			await this.abort();
+			this.agent.reset();
+		} finally {
+			this._sessionSwitchPending = false;
+		}
 		// Update cwd to current process directory — auto-mode may have chdir'd
 		// into a worktree since the original session was created.
 		const previousCwd = this._cwd;
@@ -2426,8 +2498,13 @@ export class AgentSession {
 			}
 		}
 
-		this._disconnectFromAgent();
-		await this.abort();
+		this._sessionSwitchPending = true;
+		try {
+			this._disconnectFromAgent();
+			await this.abort();
+		} finally {
+			this._sessionSwitchPending = false;
+		}
 		this._steeringMessages = [];
 		this._followUpMessages = [];
 		this._pendingNextTurnMessages = [];
diff --git a/packages/pi-coding-agent/src/core/keybindings-followup.test.ts b/packages/pi-coding-agent/src/core/keybindings-followup.test.ts
new file mode 100644
index 000000000..fd97f52f6
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/keybindings-followup.test.ts
@@ -0,0 +1,12 @@
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import test from "node:test";
+
+const source = readFileSync(join(process.cwd(), "packages/pi-coding-agent/src/core/keybindings.ts"), "utf-8");
+
+test("default follow-up keybinding includes Alt+Enter and Ctrl+Enter", () => {
+	const followUpDefault = source.match(/followUp:\s*\[([^\]]+)\]/)?.[1] ?? "";
+	assert.match(followUpDefault, /"alt\+enter"/);
+	assert.match(followUpDefault, /"ctrl\+enter"/);
+});
diff --git a/packages/pi-coding-agent/src/core/keybindings.ts b/packages/pi-coding-agent/src/core/keybindings.ts
index f2df75b76..5ba03c63c 100644
--- a/packages/pi-coding-agent/src/core/keybindings.ts
+++ b/packages/pi-coding-agent/src/core/keybindings.ts
@@ -63,7 +63,7 @@ const DEFAULT_APP_KEYBINDINGS: Record<AppAction, KeyId | KeyId[]> = {
 	toggleThinking: "ctrl+t",
 	toggleSessionNamedFilter: "ctrl+n",
 	externalEditor: "ctrl+g",
-	followUp: "alt+enter",
+	followUp: ["alt+enter", "ctrl+enter"],
 	dequeue: "alt+up",
 	pasteImage: process.platform === "win32" ? "alt+v" : ["ctrl+v", "alt+v"],
 	newSession: [],
diff --git a/packages/pi-coding-agent/src/core/slash-commands.ts b/packages/pi-coding-agent/src/core/slash-commands.ts
index 05cbb1f5e..bcde22d9b 100644
--- a/packages/pi-coding-agent/src/core/slash-commands.ts
+++ b/packages/pi-coding-agent/src/core/slash-commands.ts
@@ -38,5 +38,6 @@ export const BUILTIN_SLASH_COMMANDS: ReadonlyArray<BuiltinSlashCommand> = [
 	{ name: "thinking", description: "Set thinking level (off/minimal/low/medium/high/xhigh)" },
 	{ name: "edit-mode", description: "Toggle edit mode (standard/hashline)" },
 	{ name: "terminal", description: "Run a shell command directly (e.g. /terminal ping -c3 1.1.1.1)" },
+	{ name: "stop", description: "Stop the currently running response" },
 	{ name: "quit", description: "Quit pi" },
 ];
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts
index 283a31f99..d61d92466 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts
@@ -14,12 +14,13 @@ function renderTool(
 		isError: boolean;
 		details?: Record<string, unknown>;
 	},
+	toolDefinition?: { label?: string },
 ): string {
 	const component = new ToolExecutionComponent(
 		toolName,
 		args,
 		{},
-		undefined,
+		toolDefinition as any,
 		{ requestRender() {} } as any,
 	);
 	component.setExpanded(true);
@@ -48,7 +49,7 @@ function renderToolCollapsed(
 }
 
 describe("ToolExecutionComponent", () => {
-	test("renders capitalized Claude Code Bash tool names with bash output instead of generic args JSON", () => {
+	test("renders capitalized adapter Bash tool names with bash output instead of generic args JSON", () => {
 		const rendered = renderTool(
 			"Bash",
 			{ command: "pwd" },
@@ -60,7 +61,7 @@ describe("ToolExecutionComponent", () => {
 		assert.doesNotMatch(rendered, /^\{\s*\}$/m);
 	});
 
-	test("renders capitalized Claude Code Read tool names with read output", () => {
+	test("renders capitalized adapter Read tool names with read output", () => {
 		const rendered = renderTool(
 			"Read",
 			{ path: "/tmp/demo.txt" },
@@ -91,7 +92,8 @@ describe("ToolExecutionComponent", () => {
 			{ count: 3, enabled: true, label: "hello" },
 		);
 
-		assert.match(rendered, /some_unknown_tool/);
+		assert.match(rendered, /Some Unknown Tool/);
+		assert.doesNotMatch(rendered, /some_unknown_tool/);
 		assert.match(rendered, /count=3/);
 		assert.match(rendered, /enabled=true/);
 		assert.match(rendered, /label="hello"/);
@@ -123,4 +125,17 @@ describe("ToolExecutionComponent", () => {
 		assert.match(rendered, /"payload"/);
 		assert.match(rendered, /"nested"/);
 	});
+
+	test("custom tools without renderers use registered labels instead of raw ids", () => {
+		const rendered = renderTool(
+			"sf_plan_milestone",
+			{ milestoneId: "M001" },
+			undefined,
+			{ label: "Plan Milestone" },
+		);
+
+		assert.match(rendered, /Tool Plan Milestone/);
+		assert.match(rendered, /Plan Milestone/);
+		assert.doesNotMatch(rendered, /sf_plan_milestone/);
+	});
 });
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts
index 6620cba1d..17ec0bfcf 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts
@@ -602,7 +602,21 @@ export class ToolExecutionComponent extends Container {
 				}
 			} else {
 				// No custom renderCall, show tool name
-				this.contentBox.addChild(new Text(theme.fg("toolTitle", theme.bold(this.toolName)), 0, 0));
+				this.contentBox.addChild(
+					new Text(
+						theme.fg(
+							"toolTitle",
+							theme.bold(
+								prettifyToolName(
+									this.toolName,
+									this.toolDefinition.label,
+								),
+							),
+						),
+						0,
+						0,
+					),
+				);
 				customRendererHasContent = true;
 			}
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts
index 293ab188a..4119d028c 100644
--- a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts
@@ -23,6 +23,7 @@ function createHost(options: HostOptions = {}) {
 	let editorText = "";
 	let settingsOpened = 0;
 	let aborts = 0;
+	const statuses: string[] = [];
 	let pendingDisplayUpdates = 0;
 	let renderRequests = 0;
 
@@ -61,9 +62,11 @@ function createHost(options: HostOptions = {}) {
 			},
 		},
 		getSlashCommandContext: () => ({
+			session: host.session,
 			showSettingsSelector: () => {
 				settingsOpened += 1;
 			},
+			showStatus: host.showStatus,
 		}),
 		handleBashCommand: async () => {},
 		showWarning(message: string) {
@@ -72,6 +75,9 @@ function createHost(options: HostOptions = {}) {
 		showError(message: string) {
 			errors.push(message);
 		},
+		showStatus(message: string) {
+			statuses.push(message);
+		},
 		updateEditorBorderColor() {},
 		isExtensionCommand() {
 			return false;
@@ -107,6 +113,7 @@ function createHost(options: HostOptions = {}) {
 		getEditorText: () => editorText,
 		getSettingsOpened: () => settingsOpened,
 		getAborts: () => aborts,
+		statuses,
 		getPendingDisplayUpdates: () => pendingDisplayUpdates,
 		getRenderRequests: () => renderRequests,
 	};
@@ -140,6 +147,19 @@ test("input-controller: built-in slash commands stay in TUI dispatch", async ()
 	);
 });
 
+test("input-controller: /stop aborts the current response", async () => {
+	const { host, prompted, errors, statuses, getAborts, getEditorText } =
+		createHost();
+
+	await host.defaultEditor.onSubmit("/stop");
+
+	assert.equal(getAborts(), 1);
+	assert.deepEqual(prompted, []);
+	assert.deepEqual(errors, []);
+	assert.deepEqual(statuses, ["Stopped current response."]);
+	assert.equal(getEditorText(), "");
+});
+
 test("input-controller: extension slash commands fall through to session.prompt", async () => {
 	const { host, prompted, errors, history } = createHost({
 		knownSlashCommands: ["sf"],
diff --git a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts
index 7e943e7f3..203a51bc9 100644
--- a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts
@@ -235,6 +235,11 @@ export async function dispatchSlashCommand(
 		await ctx.handleBashCommand(command, { loginShell: true });
 		return true;
 	}
+	if (text === "/stop") {
+		await ctx.session.abort();
+		ctx.showStatus("Stopped current response.");
+		return true;
+	}
 
 	return false;
 }
diff --git a/scripts/check-versioned-json.mjs b/scripts/check-versioned-json.mjs
index 9ad4ca033..2eb6bbc4e 100644
--- a/scripts/check-versioned-json.mjs
+++ b/scripts/check-versioned-json.mjs
@@ -1,19 +1,22 @@
 #!/usr/bin/env node
 /**
- * Enforce schema/version markers on SF-owned JSON contracts.
+ * Enforce valid JSON everywhere and schemaVersion markers on SF-owned contracts.
  *
- * This intentionally does not scan ecosystem configuration files such as
- * tsconfig.json, package.json, Biome config, or lockfiles. Those files are
- * versioned by their owning tools. This check covers JSON that SF owns as
- * runtime data, persisted contracts, or generated artifact templates.
+ * Ecosystem JSON such as package.json, tsconfig.json, lockfiles, and extension
+ * manifests are parsed for validity but are not treated as SF data contracts.
+ * Their `version` fields belong to their owning tools or component release
+ * lifecycle. SF-owned runtime/data contracts use `schemaVersion` for shape
+ * compatibility.
  */
 
 import { execFileSync } from "node:child_process";
 import { readFileSync } from "node:fs";
 
-const REQUIRED_PREFIXES = ["src/resources/extensions/sf/"];
-const EXEMPT_SUFFIXES = ["/package.json"];
-const VERSION_KEYS = ["schemaVersion", "version"];
+const CONTRACT_EXACT_PATHS = new Set([
+	"src/resources/extensions/sf/workflow-templates/registry.json",
+]);
+
+const CONTRACT_PREFIXES = ["src/resources/extensions/sf/learning/data/"];
 
 function trackedJsonFiles() {
 	try {
@@ -31,59 +34,87 @@ function trackedJsonFiles() {
 	}
 }
 
-function shouldCheck(path) {
+export function isSfOwnedJsonContract(path) {
 	return (
-		REQUIRED_PREFIXES.some((prefix) => path.startsWith(prefix)) &&
-		!EXEMPT_SUFFIXES.some((suffix) => path.endsWith(suffix))
+		CONTRACT_EXACT_PATHS.has(path) ||
+		CONTRACT_PREFIXES.some((prefix) => path.startsWith(prefix))
 	);
 }
 
-function hasOwn(object, key) {
+export function hasOwn(object, key) {
 	return Object.prototype.hasOwnProperty.call(object, key);
 }
 
-function hasVersionMarker(parsed) {
+export function getSchemaVersion(parsed) {
 	if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return false;
-	if (VERSION_KEYS.some((key) => hasOwn(parsed, key))) return true;
+	if (hasOwn(parsed, "schemaVersion")) return parsed.schemaVersion;
 
 	const meta = parsed._meta;
-	return Boolean(
-		meta &&
-			typeof meta === "object" &&
-			!Array.isArray(meta) &&
-			VERSION_KEYS.some((key) => hasOwn(meta, key)),
+	if (meta && typeof meta === "object" && !Array.isArray(meta) && hasOwn(meta, "schemaVersion")) {
+		return meta.schemaVersion;
+	}
+
+	return undefined;
+}
+
+export function hasValidSchemaVersion(parsed) {
+	const schemaVersion = getSchemaVersion(parsed);
+	return (
+		typeof schemaVersion === "number" &&
+		Number.isInteger(schemaVersion) &&
+		schemaVersion >= 1
 	);
 }
 
-const failures = [];
-let checked = 0;
+export function checkJsonPolicy(paths, readText) {
+	const failures = [];
+	let contractsChecked = 0;
+	let filesParsed = 0;
 
-for (const path of trackedJsonFiles()) {
-	if (!shouldCheck(path)) continue;
-	checked++;
+	for (const path of paths) {
+		filesParsed++;
 
-	let parsed;
-	try {
-		parsed = JSON.parse(readFileSync(path, "utf8"));
-	} catch (error) {
-		const message = error instanceof Error ? error.message : String(error);
-		failures.push(`${path}: invalid JSON (${message})`);
-		continue;
+		let parsed;
+		try {
+			parsed = JSON.parse(readText(path));
+		} catch (error) {
+			const message = error instanceof Error ? error.message : String(error);
+			failures.push(`${path}: invalid JSON (${message})`);
+			continue;
+		}
+
+		if (!isSfOwnedJsonContract(path)) continue;
+		contractsChecked++;
+
+		if (!hasValidSchemaVersion(parsed)) {
+			failures.push(
+				`${path}: missing numeric schemaVersion marker (top-level or _meta)`,
+			);
+		}
 	}
 
-	if (!hasVersionMarker(parsed)) {
-		failures.push(
-			`${path}: missing schemaVersion/version marker (top-level or _meta)`,
-		);
-	}
+	return { failures, filesParsed, contractsChecked };
 }
 
-if (failures.length > 0) {
-	console.error("Versioned JSON check failed:");
-	for (const failure of failures) {
-		console.error(`  - ${failure}`);
+export function run() {
+	const result = checkJsonPolicy(trackedJsonFiles(), (path) =>
+		readFileSync(path, "utf8"),
+	);
+
+	if (result.failures.length > 0) {
+		console.error("Versioned JSON check failed:");
+		for (const failure of result.failures) {
+			console.error(`  - ${failure}`);
+		}
+		process.exit(1);
 	}
-	process.exit(1);
+
+	console.log(
+		`Versioned JSON check passed (${result.filesParsed} JSON file${result.filesParsed === 1 ? "" : "s"} parsed, ` +
+			`${result.contractsChecked} SF contract${result.contractsChecked === 1 ? "" : "s"} checked).`,
+	);
 }
 
-console.log(`Versioned JSON check passed (${checked} file${checked === 1 ? "" : "s"}).`);
+if (import.meta.url === `file://${process.argv[1]}`) {
+	run();
+}
diff --git a/scripts/check-versioned-json.test.mjs b/scripts/check-versioned-json.test.mjs
new file mode 100644
index 000000000..28747c935
--- /dev/null
+++ b/scripts/check-versioned-json.test.mjs
@@ -0,0 +1,54 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+
+import {
+	checkJsonPolicy,
+	hasValidSchemaVersion,
+	isSfOwnedJsonContract,
+} from "./check-versioned-json.mjs";
+
+test("check-versioned-json: parses every JSON file", () => {
+	const files = {
+		"package.json": '{"version":"1.0.0"}',
+		"src/resources/extensions/sf/learning/data/model-benchmarks.json": "{bad",
+	};
+
+	const result = checkJsonPolicy(Object.keys(files), (path) => files[path]);
+
+	assert.equal(result.failures.length, 1);
+	assert.match(
+		result.failures[0],
+		/^src\/resources\/extensions\/sf\/learning\/data\/model-benchmarks\.json: invalid JSON/,
+	);
+	assert.equal(result.filesParsed, 2);
+});
+
+test("check-versioned-json: requires numeric schemaVersion for SF contracts", () => {
+	const files = {
+		"src/resources/extensions/sf/learning/data/unit-weights.json":
+			'{"_meta":{"schemaVersion":1}}',
+		"src/resources/extensions/sf/workflow-templates/registry.json":
+			'{"schemaVersion":1,"templates":{}}',
+		"src/resources/extensions/sf/learning/data/model-benchmarks.json":
+			'{"_meta":{"version":"1"}}',
+		"src/resources/extensions/sf/extension-manifest.json":
+			'{"version":"1.0.0"}',
+	};
+
+	const result = checkJsonPolicy(Object.keys(files), (path) => files[path]);
+
+	assert.deepEqual(result.failures, [
+		"src/resources/extensions/sf/learning/data/model-benchmarks.json: missing numeric schemaVersion marker (top-level or _meta)",
+	]);
+	assert.equal(result.contractsChecked, 3);
+});
+
+test("check-versioned-json: treats extension version as component version", () => {
+	assert.equal(
+		isSfOwnedJsonContract("src/resources/extensions/sf/extension-manifest.json"),
+		false,
+	);
+	assert.equal(hasValidSchemaVersion({ version: "1.0.0" }), false);
+	assert.equal(hasValidSchemaVersion({ schemaVersion: 1 }), true);
+	assert.equal(hasValidSchemaVersion({ _meta: { schemaVersion: 1 } }), true);
+});
diff --git a/src/cli-web-branch.ts b/src/cli-web-branch.ts
index 035435b03..ceb462d9d 100644
--- a/src/cli-web-branch.ts
+++ b/src/cli-web-branch.ts
@@ -45,6 +45,8 @@ export interface CliFlags {
 
 	/** Set by `sf sessions` when the user picks a specific session to resume */
 	_selectedSessionPath?: string;
+	/** `sf sessions --all` — list sessions across all projects */
+	allSessions?: boolean;
 }
 
 type WritableLike = Pick<typeof process.stderr, "write">;
@@ -82,6 +84,8 @@ export function parseCliArgs(argv: string[]): CliFlags {
 			flags.continue = true;
 		} else if (arg === "--no-session") {
 			flags.noSession = true;
+		} else if (arg === "--all" || arg === "-a") {
+			flags.allSessions = true;
 		} else if (arg === "--worktree" || arg === "-w") {
 			// -w with no value → auto-generate name; -w <name> → use that name
 			if (i + 1 < args.length && !args[i + 1].startsWith("-")) {
diff --git a/src/cli.ts b/src/cli.ts
index dcb42f7e0..1226a1daa 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -459,20 +459,27 @@ if (
 // `sf sessions` — list past sessions and pick one to resume
 if (cliFlags.messages[0] === "sessions") {
 	const cwd = process.cwd();
-	const safePath = `--${cwd.replace(/^[/\\]/, "").replace(/[/\\:]/g, "-")}--`;
-	const projectSessionsDir = join(sessionsDir, safePath);
 
-	process.stderr.write(chalk.dim(`Loading sessions for ${cwd}...\n`));
-	const sessions = await SessionManager.list(cwd, projectSessionsDir);
+	let sessions;
+	if (cliFlags.allSessions) {
+		process.stderr.write(chalk.dim("Loading all sessions across all projects...\n"));
+		sessions = await SessionManager.listAll();
+	} else {
+		const safePath = `--${cwd.replace(/^[/\\]/, "").replace(/[/\\:]/g, "-")}--`;
+		const projectSessionsDir = join(sessionsDir, safePath);
+		process.stderr.write(chalk.dim(`Loading sessions for ${cwd}...\n`));
+		sessions = await SessionManager.list(cwd, projectSessionsDir);
+	}
 
 	if (sessions.length === 0) {
 		process.stderr.write(
-			chalk.yellow("No sessions found for this directory.\n"),
+			chalk.yellow("No sessions found.\n"),
 		);
 		process.exit(0);
 	}
 
-	process.stderr.write(chalk.bold(`\n  Sessions (${sessions.length}):\n\n`));
+	const label = cliFlags.allSessions ? "all projects" : cwd;
+	process.stderr.write(chalk.bold(`\n  Sessions (${sessions.length}) for ${label}:\n\n`));
 
 	const maxShow = 20;
 	const toShow = sessions.slice(0, maxShow);
@@ -485,8 +492,11 @@ if (cliFlags.messages[0] === "sessions") {
 			? s.firstMessage.replace(/\n/g, " ").substring(0, 80)
 			: chalk.dim("(empty)");
 		const num = String(i + 1).padStart(3);
+		const projectLabel = cliFlags.allSessions && s.cwd
+			? ` ${chalk.yellow(`[${s.cwd}]`)}`
+			: "";
 		process.stderr.write(
-			`  ${chalk.bold(num)}. ${chalk.green(date)} ${chalk.dim(`(${msgs} msgs)`)}${name}\n`,
+			`  ${chalk.bold(num)}. ${chalk.green(date)} ${chalk.dim(`(${msgs} msgs)`)}${name}${projectLabel}\n`,
 		);
 		process.stderr.write(`       ${chalk.dim(preview)}\n\n`);
 	}
@@ -562,11 +572,15 @@ async function runHeadlessFromAuto(headlessArgs: string[]): Promise<never> {
 	process.exit(0);
 }
 
-// `sf auto [args...]` — shorthand for `sf headless auto [args...]` (#2732)
-// Without this, `sf auto` falls through to the interactive TUI which hangs
+// `sf autonomous [args...]` / `sf auto [args...]` — shorthand for headless
+// autonomous mode (#2732). Without this, the command falls through to the TUI
 // when stdin/stdout are piped (non-TTY environments).
-if (cliFlags.messages[0] === "auto") {
-	await runHeadlessFromAuto(cliFlags.messages);
+if (cliFlags.messages[0] === "auto" || cliFlags.messages[0] === "autonomous") {
+	const headlessArgs =
+		cliFlags.messages[0] === "autonomous"
+			? ["auto", ...cliFlags.messages.slice(1)]
+			: cliFlags.messages;
+	await runHeadlessFromAuto(headlessArgs);
 }
 
 // Pi's tool bootstrap can mis-detect already-installed fd/rg on some systems
@@ -829,16 +843,24 @@ if (!cliFlags.worktree && !isPrintMode) {
 }
 
 // ---------------------------------------------------------------------------
-// Auto-redirect: `sf auto` with piped stdout → headless mode (#2732)
+// Auto-redirect: autonomous mode with piped stdout → headless mode (#2732)
 // When stdout is not a TTY (e.g. `sf auto | cat`, `sf auto > file`),
 // the TUI cannot render and the process hangs. Redirect to headless mode
 // which handles non-interactive output gracefully.
 // ---------------------------------------------------------------------------
-if (cliFlags.messages[0] === "auto" && !process.stdout.isTTY) {
+if (
+	(cliFlags.messages[0] === "auto" ||
+		cliFlags.messages[0] === "autonomous") &&
+	!process.stdout.isTTY
+) {
 	process.stderr.write(
-		"[forge] stdout is not a terminal — running auto-mode in headless mode.\n",
+		"[forge] stdout is not a terminal — running autonomous mode in headless mode.\n",
 	);
-	await runHeadlessFromAuto(cliFlags.messages.slice(1));
+	const headlessArgs =
+		cliFlags.messages[0] === "autonomous"
+			? ["auto", ...cliFlags.messages.slice(1)]
+			: cliFlags.messages;
+	await runHeadlessFromAuto(headlessArgs);
 }
 
 // ---------------------------------------------------------------------------
diff --git a/src/headless-ui.ts b/src/headless-ui.ts
index 0c7cda6a7..69e0618e0 100644
--- a/src/headless-ui.ts
+++ b/src/headless-ui.ts
@@ -470,6 +470,18 @@ export function formatThinkingLine(text: string): string {
 	return `${c.dim}${c.italic}${tag("thinking")}${truncated}${c.reset}`;
 }
 
+/**
+ * Format a text preview line from accumulated assistant text deltas.
+ * Used as a fallback when streaming is not enabled — shows a truncated one-liner.
+ * Unlike thinking, text is NOT italicized.
+ */
+export function formatTextLine(text: string): string {
+	const trimmed = text.replace(/\s+/g, " ").trim();
+	const truncated =
+		trimmed.length > 120 ? trimmed.slice(0, 117) + "..." : trimmed;
+	return `${c.dim}${tag("text")}${truncated}${c.reset}`;
+}
+
 // ---------------------------------------------------------------------------
 // Streaming Text / Thinking Formatters
 // ---------------------------------------------------------------------------
diff --git a/src/headless.ts b/src/headless.ts
index 9cb22d568..036dbe5c4 100644
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -59,6 +59,7 @@ import {
 	formatHeadlessHeartbeat,
 	formatProgress,
 	formatTextEnd,
+	formatTextLine,
 	formatTextStart,
 	formatThinkingEnd,
 	formatThinkingLine,
@@ -217,6 +218,7 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
 	};
 
 	const args = argv.slice(2);
+	let commandSeen = false;
 
 	for (let i = 0; i < args.length; i++) {
 		const arg = args[i];
@@ -297,8 +299,9 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
 			} else if (arg === "--bare") {
 				options.bare = true;
 			}
-		} else if (options.command === "auto") {
-			options.command = arg;
+		} else if (!commandSeen) {
+			options.command = arg === "autonomous" ? "auto" : arg;
+			commandSeen = true;
 		} else {
 			options.commandArgs.push(arg);
 		}
@@ -684,6 +687,7 @@ async function runHeadlessOnce(
 		| { costUsd: number; inputTokens: number; outputTokens: number }
 		| undefined;
 	let thinkingBuffer = "";
+	let textBuffer = "";
 	// Drop only adjacent identical formatProgress output. A widget that
 	// re-emits the same setStatus on every LLM call would otherwise print
 	// the same line N times in a row. Two different lines still both show;
@@ -1179,7 +1183,7 @@ async function runHeadlessOnce(
 				}
 				// Non-verbose: accumulate text_delta for truncated one-liner
 				else if (ame?.type === "text_delta") {
-					thinkingBuffer += String(ame.delta ?? ame.text ?? "");
+					textBuffer += String(ame.delta ?? ame.text ?? "");
 				}
 			}
 
@@ -1203,14 +1207,19 @@ async function runHeadlessOnce(
 					inThinkingBlock = false;
 				}
 			}
-			// Non-verbose: flush accumulated buffer as truncated one-liner
+			// Non-verbose: flush accumulated buffers as truncated one-liners
 			else if (
 				!options.verbose &&
-				thinkingBuffer.trim() &&
 				(eventType === "tool_execution_start" || eventType === "message_end")
 			) {
-				writeHeadlessLine(formatThinkingLine(thinkingBuffer));
-				thinkingBuffer = "";
+				if (textBuffer.trim()) {
+					writeHeadlessLine(formatTextLine(textBuffer));
+					textBuffer = "";
+				}
+				if (thinkingBuffer.trim()) {
+					writeHeadlessLine(formatThinkingLine(thinkingBuffer));
+					thinkingBuffer = "";
+				}
 			}
 
 			// Compute tool duration for tool_execution_end
@@ -1550,7 +1559,8 @@ async function runHeadlessOnce(
 		await completionPromise;
 	}
 
-	// Auto-mode chaining: if --auto and milestone creation succeeded, send /sf auto
+	// Autonomous-mode chaining: if --auto and milestone creation succeeded,
+	// send the canonical autonomous command.
 	if (
 		isNewMilestone &&
 		options.auto &&
@@ -1560,7 +1570,7 @@ async function runHeadlessOnce(
 	) {
 		if (!options.json) {
 			process.stderr.write(
-				"[headless] Milestone ready — chaining into auto-mode...\n",
+				"[headless] Milestone ready — chaining into autonomous mode...\n",
 			);
 		}
 
@@ -1575,10 +1585,10 @@ async function runHeadlessOnce(
 		});
 
 		try {
-			await client.prompt("/sf auto");
+			await client.prompt("/sf autonomous");
 		} catch (err) {
 			process.stderr.write(
-				`[headless] Error: Failed to start auto-mode: ${err instanceof Error ? err.message : String(err)}\n`,
+				`[headless] Error: Failed to start autonomous mode: ${err instanceof Error ? err.message : String(err)}\n`,
 			);
 			exitCode = EXIT_ERROR;
 		}
diff --git a/src/help-text.ts b/src/help-text.ts
index 3467faaff..4ad7dc19e 100644
--- a/src/help-text.ts
+++ b/src/help-text.ts
@@ -24,13 +24,16 @@ const SUBCOMMAND_HELP: Record<string, string> = {
 
 	sessions: [
 		"Usage: sf sessions",
+		"       sf sessions --all",
 		"",
-		"List all saved sessions for the current directory and interactively",
-		"pick one to resume. Shows date, message count, and a preview of the",
-		"first message for each session.",
+		"List saved sessions and interactively pick one to resume. Shows date,",
+		"message count, and a preview of the first message for each session.",
 		"",
-		"Sessions are stored per-directory, so you only see sessions that were",
-		"started from the current working directory.",
+		"Sessions are stored per-directory by default. Use --all to list sessions",
+		"across all projects.",
+		"",
+		"  sf sessions        List sessions for the current directory",
+		"  sf sessions --all  List sessions across all projects",
 		"",
 		"Compare with --continue (-c) which always resumes the most recent session.",
 	].join("\n"),
@@ -155,7 +158,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
 	headless: [
 		"Usage: sf headless [flags] [command] [args...]",
 		"",
-		"Run /sf commands without the TUI. Default command: auto",
+		"Run /sf commands without the TUI. Default command: autonomous",
 		"",
 		"Flags:",
 		"  --timeout N            Overall timeout in ms (default: 300000)",
@@ -170,7 +173,8 @@ const SUBCOMMAND_HELP: Record<string, string> = {
 		"  --events <types>       Filter JSONL output to specific event types (comma-separated)",
 		"",
 		"Commands:",
-		"  auto                 Run all queued units continuously (default)",
+		"  autonomous           Run all queued product units continuously (default)",
+		"  auto                 Alias for autonomous",
 		"  next                 Run one unit",
 		"  status               Show progress dashboard",
 		"  new-milestone        Create a milestone from a specification document",
@@ -179,7 +183,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
 		"new-milestone flags:",
 		"  --context <path>     Path to spec/PRD file (use '-' for stdin)",
 		"  --context-text <txt> Inline specification text",
-		"  --auto               Start auto-mode after milestone creation",
+		"  --auto               Start autonomous mode after milestone creation",
 		"  --verbose            Show tool calls in progress output",
 		"",
 		"Output formats:",
@@ -188,19 +192,19 @@ const SUBCOMMAND_HELP: Record<string, string> = {
 		"  stream-json  Stream JSONL events to stdout in real time (same as --json)",
 		"",
 		"Examples:",
-		"  sf headless                                    Run /sf auto",
+		"  sf headless                                    Run /sf autonomous",
 		"  sf headless next                               Run one unit",
-		"  sf headless --output-format json auto           Structured JSON result on stdout",
+		"  sf headless --output-format json autonomous    Structured JSON result on stdout",
 		"  sf headless --json status                      Machine-readable JSONL stream",
 		"  sf headless --timeout 60000                    With 1-minute timeout",
-		"  sf headless --bare auto                        Minimal context (CI/ecosystem use)",
-		"  sf headless --resume abc123 auto               Resume a prior session",
+		"  sf headless --bare autonomous                  Minimal context (CI/ecosystem use)",
+		"  sf headless --resume abc123 autonomous         Resume a prior session",
 		"  sf headless new-milestone --context spec.md    Create milestone from file",
 		"  cat spec.md | sf headless new-milestone --context -   From stdin",
 		"  sf headless new-milestone --context spec.md --auto    Create + auto-execute",
-		"  sf headless --supervised auto                     Supervised orchestrator mode",
-		"  sf headless --answers answers.json auto              With pre-supplied answers",
-		"  sf headless --events agent_end,extension_ui_request auto   Filtered event stream",
+		"  sf headless --supervised autonomous               Supervised orchestrator mode",
+		"  sf headless --answers answers.json autonomous     With pre-supplied answers",
+		"  sf headless --events agent_end,extension_ui_request autonomous   Filtered event stream",
 		"  sf headless query                              Instant JSON state snapshot",
 		"",
 		"Exit codes: 0 = success, 1 = error/timeout, 10 = blocked, 11 = cancelled",
@@ -269,7 +273,10 @@ export function printHelp(version: string): void {
 		"  worktree <cmd>           Manage worktrees (list, merge, clean, remove)\n",
 	);
 	process.stdout.write(
-		"  auto [args]              Run auto-mode without TUI (pipeable)\n",
+		"  autonomous [args]        Run autonomous mode without TUI (pipeable)\n",
+	);
+	process.stdout.write(
+		"  auto [args]              Alias for autonomous\n",
 	);
 	process.stdout.write(
 		"  headless [cmd] [args]    Run /sf commands without TUI (default: auto)\n",
diff --git a/src/resources/agents/scout.md b/src/resources/agents/scout.md
index f8c484ef3..f606eb68f 100644
--- a/src/resources/agents/scout.md
+++ b/src/resources/agents/scout.md
@@ -1,11 +1,15 @@
 ---
 name: scout
 description: Fast codebase recon that returns compressed context for handoff to other agents
-tools: read, grep, find, ls, bash
+tools: read, grep, find, ls, bash, codebase_search
 ---
 
 You are a scout. Quickly investigate a codebase and return structured findings that another agent can use without re-reading everything.
 
+Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. These keep exploration inside SF's tool surface and use native backends where available.
+
+`codebase_search` is the Sift-backed local retrieval tool. Use it when exact text search is too literal, when the relevant file path is unknown, or when you need hybrid BM25/vector/path evidence before reading files. You are still the scout role; Sift is one tool you can use.
+
 Your output will be passed to an agent who has NOT seen the files you explored.
 
 Thoroughness (infer from task, default medium):
@@ -16,7 +20,7 @@ Thoroughness (infer from task, default medium):
 
 Strategy:
 
-1. grep/find to locate relevant code
+1. Use `grep`, `find`, `ls`, and `lsp` to locate relevant code
 2. Read key sections (not entire files)
 3. Identify types, interfaces, key functions
 4. Note dependencies between files
diff --git a/src/resources/agents/worker.md b/src/resources/agents/worker.md
index e319213df..00bf5f5d1 100644
--- a/src/resources/agents/worker.md
+++ b/src/resources/agents/worker.md
@@ -9,7 +9,7 @@ Work autonomously to complete the assigned task. Use all available tools as need
 
 - Do **not** spawn subagents or act as an orchestrator unless the parent task explicitly instructs you to do so.
 - If the task looks like SF orchestration, planning, scouting, parallel dispatch, or review routing, stop and report that the caller should use the appropriate specialist agent instead (for example: `sf-worker`, `sf-scout`, `sf-reviewer`, or the top-level orchestrator).
-- In particular, do **not** call `sf_scout`, `subagent`, `launch_parallel_view`, or `sf_execute_parallel` on your own initiative.
+- In particular, do **not** call `subagent`, `launch_parallel_view`, or `sf_execute_parallel` on your own initiative.
 
 Output format when finished:
 
diff --git a/src/resources/extensions/claude-code-cli/stream-adapter.ts b/src/resources/extensions/claude-code-cli/stream-adapter.ts
index 103fbe43d..0052d545f 100644
--- a/src/resources/extensions/claude-code-cli/stream-adapter.ts
+++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts
@@ -1155,6 +1155,20 @@ export function createClaudeCodeCanUseToolHandler(
 							},
 						];
 					}
+				} else if (!perms || (Array.isArray(perms) && perms.length === 0)) {
+					// Non-Bash tool with no SDK-supplied suggestions. Without a
+						// fallback rule the SDK would return `behavior: "allow"`
+						// with no `updatedPermissions`, so "Always Allow" silently
+						// fails to persist for tools whose input varies per call
+						// (e.g. AskUserQuestion with different `questions` payloads).
+						// A bare `{ toolName }` rule matches any input.
+						perms = [{
+							type: "addRules",
+							rules: [{ toolName }],
+							behavior: "allow",
+							destination: "localSettings",
+						}];
+						notifyLabel = toolName;
 				}
 				// Notify with the resolved pattern (label already previewed it)
 				if (notifyLabel) {
diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts
index 1f47d1ecb..1b88e9a40 100644
--- a/src/resources/extensions/search-the-web/native-search.ts
+++ b/src/resources/extensions/search-the-web/native-search.ts
@@ -30,6 +30,18 @@ const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
  */
 export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
 
+/**
+ * Returns true when the provider supports native Anthropic web_search injection.
+ *
+ * Purpose: github-copilot, minimax, and kimi use Claude-compatible wire format
+ * but do NOT support the web_search tool — injecting it causes a 400 error.
+ * The `claude-` model-name prefix heuristic is too broad (those providers also
+ * use claude-* names). Only the explicit "anthropic" provider tag is trusted.
+ */
+export function supportsNativeWebSearch(provider: string): boolean {
+	return provider === "anthropic";
+}
+
 /** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */
 export function preferBraveSearch(): boolean {
 	// PREFERENCES.md takes priority over env var
@@ -183,8 +195,12 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): {
 		} else if (modelSelectFired) {
 			isAnthropic = isAnthropicProvider;
 		} else {
-			const modelName = typeof payload.model === "string" ? payload.model : "";
-			isAnthropic = modelName.startsWith("claude-");
+			// No provider info available and no model_select event fired.
+			// Without a confirmed provider, skip native web_search injection entirely
+			// rather than relying on the too-broad `claude-` prefix heuristic
+			// (github-copilot/minimax/kimi also use claude-* names but do not
+			// support web_search, causing 400 errors on injection).
+			isAnthropic = false;
 		}
 		if (!isAnthropic) return;
 
diff --git a/src/resources/extensions/sf-tui/footer.ts b/src/resources/extensions/sf-tui/footer.ts
index 0371114bf..daecbeba7 100644
--- a/src/resources/extensions/sf-tui/footer.ts
+++ b/src/resources/extensions/sf-tui/footer.ts
@@ -30,8 +30,35 @@ function hexToRgb(hex: string): { r: number; g: number; b: number } {
 }
 
 function ansiFg(hex: string, text: string, bold = false): string {
+	// Use 16-color ANSI codes for Termius compatibility
+	// Map hex colors to nearest standard ANSI color
 	const { r, g, b } = hexToRgb(hex);
-	return `\x1b[${bold ? "1;" : ""}38;2;${r};${g};${b}m${text}${RESET}`;
+	const brightness = (r + g + b) / 3;
+
+	let colorCode: number;
+	if (brightness < 50) {
+		colorCode = 30; // black
+	} else if (brightness < 100) {
+		colorCode = 90; // bright black
+	} else if (r > g + b) {
+		colorCode = bold ? 91 : 31; // red
+	} else if (g > r + b) {
+		colorCode = bold ? 92 : 32; // green
+	} else if (b > r + g) {
+		colorCode = bold ? 94 : 34; // blue
+	} else if (r > 200 && g > 150) {
+		colorCode = bold ? 93 : 33; // yellow/orange
+	} else if (r > 200 && g < 100 && b > 150) {
+		colorCode = bold ? 95 : 35; // magenta
+	} else if (g > 200 && b > 150) {
+		colorCode = bold ? 96 : 36; // cyan
+	} else if (brightness > 200) {
+		colorCode = bold ? 97 : 37; // white
+	} else {
+		colorCode = bold ? 97 : 37; // default white
+	}
+
+	return `\x1b[${bold ? "1;" : ""}${colorCode}m${text}${RESET}`;
 }
 
 function toneHex(tone: Tone): string {
diff --git a/src/resources/extensions/sf/auto-direct-dispatch.ts b/src/resources/extensions/sf/auto-direct-dispatch.ts
index 99746c13f..b800b1922 100644
--- a/src/resources/extensions/sf/auto-direct-dispatch.ts
+++ b/src/resources/extensions/sf/auto-direct-dispatch.ts
@@ -81,7 +81,7 @@ export async function dispatchDirectPhase(
 						?.require_slice_discussion;
 				if (requireDiscussion && !sliceContextFile) {
 					ctx.ui.notify(
-						`Slice ${sid} requires discussion before planning. Run /sf discuss to discuss this slice, then /sf auto to resume.`,
+						`Slice ${sid} requires discussion before planning. Run /sf discuss to discuss this slice, then /sf autonomous to resume.`,
 						"info",
 					);
 					await pauseAuto(ctx, pi);
diff --git a/src/resources/extensions/sf/auto-post-unit.ts b/src/resources/extensions/sf/auto-post-unit.ts
index 16b1e1fd3..63413cd7e 100644
--- a/src/resources/extensions/sf/auto-post-unit.ts
+++ b/src/resources/extensions/sf/auto-post-unit.ts
@@ -308,7 +308,7 @@ export function detectRogueFileWrites(
 }
 
 export const STEP_COMPLETE_FALLBACK_MESSAGE =
-	"Step complete. Run /clear, then /sf to continue (or /sf auto to run continuously).";
+	"Step complete. Run /clear, then /sf to continue (or /sf autonomous to run continuously).";
 
 export function buildStepCompleteMessage(
 	nextState: import("./types.js").SFState,
@@ -319,7 +319,7 @@ export function buildStepCompleteMessage(
 	const next = describeNextUnit(nextState);
 	return (
 		`Step complete. Next: ${next.label}\n` +
-		`Run /clear, then /sf to continue (or /sf auto to run continuously).`
+		`Run /clear, then /sf to continue (or /sf autonomous to run continuously).`
 	);
 }
 
@@ -913,7 +913,7 @@ export async function postUnitPreVerification(
 				if (err instanceof MergeConflictError) {
 					ctx.ui.notify(
 						`slice-cadence merge conflict in ${sid}: ${err.conflictedFiles.join(", ")}. ` +
-							`Resolve manually on main and run \`/sf auto\` to resume.`,
+							`Resolve manually on main and run \`/sf autonomous\` to resume.`,
 						"error",
 					);
 					// Stop auto AND signal the outer postUnit flow to exit early.
@@ -1289,7 +1289,7 @@ export async function postUnitPreVerification(
 							s.verificationRetryCount.delete(retryKey);
 							s.pendingVerificationRetry = null;
 							ctx.ui.notify(
-								`Milestone ${s.currentUnit.id} verification failed after ${MAX_VERIFICATION_RETRIES} retries — worktree branch preserved. Re-run /sf auto once blockers are resolved.`,
+								`Milestone ${s.currentUnit.id} verification failed after ${MAX_VERIFICATION_RETRIES} retries — worktree branch preserved. Re-run /sf autonomous once blockers are resolved.`,
 								"error",
 							);
 							await pauseAuto(ctx, pi);
diff --git a/src/resources/extensions/sf/auto-start.ts b/src/resources/extensions/sf/auto-start.ts
index 77844b32f..e4faeae46 100644
--- a/src/resources/extensions/sf/auto-start.ts
+++ b/src/resources/extensions/sf/auto-start.ts
@@ -255,9 +255,9 @@ export function auditOrphanedMilestoneBranches(
 				? ` Worktree directory at .sf/worktrees/${milestoneId}/ holds the live work.`
 				: "";
 			warnings.push(
-				`Branch ${branch} has ${commitsAhead} commit(s) ahead of ${mainBranch} for in-progress milestone ${milestoneId}.` +
+					`Branch ${branch} has ${commitsAhead} commit(s) ahead of ${mainBranch} for in-progress milestone ${milestoneId}.` +
 					wtSuffix +
-					` Run \`/sf auto\` to resume, or merge manually if abandoning.`,
+					` Run \`/sf autonomous\` to resume, or merge manually if abandoning.`,
 			);
 
 			// #4764 telemetry
diff --git a/src/resources/extensions/sf/auto-timeout-recovery.ts b/src/resources/extensions/sf/auto-timeout-recovery.ts
index 64d744a02..afa0c3d72 100644
--- a/src/resources/extensions/sf/auto-timeout-recovery.ts
+++ b/src/resources/extensions/sf/auto-timeout-recovery.ts
@@ -347,7 +347,7 @@ export async function recoverTimedOutUnit(
 			lastRecoveryReason: reason,
 		});
 		ctx.ui.notify(
-			`Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s): ${diagnostic}. Worktree branch preserved. Re-run /sf auto once blockers are resolved.`,
+			`Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s): ${diagnostic}. Worktree branch preserved. Re-run /sf autonomous once blockers are resolved.`,
 			"error",
 		);
 		return "paused";
diff --git a/src/resources/extensions/sf/auto.ts b/src/resources/extensions/sf/auto.ts
index 480119de7..5998f1d80 100644
--- a/src/resources/extensions/sf/auto.ts
+++ b/src/resources/extensions/sf/auto.ts
@@ -1255,9 +1255,9 @@ export async function pauseAuto(
 	ctx?.ui.setWidget("sf-progress", undefined);
 	ctx?.ui.setFooter(undefined);
 	if (ctx) initHealthWidget(ctx);
-	const resumeCmd = s.stepMode ? "/sf next" : "/sf auto";
+	const resumeCmd = s.stepMode ? "/sf next" : "/sf autonomous";
 	ctx?.ui.notify(
-		`${s.stepMode ? "Step" : "Auto"}-mode paused (Escape). Type to interact, or ${resumeCmd} to resume.`,
+		`${s.stepMode ? "Step" : "Autonomous"} mode paused (Escape). Type to interact, or ${resumeCmd} to resume.`,
 		"info",
 	);
 }
diff --git a/src/resources/extensions/sf/auto/loop.ts b/src/resources/extensions/sf/auto/loop.ts
index dfc4f1af4..0491899aa 100644
--- a/src/resources/extensions/sf/auto/loop.ts
+++ b/src/resources/extensions/sf/auto/loop.ts
@@ -8,7 +8,7 @@
  */
 
 import { randomUUID } from "node:crypto";
-import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import type {
 	ExtensionAPI,
@@ -95,6 +95,85 @@ function saveStuckState(basePath: string, state: LoopState): void {
 	}
 }
 
+// ── Custom workflow verification retry persistence ───────────────────────
+// Custom workflow verifiers can request a retry after a step runs. Persisting
+// retry counts under the run directory prevents restart loops from resetting the
+// retry budget and repeatedly dispatching the same failing step.
+const MAX_CUSTOM_ENGINE_VERIFY_RETRIES = 3;
+
+function customVerifyRetryStateDir(s: {
+	activeRunDir?: string | null;
+	basePath: string;
+}): string {
+	return s.activeRunDir
+		? join(s.activeRunDir, "runtime")
+		: join(sfRoot(s.basePath), "runtime");
+}
+
+function customVerifyRetryStatePath(s: {
+	activeRunDir?: string | null;
+	basePath: string;
+}): string {
+	return join(customVerifyRetryStateDir(s), "custom-verify-retries.json");
+}
+
+function hydrateCustomVerifyRetryCounts(s: AutoSession): Map<string, number> {
+	if (s.verificationRetryCount.size > 0) {
+		return s.verificationRetryCount;
+	}
+
+	try {
+		const raw = JSON.parse(readFileSync(customVerifyRetryStatePath(s), "utf-8"));
+		const counts =
+			raw && typeof raw === "object" && raw.counts && typeof raw.counts === "object"
+				? (raw.counts as Record<string, unknown>)
+				: {};
+		for (const [key, value] of Object.entries(counts)) {
+			if (typeof value === "number" && Number.isFinite(value) && value > 0) {
+				s.verificationRetryCount.set(key, Math.floor(value));
+			}
+		}
+	} catch (err) {
+		debugLog("autoLoop", {
+			phase: "load-custom-verify-retries-failed",
+			error: err instanceof Error ? err.message : String(err),
+		});
+	}
+
+	return s.verificationRetryCount;
+}
+
+function saveCustomVerifyRetryCounts(s: AutoSession): void {
+	const retryCounts = s.verificationRetryCount;
+	const filePath = customVerifyRetryStatePath(s);
+
+	try {
+		if (retryCounts.size === 0) {
+			unlinkSync(filePath);
+			return;
+		}
+		mkdirSync(customVerifyRetryStateDir(s), { recursive: true });
+		writeFileSync(
+			filePath,
+			JSON.stringify({
+				counts: Object.fromEntries(retryCounts),
+				updatedAt: new Date().toISOString(),
+			}) + "\n",
+		);
+	} catch (err) {
+		const code =
+			err && typeof err === "object" && "code" in err
+				? (err as { code?: string }).code
+				: undefined;
+		if (code !== "ENOENT") {
+			debugLog("autoLoop", {
+				phase: "save-custom-verify-retries-failed",
+				error: err instanceof Error ? err.message : String(err),
+			});
+		}
+	}
+}
+
 // ── Memory pressure monitoring (#3331) ──────────────────────────────────
 // Check heap usage every N iterations and trigger graceful shutdown before
 // the OS OOM killer sends SIGKILL. The threshold is 90% of the V8 heap
@@ -281,7 +360,7 @@ export async function autoLoop(
 					pi,
 					`Memory pressure: heap at ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%). ` +
 						`Stopping gracefully to prevent OOM kill after ${iteration} iterations. ` +
-						`Resume with /sf auto to continue from where you left off.`,
+						`Resume with /sf autonomous to continue from where you left off.`,
 				);
 				finishTurn("stopped", "timeout", "memory-pressure");
 				break;
@@ -508,20 +587,75 @@ export async function autoLoop(
 					break;
 				}
 				if (verifyResult === "retry") {
+					const recoveryKey = `${iterData.unitType}/${iterData.unitId}`;
+					const retryCounts = hydrateCustomVerifyRetryCounts(s);
+					const attempts = (retryCounts.get(recoveryKey) ?? 0) + 1;
+					retryCounts.set(recoveryKey, attempts);
+					saveCustomVerifyRetryCounts(s);
 					debugLog("autoLoop", {
 						phase: "custom-engine-verify-retry",
 						iteration,
 						unitId: iterData.unitId,
+						attempts,
 					});
 					deps.uokObserver?.onPhaseResult("custom-engine", "retry", {
 						unitType: iterData.unitType,
 						unitId: iterData.unitId,
+						attempts,
 					});
+					if (attempts > MAX_CUSTOM_ENGINE_VERIFY_RETRIES) {
+						const recovery = await policy.recover(
+							iterData.unitType,
+							iterData.unitId,
+							{ basePath: s.basePath },
+						);
+						if (recovery.outcome === "pause") {
+							await deps.pauseAuto(ctx, pi);
+							finishTurn(
+								"paused",
+								"manual-attention",
+								recovery.reason ?? "custom-engine-verify-retry-exhausted",
+							);
+							break;
+						}
+						if (recovery.outcome === "skip") {
+							await deps.stopAuto(
+								ctx,
+								pi,
+								recovery.reason ??
+									`Custom workflow verification for ${iterData.unitId} requested skip after retry exhaustion, but the custom engine cannot reconcile skipped steps.`,
+							);
+							finishTurn(
+								"stopped",
+								"manual-attention",
+								"custom-engine-verify-retry-exhausted",
+							);
+							break;
+						}
+						const exhaustedReason = `Custom workflow verification for ${iterData.unitId} requested retry ${attempts} times without passing.`;
+						await deps.stopAuto(
+							ctx,
+							pi,
+							recovery.outcome === "stop" && recovery.reason
+								? recovery.reason
+								: exhaustedReason,
+						);
+						finishTurn(
+							"stopped",
+							"manual-attention",
+							"custom-engine-verify-retry-exhausted",
+						);
+						break;
+					}
 					finishTurn("retry");
 					continue;
 				}
 
 				// Verification passed — mark step complete
+				s.verificationRetryCount.delete(
+					`${iterData.unitType}/${iterData.unitId}`,
+				);
+				saveCustomVerifyRetryCounts(s);
 				debugLog("autoLoop", {
 					phase: "custom-engine-reconcile",
 					iteration,
diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts
index efc92cb4a..b54f8597d 100644
--- a/src/resources/extensions/sf/auto/phases.ts
+++ b/src/resources/extensions/sf/auto/phases.ts
@@ -78,6 +78,10 @@ import {
 	logError,
 	logWarning,
 } from "../workflow-logger.js";
+import {
+	handleProductAudit,
+	type ProductAuditParams,
+} from "../tools/product-audit-tool.js";
 import {
 	getRequiredWorkflowToolsForAutoUnit,
 	getWorkflowTransportSupportError,
@@ -115,6 +119,53 @@ export function _resolveReportBasePath(
 	return s.originalBasePath || s.basePath;
 }
 
+/**
+ * Fire the product-audit for a milestone after successful merge.
+ * Uses s.productAuditMilestoneId as a guard to ensure the audit fires exactly
+ * once per milestone (mergeAndExit can be called multiple times for the same
+ * milestone at different transition points).
+ *
+ * The audit is fired with a "no-gaps" placeholder verdict. Re-run
+ * `/sf product-audit` manually for full LLM-powered gap analysis.
+ */
+async function maybeFireProductAudit(
+	s: AutoSession,
+	ctx: ExtensionContext,
+): Promise<void> {
+	const mid = s.currentMilestoneId;
+	if (!mid) return;
+
+	// Guard: only fire once per milestone
+	if (s.productAuditMilestoneId === mid) return;
+	s.productAuditMilestoneId = mid;
+
+	const params: ProductAuditParams = {
+		milestoneId: mid,
+		verdict: "no-gaps",
+		summary:
+			"Auto-fired placeholder audit at milestone merge. Re-run `/sf product-audit` for full LLM-powered gap analysis.",
+		gaps: [],
+	};
+
+	const result = await handleProductAudit(params, s.basePath);
+	if ("error" in result) {
+		logWarning("engine", "Product audit auto-fire failed", {
+			milestone: mid,
+			error: result.error,
+		});
+		ctx.ui.notify(
+			`Product audit for ${mid} auto-fired but may need manual refresh: ${result.error}`,
+			"warning",
+		);
+	} else {
+		debugLog("autoLoop", {
+			phase: "product-audit-fired",
+			milestone: mid,
+			jsonPath: result.jsonPath,
+		});
+	}
+}
+
 function clearDeferredCommitAfterCancelledUnit(
 	s: AutoSession,
 	ctx: ExtensionContext,
@@ -696,7 +747,7 @@ export async function runPreDispatch(
 			if (mergeErr instanceof MergeConflictError) {
 				// Real code conflicts — stop the loop instead of retrying forever (#2330)
 				ctx.ui.notify(
-					`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf auto to resume.`,
+					`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`,
 					"error",
 				);
 				await deps.stopAuto(
@@ -712,7 +763,7 @@ export async function runPreDispatch(
 				error: String(mergeErr),
 			});
 			ctx.ui.notify(
-				`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf auto to resume.`,
+				`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`,
 				"error",
 			);
 			await deps.stopAuto(
@@ -723,6 +774,9 @@ export async function runPreDispatch(
 			return { action: "break", reason: "merge-failed" };
 		}
 
+		// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
+		await maybeFireProductAudit(s, ctx);
+
 		// PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
 
 		deps.invalidateAllCaches();
@@ -815,10 +869,12 @@ export async function runPreDispatch(
 					deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
 					// Prevent stopAuto from attempting the same merge (#2645)
 					s.milestoneMergedInPhases = true;
+					// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
+					await maybeFireProductAudit(s, ctx);
 				} catch (mergeErr) {
 					if (mergeErr instanceof MergeConflictError) {
 						ctx.ui.notify(
-							`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf auto to resume.`,
+							`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`,
 							"error",
 						);
 						await deps.stopAuto(
@@ -833,7 +889,7 @@ export async function runPreDispatch(
 						error: String(mergeErr),
 					});
 					ctx.ui.notify(
-						`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf auto to resume.`,
+						`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`,
 						"error",
 					);
 					await deps.stopAuto(
@@ -869,12 +925,12 @@ export async function runPreDispatch(
 			);
 		} else if (state.phase === "blocked") {
 			const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
-			// Pause instead of hard-stop so the session is resumable with `/sf auto`.
+			// Pause instead of hard-stop so the session is resumable with `/sf autonomous`.
 			// Hard-stop here was causing premature termination when slice dependencies
 			// were temporarily unresolvable (e.g. after reassessment added new slices).
 			await deps.pauseAuto(ctx, pi);
 			ctx.ui.notify(
-				`${blockerMsg}. Fix and run /sf auto to resume.`,
+				`${blockerMsg}. Fix and run /sf autonomous to resume.`,
 				"warning",
 			);
 			deps.sendDesktopNotification(
@@ -954,10 +1010,12 @@ export async function runPreDispatch(
 				deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
 				// Prevent stopAuto from attempting the same merge (#2645)
 				s.milestoneMergedInPhases = true;
+				// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
+				await maybeFireProductAudit(s, ctx);
 			} catch (mergeErr) {
 				if (mergeErr instanceof MergeConflictError) {
 					ctx.ui.notify(
-						`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf auto to resume.`,
+						`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`,
 						"error",
 					);
 					await deps.stopAuto(
@@ -972,7 +1030,7 @@ export async function runPreDispatch(
 					error: String(mergeErr),
 				});
 				ctx.ui.notify(
-					`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf auto to resume.`,
+					`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`,
 					"error",
 				);
 				await deps.stopAuto(
@@ -1019,7 +1077,10 @@ export async function runPreDispatch(
 			);
 		}
 		await deps.pauseAuto(ctx, pi);
-		ctx.ui.notify(`${blockerMsg}. Fix and run /sf auto to resume.`, "warning");
+		ctx.ui.notify(
+			`${blockerMsg}. Fix and run /sf autonomous to resume.`,
+			"warning",
+		);
 		deps.sendDesktopNotification(
 			"SF",
 			blockerMsg,
@@ -1078,7 +1139,7 @@ export async function runDispatch(
 		});
 		// Warning-level stops are recoverable human checkpoints (e.g. UAT verdict
 		// gate) — pause instead of hard-stopping so the session is resumable with
-		// `/sf auto`. Error/info-level stops remain hard stops for infrastructure
+		// `/sf autonomous`. Error/info-level stops remain hard stops for infrastructure
 		// failures and terminal conditions respectively.
 		// See: https://github.com/singularity-forge/sf-run/issues/2474
 		if (dispatchResult.level === "warning") {
@@ -1530,7 +1591,7 @@ export async function runGuards(
 				}
 				if (budgetEnforcementAction === "pause") {
 					ctx.ui.notify(
-						`${msg} Pausing auto-mode — /sf auto to override and continue.`,
+						`${msg} Pausing autonomous mode — /sf autonomous to override and continue.`,
 						"warning",
 					);
 					deps.sendDesktopNotification(
@@ -1585,7 +1646,7 @@ export async function runGuards(
 		) {
 			const msg = `Context window at ${contextUsage.percent}% (threshold: ${contextThreshold}%). Pausing to prevent truncated output.`;
 			ctx.ui.notify(
-				`${msg} Run /sf auto to continue (will start fresh session).`,
+				`${msg} Run /sf autonomous to continue (will start fresh session).`,
 				"warning",
 			);
 			deps.sendDesktopNotification(
@@ -1817,7 +1878,7 @@ export async function runUnitPhase(
 		prefs?.safety_harness as Record<string, unknown> | undefined,
 	);
 	if (safetyConfig.enabled && safetyConfig.evidence_collection) {
-		resetEvidence();
+		resetEvidence(unitId, s.basePath);
 	}
 	if (
 		safetyConfig.enabled &&
diff --git a/src/resources/extensions/sf/auto/session.ts b/src/resources/extensions/sf/auto/session.ts
index 0e6fb6ad3..c4907fe94 100644
--- a/src/resources/extensions/sf/auto/session.ts
+++ b/src/resources/extensions/sf/auto/session.ts
@@ -173,6 +173,11 @@ export class AutoSession {
 	 *  stopAuto does not attempt the same merge a second time (#2645). */
 	milestoneMergedInPhases = false;
 
+	/** Set to the milestoneId after product audit fires at merge, so the audit
+	 *  fires exactly once per milestone (not twice when mergeAndExit is called
+	 *  at both the transition point and the terminal complete point). */
+	productAuditMilestoneId: string | null = null;
+
 	// ── Dispatch circuit breakers ──────────────────────────────────────
 	rewriteAttemptCount = 0;
 	/** Tracks consecutive bootstrap attempts that found phase === "complete".
@@ -330,6 +335,7 @@ export class AutoSession {
 		this.pendingTaskCompleteFailures.clear();
 		this.isolationDegraded = false;
 		this.milestoneMergedInPhases = false;
+		this.productAuditMilestoneId = null;
 		this.checkpointSha = null;
 		this.preUnitDirtyFiles = [];
 		this.stagedPendingCommit = false;
diff --git a/src/resources/extensions/sf/bootstrap/db-tools.ts b/src/resources/extensions/sf/bootstrap/db-tools.ts
index 4a425f104..7c255ee32 100644
--- a/src/resources/extensions/sf/bootstrap/db-tools.ts
+++ b/src/resources/extensions/sf/bootstrap/db-tools.ts
@@ -1017,6 +1017,61 @@ export function registerDbTools(pi: ExtensionAPI): void {
 			),
 		}),
 		execute: planMilestoneExecute,
+		renderCall(args: any, theme: any) {
+			const milestoneId = args?.milestoneId ? String(args.milestoneId) : "";
+			const title = args?.title ? String(args.title) : "";
+			const slices = Array.isArray(args?.slices) ? args.slices : [];
+			let text = theme.fg("toolTitle", theme.bold("Plan Milestone"));
+			if (milestoneId || title) {
+				text += theme.fg(
+					"muted",
+					` ${[milestoneId, title].filter(Boolean).join(": ")}`,
+				);
+			}
+			if (slices.length > 0) {
+				text += theme.fg(
+					"dim",
+					` — ${slices.length} slice${slices.length === 1 ? "" : "s"}`,
+				);
+			}
+			return new Text(text, 0, 0);
+		},
+		renderResult(result: any, _options: any, theme: any) {
+			const d = result.details;
+			if (result.isError || d?.error) {
+				const textContent = result.content?.find?.(
+					(item: any) => item?.type === "text",
+				)?.text;
+				return new Text(
+					theme.fg("error", `Error: ${d?.error ?? textContent ?? "unknown"}`),
+					0,
+					0,
+				);
+			}
+			const milestoneId = d?.milestoneId ? String(d.milestoneId) : "milestone";
+			const title = d?.title ? String(d.title) : "";
+			const sliceCount =
+				typeof d?.sliceCount === "number" ? d.sliceCount : undefined;
+			let text = theme.fg(
+				"success",
+				`${milestoneId} planned${title ? `: ${title}` : ""}`,
+			);
+			if (sliceCount !== undefined) {
+				text += theme.fg(
+					"dim",
+					` · ${sliceCount} slice${sliceCount === 1 ? "" : "s"}`,
+				);
+			}
+			if (d?.firstSliceId || d?.firstSliceTitle) {
+				text += theme.fg(
+					"dim",
+					` · next ${[d.firstSliceId, d.firstSliceTitle]
+						.filter(Boolean)
+						.join(": ")}`,
+				);
+			}
+			return new Text(text, 0, 0);
+		},
 	};
 
 	pi.registerTool(planMilestoneTool);
diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.ts b/src/resources/extensions/sf/bootstrap/register-hooks.ts
index a0a364d05..63597d109 100644
--- a/src/resources/extensions/sf/bootstrap/register-hooks.ts
+++ b/src/resources/extensions/sf/bootstrap/register-hooks.ts
@@ -34,6 +34,10 @@ import {
 	resetLearningRuntime,
 	selectLearnedModel,
 } from "../learning/runtime.js";
+import {
+	observeMemorySleeperToolResult,
+	resetMemorySleeper,
+} from "../memory-sleeper.js";
 import { initNotificationStore } from "../notification-store.js";
 import { initNotificationWidget } from "../notification-widget.js";
 import {
@@ -113,6 +117,7 @@ export function registerHooks(
 	pi.on("session_start", async (_event, ctx) => {
 		lastGeminiPreflightWarning = undefined;
 		resetLearningRuntime();
+		resetMemorySleeper();
 		try {
 			const sid = ctx.sessionManager?.getSessionId?.() ?? "";
 			const sfile = ctx.sessionManager?.getSessionFile?.() ?? "";
@@ -233,6 +238,7 @@ export function registerHooks(
 	pi.on("session_switch", async (_event, ctx) => {
 		lastGeminiPreflightWarning = undefined;
 		resetLearningRuntime();
+		resetMemorySleeper();
 		initNotificationStore(process.cwd());
 		installNotifyInterceptor(ctx);
 		resetWriteGateState();
@@ -520,6 +526,26 @@ export function registerHooks(
 	});
 
 	pi.on("tool_result", async (event) => {
+		if (isAutoActive()) {
+			const steer = observeMemorySleeperToolResult(event);
+			if (steer) {
+				pi.sendMessage(
+					{
+						customType: "sf-memory-sleeper",
+						content: steer.content,
+						display: false,
+						details: {
+							key: steer.key,
+							severity: steer.severity,
+							toolName: event.toolName,
+							toolCallId: event.toolCallId,
+						},
+					},
+					{ deliverAs: "steer" },
+				);
+			}
+		}
+
 		if (event.toolName !== "ask_user_questions") return;
 		const milestoneId = getDiscussionMilestoneId(process.cwd());
 		const queueActive = isQueuePhaseActive();
diff --git a/src/resources/extensions/sf/code-intelligence.ts b/src/resources/extensions/sf/code-intelligence.ts
index db5b08674..0b7db4947 100644
--- a/src/resources/extensions/sf/code-intelligence.ts
+++ b/src/resources/extensions/sf/code-intelligence.ts
@@ -679,13 +679,13 @@ function buildProjectRagContextLines(
 		lines.push(
 			prefs?.project_rag_auto_index === false
 				? "- Do not auto-index unless explicitly needed; query existing indexes first. " +
-						"If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout."
+						"If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, `codebase_search`, and scout."
 				: "- Index first if the backend is stale or empty; use incremental indexing when available. " +
-						"If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.",
+						"If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, `codebase_search`, and scout.",
 		);
 	} else {
 		lines.push(
-			"- Project RAG: not configured. This is optional; continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.",
+			"- Project RAG: not configured. This is optional; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, `codebase_search`, and scout.",
 		);
 		lines.push(
 			"- To enable later: build/install Brainwires/project-rag, then run `/sf codebase rag init` or set `SF_PROJECT_RAG_BIN` before initializing MCP config.",
@@ -720,11 +720,11 @@ function buildSiftContextLines(
 		);
 		lines.push(
 			"- Sift uses a sector-aware cache in the platform cache directory, typically `~/.cache/sift`; " +
-				"if the CLI is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.",
+				"if the CLI is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.",
 		);
 	} else {
 		lines.push(
-			"- Sift: not available. This is optional; continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.",
+			"- Sift: not available. This is optional; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.",
 		);
 		lines.push(
 			"- To enable later: install `rupurt/sift` on PATH or set `SIFT_PATH` to the sift binary.",
@@ -736,7 +736,7 @@ function buildSiftContextLines(
 
 function buildNoCodebaseIndexerContextLines(): string[] {
 	return [
-		"- Codebase indexer: disabled by `codebase.indexer_backend: none`; continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.",
+		"- Codebase indexer: disabled by `codebase.indexer_backend: none`; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.",
 	];
 }
 
@@ -852,7 +852,7 @@ export function formatProjectRagStatus(
 	}
 	lines.push("");
 	lines.push(
-		"Project RAG is optional. SF falls back to CODEBASE.md, rg, lsp, and scout when it is unavailable.",
+		"Project RAG is optional. SF falls back to CODEBASE.md, native grep/find/ls, lsp, codebase_search, and scout when it is unavailable.",
 	);
 	lines.push(
 		"When configured, agents should use index_codebase, query_codebase, search_by_filters, find_definition, find_references, and get_call_graph before manual file-by-file reading.",
@@ -882,7 +882,7 @@ export function formatSiftStatus(
 	}
 	lines.push("");
 	lines.push(
-		"Sift is optional. SF falls back to CODEBASE.md, rg, lsp, and scout when it is unavailable.",
+		"Sift is optional. SF falls back to CODEBASE.md, native grep/find/ls, lsp, and scout when it is unavailable.",
 	);
 	lines.push(
 		'When configured, agents should use `sift search --json <path> "<query>"`; `page-index-hybrid` is the strongest direct-search preset and `path-hybrid` is best for path-heavy queries.',
@@ -901,7 +901,7 @@ function formatNoCodebaseIndexerStatus(): string {
 		"Reason: codebase.indexer_backend is none",
 		"Operational: no - optional codebase indexer disabled.",
 		"",
-		"SF will use CODEBASE.md, rg, lsp, and scout for codebase orientation.",
+		"SF will use CODEBASE.md, native grep/find/ls, lsp, and scout for codebase orientation.",
 	].join("\n");
 }
 
diff --git a/src/resources/extensions/sf/commands-bootstrap.ts b/src/resources/extensions/sf/commands-bootstrap.ts
index 3f8203e8b..a38f3364c 100644
--- a/src/resources/extensions/sf/commands-bootstrap.ts
+++ b/src/resources/extensions/sf/commands-bootstrap.ts
@@ -3,18 +3,20 @@ import {
 	type ExtensionCommandContext,
 	importExtensionModule,
 } from "@singularity-forge/pi-coding-agent";
+import { workflowTemplateCommandDefinitions } from "./workflow-templates.js";
 
 const TOP_LEVEL_SUBCOMMANDS = [
 	{ cmd: "help", desc: "Categorized command reference with descriptions" },
 	{ cmd: "next", desc: "Explicit step mode (same as /sf)" },
 	{
-		cmd: "auto",
+		cmd: "autonomous",
 		desc: "Autonomous mode — research, plan, execute, commit, repeat",
 	},
-	{ cmd: "stop", desc: "Stop auto mode gracefully" },
+	{ cmd: "auto", desc: "Alias for /sf autonomous" },
+	{ cmd: "stop", desc: "Stop autonomous mode gracefully" },
 	{
 		cmd: "pause",
-		desc: "Pause auto-mode (preserves state, /sf auto to resume)",
+		desc: "Pause autonomous mode (preserves state, /sf autonomous to resume)",
 	},
 	{ cmd: "status", desc: "Progress dashboard" },
 	{ cmd: "visualize", desc: "Open workflow visualizer" },
@@ -88,14 +90,14 @@ function getSfArgumentCompletions(prefix: string) {
 
 	const partial = parts[1] ?? "";
 
-	if (parts[0] === "auto" && parts.length <= 2) {
+	if ((parts[0] === "auto" || parts[0] === "autonomous") && parts.length <= 2) {
 		return filterStartsWith(
 			partial,
 			[
 				{ cmd: "--verbose", desc: "Show detailed execution output" },
 				{ cmd: "--debug", desc: "Enable debug logging" },
 			],
-			"auto",
+			parts[0],
 		);
 	}
 
@@ -260,20 +262,7 @@ function getSfArgumentCompletions(prefix: string) {
 		return filterStartsWith(
 			partial,
 			[
-				{ cmd: "bugfix", desc: "Triage, fix, test, and ship a bug fix" },
-				{
-					cmd: "small-feature",
-					desc: "Lightweight feature with optional discussion",
-				},
-				{ cmd: "spike", desc: "Research, prototype, and document findings" },
-				{ cmd: "hotfix", desc: "Minimal: fix it, test it, ship it" },
-				{ cmd: "refactor", desc: "Inventory, plan waves, migrate, verify" },
-				{ cmd: "security-audit", desc: "Scan, triage, remediate, re-scan" },
-				{ cmd: "dep-upgrade", desc: "Assess, upgrade, fix breaks, verify" },
-				{
-					cmd: "full-project",
-					desc: "Complete SF workflow with full ceremony",
-				},
+				...workflowTemplateCommandDefinitions(),
 				{ cmd: "resume", desc: "Resume an in-progress workflow" },
 				{ cmd: "--list", desc: "List all available templates" },
 				{ cmd: "--dry-run", desc: "Preview workflow without executing" },
diff --git a/src/resources/extensions/sf/commands-handlers.ts b/src/resources/extensions/sf/commands-handlers.ts
index 8d4318814..6803a20dd 100644
--- a/src/resources/extensions/sf/commands-handlers.ts
+++ b/src/resources/extensions/sf/commands-handlers.ts
@@ -596,18 +596,23 @@ function compareSemverLocal(a: string, b: string): number {
 
 export async function handleUpdate(
 	ctx: ExtensionCommandContext,
+	deps: {
+		currentVersion?: string;
+		fetchLatestVersion?: () => Promise<string | null>;
+		install?: (command: string) => void;
+	} = {},
 ): Promise<void> {
 	const { execSync } = await import("node:child_process");
 
 	const NPM_PACKAGE = "sf-run";
-	const current = process.env.SF_VERSION || "0.0.0";
+	const current = deps.currentVersion ?? process.env.SF_VERSION ?? "0.0.0";
 
 	ctx.ui.notify(
 		`Current version: v${current}\nChecking npm registry...`,
 		"info",
 	);
 
-	const latest = await fetchLatestVersionForCommand();
+	const latest = await (deps.fetchLatestVersion ?? fetchLatestVersionForCommand)();
 	if (!latest) {
 		ctx.ui.notify(
 			"Failed to reach npm registry. Check your network connection.",
@@ -625,13 +630,25 @@ export async function handleUpdate(
 
 	const installCmd = resolveInstallCommand(`${NPM_PACKAGE}@latest`);
 	try {
-		execSync(installCmd, {
-			stdio: ["ignore", "pipe", "ignore"],
-		});
-		ctx.ui.notify(
-			`Updated to v${latest}. Restart your SF session to use the new version.`,
-			"info",
-		);
+		if (deps.install) {
+			deps.install(installCmd);
+		} else {
+			execSync(installCmd, {
+				stdio: ["ignore", "pipe", "ignore"],
+			});
+		}
+		ctx.ui.notify(`Updated to v${latest}. Reloading current session...`, "info");
+		try {
+			await ctx.reload();
+			ctx.ui.notify(`Updated to v${latest}. Reloaded current session.`, "info");
+		} catch (reloadError) {
+			const message =
+				reloadError instanceof Error ? reloadError.message : String(reloadError);
+			ctx.ui.notify(
+				`Updated to v${latest}, but automatic reload failed: ${message}. Use /sf reload to resume with the new version.`,
+				"warning",
+			);
+		}
 	} catch {
 		ctx.ui.notify(`Update failed. Try manually: ${installCmd}`, "error");
 	}
diff --git a/src/resources/extensions/sf/commands-inspect.ts b/src/resources/extensions/sf/commands-inspect.ts
index 6d8adf4f4..e5488c8d0 100644
--- a/src/resources/extensions/sf/commands-inspect.ts
+++ b/src/resources/extensions/sf/commands-inspect.ts
@@ -63,7 +63,7 @@ export async function handleInspect(
 			const dbPath = join(sfDir, "sf.db");
 			if (!existsSync(sfDir) || !existsSync(dbPath) || !openDatabase(dbPath)) {
 				ctx.ui.notify(
-					"No SF database available. Run /sf auto to create one.",
+					"No SF database available. Run /sf autonomous to create one.",
 					"info",
 				);
 				return;
@@ -73,7 +73,7 @@ export async function handleInspect(
 		const adapter = _getAdapter();
 		if (!adapter) {
 			ctx.ui.notify(
-				"No SF database available. Run /sf auto to create one.",
+				"No SF database available. Run /sf autonomous to create one.",
 				"info",
 			);
 			return;
diff --git a/src/resources/extensions/sf/commands-logs.ts b/src/resources/extensions/sf/commands-logs.ts
index d2d8468a8..8e2893884 100644
--- a/src/resources/extensions/sf/commands-logs.ts
+++ b/src/resources/extensions/sf/commands-logs.ts
@@ -409,7 +409,7 @@ async function handleLogsList(
 	}
 
 	lines.push("");
-	lines.push("Tip: Enable debug logging with SF_DEBUG=1 before /sf auto");
+	lines.push("Tip: Enable debug logging with SF_DEBUG=1 before /sf autonomous");
 
 	ctx.ui.notify(lines.join("\n"), "info");
 }
@@ -495,7 +495,7 @@ async function handleLogsDebug(
 
 	if (debugLogs.length === 0) {
 		ctx.ui.notify(
-			"No debug logs found.\n\nEnable debug logging: SF_DEBUG=1 sf auto",
+			"No debug logs found.\n\nEnable debug logging: SF_DEBUG=1 sf autonomous",
 			"info",
 		);
 		return;
diff --git a/src/resources/extensions/sf/commands-workflow-templates.ts b/src/resources/extensions/sf/commands-workflow-templates.ts
index 24a565777..281b45b8d 100644
--- a/src/resources/extensions/sf/commands-workflow-templates.ts
+++ b/src/resources/extensions/sf/commands-workflow-templates.ts
@@ -17,13 +17,23 @@ import type {
 	ExtensionAPI,
 	ExtensionCommandContext,
 } from "@singularity-forge/pi-coding-agent";
-import { isAutoActive, isAutoPaused } from "./auto.js";
+import {
+	isAutoActive,
+	isAutoPaused,
+	setActiveEngineId,
+	setActiveRunDir,
+	startAutoDetached,
+} from "./auto.js";
 import { getErrorMessage } from "./error-utils.js";
 import { createGitService, runGit } from "./git-service.js";
+import { readGraph } from "./graph.js";
 import { sfRoot } from "./paths.js";
 import { loadPrompt } from "./prompt-loader.js";
+import { createRunFromDefinition } from "./run-manager.js";
+import { compileTemplateRun } from "./workflow-template-compiler.js";
 import {
 	autoDetect,
+	formatStartUsage,
 	getTemplateInfo,
 	listTemplates,
 	loadRegistry,
@@ -98,6 +108,7 @@ interface WorkflowState {
 	updatedAt: string;
 	completedAt?: string;
 	artifactDir: string;
+	runDir?: string;
 }
 
 /**
@@ -110,6 +121,7 @@ function writeWorkflowState(
 	phases: string[],
 	description: string,
 	branch: string,
+	runDir?: string,
 ): void {
 	const statePath = join(artifactDir, "STATE.json");
 	const state: WorkflowState = {
@@ -126,6 +138,7 @@ function writeWorkflowState(
 		startedAt: new Date().toISOString(),
 		updatedAt: new Date().toISOString(),
 		artifactDir,
+		runDir,
 	};
 	writeFileSync(statePath, JSON.stringify(state, null, 2) + "\n");
 }
@@ -157,6 +170,34 @@ function findInProgressWorkflows(basePath: string): WorkflowState[] {
 				try {
 					const raw = readFileSync(statePath, "utf-8");
 					const state = JSON.parse(raw) as WorkflowState;
+					if (state.runDir) {
+						try {
+							const graph = readGraph(state.runDir);
+							const allDone = graph.steps.every(
+								(step) =>
+									step.status === "complete" || step.status === "expanded",
+							);
+							if (allDone) continue;
+							const firstPendingIndex = graph.steps.findIndex(
+								(step) => step.status === "pending" || step.status === "active",
+							);
+							state.phases = state.phases.map((phase, index) => {
+								const graphStep = graph.steps[index];
+								if (
+									graphStep?.status === "complete" ||
+									graphStep?.status === "expanded"
+								) {
+									return { ...phase, status: "completed" as const };
+								}
+								if (index === firstPendingIndex) {
+									return { ...phase, status: "active" as const };
+								}
+								return { ...phase, status: "pending" as const };
+							});
+						} catch {
+							/* fall back to legacy state if graph is unreadable */
+						}
+					}
 					if (!state.completedAt) {
 						results.push(state);
 					}
@@ -204,7 +245,7 @@ export async function handleStart(
 	if (isAutoPaused()) {
 		ctx.ui.notify(
 			"Auto-mode is paused. Starting a workflow template will run independently.\n" +
-				"The paused auto-mode session can be resumed later with /sf auto.",
+				"The paused autonomous session can be resumed later with /sf autonomous.",
 			"info",
 		);
 	}
@@ -236,6 +277,13 @@ export async function handleStart(
 			"info",
 		);
 
+		if (wf.runDir) {
+			setActiveEngineId("custom");
+			setActiveRunDir(wf.runDir);
+			startAutoDetached(ctx, pi, basePath, false);
+			return;
+		}
+
 		const workflowContent = loadWorkflowTemplate(wf.template);
 		if (!workflowContent) {
 			ctx.ui.notify(
@@ -301,7 +349,7 @@ export async function handleStart(
 
 	// Check for --issue flag (bugfix shortcut)
 	const issueMatch = cleanedArgs.match(/--issue\s+(\S+)/);
-	const issueRef = issueMatch ? issueMatch[1] : null;
+	const issueRef = issueMatch?.[1] ?? null;
 
 	// Try resolving first word as a template name
 	let match: TemplateMatch | null = null;
@@ -347,27 +395,7 @@ export async function handleStart(
 	// No template resolved at all
 	if (!match) {
 		if (!trimmed) {
-			ctx.ui.notify(
-				"Usage: /sf start <template> [description]\n\n" +
-					"Templates:\n" +
-					"  bugfix          Triage → fix → verify → ship\n" +
-					"  small-feature   Scope → plan → implement → verify\n" +
-					"  spike           Scope → research → synthesize\n" +
-					"  hotfix          Fix → ship (minimal ceremony)\n" +
-					"  refactor        Inventory → plan → migrate → verify\n" +
-					"  security-audit  Scan → triage → remediate → re-scan\n" +
-					"  dep-upgrade     Assess → upgrade → fix → verify\n" +
-					"  full-project    Complete SF with full ceremony\n\n" +
-					"Examples:\n" +
-					"  /sf start bugfix fix login button not responding\n" +
-					"  /sf start spike evaluate auth libraries\n" +
-					"  /sf start hotfix critical: API returns 500\n\n" +
-					"Flags:\n" +
-					"  --dry-run       Preview what would happen without executing\n" +
-					"  --issue <ref>   Link to a GitHub issue\n\n" +
-					"Run /sf templates for detailed template info.",
-				"info",
-			);
+			ctx.ui.notify(formatStartUsage(), "info");
 		} else {
 			ctx.ui.notify(
 				`No template matched "${firstWord}". Run /sf start to see available templates.`,
@@ -432,14 +460,14 @@ export async function handleStart(
 				{
 					customType: "sf-workflow-template",
 					content:
-						"The user wants to start a full SF project. Run `/sf init` to bootstrap the project, then `/sf auto` to begin execution.",
+						"The user wants to start a full SF project. Run `/sf init` to bootstrap the project, then `/sf autonomous` to begin execution.",
 					display: false,
 				},
 				{ triggerTurn: true },
 			);
 		} else {
 			ctx.ui.notify(
-				"Project already initialized. Use `/sf auto` to continue or `/sf discuss` to start a new milestone.",
+				"Project already initialized. Use `/sf autonomous` to continue or `/sf discuss` to start a new milestone.",
 				"info",
 			);
 		}
@@ -488,7 +516,30 @@ export async function handleStart(
 
 	const actualBranch = branchCreated ? branchName : git.getCurrentBranch();
 
-	// ─── Write workflow state for resume support ────────────────────────────
+	// ─── Compile template into graph-backed workflow run ────────────────────
+
+	const definition = compileTemplateRun({
+		templateId,
+		template,
+		workflowContent,
+		description,
+		issueRef,
+		artifactDir,
+		branch: actualBranch,
+		date,
+		mode: "guided",
+	});
+	const runDir = createRunFromDefinition(basePath, templateId, definition, {
+		kind: "template",
+		mode: "guided",
+		templateId,
+		description,
+		issueRef,
+		artifactDir: artifactDir || null,
+		branch: actualBranch,
+	});
+
+	// ─── Write workflow state for legacy resume/discovery support ───────────
 
 	if (artifactDir) {
 		writeWorkflowState(
@@ -498,6 +549,7 @@ export async function handleStart(
 			template.phases,
 			description,
 			actualBranch,
+			runDir,
 		);
 	}
 
@@ -509,30 +561,12 @@ export async function handleStart(
 	];
 	if (artifactDir) infoLines.push(`Artifacts: ${artifactDir}`);
 	infoLines.push(`Branch: ${actualBranch}`);
+	infoLines.push(`Run: ${runDir}`);
 	ctx.ui.notify(infoLines.join("\n"), "info");
 
-	const prompt = loadPrompt("workflow-start", {
-		templateId,
-		templateName: template.name,
-		templateDescription: template.description,
-		phases: template.phases.join(" → "),
-		complexity: template.estimated_complexity,
-		artifactDir: artifactDir || "(none)",
-		branch: actualBranch,
-		description: description || "(none provided)",
-		issueRef: issueRef || "(none)",
-		date,
-		workflowContent,
-	});
-
-	pi.sendMessage(
-		{
-			customType: "sf-workflow-template",
-			content: prompt,
-			display: false,
-		},
-		{ triggerTurn: true },
-	);
+	setActiveEngineId("custom");
+	setActiveRunDir(runDir);
+	startAutoDetached(ctx, pi, basePath, false);
 }
 
 // ─── /sf templates ──────────────────────────────────────────────────────────
diff --git a/src/resources/extensions/sf/commands/catalog.ts b/src/resources/extensions/sf/commands/catalog.ts
index d1acb1a12..eea2ae379 100644
--- a/src/resources/extensions/sf/commands/catalog.ts
+++ b/src/resources/extensions/sf/commands/catalog.ts
@@ -2,7 +2,10 @@ import { existsSync, readdirSync, readFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 
-import { loadRegistry } from "../workflow-templates.js";
+import {
+	loadRegistry,
+	workflowTemplateCommandDefinitions,
+} from "../workflow-templates.js";
 import { resolveProjectRoot } from "../worktree.js";
 
 const sfHome = process.env.SF_HOME || join(homedir(), ".sf");
@@ -15,19 +18,23 @@ export interface SfCommandDefinition {
 type CompletionMap = Record<string, readonly SfCommandDefinition[]>;
 
 export const SF_COMMAND_DESCRIPTION =
-	"SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|todo|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan";
+	"SF — Singularity Forge: /sf help|start|templates|next|autonomous|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|todo|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan";
 
 export const TOP_LEVEL_SUBCOMMANDS: readonly SfCommandDefinition[] = [
 	{ cmd: "help", desc: "Categorized command reference with descriptions" },
 	{ cmd: "next", desc: "Explicit step mode (same as /sf)" },
 	{
-		cmd: "auto",
+		cmd: "autonomous",
 		desc: "Autonomous mode — research, plan, execute, commit, repeat",
 	},
-	{ cmd: "stop", desc: "Stop auto mode gracefully" },
+	{
+		cmd: "auto",
+		desc: "Alias for /sf autonomous",
+	},
+	{ cmd: "stop", desc: "Stop autonomous mode gracefully" },
 	{
 		cmd: "pause",
-		desc: "Pause auto-mode (preserves state, /sf auto to resume)",
+		desc: "Pause autonomous mode (preserves state, /sf autonomous to resume)",
 	},
 	{ cmd: "status", desc: "Progress dashboard" },
 	{ cmd: "widget", desc: "Cycle widget: full → small → min → off" },
@@ -151,6 +158,10 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly SfCommandDefinition[] = [
 ];
 
 const NESTED_COMPLETIONS: CompletionMap = {
+	autonomous: [
+		{ cmd: "--verbose", desc: "Show detailed execution output" },
+		{ cmd: "--debug", desc: "Enable debug logging" },
+	],
 	auto: [
 		{ cmd: "--verbose", desc: "Show detailed execution output" },
 		{ cmd: "--debug", desc: "Enable debug logging" },
@@ -258,17 +269,7 @@ const NESTED_COMPLETIONS: CompletionMap = {
 		{ cmd: "status", desc: "Alias for profile in the first implementation" },
 	],
 	start: [
-		{ cmd: "bugfix", desc: "Triage, fix, test, and ship a bug fix" },
-		{
-			cmd: "small-feature",
-			desc: "Lightweight feature with optional discussion",
-		},
-		{ cmd: "spike", desc: "Research, prototype, and document findings" },
-		{ cmd: "hotfix", desc: "Minimal: fix it, test it, ship it" },
-		{ cmd: "refactor", desc: "Inventory, plan waves, migrate, verify" },
-		{ cmd: "security-audit", desc: "Scan, triage, remediate, re-scan" },
-		{ cmd: "dep-upgrade", desc: "Assess, upgrade, fix breaks, verify" },
-		{ cmd: "full-project", desc: "Complete SF workflow with full ceremony" },
+		...workflowTemplateCommandDefinitions(),
 		{ cmd: "resume", desc: "Resume an in-progress workflow" },
 		{ cmd: "--list", desc: "List all available templates" },
 		{ cmd: "--dry-run", desc: "Preview workflow without executing" },
diff --git a/src/resources/extensions/sf/commands/handlers/auto.ts b/src/resources/extensions/sf/commands/handlers/auto.ts
index 4683f07ff..d87cd9a51 100644
--- a/src/resources/extensions/sf/commands/handlers/auto.ts
+++ b/src/resources/extensions/sf/commands/handlers/auto.ts
@@ -19,8 +19,9 @@ import { findMilestoneIds } from "../../milestone-id-utils.js";
 import { guardRemoteSession, projectRoot } from "../context.js";
 
 /**
- * Parse --yolo flag and optional file path from the auto command string.
- * Supports: `/sf auto --yolo path/to/file.md` or `/sf auto -y path/to/file.md`
+ * Parse --yolo flag and optional file path from the autonomous command string.
+ * Supports: `/sf autonomous --yolo path/to/file.md`, `/sf auto --yolo path/to/file.md`,
+ * or `/sf auto -y path/to/file.md`.
  */
 function parseYoloFlag(trimmed: string): {
 	yoloSeedFile: string | null;
@@ -64,6 +65,12 @@ export async function handleAutoCommand(
 	ctx: ExtensionCommandContext,
 	pi: ExtensionAPI,
 ): Promise<boolean> {
+	const isAutonomousCommand =
+		trimmed === "auto" ||
+		trimmed.startsWith("auto ") ||
+		trimmed === "autonomous" ||
+		trimmed.startsWith("autonomous ");
+
 	const launchAuto = async (
 		verboseMode: boolean,
 		options?: {
@@ -109,8 +116,9 @@ export async function handleAutoCommand(
 		return true;
 	}
 
-	if (trimmed === "auto" || trimmed.startsWith("auto ")) {
-		const { yoloSeedFile, rest: afterYolo } = parseYoloFlag(trimmed);
+	if (isAutonomousCommand) {
+		const normalized = trimmed.replace(/^(?:auto|autonomous)\b/, "auto");
+		const { yoloSeedFile, rest: afterYolo } = parseYoloFlag(normalized);
 		const { milestoneId, rest: afterMilestone } =
 			parseMilestoneTarget(afterYolo);
 		const verboseMode = afterMilestone.includes("--verbose");
@@ -184,7 +192,7 @@ export async function handleAutoCommand(
 		if (!isAutoActive()) {
 			if (isAutoPaused()) {
 				ctx.ui.notify(
-					"Auto-mode is already paused. /sf auto to resume.",
+					"Autonomous mode is already paused. /sf autonomous to resume.",
 					"info",
 				);
 			} else {
diff --git a/src/resources/extensions/sf/commands/handlers/core.ts b/src/resources/extensions/sf/commands/handlers/core.ts
index 632f21d80..684b5bc0e 100644
--- a/src/resources/extensions/sf/commands/handlers/core.ts
+++ b/src/resources/extensions/sf/commands/handlers/core.ts
@@ -33,9 +33,10 @@ export function showHelp(ctx: ExtensionCommandContext, args = ""): void {
 		"QUICK START",
 		"  /sf start <tpl>   Start a workflow template",
 		"  /sf               Run next unit (same as /sf next)",
-		"  /sf auto          Run all queued units continuously",
-		"  /sf pause         Pause auto-mode",
-		"  /sf stop          Stop auto-mode gracefully",
+		"  /sf autonomous    Run all queued product units continuously",
+		"  /sf auto          Alias for /sf autonomous",
+		"  /sf pause         Pause autonomous mode",
+		"  /sf stop          Stop autonomous mode gracefully",
 		"",
 		"VISIBILITY",
 		`  /sf status         Dashboard  (${formattedShortcutPair("dashboard")})`,
@@ -69,9 +70,10 @@ export function showHelp(ctx: ExtensionCommandContext, args = ""): void {
 		"  /sf templates     List available workflow templates  [info <name>]",
 		"  /sf               Run next unit in step mode (same as /sf next)",
 		"  /sf next           Execute next task, then pause  [--dry-run] [--verbose]",
-		"  /sf auto           Run all queued units continuously  [--verbose]",
-		"  /sf stop           Stop auto-mode gracefully",
-		"  /sf pause          Pause auto-mode (preserves state, /sf auto to resume)",
+		"  /sf autonomous     Run all queued product units continuously  [--verbose]",
+		"  /sf auto           Alias for /sf autonomous",
+		"  /sf stop           Stop autonomous mode gracefully",
+		"  /sf pause          Pause autonomous mode (preserves state, /sf autonomous to resume)",
 		"  /sf discuss        Start guided milestone/slice discussion",
 		"  /sf new-milestone  Create milestone from headless context (used by sf headless)",
 		"",
diff --git a/src/resources/extensions/sf/commands/handlers/workflow.ts b/src/resources/extensions/sf/commands/handlers/workflow.ts
index 781b82f2d..7f79d24d7 100644
--- a/src/resources/extensions/sf/commands/handlers/workflow.ts
+++ b/src/resources/extensions/sf/commands/handlers/workflow.ts
@@ -180,7 +180,8 @@ async function handleCustomWorkflow(
 		}
 		const lines = runs.map((r) => {
 			const stepInfo = `${r.steps.completed}/${r.steps.total} steps`;
-			return `• ${r.name} [${r.timestamp}] — ${r.status} (${stepInfo})`;
+			const source = r.source?.kind ? ` [${r.source.kind}]` : "";
+			return `• ${r.name} [${r.timestamp}] — ${r.status} (${stepInfo})${source}`;
 		});
 		ctx.ui.notify(lines.join("\n"), "info");
 		return true;
@@ -242,7 +243,7 @@ async function handleCustomWorkflow(
 		const engineId = getActiveEngineId();
 		if (engineId === "dev" || engineId === null) {
 			ctx.ui.notify(
-				"No custom workflow to resume. Use /sf auto for dev workflow.",
+				"No custom workflow to resume. Use /sf autonomous for dev workflow.",
 				"warning",
 			);
 			return true;
diff --git a/src/resources/extensions/sf/crash-recovery.ts b/src/resources/extensions/sf/crash-recovery.ts
index c4d685853..d8bf40129 100644
--- a/src/resources/extensions/sf/crash-recovery.ts
+++ b/src/resources/extensions/sf/crash-recovery.ts
@@ -112,21 +112,21 @@ export function formatCrashInfo(lock: LockData): string {
 
 	// Add recovery guidance based on what was happening when it crashed
 	if (lock.unitType === "starting" && lock.unitId === "bootstrap") {
-		lines.push(`No work was lost. Run /sf auto to restart.`);
+		lines.push(`No work was lost. Run /sf autonomous to restart.`);
 	} else if (
 		lock.unitType.includes("research") ||
 		lock.unitType.includes("plan")
 	) {
 		lines.push(
-			`The ${lock.unitType} unit may be incomplete. Run /sf auto to re-run it.`,
+			`The ${lock.unitType} unit may be incomplete. Run /sf autonomous to re-run it.`,
 		);
 	} else if (lock.unitType.includes("execute")) {
 		lines.push(
-			`Task execution was interrupted. Run /sf auto to resume — completed work is preserved.`,
+			`Task execution was interrupted. Run /sf autonomous to resume — completed work is preserved.`,
 		);
 	} else if (lock.unitType.includes("complete")) {
 		lines.push(
-			`Slice/milestone completion was interrupted. Run /sf auto to finish.`,
+			`Slice/milestone completion was interrupted. Run /sf autonomous to finish.`,
 		);
 	}
 
diff --git a/src/resources/extensions/sf/custom-workflow-engine.ts b/src/resources/extensions/sf/custom-workflow-engine.ts
index bd7ffb7e7..90e27990f 100644
--- a/src/resources/extensions/sf/custom-workflow-engine.ts
+++ b/src/resources/extensions/sf/custom-workflow-engine.ts
@@ -28,6 +28,7 @@ import { withFileLock } from "./file-lock.js";
 import {
 	expandIteration,
 	getNextPendingStep,
+	markStepActive,
 	markStepComplete,
 	readGraph,
 	type WorkflowGraph,
@@ -87,82 +88,115 @@ export class CustomWorkflowEngine implements WorkflowEngine {
 	 * - Expanded GRAPH.yaml is written to disk before dispatch — inspectable on disk.
 	 */
 	async resolveDispatch(
-		state: EngineState,
+		_state: EngineState,
 		_context: { basePath: string },
 	): Promise<EngineDispatchAction> {
-		let graph = state.raw as WorkflowGraph;
-		let next = getNextPendingStep(graph);
+		const graphPath = join(this.runDir, "GRAPH.yaml");
 
-		if (!next) {
-			return {
-				action: "stop",
-				reason: "All steps complete",
-				level: "info",
-			};
-		}
-
-		// Check frozen DEFINITION.yaml for iterate config on this step
-		const def = readFrozenDefinition(this.runDir);
-		const stepDef = def.steps.find((s: StepDefinition) => s.id === next!.id);
-
-		if (stepDef?.iterate) {
-			const iterate = stepDef.iterate;
-
-			// Read source artifact
-			const sourcePath = join(this.runDir, iterate.source);
-			let sourceContent: string;
-			try {
-				sourceContent = readFileSync(sourcePath, "utf-8");
-			} catch {
-				throw new Error(
-					`Iterate source artifact not found: ${sourcePath} (step "${next.id}", source: "${iterate.source}")`,
-				);
+		return await withFileLock(graphPath, () => {
+			let graph = readGraph(this.runDir);
+			const active = graph.steps.find((step) => step.status === "active");
+			if (active) {
+				return {
+					action: "dispatch",
+					step: {
+						unitType: "custom-step",
+						unitId: `${graph.metadata.name}/${active.id}`,
+						prompt: injectContext(this.runDir, active.id, active.prompt),
+					},
+				};
 			}
 
-			// Extract items via regex with global+multiline flags.
-			// Guard against ReDoS: if matching takes too long on large inputs, bail.
-			const regex = new RegExp(iterate.pattern, "gm");
-			const items: string[] = [];
-			const matchStart = Date.now();
-			let match: RegExpExecArray | null;
-			// biome-ignore lint/suspicious/noAssignInExpressions: intentional read loop
-			while ((match = regex.exec(sourceContent)) !== null) {
-				if (match[1] !== undefined) items.push(match[1]);
-				if (Date.now() - matchStart > 5_000) {
-					throw new Error(
-						`Iterate pattern "${iterate.pattern}" exceeded 5s timeout on step "${next.id}" — possible ReDoS`,
-					);
-				}
-			}
-
-			// Expand the graph
-			const expandedGraph = expandIteration(graph, next.id, items, next.prompt);
-			writeGraph(this.runDir, expandedGraph);
-			graph = expandedGraph;
-
-			// Re-query for first instance step
-			next = getNextPendingStep(expandedGraph);
+			let next = getNextPendingStep(graph);
 
 			if (!next) {
 				return {
 					action: "stop",
-					reason: "Iterate expansion produced no instances",
+					reason: "All steps complete",
 					level: "info",
 				};
 			}
-		}
 
-		// Enrich prompt with context from prior step artifacts
-		const enrichedPrompt = injectContext(this.runDir, next.id, next.prompt);
+			// Check frozen DEFINITION.yaml for iterate config on this step
+			const def = readFrozenDefinition(this.runDir);
+			const stepDef = def.steps.find((s: StepDefinition) => s.id === next!.id);
 
-		return {
-			action: "dispatch",
-			step: {
-				unitType: "custom-step",
-				unitId: `${graph.metadata.name}/${next.id}`,
-				prompt: enrichedPrompt,
-			},
-		};
+			if (stepDef?.iterate) {
+				const iterate = stepDef.iterate;
+
+				// Read source artifact
+				const sourcePath = join(this.runDir, iterate.source);
+				let sourceContent: string;
+				try {
+					sourceContent = readFileSync(sourcePath, "utf-8");
+				} catch {
+					throw new Error(
+						`Iterate source artifact not found: ${sourcePath} (step "${next.id}", source: "${iterate.source}")`,
+					);
+				}
+
+				// Extract items via regex with global+multiline flags.
+				// Guard against ReDoS: if matching takes too long on large inputs, bail.
+				const regex = new RegExp(iterate.pattern, "gm");
+				const items: string[] = [];
+				const matchStart = Date.now();
+				let match: RegExpExecArray | null;
+				// biome-ignore lint/suspicious/noAssignInExpressions: intentional read loop
+				while ((match = regex.exec(sourceContent)) !== null) {
+					if (match[1] !== undefined) items.push(match[1]);
+					if (Date.now() - matchStart > 5_000) {
+						throw new Error(
+							`Iterate pattern "${iterate.pattern}" exceeded 5s timeout on step "${next.id}" — possible ReDoS`,
+						);
+					}
+				}
+
+				// Expand the graph
+				const expandedGraph = expandIteration(
+					graph,
+					next.id,
+					items,
+					next.prompt,
+				);
+				writeGraph(this.runDir, expandedGraph);
+				graph = expandedGraph;
+
+				// Re-query for first instance step
+				next = getNextPendingStep(expandedGraph);
+
+				if (!next) {
+					return {
+						action: "stop",
+						reason: "Iterate expansion produced no instances",
+						level: "info",
+					};
+				}
+			}
+
+			const activeGraph = markStepActive(graph, next.id);
+			writeGraph(this.runDir, activeGraph);
+
+			const activeStep = activeGraph.steps.find((s) => s.id === next.id);
+			if (!activeStep) {
+				throw new Error(`Active step not found after GRAPH.yaml update: ${next.id}`);
+			}
+
+			// Enrich prompt with context from prior step artifacts
+			const enrichedPrompt = injectContext(
+				this.runDir,
+				activeStep.id,
+				activeStep.prompt,
+			);
+
+			return {
+				action: "dispatch",
+				step: {
+					unitType: "custom-step",
+					unitId: `${activeGraph.metadata.name}/${activeStep.id}`,
+					prompt: enrichedPrompt,
+				},
+			};
+		});
 	}
 
 	/**
diff --git a/src/resources/extensions/sf/dashboard-overlay.ts b/src/resources/extensions/sf/dashboard-overlay.ts
index 15450360c..5563c1da1 100644
--- a/src/resources/extensions/sf/dashboard-overlay.ts
+++ b/src/resources/extensions/sf/dashboard-overlay.ts
@@ -443,7 +443,7 @@ export class SFDashboardOverlay {
 			);
 			lines.push(blank());
 		} else if (this.dashData.paused) {
-			lines.push(row(th.fg("dim", "/sf auto to resume")));
+			lines.push(row(th.fg("dim", "/sf autonomous to resume")));
 			lines.push(blank());
 		} else if (isRemote) {
 			const rs = this.dashData.remoteSession!;
@@ -454,7 +454,7 @@ export class SFDashboardOverlay {
 			lines.push(row(th.fg("text", `Remote session: ${unitDisplay}`)));
 			lines.push(blank());
 		} else {
-			lines.push(row(th.fg("dim", "No unit running · /sf auto to start")));
+			lines.push(row(th.fg("dim", "No unit running · /sf autonomous to start")));
 			lines.push(blank());
 		}
 
diff --git a/src/resources/extensions/sf/forensics.ts b/src/resources/extensions/sf/forensics.ts
index 5acdf9311..d80098e74 100644
--- a/src/resources/extensions/sf/forensics.ts
+++ b/src/resources/extensions/sf/forensics.ts
@@ -275,7 +275,7 @@ export async function handleForensics(
 	const basePath = process.cwd();
 	const root = sfRoot(basePath);
 	if (!existsSync(root)) {
-		ctx.ui.notify("No SF state found. Run /sf auto first.", "warning");
+		ctx.ui.notify("No SF state found. Run /sf autonomous first.", "warning");
 		return;
 	}
 
@@ -1119,7 +1119,7 @@ function detectWorktreeOrphans(
 			summary: `${count} worktree orphan(s) detected (${reason})`,
 			details:
 				reason === "in-progress-unmerged"
-					? "Auto-mode exited without completing a milestone; live work sits on an unmerged milestone branch. Run `/sf auto` to resume, or merge manually."
+					? "Autonomous mode exited without completing a milestone; live work sits on an unmerged milestone branch. Run `/sf autonomous` to resume, or merge manually."
 					: reason === "complete-unmerged"
 						? "A completed milestone's branch was never merged back to main. Run `/sf health --fix` to resolve."
 						: `Reason: ${reason}.`,
diff --git a/src/resources/extensions/sf/graph.ts b/src/resources/extensions/sf/graph.ts
index 878d47fb6..b328fa0bf 100644
--- a/src/resources/extensions/sf/graph.ts
+++ b/src/resources/extensions/sf/graph.ts
@@ -182,6 +182,45 @@ export function getNextPendingStep(graph: WorkflowGraph): GraphStep | null {
 	return null;
 }
 
+/**
+ * Return a new graph with the specified step marked as "active".
+ * Immutable — does not mutate the input graph.
+ *
+ * Purpose: record that a custom workflow step has been dispatched so restart
+ * and concurrent dispatch attempts resume the same unit instead of selecting a
+ * second pending step.
+ *
+ * Consumer: CustomWorkflowEngine.resolveDispatch before returning a custom-step
+ * unit to the auto loop.
+ *
+ * @param graph — the current workflow graph
+ * @param stepId — ID of the step to mark active
+ * @returns New graph with the step's status set to "active"
+ * @throws Error if stepId is not found in the graph
+ */
+export function markStepActive(
+	graph: WorkflowGraph,
+	stepId: string,
+): WorkflowGraph {
+	const found = graph.steps.some((s) => s.id === stepId);
+	if (!found) {
+		throw new Error(`Step not found: ${stepId}`);
+	}
+
+	return {
+		...graph,
+		steps: graph.steps.map((s) =>
+			s.id === stepId
+				? {
+						...s,
+						status: "active" as const,
+						startedAt: s.startedAt ?? new Date().toISOString(),
+					}
+				: s,
+		),
+	};
+}
+
 /**
  * Return a new graph with the specified step marked as "complete".
  * Immutable — does not mutate the input graph.
diff --git a/src/resources/extensions/sf/guided-flow.ts b/src/resources/extensions/sf/guided-flow.ts
index a9b89dce7..f6e2ed690 100644
--- a/src/resources/extensions/sf/guided-flow.ts
+++ b/src/resources/extensions/sf/guided-flow.ts
@@ -1815,7 +1815,7 @@ export async function showWorkflowEntry(
 		if (interrupted.lock) clearLock(basePath);
 		const resumeLabel = interrupted.pausedSession?.stepMode
 			? "Resume with /sf next"
-			: "Resume with /sf auto";
+			: "Resume with /sf autonomous";
 		const resume = await showNextAction(ctx, {
 			title: "SF — Interrupted Session Detected",
 			summary: formatInterruptedSessionSummary(interrupted),
diff --git a/src/resources/extensions/sf/learning/data/model-benchmarks.json b/src/resources/extensions/sf/learning/data/model-benchmarks.json
index cb4e8855d..2568a5605 100644
--- a/src/resources/extensions/sf/learning/data/model-benchmarks.json
+++ b/src/resources/extensions/sf/learning/data/model-benchmarks.json
@@ -1,6 +1,6 @@
 {
 	"_meta": {
-		"version": "1",
+		"schemaVersion": 1,
 		"generated": "2026-04-15",
 		"notes": "Real benchmark numbers from vendor model cards and public leaderboards. Null where no published value exists. Do not fabricate. Schema is the union of metrics any one model publishes; most models populate only a subset.",
 		"benchmark_scales": {
diff --git a/src/resources/extensions/sf/learning/data/unit-weights.json b/src/resources/extensions/sf/learning/data/unit-weights.json
index bfc726c25..98311851b 100644
--- a/src/resources/extensions/sf/learning/data/unit-weights.json
+++ b/src/resources/extensions/sf/learning/data/unit-weights.json
@@ -1,6 +1,6 @@
 {
 	"_meta": {
-		"version": "1",
+		"schemaVersion": 1,
 		"generated": "2026-04-15",
 		"notes": "Per-unit-type benchmark weight maps. Each block sums to ~1.0. Benchmarks referenced must be a subset of model-benchmarks.json schema. Used by computeUnitTypeScore() to rank candidates per unit type."
 	},
diff --git a/src/resources/extensions/sf/memory-sleeper.ts b/src/resources/extensions/sf/memory-sleeper.ts
new file mode 100644
index 000000000..9d29292b8
--- /dev/null
+++ b/src/resources/extensions/sf/memory-sleeper.ts
@@ -0,0 +1,139 @@
+import type { ToolResultEvent } from "@singularity-forge/pi-coding-agent";
+
+export interface MemorySleeperSteer {
+	key: string;
+	content: string;
+	severity: "info" | "warning";
+}
+
+interface BashFailure {
+	count: number;
+	lastAt: number;
+}
+
+const seenKeys = new Set<string>();
+const bashFailures = new Map<string, BashFailure>();
+
+const MAX_RESULT_CHARS = 6000;
+const REPEAT_FAILURE_WINDOW_MS = 10 * 60 * 1000;
+
+export function resetMemorySleeper(): void {
+	seenKeys.clear();
+	bashFailures.clear();
+}
+
+function normalizeCommand(command: unknown): string {
+	return String(command ?? "")
+		.replace(/\s+/g, " ")
+		.trim();
+}
+
+function contentText(event: ToolResultEvent): string {
+	return event.content
+		.map((part) => (part.type === "text" ? part.text : ""))
+		.join("\n")
+		.slice(0, MAX_RESULT_CHARS);
+}
+
+function once(steer: MemorySleeperSteer): MemorySleeperSteer | undefined {
+	if (seenKeys.has(steer.key)) return undefined;
+	seenKeys.add(steer.key);
+	return steer;
+}
+
+function buildSteer(title: string, body: string): string {
+	return [
+		`Memory sleeper steering: ${title}`,
+		"",
+		body,
+		"",
+		"Treat this as a narrow correction. Do not restart the task; adapt the next step and continue.",
+	].join("\n");
+}
+
+function maybeBunSteer(event: ToolResultEvent): MemorySleeperSteer | undefined {
+	if (event.toolName !== "bash") return undefined;
+	const command = normalizeCommand(event.input.command);
+	if (!/\b(bun|bunx)\b/.test(command)) return undefined;
+
+	return once({
+		key: "bun-command",
+		severity: "warning",
+		content: buildSteer(
+			"avoid Bun in this project",
+			"The operator explicitly requested Node/npm verification. Replace Bun commands with npm/node equivalents before continuing.",
+		),
+	});
+}
+
+function maybeKnownFailureSteer(text: string): MemorySleeperSteer | undefined {
+	if (
+		text.includes("Cannot find module") &&
+		text.includes("/extensions/sf/commands/dispatcher.js")
+	) {
+		return once({
+			key: "sf-dispatcher-import",
+			severity: "warning",
+			content: buildSteer(
+				"SF dispatcher import failed",
+				"The installed SF command extension is trying to import commands/dispatcher.js and cannot resolve it. Check the extension sync/build path before retrying /sf auto.",
+			),
+		});
+	}
+
+	if (
+		text.includes("failed to update rules file") &&
+		text.includes(".codex/rules/default.rules") &&
+		text.includes("Permission denied")
+	) {
+		return once({
+			key: "exec-policy-permission",
+			severity: "warning",
+			content: buildSteer(
+				"exec policy rules file is not writable",
+				"The Codex rules file cannot be updated. Inspect whether ~/.codex/rules/default.rules is a read-only symlink, replace it with a local writable copy if needed, then retry the policy update.",
+			),
+		});
+	}
+
+	return undefined;
+}
+
+function maybeRepeatedFailureSteer(
+	event: ToolResultEvent,
+	text: string,
+): MemorySleeperSteer | undefined {
+	if (event.toolName !== "bash" || !event.isError) return undefined;
+
+	const command = normalizeCommand(event.input.command);
+	if (!command) return undefined;
+
+	const now = Date.now();
+	const existing = bashFailures.get(command);
+	const count =
+		existing && now - existing.lastAt <= REPEAT_FAILURE_WINDOW_MS
+			? existing.count + 1
+			: 1;
+	bashFailures.set(command, { count, lastAt: now });
+
+	if (count < 2) return undefined;
+	return once({
+		key: `repeat-failure:${command}`,
+		severity: "warning",
+		content: buildSteer(
+			"repeated failing command",
+			`The same bash command has failed ${count} times in this auto-mode unit:\n\n${command}\n\nStop retrying it as-is. Read the error, inspect the relevant files/config, and choose a different repair or verification path.\n\nLatest result excerpt:\n${text.slice(0, 1200)}`,
+		),
+	});
+}
+
+export function observeMemorySleeperToolResult(
+	event: ToolResultEvent,
+): MemorySleeperSteer | undefined {
+	const text = contentText(event);
+	return (
+		maybeBunSteer(event) ??
+		maybeKnownFailureSteer(text) ??
+		maybeRepeatedFailureSteer(event, text)
+	);
+}
diff --git a/src/resources/extensions/sf/prompts/discuss-headless.md b/src/resources/extensions/sf/prompts/discuss-headless.md
index f7e55ff75..3f72ca20c 100644
--- a/src/resources/extensions/sf/prompts/discuss-headless.md
+++ b/src/resources/extensions/sf/prompts/discuss-headless.md
@@ -76,8 +76,8 @@ Before anything else, form a diagnosis: What is the core challenge? What is brok
 - **Measure coverage**: find untested critical paths
 - **Scan for dead code, stubs, and commented-out features** — abandoned attempts are signals
 - **Discover needed skills**: identify repo languages, frameworks, data stores, external services, build tools, and domain-specific competencies. Check installed skills first; record installed, missing, and potentially useful skills in `.sf/CODEBASE.md` and `.sf/PM-STRATEGY.md`.
-- **Use code intelligence when available**: if the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, index/query it for broad concept, symbol, schema, and git-history searches before manually reading files. If it is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.
-- Use `rg`, `find`, `ast-grep`, `ls -la` for broad codebase mapping
+- **Use code intelligence when available**: if the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, index/query it for broad concept, symbol, schema, and git-history searches before manually reading files. If it is missing or fails, continue with `.sf/CODEBASE.md`, in-process `grep`/`find`/`ls`, `lsp`, `codebase_search`, and scout.
+- Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. Fall back to shell `rg`, `find`, `ast-grep`, or `ls -la` only when the native/in-process tool surface is insufficient.
 
 ### Step 2: Check library and ecosystem facts
 - **DeepWiki first:** `ask_question` / `read_wiki_structure` / `read_wiki_contents` for any GitHub-hosted library or framework — AI-indexed, no free-tier cap
diff --git a/src/resources/extensions/sf/prompts/discuss.md b/src/resources/extensions/sf/prompts/discuss.md
index 783195d8c..a85ce9b27 100644
--- a/src/resources/extensions/sf/prompts/discuss.md
+++ b/src/resources/extensions/sf/prompts/discuss.md
@@ -34,7 +34,7 @@ After reflection is confirmed, decide the approach based on the actual scope —
 
 Before asking your first question, do a mandatory investigation pass. This is not optional.
 
-1. **Scout the codebase** — `ls`, `find`, `rg`, or `scout` for broad unfamiliar areas. Understand what already exists, what patterns are established, what constraints current code imposes.
+1. **Scout the codebase** — use in-process `grep`, `find`, `ls`, and `lsp` first; use `codebase_search` for Sift-backed hybrid retrieval; use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
 2. **Check library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) as the default for any GitHub-hosted library or framework the user mentioned. Fall back to `resolve_library` / `get_library_docs` (Context7) for npm/pypi/crates packages DeepWiki doesn't have. **Context7 free tier is capped at 1000 req/month — spend those on cases DeepWiki can't cover.** Get current facts about capabilities, constraints, API shapes, version-specific behavior.
 3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the user referenced external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
 
diff --git a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
index 094cdd4cf..fa7c0411f 100644
--- a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
+++ b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
@@ -15,7 +15,7 @@ Apply `pm-planning` skill thinking throughout: use Working Backwards to anchor o
 ### Before your first question round
 
 Do a lightweight targeted investigation so your questions are grounded in reality:
-- Scout the codebase (`rg`, `find`, or `scout`) to understand what already exists that this milestone touches or builds on
+- Scout the codebase with in-process `grep`, `find`, `ls`, and `lsp` first; use `codebase_search` for Sift-backed hybrid retrieval; use `scout` for broad unfamiliar areas that need a separate explorer
 - If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP search tools for broad concept, symbol, schema, and git-history lookup before manually reading files
 - Check the roadmap context above (if present) to understand what surrounds this milestone
 - **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier).
diff --git a/src/resources/extensions/sf/prompts/guided-discuss-slice.md b/src/resources/extensions/sf/prompts/guided-discuss-slice.md
index f9760ee04..40d348a41 100644
--- a/src/resources/extensions/sf/prompts/guided-discuss-slice.md
+++ b/src/resources/extensions/sf/prompts/guided-discuss-slice.md
@@ -11,7 +11,7 @@ Your goal is **not** to center the discussion on tech stack trivia, naming conve
 ### Before your first question round
 
 Do a lightweight targeted investigation so your questions are grounded in reality:
-- Scout the codebase (`rg`, `find`, or `scout` for broad unfamiliar areas) to understand what already exists that this slice touches or builds on
+- Scout the codebase with in-process `grep`, `find`, `ls`, and `lsp` first; use `codebase_search` for Sift-backed hybrid retrieval; use `scout` for broad unfamiliar areas that need a separate explorer
 - Check the roadmap context above to understand what surrounds this slice — what comes before, what depends on it
 - **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier).
 - Identify the 3–5 biggest behavioural unknowns: things where the user's answer will materially change what gets built
diff --git a/src/resources/extensions/sf/prompts/queue.md b/src/resources/extensions/sf/prompts/queue.md
index e1b4fb485..d49604d2a 100644
--- a/src/resources/extensions/sf/prompts/queue.md
+++ b/src/resources/extensions/sf/prompts/queue.md
@@ -26,7 +26,7 @@ Never fabricate or simulate user input during this discussion. Never generate fa
 
 - Check library docs **DeepWiki first** (`ask_question` / `read_wiki_structure` / `read_wiki_contents`) for any GitHub-hosted library or framework — AI-indexed, no free-tier cap. Fall back to Context7 (`resolve_library` / `get_library_docs`) for npm/pypi/crates packages DeepWiki doesn't cover. Context7 free tier is 1000 req/month — don't spend those on cases DeepWiki covers.
 - Do web searches (`search-the-web`) to verify the landscape — what solutions exist, what's changed recently, what's the current best practice. Use `freshness` for recency-sensitive queries, `domain` to target specific sites. Use `fetch_page` to read the full content of promising URLs when snippets aren't enough. **Budget:** You have a limited number of web searches per turn (typically 3-5). Prefer DeepWiki → Context7 → web search for docs; use `search_and_read` for one-shot topic research. Do NOT repeat the same or similar queries. Distribute searches across turns rather than clustering them.
-- Scout the codebase (`ls`, `find`, `rg`, or `scout` for broad unfamiliar areas) to understand what already exists, what patterns are established, what constraints current code imposes
+- Scout the codebase with in-process `grep`, `find`, `ls`, and `lsp` first; use `codebase_search` for Sift-backed hybrid retrieval; use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes
 
 Don't go deep — just enough that your next question reflects what's actually true rather than what you assume.
 
diff --git a/src/resources/extensions/sf/prompts/system.md b/src/resources/extensions/sf/prompts/system.md
index 9b1a0e5f7..1e6c8316f 100644
--- a/src/resources/extensions/sf/prompts/system.md
+++ b/src/resources/extensions/sf/prompts/system.md
@@ -142,7 +142,8 @@ Templates showing the expected format for each artifact type are in:
 ### Commands
 
 - `/sf` - contextual wizard
-- `/sf auto` - auto-execute (fresh context per task)
+- `/sf autonomous` - auto-execute (fresh context per task)
+- `/sf auto` - alias for `/sf autonomous`
 - `/sf stop` - stop auto-mode
 - `/sf status` - progress dashboard overlay
 - `/sf queue` - queue future milestones (safe while auto-mode is running)
@@ -161,7 +162,7 @@ Templates showing the expected format for each artifact type are in:
 
 **Code navigation:** Use `lsp` for definition, type_definition, implementation, references, incoming_calls, outgoing_calls, hover, signature, symbols, rename, code_actions, format, and diagnostics. Falls back gracefully if no server is available. Never `grep` for a symbol definition when `lsp` can resolve it semantically. Never shell out to prettier/rustfmt/gofmt when `lsp format` is available. After editing code, use `lsp diagnostics` to verify no type errors were introduced.
 
-**Codebase exploration:** Use `subagent` with `scout` for broad unfamiliar subsystem mapping. Use `.sf/CODEBASE.md` for durable orientation. If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Use `rg` for text search across files. Use `lsp` for structural navigation. Never read files one-by-one to "explore" — search first, then read what's relevant.
+**Codebase exploration:** Prefer in-process SF tools first: `grep` for exact text search, `find`/`ls` for filesystem discovery, and `lsp` for structural navigation. These avoid shelling out and use SF's native backends where available. Use `.sf/CODEBASE.md` for durable orientation. If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Use `codebase_search` when Sift-backed hybrid retrieval is a better fit than exact search. Use `subagent` with `scout` for broad unfamiliar subsystem mapping that needs an explorer's judgment. Never read files one-by-one to "explore" — search first, then read what's relevant.
 
 **Swarm dispatch:** Let the system decide whether swarming fits before dispatching multiple execution subagents. Use a 2-3 worker same-model swarm only when the work splits into independent shards with explicit file/directory ownership, shard-local verification, low conflict risk, and clear wall-clock savings. Do not swarm shared-interface edits, lockfiles, migrations, single-failure debugging, or sequence-dependent work. The parent agent remains coordinator: assign ownership, synthesize results, inspect dirty files, resolve conflicts, and run final verification.
 
diff --git a/src/resources/extensions/sf/run-manager.ts b/src/resources/extensions/sf/run-manager.ts
index eb7a6cd4d..73d80b106 100644
--- a/src/resources/extensions/sf/run-manager.ts
+++ b/src/resources/extensions/sf/run-manager.ts
@@ -17,13 +17,18 @@ import {
 	existsSync,
 	mkdirSync,
 	readdirSync,
+	readFileSync,
 	statSync,
 	writeFileSync,
 } from "node:fs";
 import { join } from "node:path";
 import { stringify } from "yaml";
 import type { WorkflowDefinition } from "./definition-loader.js";
-import { loadDefinition, substituteParams } from "./definition-loader.js";
+import {
+	loadDefinition,
+	substituteParams,
+	validateDefinition,
+} from "./definition-loader.js";
 import type { WorkflowGraph } from "./graph.js";
 import { initializeGraph, readGraph, writeGraph } from "./graph.js";
 
@@ -40,12 +45,26 @@ export interface RunMetadata {
 	steps: { total: number; completed: number; pending: number; active: number };
 	/** Overall status derived from step states. */
 	status: "pending" | "running" | "complete";
+	/** Optional source metadata for template-compiled or definition-backed runs. */
+	source?: WorkflowRunSourceMetadata;
+}
+
+export interface WorkflowRunSourceMetadata {
+	kind: "definition" | "template";
+	mode: "guided" | "autonomous" | "explicit";
+	templateId?: string;
+	description?: string;
+	issueRef?: string | null;
+	artifactDir?: string | null;
+	branch?: string | null;
+	createdAt: string;
 }
 
 // ─── Constants ───────────────────────────────────────────────────────────
 
 const RUNS_DIR = "workflow-runs";
 const DEFS_DIR = "workflow-defs";
+const RUN_METADATA_FILENAME = "RUN.json";
 
 // ─── Helpers ─────────────────────────────────────────────────────────────
 
@@ -76,6 +95,55 @@ function deriveStatus(
 	return "pending";
 }
 
+function createRunDirectory(basePath: string, runName: string): string {
+	const timestamp = makeTimestamp();
+	const runDir = join(basePath, ".sf", RUNS_DIR, runName, timestamp);
+	mkdirSync(runDir, { recursive: true });
+	return runDir;
+}
+
+function writeRunFiles(
+	runDir: string,
+	def: WorkflowDefinition,
+	options?: {
+		params?: Record<string, string>;
+		source?: WorkflowRunSourceMetadata;
+	},
+): void {
+	writeFileSync(join(runDir, "DEFINITION.yaml"), stringify(def), "utf-8");
+
+	const graph = initializeGraph(def);
+	writeGraph(runDir, graph);
+
+	if (options?.params && Object.keys(options.params).length > 0) {
+		writeFileSync(
+			join(runDir, "PARAMS.json"),
+			JSON.stringify(options.params, null, 2),
+			"utf-8",
+		);
+	}
+
+	if (options?.source) {
+		writeFileSync(
+			join(runDir, RUN_METADATA_FILENAME),
+			JSON.stringify(options.source, null, 2) + "\n",
+			"utf-8",
+		);
+	}
+}
+
+function readRunSource(runDir: string): WorkflowRunSourceMetadata | undefined {
+	const metadataPath = join(runDir, RUN_METADATA_FILENAME);
+	if (!existsSync(metadataPath)) return undefined;
+	try {
+		return JSON.parse(readFileSync(metadataPath, "utf-8")) as
+			| WorkflowRunSourceMetadata
+			| undefined;
+	} catch {
+		return undefined;
+	}
+}
+
 // ─── Public API ──────────────────────────────────────────────────────────
 
 /**
@@ -107,27 +175,48 @@ export function createRun(
 		? substituteParams(rawDef, overrides)
 		: substituteParams(rawDef); // still resolve default params if any
 
-	// Create the run directory
-	const timestamp = makeTimestamp();
-	const runDir = join(basePath, ".sf", RUNS_DIR, defName, timestamp);
-	mkdirSync(runDir, { recursive: true });
+	const runDir = createRunDirectory(basePath, defName);
+	writeRunFiles(runDir, def, {
+		params: overrides,
+		source: {
+			kind: "definition",
+			mode: "explicit",
+			createdAt: new Date().toISOString(),
+		},
+	});
 
-	// Freeze the definition as DEFINITION.yaml
-	writeFileSync(join(runDir, "DEFINITION.yaml"), stringify(def), "utf-8");
+	return runDir;
+}
 
-	// Initialize and write GRAPH.yaml
-	const graph = initializeGraph(def);
-	writeGraph(runDir, graph);
-
-	// Write PARAMS.json if overrides were provided
-	if (overrides && Object.keys(overrides).length > 0) {
-		writeFileSync(
-			join(runDir, "PARAMS.json"),
-			JSON.stringify(overrides, null, 2),
-			"utf-8",
+/**
+ * Create a new isolated run directory from an already-built definition.
+ *
+ * Purpose: let non-YAML authoring surfaces, such as `/sf start` templates,
+ * enter the same graph-backed runtime as custom YAML definitions.
+ *
+ * Consumer: workflow template commands after resolving and compiling a template.
+ */
+export function createRunFromDefinition(
+	basePath: string,
+	runName: string,
+	definition: WorkflowDefinition,
+	source?: Omit<WorkflowRunSourceMetadata, "createdAt">,
+): string {
+	const validation = validateDefinition(definition);
+	if (!validation.valid) {
+		throw new Error(
+			`Invalid workflow definition for run "${runName}":\n  - ${validation.errors.join("\n  - ")}`,
 		);
 	}
-
+	const runDir = createRunDirectory(basePath, runName);
+	writeRunFiles(runDir, definition, {
+		source: source
+			? {
+					...source,
+					createdAt: new Date().toISOString(),
+				}
+			: undefined,
+	});
 	return runDir;
 }
 
@@ -186,6 +275,7 @@ export function listRuns(basePath: string, defName?: string): RunMetadata[] {
 					runDir,
 					steps: { total, completed, pending, active },
 					status: deriveStatus(graph),
+					source: readRunSource(runDir),
 				});
 			} catch {
 				// Skip runs with invalid/missing GRAPH.yaml
diff --git a/src/resources/extensions/sf/safety/evidence-collector.ts b/src/resources/extensions/sf/safety/evidence-collector.ts
index 54682754f..01d5ea9cf 100644
--- a/src/resources/extensions/sf/safety/evidence-collector.ts
+++ b/src/resources/extensions/sf/safety/evidence-collector.ts
@@ -7,6 +7,9 @@
  * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
  */
 
+import { appendFileSync, existsSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+
 // ─── Types ──────────────────────────────────────────────────────────────────
 
 export interface BashEvidence {
@@ -38,11 +41,22 @@ export type EvidenceEntry = BashEvidence | FileWriteEvidence | FileEditEvidence;
 
 let unitEvidence: EvidenceEntry[] = [];
 
+// Disk persistence: unit context set by resetEvidence() at unit start.
+// Guarded by presence of currentUnitId — if absent, disk write is skipped.
+let currentUnitId: string | undefined = undefined;
+let currentBasePath: string | undefined = undefined;
+
 // ─── Public API ─────────────────────────────────────────────────────────────
 
-/** Reset all evidence for a new unit. Call at unit start. */
-export function resetEvidence(): void {
+/**
+ * Reset all evidence for a new unit. Call at unit start.
+ * @param unitId - The active unit ID (e.g. M006/S02/T03)
+ * @param basePath - Project root path for computing the evidence file path
+ */
+export function resetEvidence(unitId?: string, basePath?: string): void {
 	unitEvidence = [];
+	currentUnitId = unitId;
+	currentBasePath = basePath;
 }
 
 /** Get a read-only view of all evidence collected for the current unit. */
@@ -65,6 +79,32 @@ export function getFilePaths(): string[] {
 		.map((e) => e.path);
 }
 
+// ─── Disk Persistence ──────────────────────────────────────────────────────
+
+/**
+ * Append an evidence entry to .sf/active/{unitId}/evidence.jsonl.
+ *
+ * Purpose: Evidence collected at tool_call time must survive a mid-unit re-dispatch
+ * race where runUnitPhase re-fires between tool_call and tool_execution_end.
+ * Without disk persistence, in-memory evidence is lost on re-dispatch.
+ *
+ * Consumer: phases.ts runUnitPhase calls resetEvidence(unitId, basePath) at unit start.
+ * Best-effort: disk write failures are silently swallowed so they never crash the agent.
+ */
+function saveEvidenceToDisk(entry: EvidenceEntry): void {
+	if (!currentUnitId || !currentBasePath) return;
+	try {
+		const dir = join(currentBasePath, ".sf", "active", currentUnitId);
+		if (!existsSync(dir)) {
+			mkdirSync(dir, { recursive: true });
+		}
+		const line = JSON.stringify(entry);
+		appendFileSync(join(dir, "evidence.jsonl"), line + "\n");
+	} catch {
+		// Best-effort: disk write failures must not crash the agent.
+	}
+}
+
 // ─── Recording (called from register-hooks.ts) ─────────────────────────────
 
 /**
@@ -75,29 +115,34 @@ export function recordToolCall(
 	toolName: string,
 	input: Record<string, unknown>,
 ): void {
+	let entry: EvidenceEntry | undefined;
 	if (toolName === "bash" || toolName === "Bash") {
-		unitEvidence.push({
+		entry = {
 			kind: "bash",
 			toolCallId: "",
 			command: String(input.command ?? ""),
 			exitCode: -1,
 			outputSnippet: "",
 			timestamp: Date.now(),
-		});
+		};
 	} else if (toolName === "write" || toolName === "Write") {
-		unitEvidence.push({
+		entry = {
 			kind: "write",
 			toolCallId: "",
 			path: String(input.file_path ?? input.path ?? ""),
 			timestamp: Date.now(),
-		});
+		};
 	} else if (toolName === "edit" || toolName === "Edit") {
-		unitEvidence.push({
+		entry = {
 			kind: "edit",
 			toolCallId: "",
 			path: String(input.file_path ?? input.path ?? ""),
 			timestamp: Date.now(),
-		});
+		};
+	}
+	if (entry) {
+		unitEvidence.push(entry);
+		saveEvidenceToDisk(entry);
 	}
 }
 
@@ -121,11 +166,13 @@ export function recordToolResult(
 			entry.outputSnippet = text.slice(0, 500);
 			const exitMatch = text.match(/Command exited with code (\d+)/);
 			entry.exitCode = exitMatch ? Number(exitMatch[1]) : isError ? 1 : 0;
+			saveEvidenceToDisk(entry);
 		}
 	} else if (normalizedName === "write" || normalizedName === "edit") {
 		const entry = findLastUnresolved(normalizedName as "write" | "edit");
 		if (entry) {
 			entry.toolCallId = toolCallId;
+			saveEvidenceToDisk(entry);
 		}
 	}
 }
diff --git a/src/resources/extensions/sf/skills/researcher/SKILL.md b/src/resources/extensions/sf/skills/researcher/SKILL.md
index ddeff8bf1..6cf6e57d0 100644
--- a/src/resources/extensions/sf/skills/researcher/SKILL.md
+++ b/src/resources/extensions/sf/skills/researcher/SKILL.md
@@ -33,7 +33,7 @@ mcp_call server=serena tool=find_referencing_symbols arguments={contextLines=3,m
 mcp_call server=serena tool=read_file arguments={file_path="src/resources/extensions/subagent/index.ts"}
 
 # Search for pattern in files
-mcp_call server=serena tool=search_for_pattern arguments={pattern="call_scout",filePattern="*.ts",contextLines=3}
+mcp_call server=serena tool=search_for_pattern arguments={pattern="codebase_search",filePattern="*.ts",contextLines=3}
 
 # List directory
 mcp_call server=serena tool=list_dir arguments={path="src/resources/extensions/sf/skills/"}
diff --git a/src/resources/extensions/sf/slice-parallel-orchestrator.ts b/src/resources/extensions/sf/slice-parallel-orchestrator.ts
index 1db115794..56e125ed2 100644
--- a/src/resources/extensions/sf/slice-parallel-orchestrator.ts
+++ b/src/resources/extensions/sf/slice-parallel-orchestrator.ts
@@ -83,7 +83,7 @@ export function getSliceOrchestratorState(): SliceOrchestratorState | null {
 /**
  * Start parallel execution for eligible slices within a milestone.
  *
- * For each eligible slice: create a worktree, spawn `sf --mode json --print "/sf auto"`
+ * For each eligible slice: create a worktree, spawn `sf --mode json --print "/sf autonomous"`
  * with env SF_SLICE_LOCK=<SID> + SF_MILESTONE_LOCK=<MID> + SF_PARALLEL_WORKER=1.
  */
 export async function startSliceParallel(
@@ -328,7 +328,7 @@ function resolveSfBin(): string | null {
 
 /**
  * Spawn a worker process for a slice.
- * The worker runs `sf --mode json --print "/sf auto"` in the slice's worktree
+ * The worker runs `sf --mode json --print "/sf autonomous"` in the slice's worktree
  * with SF_SLICE_LOCK, SF_MILESTONE_LOCK, and SF_PARALLEL_WORKER set.
  */
 function spawnSliceWorker(
@@ -348,7 +348,7 @@ function spawnSliceWorker(
 	try {
 		child = spawn(
 			process.execPath,
-			[binPath, "--mode", "json", "--print", "/sf auto"],
+			[binPath, "--mode", "json", "--print", "/sf autonomous"],
 			{
 				cwd: worker.worktreePath,
 				env: {
diff --git a/src/resources/extensions/sf/tests/bundled-workflow-defs.test.ts b/src/resources/extensions/sf/tests/bundled-workflow-defs.test.ts
index 1caf9fc05..5e7b7fc0a 100644
--- a/src/resources/extensions/sf/tests/bundled-workflow-defs.test.ts
+++ b/src/resources/extensions/sf/tests/bundled-workflow-defs.test.ts
@@ -146,6 +146,67 @@ test("release-checklist.yaml passes validation", () => {
 	assert.equal(result.errors.length, 0);
 });
 
+// ─── product-tracking-lifecycle.yaml ────────────────────────────────────
+
+test("product-tracking-lifecycle.yaml passes validation", () => {
+	const parsed = loadYaml("product-tracking-lifecycle.yaml");
+	const result = validateDefinition(parsed);
+	assert.equal(result.valid, true, `Invalid: ${result.errors.join("; ")}`);
+	assert.equal(result.errors.length, 0);
+});
+
+test("product-tracking-lifecycle.yaml: telemetry lifecycle is ordered and produces handoff artifacts", () => {
+	const parsed = loadYaml("product-tracking-lifecycle.yaml") as Record<
+		string,
+		unknown
+	>;
+	const steps = parsed.steps as Array<Record<string, unknown>>;
+	const stepIds = steps.map((step) => step.id);
+
+	assert.deepEqual(stepIds, [
+		"model-product",
+		"audit-current-tracking",
+		"design-tracking-plan",
+		"generate-instrument-guide",
+		"implement-tracking",
+		"verify-tracking",
+	]);
+	assert.ok(
+		(parsed.description as string).includes("product telemetry"),
+		"workflow description should state the product telemetry purpose",
+	);
+
+	const guide = steps.find((step) => step.id === "generate-instrument-guide");
+	assert.ok(guide, "expected generate-instrument-guide step");
+	assert.ok(
+		(guide!.produces as string[]).includes(".telemetry/instrument.md"),
+		"instrument guide step should produce .telemetry/instrument.md",
+	);
+
+	const implementation = steps.find((step) => step.id === "implement-tracking");
+	assert.ok(implementation, "expected implement-tracking step");
+	assert.deepEqual(implementation!.requires, ["generate-instrument-guide"]);
+	assert.ok(
+		(implementation!.context_from as string[]).includes(
+			"generate-instrument-guide",
+		),
+		"implementation step should consume the instrumentation guide",
+	);
+	assert.equal(
+		(implementation!.verify as Record<string, unknown>).policy,
+		"content-heuristic",
+		"implementation step should verify artifacts instead of pausing unconditionally",
+	);
+
+	const verifyTracking = steps.find((step) => step.id === "verify-tracking");
+	assert.ok(verifyTracking, "expected verify-tracking step");
+	assert.equal(
+		(verifyTracking!.verify as Record<string, unknown>).policy,
+		"content-heuristic",
+		"verification step should verify artifacts instead of pausing unconditionally",
+	);
+});
+
 test("release-checklist.yaml: diamond dependencies and human-review", () => {
 	const parsed = loadYaml("release-checklist.yaml") as Record<string, unknown>;
 	const steps = parsed.steps as Array<Record<string, unknown>>;
@@ -199,6 +260,7 @@ test("no produces path contains '..'", () => {
 		"blog-post-pipeline.yaml",
 		"code-audit.yaml",
 		"release-checklist.yaml",
+		"product-tracking-lifecycle.yaml",
 	];
 
 	for (const file of files) {
diff --git a/src/resources/extensions/sf/tests/commands-workflow-custom.test.ts b/src/resources/extensions/sf/tests/commands-workflow-custom.test.ts
index 3026bfcc9..0189c1fba 100644
--- a/src/resources/extensions/sf/tests/commands-workflow-custom.test.ts
+++ b/src/resources/extensions/sf/tests/commands-workflow-custom.test.ts
@@ -6,7 +6,16 @@
  */
 
 import assert from "node:assert/strict";
-import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { execSync } from "node:child_process";
+import {
+	existsSync,
+	mkdirSync,
+	mkdtempSync,
+	readFileSync,
+	readdirSync,
+	rmSync,
+	writeFileSync,
+} from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, before, describe, it } from "node:test";
@@ -15,6 +24,7 @@ import {
 	getSfArgumentCompletions,
 	TOP_LEVEL_SUBCOMMANDS,
 } from "../commands/catalog.ts";
+import { loadRegistry } from "../workflow-templates.ts";
 
 // ─── Helpers ─────────────────────────────────────────────────────────────
 
@@ -27,6 +37,15 @@ function makeTmpBase(): string {
 	return dir;
 }
 
+function initGitRepo(base: string): void {
+	execSync("git init", { cwd: base, stdio: "ignore" });
+	execSync("git config user.email test@example.com", {
+		cwd: base,
+		stdio: "ignore",
+	});
+	execSync("git config user.name Test", { cwd: base, stdio: "ignore" });
+}
+
 afterEach(() => {
 	// Restore cwd if changed during tests
 	if (savedCwd && process.cwd() !== savedCwd) {
@@ -69,12 +88,16 @@ function createMockCtx() {
 }
 
 function createMockPi() {
+	const messages: unknown[] = [];
 	return {
+		messages,
 		registerCommand() {},
 		registerTool() {},
 		registerShortcut() {},
 		on() {},
-		sendMessage() {},
+		sendMessage(message: unknown) {
+			messages.push(message);
+		},
 	};
 }
 
@@ -122,6 +145,24 @@ describe("workflow catalog registration", () => {
 		assert.ok(labels.includes("model"), "should include model completion");
 	});
 
+	it("autonomous appears in TOP_LEVEL_SUBCOMMANDS and auto remains an alias", () => {
+		const autonomous = TOP_LEVEL_SUBCOMMANDS.find(
+			(c) => c.cmd === "autonomous",
+		);
+		const auto = TOP_LEVEL_SUBCOMMANDS.find((c) => c.cmd === "auto");
+		assert.ok(autonomous, "autonomous should be in TOP_LEVEL_SUBCOMMANDS");
+		assert.match(autonomous!.desc, /Autonomous mode/i);
+		assert.ok(auto, "auto alias should remain in TOP_LEVEL_SUBCOMMANDS");
+		assert.match(auto!.desc, /alias/i);
+	});
+
+	it("getSfArgumentCompletions supports autonomous flags", () => {
+		const completions = getSfArgumentCompletions("autonomous ");
+		const labels = completions.map((c: any) => c.label);
+		assert.ok(labels.includes("--verbose"), "should include verbose flag");
+		assert.ok(labels.includes("--debug"), "should include debug flag");
+	});
+
 	it("workflow appears in TOP_LEVEL_SUBCOMMANDS", () => {
 		const entry = TOP_LEVEL_SUBCOMMANDS.find((c) => c.cmd === "workflow");
 		assert.ok(entry, "workflow should be in TOP_LEVEL_SUBCOMMANDS");
@@ -189,6 +230,105 @@ describe("workflow catalog registration", () => {
 		);
 		assert.ok(!labels.includes("test-suite"), "should not include test-suite");
 	});
+
+	it("getSfArgumentCompletions('start ') includes every registry template", () => {
+		const completions = getSfArgumentCompletions("start ");
+		const labels = completions.map((c: any) => c.label);
+		const registry = loadRegistry();
+		for (const id of Object.keys(registry.templates)) {
+			assert.ok(labels.includes(id), `missing start completion: ${id}`);
+		}
+		assert.ok(labels.includes("resume"), "should include resume command");
+		assert.ok(labels.includes("--dry-run"), "should include dry-run flag");
+	});
+});
+
+// ─── /sf start Template Runs ─────────────────────────────────────────────
+
+describe("workflow template start command", () => {
+	async function callStart(trimmed: string, base: string) {
+		process.chdir(base);
+		const { handleWorkflowCommand } = await import(
+			"../commands/handlers/workflow.ts"
+		);
+		const ctx = createMockCtx();
+		const pi = createMockPi();
+		const handled = await handleWorkflowCommand(trimmed, ctx as any, pi as any);
+		return { handled, notifications: ctx.notifications, messages: pi.messages };
+	}
+
+	it("creates a graph-backed run for product-plan without sending a raw prompt", async () => {
+		const base = makeTmpBase();
+		initGitRepo(base);
+		const { handled, notifications, messages } = await callStart(
+			"start product-plan plan the product",
+			base,
+		);
+
+		assert.ok(handled, "start command should be handled");
+		assert.equal(
+			messages.length,
+			0,
+			"normal template start should not send a raw prompt",
+		);
+		assert.ok(
+			notifications.some((n) =>
+				n.message.includes("Starting workflow: Product Plan"),
+			),
+			"should notify that product-plan started",
+		);
+
+		const runsRoot = join(base, ".sf", "workflow-runs", "product-plan");
+		assert.ok(existsSync(runsRoot), "product-plan run root should exist");
+		const [timestamp] = readdirSync(runsRoot);
+		assert.ok(timestamp, "should create timestamped run dir");
+		const runDir = join(runsRoot, timestamp);
+		assert.ok(existsSync(join(runDir, "DEFINITION.yaml")));
+		assert.ok(existsSync(join(runDir, "GRAPH.yaml")));
+		assert.ok(existsSync(join(runDir, "RUN.json")));
+
+		const runMeta = JSON.parse(readFileSync(join(runDir, "RUN.json"), "utf-8"));
+		assert.equal(runMeta.kind, "template");
+		assert.equal(runMeta.mode, "guided");
+		assert.equal(runMeta.templateId, "product-plan");
+	});
+
+	it("dry-run product-plan creates no workflow run", async () => {
+		const base = makeTmpBase();
+		const { handled, notifications } = await callStart(
+			"start --dry-run product-plan plan the product",
+			base,
+		);
+
+		assert.ok(handled, "dry-run start command should be handled");
+		assert.ok(
+			notifications.some((n) => n.message.includes("DRY RUN")),
+			"should report dry-run output",
+		);
+		assert.ok(
+			!existsSync(join(base, ".sf", "workflow-runs", "product-plan")),
+			"dry-run should not create a product-plan run",
+		);
+	});
+
+	it("full-project keeps the existing prompt-dispatch special case", async () => {
+		const base = makeTmpBase();
+		const { handled, messages } = await callStart(
+			"start full-project new app",
+			base,
+		);
+
+		assert.ok(handled, "full-project start should be handled");
+		assert.equal(
+			messages.length,
+			1,
+			"full-project should still send a setup prompt",
+		);
+		assert.ok(
+			!existsSync(join(base, ".sf", "workflow-runs", "full-project")),
+			"full-project should not create a template run yet",
+		);
+	});
 });
 
 // ─── Command Handler Tests ───────────────────────────────────────────────
@@ -330,6 +470,41 @@ describe("workflow command handler", () => {
 		);
 	});
 
+	it("'/sf workflow list' shows template and definition source metadata", async () => {
+		const base = makeTmpBase();
+		writeDefinition(base, "deploy-pipeline", SIMPLE_DEF);
+		process.chdir(base);
+
+		const { createRun, createRunFromDefinition } = await import(
+			"../run-manager.ts"
+		);
+		createRun(base, "deploy-pipeline");
+		createRunFromDefinition(
+			base,
+			"product-plan",
+			{
+				version: 1,
+				name: "product-plan",
+				steps: [
+					{
+						id: "model",
+						name: "model",
+						prompt: "Model product",
+						requires: [],
+						produces: [],
+					},
+				],
+			},
+			{ kind: "template", mode: "guided", templateId: "product-plan" },
+		);
+
+		const { handled, notifications } = await callHandler("workflow list");
+		assert.ok(handled, "workflow list should be handled");
+		const output = notifications.map((n) => n.message).join("\n");
+		assert.match(output, /deploy-pipeline.*\[definition\]/);
+		assert.match(output, /product-plan.*\[template\]/);
+	});
+
 	it("non-workflow commands are not intercepted by custom workflow routing", async () => {
 		const { handleWorkflowCommand } = await import(
 			"../commands/handlers/workflow.ts"
diff --git a/src/resources/extensions/sf/tests/complete-slice.test.ts b/src/resources/extensions/sf/tests/complete-slice.test.ts
index 6fc710d19..d7bf3c525 100644
--- a/src/resources/extensions/sf/tests/complete-slice.test.ts
+++ b/src/resources/extensions/sf/tests/complete-slice.test.ts
@@ -151,7 +151,7 @@ console.log("\n=== complete-slice: schema v6 migration ===");
 	const versionRow = adapter
 		.prepare("SELECT MAX(version) as v FROM schema_version")
 		.get();
-	assertEq(versionRow?.["v"], 20, "schema version should be 20");
+	assertEq(versionRow?.["v"], 21, "schema version should be 21");
 
 	// Verify slices table has full_summary_md and full_uat_md columns
 	const cols = adapter.prepare("PRAGMA table_info(slices)").all();
diff --git a/src/resources/extensions/sf/tests/custom-engine-loop-integration.test.ts b/src/resources/extensions/sf/tests/custom-engine-loop-integration.test.ts
index a4221fa05..e34b2cf70 100644
--- a/src/resources/extensions/sf/tests/custom-engine-loop-integration.test.ts
+++ b/src/resources/extensions/sf/tests/custom-engine-loop-integration.test.ts
@@ -75,6 +75,7 @@ function writeDefinition(
 	runDir: string,
 	steps: GraphStep[],
 	name = "test-wf",
+	verifyByStep: Record<string, unknown> = {},
 ): void {
 	const def = {
 		version: 1,
@@ -86,6 +87,7 @@ function writeDefinition(
 			prompt: s.prompt ?? `Do ${s.id}`,
 			produces: `${s.id}/output.md`,
 			...(s.dependsOn?.length ? { requires: s.dependsOn } : {}),
+			...(verifyByStep[s.id] ? { verify: verifyByStep[s.id] } : {}),
 		})),
 	};
 	writeFileSync(join(runDir, "DEFINITION.yaml"), stringify(def));
@@ -477,6 +479,102 @@ describe("Custom engine loop integration", () => {
 		);
 	});
 
+	it("persists custom verification retries and stops after retry exhaustion", async () => {
+		_resetPendingResolve();
+
+		const runDir = makeTmpDir();
+		const graph = makeGraph([makeStep({ id: "only" })], "retry-wf");
+		writeGraph(runDir, graph);
+		writeDefinition(runDir, graph.steps, "retry-wf", {
+			only: { policy: "shell-command", command: "test -f missing-output" },
+		});
+
+		const ctx = makeMockCtx();
+		const pi = makeMockPi();
+		const s = makeLoopSession({
+			activeEngineId: "custom",
+			activeRunDir: runDir,
+			basePath: runDir,
+		});
+
+		const deps = makeMockDeps({
+			stopAuto: async (_ctx, _pi, reason) => {
+				deps.callLog.push(`stopAuto:${reason ?? "no-reason"}`);
+				s.active = false;
+			},
+		});
+
+		const loopPromise = autoLoop(ctx, pi, s, deps);
+
+		for (let i = 0; i < 4; i++) {
+			await new Promise((r) => setTimeout(r, 80));
+			resolveAgentEnd({ messages: [{ role: "assistant" }] });
+		}
+
+		await loopPromise;
+
+		assert.equal(pi.calls.length, 4, "should dispatch through retry exhaustion");
+		assert.equal(
+			s.verificationRetryCount.get("custom-step/retry-wf/only"),
+			4,
+			"retry count should persist in the active session map",
+		);
+		assert.ok(
+			existsSync(join(runDir, "runtime", "custom-verify-retries.json")),
+			"retry count should persist under the workflow run directory",
+		);
+		assert.ok(
+			deps.callLog.some((entry) =>
+				entry.includes("requested retry 4 times without passing"),
+			),
+			"loop should stop with retry exhaustion reason",
+		);
+
+		const finalGraph = readGraph(runDir);
+		assert.equal(finalGraph.steps[0].status, "active");
+	});
+
+	it("clears custom verification retry state after successful verification", async () => {
+		_resetPendingResolve();
+
+		const runDir = makeTmpDir();
+		const graph = makeGraph([makeStep({ id: "only" })], "clear-retry-wf");
+		writeGraph(runDir, graph);
+		writeDefinition(runDir, graph.steps, "clear-retry-wf");
+
+		const ctx = makeMockCtx();
+		const pi = makeMockPi();
+		const s = makeLoopSession({
+			activeEngineId: "custom",
+			activeRunDir: runDir,
+			basePath: runDir,
+		});
+		s.verificationRetryCount.set("custom-step/clear-retry-wf/only", 2);
+
+		const deps = makeMockDeps({
+			stopAuto: async (_ctx, _pi, reason) => {
+				deps.callLog.push(`stopAuto:${reason ?? "no-reason"}`);
+				s.active = false;
+			},
+		});
+
+		const loopPromise = autoLoop(ctx, pi, s, deps);
+
+		await new Promise((r) => setTimeout(r, 80));
+		resolveAgentEnd({ messages: [{ role: "assistant" }] });
+
+		await loopPromise;
+
+		assert.equal(
+			s.verificationRetryCount.has("custom-step/clear-retry-wf/only"),
+			false,
+		);
+		assert.equal(
+			existsSync(join(runDir, "runtime", "custom-verify-retries.json")),
+			false,
+		);
+	});
+
 	it("respects dependency ordering — step-b waits for step-a", async () => {
 		_resetPendingResolve();
 
diff --git a/src/resources/extensions/sf/tests/custom-workflow-engine.test.ts b/src/resources/extensions/sf/tests/custom-workflow-engine.test.ts
index 79729acc8..309ded9a1 100644
--- a/src/resources/extensions/sf/tests/custom-workflow-engine.test.ts
+++ b/src/resources/extensions/sf/tests/custom-workflow-engine.test.ts
@@ -135,7 +135,7 @@ describe("CustomWorkflowEngine.deriveState", () => {
 
 describe("CustomWorkflowEngine.resolveDispatch", () => {
 	it("returns dispatch for first pending step", async () => {
-		const { engine } = setupEngine(
+		const { engine, runDir } = setupEngine(
 			[
 				makeStep({ id: "step-1", prompt: "Do the first thing" }),
 				makeStep({ id: "step-2", dependsOn: ["step-1"] }),
@@ -154,6 +154,11 @@ describe("CustomWorkflowEngine.resolveDispatch", () => {
 			assert.equal(dispatch.step.unitId, "my-workflow/step-1");
 			assert.equal(dispatch.step.prompt, "Do the first thing");
 		}
+
+		const graph = readGraph(runDir);
+		assert.equal(graph.steps[0].status, "active");
+		assert.ok(graph.steps[0].startedAt);
+		assert.equal(graph.steps[1].status, "pending");
 	});
 
 	it("returns stop when all steps are complete", async () => {
@@ -217,6 +222,57 @@ describe("CustomWorkflowEngine.resolveDispatch", () => {
 			assert.equal(dispatch.step.unitId, "dep-wf/b");
 		}
 	});
+
+	it("re-dispatches an active step after restart instead of selecting another pending step", async () => {
+		const { engine, runDir } = setupEngine(
+			[
+				makeStep({
+					id: "a",
+					status: "active",
+					startedAt: "2026-01-01T01:00:00.000Z",
+				}),
+				makeStep({ id: "b" }),
+			],
+			"resume-wf",
+		);
+
+		const state = await engine.deriveState("/unused");
+		const dispatch = await engine.resolveDispatch(state, {
+			basePath: "/unused",
+		});
+
+		assert.equal(dispatch.action, "dispatch");
+		if (dispatch.action === "dispatch") {
+			assert.equal(dispatch.step.unitId, "resume-wf/a");
+		}
+
+		const graph = readGraph(runDir);
+		assert.equal(graph.steps[0].status, "active");
+		assert.equal(graph.steps[0].startedAt, "2026-01-01T01:00:00.000Z");
+		assert.equal(graph.steps[1].status, "pending");
+	});
+
+	it("does not select a second pending step while the first dispatch is active", async () => {
+		const { engine, runDir } = setupEngine(
+			[makeStep({ id: "a" }), makeStep({ id: "b" })],
+			"single-active-wf",
+		);
+
+		const state = await engine.deriveState("/unused");
+		const first = await engine.resolveDispatch(state, { basePath: "/unused" });
+		const second = await engine.resolveDispatch(state, { basePath: "/unused" });
+
+		assert.equal(first.action, "dispatch");
+		assert.equal(second.action, "dispatch");
+		if (first.action === "dispatch" && second.action === "dispatch") {
+			assert.equal(first.step.unitId, "single-active-wf/a");
+			assert.equal(second.step.unitId, "single-active-wf/a");
+		}
+
+		const graph = readGraph(runDir);
+		assert.equal(graph.steps[0].status, "active");
+		assert.equal(graph.steps[1].status, "pending");
+	});
 });
 
 // ─── reconcile ───────────────────────────────────────────────────────────
diff --git a/src/resources/extensions/sf/tests/graph-operations.test.ts b/src/resources/extensions/sf/tests/graph-operations.test.ts
index e5e28f785..d85d44c02 100644
--- a/src/resources/extensions/sf/tests/graph-operations.test.ts
+++ b/src/resources/extensions/sf/tests/graph-operations.test.ts
@@ -24,6 +24,7 @@ import {
 	getNextPendingStep,
 	graphFromDefinition,
 	initializeGraph,
+	markStepActive,
 	markStepComplete,
 	readGraph,
 	type WorkflowGraph,
@@ -304,6 +305,49 @@ describe("markStepComplete", () => {
 	});
 });
 
+// ─── markStepActive ──────────────────────────────────────────────────────
+
+describe("markStepActive", () => {
+	it("returns new graph with step status 'active' and startedAt timestamp", () => {
+		const original = makeGraph([makeStep({ id: "a" }), makeStep({ id: "b" })]);
+
+		const updated = markStepActive(original, "a");
+
+		assert.equal(original.steps[0].status, "pending");
+		assert.equal(updated.steps[0].status, "active");
+		assert.ok(updated.steps[0].startedAt);
+		assert.ok(!Number.isNaN(Date.parse(updated.steps[0].startedAt!)));
+		assert.equal(updated.steps[1].status, "pending");
+	});
+
+	it("preserves existing startedAt timestamp when redispatching active work", () => {
+		const graph = makeGraph([
+			makeStep({
+				id: "a",
+				status: "active",
+				startedAt: "2026-01-01T01:00:00.000Z",
+			}),
+		]);
+
+		const updated = markStepActive(graph, "a");
+
+		assert.equal(updated.steps[0].status, "active");
+		assert.equal(updated.steps[0].startedAt, "2026-01-01T01:00:00.000Z");
+	});
+
+	it("throws for unknown step ID", () => {
+		const graph = makeGraph([makeStep({ id: "a" })]);
+		assert.throws(
+			() => markStepActive(graph, "missing"),
+			(err: Error) => {
+				assert.ok(err.message.includes("Step not found"));
+				assert.ok(err.message.includes("missing"));
+				return true;
+			},
+		);
+	});
+});
+
 // ─── expandIteration ─────────────────────────────────────────────────────
 
 describe("expandIteration", () => {
diff --git a/src/resources/extensions/sf/tests/integration/git-service.test.ts b/src/resources/extensions/sf/tests/integration/git-service.test.ts
index 8fa45fcb9..7fe1cdeeb 100644
--- a/src/resources/extensions/sf/tests/integration/git-service.test.ts
+++ b/src/resources/extensions/sf/tests/integration/git-service.test.ts
@@ -28,6 +28,7 @@ import {
 	writeIntegrationBranch,
 } from "../../git-service.ts";
 import { nativeAddAllWithExclusions } from "../../native-git-bridge.ts";
+import { loadRegistry } from "../../workflow-templates.ts";
 
 function run(command: string, cwd: string): string {
 	return execSync(command, {
@@ -1183,62 +1184,14 @@ describe("git-service", async () => {
 	test("Integration branch: rejects workflow-template branches", () => {
 		const repo = initBranchTestRepo();
 
-		// All 8 registered workflow templates should be rejected
-		writeIntegrationBranch(repo, "M001", "sf/hotfix/fix-login");
-		assert.deepStrictEqual(
-			readIntegrationBranch(repo, "M001"),
-			null,
-			"hotfix branch is not recorded",
-		);
-
-		writeIntegrationBranch(repo, "M001", "sf/bugfix/null-pointer");
-		assert.deepStrictEqual(
-			readIntegrationBranch(repo, "M001"),
-			null,
-			"bugfix branch is not recorded",
-		);
-
-		writeIntegrationBranch(repo, "M001", "sf/small-feature/add-button");
-		assert.deepStrictEqual(
-			readIntegrationBranch(repo, "M001"),
-			null,
-			"small-feature branch is not recorded",
-		);
-
-		writeIntegrationBranch(repo, "M001", "sf/refactor/rename-module");
-		assert.deepStrictEqual(
-			readIntegrationBranch(repo, "M001"),
-			null,
-			"refactor branch is not recorded",
-		);
-
-		writeIntegrationBranch(repo, "M001", "sf/spike/evaluate-lib");
-		assert.deepStrictEqual(
-			readIntegrationBranch(repo, "M001"),
-			null,
-			"spike branch is not recorded",
-		);
-
-		writeIntegrationBranch(repo, "M001", "sf/security-audit/owasp-scan");
-		assert.deepStrictEqual(
-			readIntegrationBranch(repo, "M001"),
-			null,
-			"security-audit branch is not recorded",
-		);
-
-		writeIntegrationBranch(repo, "M001", "sf/dep-upgrade/bump-react");
-		assert.deepStrictEqual(
-			readIntegrationBranch(repo, "M001"),
-			null,
-			"dep-upgrade branch is not recorded",
-		);
-
-		writeIntegrationBranch(repo, "M001", "sf/full-project/new-app");
-		assert.deepStrictEqual(
-			readIntegrationBranch(repo, "M001"),
-			null,
-			"full-project branch is not recorded",
-		);
+		for (const id of Object.keys(loadRegistry().templates)) {
+			writeIntegrationBranch(repo, "M001", `sf/${id}/example`);
+			assert.deepStrictEqual(
+				readIntegrationBranch(repo, "M001"),
+				null,
+				`${id} branch is not recorded`,
+			);
+		}
 
 		rmSync(repo, { recursive: true, force: true });
 	});
diff --git a/src/resources/extensions/sf/tests/md-importer.test.ts b/src/resources/extensions/sf/tests/md-importer.test.ts
index 98c146fa0..483b8de80 100644
--- a/src/resources/extensions/sf/tests/md-importer.test.ts
+++ b/src/resources/extensions/sf/tests/md-importer.test.ts
@@ -522,8 +522,8 @@ test("md-importer: schema v1→v2 migration", () => {
 		.get();
 	assert.deepStrictEqual(
 		version?.v,
-		16,
-		"new DB should be at schema version 16",
+		21,
+		"new DB should be at schema version 21",
 	);
 
 	// Artifacts table should exist
diff --git a/src/resources/extensions/sf/tests/memory-sleeper.test.ts b/src/resources/extensions/sf/tests/memory-sleeper.test.ts
new file mode 100644
index 000000000..900ee0251
--- /dev/null
+++ b/src/resources/extensions/sf/tests/memory-sleeper.test.ts
@@ -0,0 +1,113 @@
+import assert from "node:assert/strict";
+import { afterEach, describe, it } from "node:test";
+import type { ToolResultEvent } from "@singularity-forge/pi-coding-agent";
+import {
+	observeMemorySleeperToolResult,
+	resetMemorySleeper,
+} from "../memory-sleeper.ts";
+
+function bashResult(args: {
+	command: string;
+	text?: string;
+	isError?: boolean;
+}): ToolResultEvent {
+	return {
+		type: "tool_result",
+		toolName: "bash",
+		toolCallId: "tool-1",
+		input: { command: args.command },
+		content: [{ type: "text", text: args.text ?? "" }],
+		details: undefined,
+		isError: args.isError ?? false,
+	} as ToolResultEvent;
+}
+
+function customResult(text: string): ToolResultEvent {
+	return {
+		type: "tool_result",
+		toolName: "sf_auto",
+		toolCallId: "tool-2",
+		input: {},
+		content: [{ type: "text", text }],
+		details: undefined,
+		isError: true,
+	} as ToolResultEvent;
+}
+
+describe("memory sleeper steering", () => {
+	afterEach(() => {
+		resetMemorySleeper();
+	});
+
+	it("steers bun commands toward Node/npm and dedupes the correction", () => {
+		const first = observeMemorySleeperToolResult(
+			bashResult({ command: "bun run test", isError: false }),
+		);
+		const second = observeMemorySleeperToolResult(
+			bashResult({ command: "bun run lint", isError: false }),
+		);
+
+		assert.equal(first?.key, "bun-command");
+		assert.match(first?.content ?? "", /Node\/npm/);
+		assert.equal(second, undefined);
+	});
+
+	it("does not require an error to catch forbidden command choices", () => {
+		const steer = observeMemorySleeperToolResult(
+			bashResult({ command: "bunx biome check src", isError: false }),
+		);
+
+		assert.equal(steer?.severity, "warning");
+		assert.match(steer?.content ?? "", /Replace Bun commands/);
+	});
+
+	it("steers on repeated identical bash failures", () => {
+		const first = observeMemorySleeperToolResult(
+			bashResult({
+				command: "npm run test:sf-light",
+				text: "suite failed",
+				isError: true,
+			}),
+		);
+		const second = observeMemorySleeperToolResult(
+			bashResult({
+				command: "npm run test:sf-light",
+				text: "suite failed again",
+				isError: true,
+			}),
+		);
+		const third = observeMemorySleeperToolResult(
+			bashResult({
+				command: "npm run test:sf-light",
+				text: "suite failed a third time",
+				isError: true,
+			}),
+		);
+
+		assert.equal(first, undefined);
+		assert.match(second?.content ?? "", /same bash command has failed 2 times/);
+		assert.equal(third, undefined);
+	});
+
+	it("recognizes the dispatcher import failure from command extension startup", () => {
+		const steer = observeMemorySleeperToolResult(
+			customResult(
+				"Extension command:sf error: Cannot find module '/home/mhugo/.sf/agent/extensions/sf/commands/dispatcher.js' imported from index.js",
+			),
+		);
+
+		assert.equal(steer?.key, "sf-dispatcher-import");
+		assert.match(steer?.content ?? "", /extension sync\/build path/);
+	});
+
+	it("recognizes the exec-policy rules permission trap", () => {
+		const steer = observeMemorySleeperToolResult(
+			customResult(
+				"Failed to apply execpolicy amendment: failed to update rules file /home/mhugo/.codex/rules/default.rules: Permission denied",
+			),
+		);
+
+		assert.equal(steer?.key, "exec-policy-permission");
+		assert.match(steer?.content ?? "", /read-only symlink/);
+	});
+});
diff --git a/src/resources/extensions/sf/tests/memory-store.test.ts b/src/resources/extensions/sf/tests/memory-store.test.ts
index 973ef8e34..4a249e8ce 100644
--- a/src/resources/extensions/sf/tests/memory-store.test.ts
+++ b/src/resources/extensions/sf/tests/memory-store.test.ts
@@ -511,11 +511,11 @@ test("memory-store: schema includes memories table", () => {
 		"active_memories view should exist",
 	);
 
-	// Verify schema version is 16 (UOK gate/git/audit projection tables included)
+	// Verify schema version is current
 	const version = adapter
 		.prepare("SELECT MAX(version) as v FROM schema_version")
 		.get();
-	assert.deepStrictEqual(version?.["v"], 16, "schema version should be 16");
+	assert.deepStrictEqual(version?.["v"], 21, "schema version should be 21");
 
 	closeDatabase();
 });
diff --git a/src/resources/extensions/sf/tests/phases-merge-error-stops-auto.test.ts b/src/resources/extensions/sf/tests/phases-merge-error-stops-auto.test.ts
index 4b8c01af6..34d2fa16b 100644
--- a/src/resources/extensions/sf/tests/phases-merge-error-stops-auto.test.ts
+++ b/src/resources/extensions/sf/tests/phases-merge-error-stops-auto.test.ts
@@ -83,7 +83,7 @@ assertTrue(
 
 // Each non-conflict block should call ctx.ui.notify with error severity
 const notifyErrorPattern =
-	/Merge failed:.*Resolve and run \/sf auto to resume/g;
+	/Merge failed:.*Resolve and run \/sf autonomous to resume/g;
 const notifyCount = [...phasesSrc.matchAll(notifyErrorPattern)].length;
 assertTrue(
 	notifyCount >= 3,
diff --git a/src/resources/extensions/sf/tests/run-manager.test.ts b/src/resources/extensions/sf/tests/run-manager.test.ts
index 76b6d10b3..b550c7888 100644
--- a/src/resources/extensions/sf/tests/run-manager.test.ts
+++ b/src/resources/extensions/sf/tests/run-manager.test.ts
@@ -19,7 +19,7 @@ import { join } from "node:path";
 import { afterEach, describe, it } from "node:test";
 import { parse } from "yaml";
 
-import { createRun, listRuns } from "../run-manager.ts";
+import { createRun, createRunFromDefinition, listRuns } from "../run-manager.ts";
 
 // ─── Helpers ─────────────────────────────────────────────────────────────
 
@@ -183,6 +183,63 @@ describe("createRun", () => {
 	});
 });
 
+// ─── createRunFromDefinition ─────────────────────────────────────────────
+
+describe("createRunFromDefinition", () => {
+	it("creates a graph-backed run from an in-memory definition with source metadata", () => {
+		const base = makeTmpBase();
+		const runDir = createRunFromDefinition(
+			base,
+			"product-plan",
+			{
+				version: 1,
+				name: "product-plan",
+				description: "Compiled template run",
+				steps: [
+					{
+						id: "model",
+						name: "model",
+						prompt: "Model the product",
+						requires: [],
+						produces: [],
+					},
+					{
+						id: "scope",
+						name: "scope",
+						prompt: "Scope the product",
+						requires: ["model"],
+						produces: [],
+					},
+				],
+			},
+			{
+				kind: "template",
+				mode: "guided",
+				templateId: "product-plan",
+				description: "plan the product",
+				issueRef: "ISSUE-123",
+				artifactDir: ".sf/workflows/product-plan/example",
+				branch: "sf/product-plan/example",
+			},
+		);
+
+		assert.ok(existsSync(join(runDir, "DEFINITION.yaml")));
+		assert.ok(existsSync(join(runDir, "GRAPH.yaml")));
+		assert.ok(existsSync(join(runDir, "RUN.json")));
+
+		const graphContent = parse(readFileSync(join(runDir, "GRAPH.yaml"), "utf-8"));
+		assert.equal(graphContent.metadata.name, "product-plan");
+		assert.equal(graphContent.steps[1].depends_on[0], "model");
+
+		const runs = listRuns(base, "product-plan");
+		assert.equal(runs.length, 1);
+		assert.equal(runs[0].source?.kind, "template");
+		assert.equal(runs[0].source?.mode, "guided");
+		assert.equal(runs[0].source?.templateId, "product-plan");
+		assert.equal(runs[0].source?.description, "plan the product");
+	});
+});
+
 // ─── listRuns ────────────────────────────────────────────────────────────
 
 describe("listRuns", () => {
@@ -208,6 +265,7 @@ describe("listRuns", () => {
 		assert.equal(runs[0].steps.pending, 2);
 		assert.equal(runs[0].steps.active, 0);
 		assert.equal(runs[0].status, "pending");
+		assert.equal(runs[0].source?.mode, "explicit");
 	});
 
 	it("filters by definition name", () => {
diff --git a/src/resources/extensions/sf/tests/sf-db.test.ts b/src/resources/extensions/sf/tests/sf-db.test.ts
index 713d817d0..1b623c664 100644
--- a/src/resources/extensions/sf/tests/sf-db.test.ts
+++ b/src/resources/extensions/sf/tests/sf-db.test.ts
@@ -83,8 +83,8 @@ describe("sf.db", () => {
 			.get();
 		assert.deepStrictEqual(
 			version?.["version"],
-			16,
-			"schema version should be 16",
+			21,
+			"schema version should be 21",
 		);
 
 		// Check tables exist by querying them
diff --git a/src/resources/extensions/sf/tests/start-auto-detached.test.ts b/src/resources/extensions/sf/tests/start-auto-detached.test.ts
index ca876ffc5..c5199ecf0 100644
--- a/src/resources/extensions/sf/tests/start-auto-detached.test.ts
+++ b/src/resources/extensions/sf/tests/start-auto-detached.test.ts
@@ -35,6 +35,10 @@ test("interactive command entrypoints use startAutoDetached instead of awaiting
 		autoHandlerSrc.includes("startAutoDetached("),
 		"auto command handler should launch auto-mode through startAutoDetached",
 	);
+	assert.ok(
+		autoHandlerSrc.includes('trimmed === "autonomous"'),
+		"auto command handler should accept /sf autonomous",
+	);
 	assert.ok(
 		workflowHandlerSrc.includes("startAutoDetached("),
 		"workflow handler should launch auto-mode through startAutoDetached",
diff --git a/src/resources/extensions/sf/tests/tool-naming.test.ts b/src/resources/extensions/sf/tests/tool-naming.test.ts
index 8270a6b90..f9d46f07f 100644
--- a/src/resources/extensions/sf/tests/tool-naming.test.ts
+++ b/src/resources/extensions/sf/tests/tool-naming.test.ts
@@ -147,4 +147,48 @@ for (const { canonical, alias } of RENAME_MAP) {
 	}
 }
 
+// ─── High-signal tool rendering ──────────────────────────────────────────────
+
+console.log("\n── Tool naming: milestone planning renderer summarizes work ──");
+
+{
+	const planMilestoneTool = pi.tools.find(
+		(t: any) => t.name === "sf_plan_milestone",
+	);
+	assert.equal(typeof planMilestoneTool?.renderCall, "function");
+	assert.equal(typeof planMilestoneTool?.renderResult, "function");
+
+	const fakeTheme = {
+		bold: (text: string) => text,
+		fg: (_name: string, text: string) => text,
+	};
+	const callComponent = planMilestoneTool.renderCall(
+		{
+			milestoneId: "M008",
+			title: "Workflow polish",
+			slices: [{ sliceId: "S01", title: "Improve tool cards" }],
+		},
+		fakeTheme,
+	);
+	assert.match(callComponent.text, /M008: Workflow polish/);
+	assert.match(callComponent.text, /1 slice/);
+
+	const resultComponent = planMilestoneTool.renderResult(
+		{
+			details: {
+				milestoneId: "M008",
+				title: "Workflow polish",
+				sliceCount: 1,
+				firstSliceId: "S01",
+				firstSliceTitle: "Improve tool cards",
+			},
+		},
+		{},
+		fakeTheme,
+	);
+	assert.match(resultComponent.text, /M008 planned: Workflow polish/);
+	assert.match(resultComponent.text, /1 slice/);
+	assert.match(resultComponent.text, /next S01: Improve tool cards/);
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
diff --git a/src/resources/extensions/sf/tests/update-command.test.ts b/src/resources/extensions/sf/tests/update-command.test.ts
index d5255bade..e63acf3a7 100644
--- a/src/resources/extensions/sf/tests/update-command.test.ts
+++ b/src/resources/extensions/sf/tests/update-command.test.ts
@@ -2,6 +2,7 @@ import assert from "node:assert/strict";
 import test from "node:test";
 
 import { registerSFCommand } from "../commands.ts";
+import { handleUpdate } from "../commands-handlers.ts";
 
 function createMockPi() {
 	const commands = new Map<string, any>();
@@ -19,18 +20,47 @@ function createMockPi() {
 
 function _createMockCtx() {
 	const notifications: { message: string; level: string }[] = [];
+	let reloadCount = 0;
 	return {
 		notifications,
+		get reloadCount() {
+			return reloadCount;
+		},
 		ui: {
 			notify(message: string, level: string) {
 				notifications.push({ message, level });
 			},
 			custom: async () => {},
 		},
+		reload: async () => {
+			reloadCount++;
+		},
 		shutdown: async () => {},
 	};
 }
 
+test("/sf update reloads current session after successful install", async () => {
+	const ctx = _createMockCtx();
+	let installedCommand = "";
+
+	await handleUpdate(ctx as any, {
+		currentVersion: "1.0.0",
+		fetchLatestVersion: async () => "1.0.1",
+		install: (command) => {
+			installedCommand = command;
+		},
+	});
+
+	assert.match(installedCommand, /sf-run@latest/);
+	assert.equal(ctx.reloadCount, 1);
+	assert.ok(
+		ctx.notifications.some((entry) =>
+			entry.message.includes("Reloaded current session"),
+		),
+		"successful update should reload the current session automatically",
+	);
+});
+
 test("/sf update appears in subcommand completions", () => {
 	const pi = createMockPi();
 	registerSFCommand(pi as any);
diff --git a/src/resources/extensions/sf/tests/workflow-template-compiler.test.ts b/src/resources/extensions/sf/tests/workflow-template-compiler.test.ts
new file mode 100644
index 000000000..198e0e64c
--- /dev/null
+++ b/src/resources/extensions/sf/tests/workflow-template-compiler.test.ts
@@ -0,0 +1,72 @@
+/**
+ * workflow-template-compiler.test.ts — Tests for `/sf start` template compilation.
+ *
+ * Verifies that registry templates compile into valid graph-runtime workflow
+ * definitions without requiring users to author YAML files.
+ */
+
+import assert from "node:assert/strict";
+import test from "node:test";
+
+import { validateDefinition } from "../definition-loader.ts";
+import { compileTemplateRun } from "../workflow-template-compiler.ts";
+import { loadWorkflowTemplate, resolveByName } from "../workflow-templates.ts";
+
+test("compileTemplateRun turns product-plan into a valid linear workflow definition", () => {
+	const match = resolveByName("product-plan");
+	assert.ok(match, "product-plan should resolve");
+	const workflowContent = loadWorkflowTemplate("product-plan");
+	assert.ok(workflowContent, "product-plan template should load");
+
+	const definition = compileTemplateRun({
+		templateId: match.id,
+		template: match.template,
+		workflowContent,
+		description: "plan the product we need to develop",
+		issueRef: "ISSUE-123",
+		artifactDir: ".sf/workflows/product-plan/260501-1-plan",
+		branch: "sf/product-plan/plan",
+		date: "2026-05-01",
+		mode: "guided",
+	});
+
+	const validation = validateDefinition(definition);
+	assert.equal(
+		validation.valid,
+		true,
+		`definition should validate: ${validation.errors.join("; ")}`,
+	);
+	assert.equal(definition.name, "product-plan");
+	assert.deepEqual(
+		definition.steps.map((step) => step.id),
+		["model", "scope", "slice", "ready"],
+	);
+	assert.deepEqual(definition.steps[0].requires, []);
+	assert.deepEqual(definition.steps[1].requires, ["model"]);
+	assert.ok(
+		definition.steps[0].prompt.includes("Product Plan Workflow"),
+		"step prompt should include template content",
+	);
+	assert.ok(
+		definition.steps[0].prompt.includes("plan the product we need to develop"),
+		"step prompt should include user description",
+	);
+	assert.ok(
+		definition.steps[0].prompt.includes("Current phase: model"),
+		"step prompt should name current phase",
+	);
+	assert.ok(
+		definition.steps[0].prompt.includes("This is a guided /sf start run"),
+		"guided prompt should permit clarifying questions",
+	);
+	assert.equal(
+		definition.steps.find((step) => step.id === "scope")?.verify?.policy,
+		"human-review",
+		"scope phase should include a guided review gate",
+	);
+	assert.equal(
+		definition.steps.find((step) => step.id === "ready")?.verify?.policy,
+		"human-review",
+		"ready phase should include a guided review gate",
+	);
+});
diff --git a/src/resources/extensions/sf/tests/workflow-templates.test.ts b/src/resources/extensions/sf/tests/workflow-templates.test.ts
index d00228eff..b14e21822 100644
--- a/src/resources/extensions/sf/tests/workflow-templates.test.ts
+++ b/src/resources/extensions/sf/tests/workflow-templates.test.ts
@@ -5,12 +5,14 @@
 import assert from "node:assert/strict";
 import {
 	autoDetect,
+	formatStartUsage,
 	getTemplateInfo,
 	listTemplates,
 	loadRegistry,
 	loadWorkflowTemplate,
 	resolveByName,
 	scaffoldMilestoneSlices,
+	workflowTemplateCommandDefinitions,
 } from "../workflow-templates.ts";
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -22,7 +24,11 @@ console.log("\n── Registry Loading ──");
 {
 	const registry = loadRegistry();
 	assert.ok(registry !== null, "Registry should load");
-	assert.deepStrictEqual(registry.version, 1, "Registry version should be 1");
+	assert.deepStrictEqual(
+		registry.schemaVersion,
+		1,
+		"Registry schemaVersion should be 1",
+	);
 	assert.ok(
 		Object.keys(registry.templates).length >= 8,
 		"Should have at least 8 templates",
@@ -38,6 +44,8 @@ console.log("\n── Registry Loading ──");
 		"hotfix",
 		"security-audit",
 		"dep-upgrade",
+		"product-plan",
+		"product-tracking",
 	];
 	for (const id of expectedIds) {
 		assert.ok(
@@ -68,6 +76,10 @@ console.log("\n── Registry Loading ──");
 			Array.isArray(entry.triggers) && entry.triggers.length > 0,
 			`${id}: triggers should be non-empty array`,
 		);
+		assert.ok(
+			loadWorkflowTemplate(id) !== null,
+			`${id}: registered template file should load`,
+		);
 	}
 }
 
@@ -114,6 +126,22 @@ console.log("\n── Resolve by Name ──");
 		'Alias "deps" should map to dep-upgrade',
 	);
 
+	const telemetry = resolveByName("telemetry");
+	assert.ok(telemetry !== null, 'Should resolve "telemetry" alias');
+	assert.deepStrictEqual(
+		telemetry!.id,
+		"product-tracking",
+		'Alias "telemetry" should map to product-tracking',
+	);
+
+	const product = resolveByName("product");
+	assert.ok(product !== null, 'Should resolve "product" alias');
+	assert.deepStrictEqual(
+		product!.id,
+		"product-plan",
+		'Alias "product" should map to product-plan',
+	);
+
 	// No match
 	const missing = resolveByName("nonexistent-template");
 	assert.ok(missing === null, "Should return null for unknown template");
@@ -161,6 +189,28 @@ console.log("\n── Auto-Detection ──");
 		"Should include dep-upgrade in matches",
 	);
 
+	// Should detect product-tracking from product analytics phrasing
+	const trackingMatches = autoDetect("create product analytics tracking plan");
+	assert.ok(
+		trackingMatches.length > 0,
+		'Should detect matches for "product analytics tracking plan"',
+	);
+	assert.ok(
+		trackingMatches.some((m) => m.id === "product-tracking"),
+		"Should include product-tracking in matches",
+	);
+
+	// Should detect product-plan from planning phrasing
+	const productPlanMatches = autoDetect("plan the product we need to develop");
+	assert.ok(
+		productPlanMatches.length > 0,
+		'Should detect matches for "plan the product we need to develop"',
+	);
+	assert.ok(
+		productPlanMatches.some((m) => m.id === "product-plan"),
+		"Should include product-plan in matches",
+	);
+
 	// Multi-word triggers should score higher
 	const projectMatches = autoDetect("create a new project from scratch");
 	const projectMatch = projectMatches.find((m) => m.id === "full-project");
@@ -190,9 +240,36 @@ console.log("\n── List Templates ──");
 	assert.ok(output.includes("bugfix"), "Should list bugfix");
 	assert.ok(output.includes("spike"), "Should list spike");
 	assert.ok(output.includes("hotfix"), "Should list hotfix");
+	assert.ok(
+		output.includes("product-tracking"),
+		"Should list product-tracking",
+	);
+	assert.ok(output.includes("product-plan"), "Should list product-plan");
 	assert.ok(output.includes("/sf start"), "Should include usage hint");
 }
 
+// ═══════════════════════════════════════════════════════════════════════════
+// Start Usage and Completions
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log("\n── Start Usage and Completions ──");
+
+{
+	const registry = loadRegistry();
+	const commandDefs = workflowTemplateCommandDefinitions();
+	const commandIds = commandDefs.map((entry) => entry.cmd);
+	for (const id of Object.keys(registry.templates)) {
+		assert.ok(commandIds.includes(id), `Start completions should include ${id}`);
+	}
+
+	const usage = formatStartUsage();
+	assert.ok(usage.includes("product-plan"), "Usage should include product-plan");
+	assert.ok(
+		usage.includes("/sf workflow run"),
+		"Usage should distinguish YAML workflow definitions",
+	);
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 // Template Info
 // ═══════════════════════════════════════════════════════════════════════════
@@ -236,6 +313,31 @@ console.log("\n── Load Workflow Template ──");
 		"Should contain hotfix title",
 	);
 
+	const productTrackingContent = loadWorkflowTemplate("product-tracking");
+	assert.ok(
+		productTrackingContent !== null,
+		"Should load product-tracking template",
+	);
+	assert.ok(
+		productTrackingContent!.includes("Product Tracking Workflow"),
+		"Should contain product-tracking title",
+	);
+	assert.ok(
+		productTrackingContent!.includes("Phase 5: Implement Tracking"),
+		"Should contain implementation phase",
+	);
+
+	const productPlanContent = loadWorkflowTemplate("product-plan");
+	assert.ok(productPlanContent !== null, "Should load product-plan template");
+	assert.ok(
+		productPlanContent!.includes("Product Plan Workflow"),
+		"Should contain product-plan title",
+	);
+	assert.ok(
+		productPlanContent!.includes("Phase 3: Plan Implementation Slices"),
+		"Should contain implementation slice planning phase",
+	);
+
 	const missingContent = loadWorkflowTemplate("nonexistent");
 	assert.ok(missingContent === null, "Should return null for unknown template");
 }
@@ -261,6 +363,19 @@ console.log("\n── Milestone Scaffolding ──");
 	assert.ok(featureSlices !== null, "Should scaffold via alias");
 	assert.equal(featureSlices![0].title, "Define the user-facing contract");
 
+	const productPlanSlices = scaffoldMilestoneSlices("product-plan");
+	assert.ok(
+		productPlanSlices !== null,
+		"Should scaffold product-plan milestone slices",
+	);
+	assert.equal(
+		productPlanSlices!.length,
+		4,
+		"Product-plan scaffold should create 4 slices",
+	);
+	assert.equal(productPlanSlices![0].title, "Model the product and value flow");
+	assert.ok(productPlanSlices![2].depends.includes("S02"));
+
 	const missingScaffold = scaffoldMilestoneSlices("nonexistent");
 	assert.equal(missingScaffold, null, "Unknown template should not scaffold");
 }
diff --git a/src/resources/extensions/sf/tests/workspace-index.test.ts b/src/resources/extensions/sf/tests/workspace-index.test.ts
index 7cc4cf9e6..27f9262cc 100644
--- a/src/resources/extensions/sf/tests/workspace-index.test.ts
+++ b/src/resources/extensions/sf/tests/workspace-index.test.ts
@@ -22,7 +22,7 @@ test("workspace index: indexes active milestone/slice/task and suggests commands
 	);
 	writeFileSync(
 		join(sDir, "S01-PLAN.md"),
-		`# S01: Demo Slice\n\n**Goal:** Demo\n**Demo:** Demo\n\n## Must-Haves\n- done\n\n## Tasks\n- [ ] **T01: Implement thing** \`est:10m\`\n  Task is in progress.\n`,
+		`# S01: Demo Slice\n\n**Goal:** Demo\n**Demo:** Demo\n\n## Must-Haves\n- done\n\n## Adversarial Review\n\n### Partner Review\nThe plan is scoped and executable.\n\n### Combatant Review\nThe main risk is covered by the task boundary.\n\n### Architect Review\nThe implementation path is acceptable for the fixture.\n\n## Tasks\n- [ ] **T01: Implement thing** \`est:10m\`\n  Task is in progress.\n`,
 	);
 	writeFileSync(
 		join(sDir, "tasks", "T01-PLAN.md"),
@@ -42,7 +42,7 @@ test("workspace index: indexes active milestone/slice/task and suggests commands
 		assert.ok(suggestions.some((item) => item.value === "M001/S01/T01"));
 
 		const commands = await getSuggestedNextCommands(base);
-		assert.ok(commands.includes("/sf auto"));
+		assert.ok(commands.includes("/sf autonomous"));
 		assert.ok(commands.includes("/sf doctor M001/S01"));
 		assert.ok(commands.includes("/sf status"));
 	} finally {
diff --git a/src/resources/extensions/sf/tools/plan-milestone.ts b/src/resources/extensions/sf/tools/plan-milestone.ts
index 45d7cdbc1..4087eedcf 100644
--- a/src/resources/extensions/sf/tools/plan-milestone.ts
+++ b/src/resources/extensions/sf/tools/plan-milestone.ts
@@ -82,6 +82,10 @@ export interface PlanMilestoneParams {
 
 export interface PlanMilestoneResult {
 	milestoneId: string;
+	title: string;
+	sliceCount: number;
+	firstSliceId?: string;
+	firstSliceTitle?: string;
 	roadmapPath: string;
 }
 
@@ -518,6 +522,10 @@ export async function handlePlanMilestone(
 
 	return {
 		milestoneId: params.milestoneId,
+		title: params.title,
+		sliceCount: params.slices?.length ?? 0,
+		firstSliceId: params.slices?.[0]?.sliceId,
+		firstSliceTitle: params.slices?.[0]?.title,
 		roadmapPath,
 	};
 }
diff --git a/src/resources/extensions/sf/tools/workflow-tool-executors.ts b/src/resources/extensions/sf/tools/workflow-tool-executors.ts
index 6019f3e5b..498e7d9ab 100644
--- a/src/resources/extensions/sf/tools/workflow-tool-executors.ts
+++ b/src/resources/extensions/sf/tools/workflow-tool-executors.ts
@@ -713,11 +713,20 @@ export async function executePlanMilestone(
 		}
 		return {
 			content: [
-				{ type: "text", text: `Planned milestone ${result.milestoneId}` },
+				{
+					type: "text",
+					text: `Planned milestone ${result.milestoneId}${
+						result.title ? `: ${result.title}` : ""
+					}`,
+				},
 			],
 			details: {
 				operation: "plan_milestone",
 				milestoneId: result.milestoneId,
+				title: result.title,
+				sliceCount: result.sliceCount,
+				firstSliceId: result.firstSliceId,
+				firstSliceTitle: result.firstSliceTitle,
 				roadmapPath: result.roadmapPath,
 			},
 		};
diff --git a/src/resources/extensions/sf/workflow-template-compiler.ts b/src/resources/extensions/sf/workflow-template-compiler.ts
new file mode 100644
index 000000000..126f8a1a8
--- /dev/null
+++ b/src/resources/extensions/sf/workflow-template-compiler.ts
@@ -0,0 +1,101 @@
+/**
+ * workflow-template-compiler.ts — Compile `/sf start` templates into run definitions.
+ *
+ * Purpose: let prompt-oriented workflow templates enter the same graph-backed
+ * runtime as YAML workflow definitions while preserving the ergonomic `/sf start`
+ * authoring surface.
+ */
+
+import type { WorkflowDefinition } from "./definition-loader.js";
+import type { TemplateEntry } from "./workflow-templates.js";
+
+export interface CompileTemplateRunInput {
+	templateId: string;
+	template: TemplateEntry;
+	workflowContent: string;
+	description: string;
+	issueRef: string | null;
+	artifactDir: string;
+	branch: string;
+	date: string;
+	mode?: "guided" | "autonomous" | "explicit";
+}
+
+function stepIdForPhase(phase: string, index: number): string {
+	const slug = phase
+		.toLowerCase()
+		.replace(/[^a-z0-9]+/g, "-")
+		.replace(/^-|-$/g, "")
+		.slice(0, 40)
+		.replace(/-$/, "");
+	return slug || `phase-${index + 1}`;
+}
+
+function phasePrompt(input: CompileTemplateRunInput, phase: string): string {
+	const guided = input.mode === "guided";
+	return [
+		`Run the "${phase}" phase of the ${input.template.name} workflow.`,
+		"",
+		`Template id: ${input.templateId}`,
+		`Description: ${input.description || "(none provided)"}`,
+		`Issue: ${input.issueRef || "(none)"}`,
+		`Artifact directory: ${input.artifactDir || "(none)"}`,
+		`Branch: ${input.branch}`,
+		`Date: ${input.date}`,
+		"",
+		"Use the workflow template below as the source of truth. Execute only the current phase, produce the phase's required evidence, and leave enough notes for the next graph step to continue without re-deciding prior scope.",
+		guided
+			? "This is a guided /sf start run: ask concise clarifying questions when product intent, scope, acceptance criteria, or readiness is unclear, and pause rather than guessing through important product decisions."
+			: "",
+		"",
+		`Current phase: ${phase}`,
+		"",
+		"--- Workflow Template ---",
+		input.workflowContent,
+	].join("\n");
+}
+
+function hasGuidedReviewGate(
+	input: CompileTemplateRunInput,
+	phase: string,
+): boolean {
+	if (input.mode !== "guided") return false;
+	const configured = input.template.interaction?.question_gates;
+	if (configured) return configured.includes(phase);
+	return input.templateId === "product-plan" && ["scope", "ready"].includes(phase);
+}
+
+/**
+ * Compile a resolved workflow template into a V1 workflow definition.
+ *
+ * Purpose: bridge `/sf start` templates into the canonical custom workflow
+ * graph runtime without requiring users to author YAML definitions.
+ *
+ * Consumer: `handleStart` before creating a template-backed workflow run.
+ */
+export function compileTemplateRun(
+	input: CompileTemplateRunInput,
+): WorkflowDefinition {
+	return {
+		version: 1,
+		name: input.templateId,
+		description: input.template.description,
+		steps: input.template.phases.map((phase, index) => {
+			const id = stepIdForPhase(phase, index);
+			const previousPhase = input.template.phases[index - 1];
+			return {
+				id,
+				name: phase,
+				prompt: phasePrompt(input, phase),
+				requires:
+					index > 0 && previousPhase != null
+						? [stepIdForPhase(previousPhase, index - 1)]
+						: [],
+				produces: [],
+				verify: hasGuidedReviewGate(input, phase)
+					? ({ policy: "human-review" } as const)
+					: undefined,
+			};
+		}),
+	};
+}
diff --git a/src/resources/extensions/sf/workflow-templates.ts b/src/resources/extensions/sf/workflow-templates.ts
index f23d0e153..d61b68a18 100644
--- a/src/resources/extensions/sf/workflow-templates.ts
+++ b/src/resources/extensions/sf/workflow-templates.ts
@@ -38,10 +38,14 @@ export interface TemplateEntry {
 	artifact_dir: string | null;
 	estimated_complexity: string;
 	requires_project: boolean;
+	interaction?: {
+		guided_default?: boolean;
+		question_gates?: string[];
+	};
 }
 
 export interface TemplateRegistry {
-	version: number;
+	schemaVersion: number;
 	templates: Record<string, TemplateEntry>;
 }
 
@@ -65,6 +69,11 @@ export interface MilestoneTemplateSliceScaffold {
 	observabilityImpact: string;
 }
 
+export interface WorkflowTemplateCommandDefinition {
+	cmd: string;
+	desc: string;
+}
+
 // ─── Registry Cache ──────────────────────────────────────────────────────────
 
 let cachedRegistry: TemplateRegistry | null = null;
@@ -76,7 +85,7 @@ export function loadRegistry(): TemplateRegistry {
 	if (cachedRegistry) return cachedRegistry;
 
 	if (!existsSync(registryPath)) {
-		cachedRegistry = { version: 1, templates: {} };
+		cachedRegistry = { schemaVersion: 1, templates: {} };
 		return cachedRegistry;
 	}
 
@@ -84,11 +93,58 @@ export function loadRegistry(): TemplateRegistry {
 		const content = readFileSync(registryPath, "utf-8");
 		cachedRegistry = JSON.parse(content) as TemplateRegistry;
 	} catch {
-		cachedRegistry = { version: 1, templates: {} };
+		cachedRegistry = { schemaVersion: 1, templates: {} };
 	}
 	return cachedRegistry;
 }
 
+/**
+ * Return registry-backed workflow template command entries for completion and help.
+ *
+ * Purpose: keep `/sf start` discovery surfaces tied to the template registry so
+ * newly registered workflows are visible without editing multiple hardcoded lists.
+ *
+ * Consumer: `/sf` command completion catalogs and `/sf start` usage rendering.
+ */
+export function workflowTemplateCommandDefinitions(): WorkflowTemplateCommandDefinition[] {
+	const registry = loadRegistry();
+	return Object.entries(registry.templates).map(([id, entry]) => ({
+		cmd: id,
+		desc: entry.description,
+	}));
+}
+
+/**
+ * Format registry-backed `/sf start` usage text.
+ *
+ * Purpose: present the autoflow template family from the same source of truth as
+ * resolution and completion, preventing stale command help.
+ *
+ * Consumer: `/sf start` when called without a resolvable template.
+ */
+export function formatStartUsage(): string {
+	const templates = workflowTemplateCommandDefinitions()
+		.map(({ cmd, desc }) => `  ${cmd.padEnd(16)} ${desc}`)
+		.join("\n");
+	return (
+		"Usage: /sf start <template> [description]\n\n" +
+		"/sf start templates are autoflow planning templates. " +
+		"YAML workflow definitions use /sf workflow run <definition>.\n\n" +
+		"Templates:\n" +
+		templates +
+		"\n\n" +
+		"Examples:\n" +
+		"  /sf start bugfix fix login button not responding\n" +
+		"  /sf start product-plan plan the product we need to develop\n" +
+		"  /sf start product-tracking create product analytics tracking plan\n" +
+		"  /sf start spike evaluate auth libraries\n\n" +
+		"Flags:\n" +
+		"  --dry-run       Preview what would happen without executing\n" +
+		"  --issue <ref>   Link to a GitHub issue\n\n" +
+		"Run /sf templates for detailed template info."
+	);
+}
+
 /**
  * Resolve a template by exact name or alias.
  * Returns null if no match found.
@@ -113,13 +169,6 @@ export function resolveByName(nameOrAlias: string): TemplateMatch | null {
 		}
 	}
 
-	// Fuzzy: prefix match on id
-	for (const [id, entry] of Object.entries(registry.templates)) {
-		if (id.startsWith(normalized) || normalized.startsWith(id)) {
-			return { id, template: entry, confidence: "high" };
-		}
-	}
-
 	// Common aliases
 	const aliases: Record<string, string> = {
 		bug: "bugfix",
@@ -138,6 +187,13 @@ export function resolveByName(nameOrAlias: string): TemplateMatch | null {
 		migration: "refactor",
 		project: "full-project",
 		full: "full-project",
+		product: "product-plan",
+		"product-planning": "product-plan",
+		"plan-product": "product-plan",
+		analytics: "product-tracking",
+		instrumentation: "product-tracking",
+		telemetry: "product-tracking",
+		tracking: "product-tracking",
 	};
 
 	const aliasMatch = aliases[normalized];
@@ -149,6 +205,13 @@ export function resolveByName(nameOrAlias: string): TemplateMatch | null {
 		};
 	}
 
+	// Fuzzy: prefix match on id
+	for (const [id, entry] of Object.entries(registry.templates)) {
+		if (id.startsWith(normalized) || normalized.startsWith(id)) {
+			return { id, template: entry, confidence: "high" };
+		}
+	}
+
 	return null;
 }
 
@@ -294,6 +357,69 @@ export function scaffoldMilestoneSlices(
 	if (!match) return null;
 
 	switch (match.id) {
+		case "product-plan":
+			return [
+				{
+					sliceId: "S01",
+					title: "Model the product and value flow",
+					risk: "medium",
+					depends: [],
+					demo: "The team can explain who the product serves, what value it creates, and which domain entities matter.",
+					goal: "Capture the product purpose, users, workflows, entities, value moments, and unknowns before implementation planning.",
+					successCriteria:
+						"A product model exists with named consumers, core journeys, and explicit open questions.",
+					proofLevel: "product contract",
+					integrationClosure:
+						"Product intent is connected to real code, docs, or requested user outcomes instead of generic assumptions.",
+					observabilityImpact:
+						"Important product questions and future telemetry needs are visible early.",
+				},
+				{
+					sliceId: "S02",
+					title: "Define the target capability boundary",
+					risk: "medium",
+					depends: ["S01"],
+					demo: "The intended product increment has a clear MVP boundary and non-goals.",
+					goal: "Translate the product model into a concrete capability, acceptance path, scope boundary, and release assumptions.",
+					successCriteria:
+						"The planned increment names what will ship, who will use it, and what is deliberately deferred.",
+					proofLevel: "scope contract",
+					integrationClosure:
+						"The capability boundary maps to production-facing surfaces and integration points.",
+					observabilityImpact:
+						"Success signals, diagnostics, and audit needs are captured before code work starts.",
+				},
+				{
+					sliceId: "S03",
+					title: "Plan implementation slices",
+					risk: "medium",
+					depends: ["S02"],
+					demo: "The product increment is broken into buildable slices with evidence and dependencies.",
+					goal: "Create a development plan with ordered slices, tests, integration closure, and verification commands.",
+					successCriteria:
+						"Every slice has purpose, consumer, acceptance evidence, and a bounded implementation surface.",
+					proofLevel: "slice plan",
+					integrationClosure:
+						"The plan shows how partial slices compose into the product capability.",
+					observabilityImpact:
+						"Risks, telemetry candidates, and release-readiness checks are attached to slices.",
+				},
+				{
+					sliceId: "S04",
+					title: "Confirm implementation readiness",
+					risk: "low",
+					depends: ["S03"],
+					demo: "The team can start implementation without re-deciding product scope.",
+					goal: "Review the plan for missing purpose, missing consumers, untestable behavior, and unresolved blockers.",
+					successCriteria:
+						"The product plan is ready for execution or records concrete blockers and follow-up decisions.",
+					proofLevel: "readiness review",
+					integrationClosure:
+						"Implementation can begin from the plan with clear proof obligations.",
+					observabilityImpact:
+						"Release, support, and analytics follow-ups are explicit instead of implicit.",
+				},
+			];
 		case "bugfix":
 		case "hotfix":
 			return [
diff --git a/src/resources/extensions/sf/workflow-templates/full-project.md b/src/resources/extensions/sf/workflow-templates/full-project.md
index e2f2e0ee3..eebf6a14b 100644
--- a/src/resources/extensions/sf/workflow-templates/full-project.md
+++ b/src/resources/extensions/sf/workflow-templates/full-project.md
@@ -13,7 +13,7 @@ research, planning, execution, and verification. Use for greenfield projects or
 major features that need the full planning apparatus.
 
 This template wraps the existing SF workflow for registry completeness.
-When selected, it routes to the standard /sf init → /sf auto pipeline.
+When selected, it routes to the standard /sf init → /sf autonomous pipeline.
 </purpose>
 
 <phases>
@@ -33,7 +33,7 @@ it should route to the standard SF workflow:
 
 1. If `.sf/` doesn't exist: Run `/sf init` to bootstrap the project
 2. If `.sf/` exists but no milestones: Start the discuss phase via `/sf discuss`
-3. If milestones exist: Resume via `/sf auto` or `/sf next`
+3. If milestones exist: Resume via `/sf autonomous` or `/sf next`
 
 The full SF workflow protocol is defined in `SF-WORKFLOW.md` and handles all
 phases, state tracking, and agent orchestration.
diff --git a/src/resources/extensions/sf/workflow-templates/product-plan.md b/src/resources/extensions/sf/workflow-templates/product-plan.md
new file mode 100644
index 000000000..e0ced3a04
--- /dev/null
+++ b/src/resources/extensions/sf/workflow-templates/product-plan.md
@@ -0,0 +1,83 @@
+# Product Plan Workflow
+
+<template_meta>
+name: product-plan
+version: 1
+requires_project: false
+artifact_dir: .sf/workflows/product-plan/
+</template_meta>
+
+<purpose>
+Plan the product increment before development starts. This workflow turns a
+product idea into a concrete product model, capability boundary, implementation
+slice plan, and readiness review so engineering work has a real consumer,
+acceptance path, and verification strategy.
+</purpose>
+
+<phases>
+1. model - Document the product, users, value flow, and domain entities
+2. scope - Define the target capability, MVP boundary, non-goals, and risks
+3. slice - Break the work into implementation slices with evidence
+4. ready - Confirm blockers, tests, release needs, and follow-up decisions
+</phases>
+
+<process>
+
+## Phase 1: Model Product
+
+**Goal:** Establish what product is being built and why it should exist.
+
+1. Read the user request, existing docs, README, product surfaces, routes, APIs, tests, and current plans if present.
+2. Identify users, customers, operators, organizations/accounts, core workflows, value moments, and domain entities.
+3. Separate confirmed product facts from assumptions and open questions.
+4. Produce `PRODUCT.md` in the artifact directory.
+
+**Completion evidence:**
+- The product model names real users or production consumers.
+- Core workflows and entities are explicit.
+- Unknowns are listed instead of guessed.
+
+## Phase 2: Scope Target Capability
+
+**Goal:** Turn the product model into a bounded increment that can be developed.
+
+1. Define the capability or MVP boundary in user-facing terms.
+2. Name what is in scope, out of scope, and deliberately deferred.
+3. Identify integration points, data needs, permissions, UX/API surfaces, and operational constraints.
+4. Define acceptance criteria and falsifiers for the planned capability.
+5. Produce `SCOPE.md` in the artifact directory.
+
+**Completion evidence:**
+- The target capability can be explained without implementation details.
+- Non-goals and release assumptions are explicit.
+- Acceptance criteria describe observable behavior.
+
+## Phase 3: Plan Implementation Slices
+
+**Goal:** Create a buildable plan that preserves product intent through implementation.
+
+1. Break the capability into ordered slices.
+2. For each slice, state purpose, consumer, files or modules likely affected, dependencies, tests, and integration closure.
+3. Identify telemetry, audit, docs, migration, or support work that must ship with the product.
+4. Produce `PLAN.md` in the artifact directory.
+
+**Completion evidence:**
+- Each slice has a real consumer and a verification path.
+- Dependencies and sequencing are explicit.
+- Cross-cutting concerns are recorded as slices or follow-up work.
+
+## Phase 4: Confirm Readiness
+
+**Goal:** Decide whether development can begin without re-deciding the product.
+
+1. Review the product model, scope, and slice plan for missing purpose, missing consumers, untestable behavior, and unresolved blockers.
+2. Decide whether product tracking should run now or later. If now, hand off to `/sf start product-tracking`.
+3. Record verification commands, release gates, remaining open questions, and implementation start criteria.
+4. Produce `READY.md` in the artifact directory.
+
+**Completion evidence:**
+- The plan is ready for implementation or blocked with named decisions.
+- Verification and release gates are known.
+- Telemetry and product-audit follow-ups are explicit.
+
+</process>
diff --git a/src/resources/extensions/sf/workflow-templates/product-tracking.md b/src/resources/extensions/sf/workflow-templates/product-tracking.md
new file mode 100644
index 000000000..62ad73374
--- /dev/null
+++ b/src/resources/extensions/sf/workflow-templates/product-tracking.md
@@ -0,0 +1,116 @@
+# Product Tracking Workflow
+
+<template_meta>
+name: product-tracking
+version: 1
+requires_project: false
+artifact_dir: .sf/workflows/product-tracking/
+</template_meta>
+
+<purpose>
+Make a product data-ready by turning product behavior into a versioned telemetry
+contract, implementation guide, and verified tracking code. The workflow protects
+product, growth, customer success, and engineering consumers from analytics that
+are missing, inconsistent, or impossible to trust.
+</purpose>
+
+<phases>
+1. model - Document what the product does, who uses it, and which entities matter
+2. audit - Capture the analytics and identity wiring that already exists
+3. plan - Design the target tracking plan and explicit delta from current state
+4. instrument - Generate SDK-specific implementation guidance
+5. implement - Add or update tracking code against the guide
+6. verify - Prove the tracking contract and implementation are usable
+</phases>
+
+<process>
+
+## Phase 1: Model Product
+
+**Goal:** Produce `.telemetry/product.md`, the product model used by every later phase.
+
+1. Inspect README, docs, routes, core domain modules, API handlers, UI flows, and tests.
+2. Identify users, organizations/accounts, key workflows, value moments, and domain entities.
+3. Write `.telemetry/product.md` with enough detail for a telemetry plan reviewer to understand why each event exists.
+4. If an installed skill named `product-tracking-skills:product-tracking-model-product` is available, use it. If it is unavailable, follow this phase directly.
+
+**Completion evidence:**
+- `.telemetry/product.md` exists.
+- It names real product consumers and entities, not generic analytics concepts.
+- Unknowns are marked as open questions instead of guessed.
+
+## Phase 2: Audit Current Tracking
+
+**Goal:** Produce `.telemetry/current-state.yaml` plus an audit note describing current SDK and instrumentation wiring.
+
+1. Search for analytics SDK imports, network endpoints, server-side emitters, identity calls, group/account calls, feature flag exposure calls, and test helpers.
+2. Record actual events, properties, user identity fields, group identity fields, destinations, and call sites.
+3. Document failure or absence explicitly. "No current tracking found" is a valid audit result if supported by code search evidence.
+4. If `product-tracking-skills:product-tracking-audit-current-tracking` is available, use it. If it is unavailable, follow this phase directly.
+
+**Completion evidence:**
+- `.telemetry/current-state.yaml` exists.
+- The audit names the files or modules inspected.
+- Missing or ambiguous wiring is represented as a gap, not silently ignored.
+
+## Phase 3: Design Tracking Plan
+
+**Goal:** Produce `.telemetry/tracking-plan.yaml` and `.telemetry/delta.md`.
+
+1. Use `.telemetry/product.md` and `.telemetry/current-state.yaml` as the source of truth.
+2. Define events, properties, entities, group hierarchy, identity rules, and lifecycle semantics.
+3. Prefer fewer high-value events over noisy implementation events.
+4. Write `.telemetry/delta.md` to separate what already exists from what must be added, renamed, removed, or fixed.
+5. If `product-tracking-skills:product-tracking-design-tracking-plan` is available, use it. If it is unavailable, follow this phase directly.
+
+**Completion evidence:**
+- `.telemetry/tracking-plan.yaml` exists.
+- `.telemetry/delta.md` exists.
+- Every event has a production consumer or decision it supports.
+
+## Phase 4: Generate Instrument Guide
+
+**Goal:** Produce `.telemetry/instrument.md`, the SDK-specific implementation guide for the target destination.
+
+1. Determine the destination from user input, existing SDKs, or the `tracking_destination` parameter. If unclear, document the assumed destination and confidence.
+2. Explain identify, group/account, track, feature exposure, and page/screen tracking patterns that apply to this repo.
+3. Include concrete examples in the repo's language and framework.
+4. Include privacy, PII, environment, batching, and server/client boundary constraints.
+5. If `product-tracking-skills:product-tracking-instrument-guide` is available, use it. If it is unavailable, follow this phase directly.
+
+**Completion evidence:**
+- `.telemetry/instrument.md` exists.
+- It is specific enough that an implementer can add code without re-deciding SDK semantics.
+- Unsupported capabilities of the destination are called out.
+
+## Phase 5: Implement Tracking
+
+**Goal:** Add or update typed tracking code using `.telemetry/instrument.md` and `.telemetry/tracking-plan.yaml`.
+
+1. Create or update a small tracking module/wrapper that centralizes SDK calls.
+2. Add typed event helpers or schemas where the repo language supports them.
+3. Wire identity and group calls at real login/account/session boundaries.
+4. Instrument the highest-value events from `.telemetry/delta.md`; do not sprinkle ad hoc calls outside the wrapper.
+5. Add focused tests for wrapper behavior, identity mapping, and at least one real consumer path.
+6. If `product-tracking-skills:product-tracking-implement-tracking` is available, use it. If it is unavailable, follow this phase directly.
+
+**Completion evidence:**
+- Tracking code exists in the repo, not only in docs.
+- Tests or smoke checks prove at least one event path and identity path.
+- `.telemetry/delta.md` is updated to show what remains.
+
+## Phase 6: Verify Tracking
+
+**Goal:** Prove the telemetry contract is coherent and the implementation can ship.
+
+1. Run the repo's focused tests, typecheck, lint, or build command relevant to the tracking changes.
+2. Review generated events against `.telemetry/tracking-plan.yaml` for naming, required properties, identity, and PII constraints.
+3. Write a short verification note in the artifact directory or `.telemetry/verification.md`.
+4. If verification cannot run, document the blocker and the exact command or environment needed.
+
+**Completion evidence:**
+- Verification commands and results are recorded.
+- Any remaining telemetry gaps are listed as explicit follow-up work.
+- The final state identifies what product questions can now be answered.
+
+</process>
diff --git a/src/resources/extensions/sf/workflow-templates/registry.json b/src/resources/extensions/sf/workflow-templates/registry.json
index 3b8474e85..136cec46b 100644
--- a/src/resources/extensions/sf/workflow-templates/registry.json
+++ b/src/resources/extensions/sf/workflow-templates/registry.json
@@ -1,5 +1,5 @@
 {
-	"version": 1,
+	"schemaVersion": 1,
 	"templates": {
 		"full-project": {
 			"name": "Full Project",
@@ -162,6 +162,57 @@
 			"artifact_dir": ".sf/active/",
 			"estimated_complexity": "low",
 			"requires_project": true
+		},
+		"product-plan": {
+			"name": "Product Plan",
+			"description": "Model the product, define the target capability, plan implementation slices, and confirm readiness before development starts",
+			"file": "product-plan.md",
+			"phases": ["model", "scope", "slice", "ready"],
+			"triggers": [
+				"product plan",
+				"plan product",
+				"plan the product",
+				"product planning",
+				"product development",
+				"define product",
+				"mvp",
+				"roadmap",
+				"what to build"
+			],
+			"artifact_dir": ".sf/workflows/product-plan/",
+			"estimated_complexity": "medium",
+			"requires_project": false,
+			"interaction": {
+				"guided_default": true,
+				"question_gates": ["scope", "ready"]
+			}
+		},
+		"product-tracking": {
+			"name": "Product Tracking",
+			"description": "Model the product, audit current analytics, design a tracking plan, generate an instrumentation guide, implement tracking, and verify the data contract",
+			"file": "product-tracking.md",
+			"phases": [
+				"model",
+				"audit",
+				"plan",
+				"instrument",
+				"implement",
+				"verify"
+			],
+			"triggers": [
+				"product tracking",
+				"product analytics",
+				"tracking plan",
+				"analytics instrumentation",
+				"instrument analytics",
+				"telemetry",
+				"event tracking",
+				"implement tracking",
+				"data-ready"
+			],
+			"artifact_dir": ".sf/workflows/product-tracking/",
+			"estimated_complexity": "medium",
+			"requires_project": false
 		}
 	}
 }
diff --git a/src/resources/extensions/sf/workspace-index.ts b/src/resources/extensions/sf/workspace-index.ts
index 3fa7b187b..dd163a4f5 100644
--- a/src/resources/extensions/sf/workspace-index.ts
+++ b/src/resources/extensions/sf/workspace-index.ts
@@ -358,7 +358,7 @@ export async function getSuggestedNextCommands(
 		index.active.phase === "executing" ||
 		index.active.phase === "summarizing"
 	)
-		commands.add("/sf auto");
+		commands.add("/sf autonomous");
 	if (scope) commands.add(`/sf doctor ${scope}`);
 	if (scope) commands.add(`/sf doctor fix ${scope}`);
 	commands.add("/sf status");
diff --git a/src/resources/extensions/sf/worktree-command.ts b/src/resources/extensions/sf/worktree-command.ts
index 4747bbd5b..23499bfdb 100644
--- a/src/resources/extensions/sf/worktree-command.ts
+++ b/src/resources/extensions/sf/worktree-command.ts
@@ -393,7 +393,7 @@ async function handleCreate(
 					`This worktree inherited existing SF milestones from the main branch.`,
 					``,
 					`  Continue — keep milestones and pick up where main left off`,
-					`  Start fresh — clear milestones so /sf auto starts a new project`,
+					`  Start fresh — clear milestones so /sf autonomous starts a new project`,
 				].join("\n"),
 				confirmLabel: "Continue",
 				declineLabel: "Start fresh",
@@ -408,7 +408,7 @@ async function handleCreate(
 			? `  ${CLR.muted("Auto-committed on previous branch before switching.")}`
 			: "";
 		const freshNote = clearedPlans
-			? `  ${CLR.ok("✓")} Cleared milestones — ${CLR.hint("/sf auto")} will start fresh.`
+			? `  ${CLR.ok("✓")} Cleared milestones — ${CLR.hint("/sf autonomous")} will start fresh.`
 			: "";
 		ctx.ui.notify(
 			[
diff --git a/src/resources/extensions/subagent/index.ts b/src/resources/extensions/subagent/index.ts
index b5a1bdeb3..478fed9bd 100644
--- a/src/resources/extensions/subagent/index.ts
+++ b/src/resources/extensions/subagent/index.ts
@@ -45,6 +45,16 @@ import { registerWorker, updateWorker } from "./worker-registry.js";
 const MAX_PARALLEL_TASKS = 8;
 const MAX_CONCURRENCY = 4;
 const COLLAPSED_ITEM_COUNT = 10;
+/**
+ * Bounds Sift-backed code search so a failed model/runtime path cannot leave the
+ * TUI showing an eternal running tool.
+ *
+ * Purpose: keep codebase exploration responsive when Sift stalls, builds a cold
+ * cache, or waits on an unavailable local model.
+ *
+ * Consumer: the `codebase_search` extension tool registered below.
+ */
+const CODEBASE_SEARCH_TIMEOUT_MS = 120_000;
 const liveSubagentProcesses = new Set<ChildProcess>();
 const AGENT_ALIASES: Record<string, string> = {
 	default: "worker",
@@ -118,6 +128,75 @@ async function stopLiveSubagents(): Promise<void> {
 	}
 }
 
+interface CodebaseSearchDetails {
+	operation?: string;
+	exitCode?: number;
+	timedOut?: boolean;
+	aborted?: boolean;
+	query?: string;
+	scope?: string;
+	strategy?: string;
+	timeoutMs?: number;
+	siftBin?: string;
+}
+
+/**
+ * Returns true when a Sift search result should render as failure.
+ *
+ * Purpose: keep UI status derived from stable execution details instead of
+ * relying on provider-specific `AgentToolResult` fields that custom renderers do
+ * not receive consistently.
+ *
+ * Consumer: `codebase_search.renderResult`.
+ */
+function isCodebaseSearchError(details: CodebaseSearchDetails | undefined): boolean {
+	return Boolean(
+		details?.timedOut ||
+			details?.aborted ||
+			(typeof details?.exitCode === "number" && details.exitCode !== 0),
+	);
+}
+
+/**
+ * Finds the Sift CLI executable used by the codebase search tool.
+ *
+ * Purpose: support normal PATH installs while preserving the common Cargo
+ * install location, so Sift works in fresh agent shells without extra setup.
+ *
+ * Consumer: `codebase_search.execute`.
+ */
+function resolveSiftBinary(): string {
+	const pathEnv = process.env.PATH ?? "";
+	for (const dir of pathEnv.split(path.delimiter)) {
+		const candidate = path.join(dir, "sift");
+		try {
+			if (fs.existsSync(candidate)) return candidate;
+		} catch {
+			// Keep scanning PATH entries even if one directory is unreadable.
+		}
+	}
+
+	const cargoBin = path.join(os.homedir(), ".cargo", "bin", "sift");
+	return fs.existsSync(cargoBin) ? cargoBin : "sift";
+}
+
+/**
+ * Builds the exact Sift command argv for autonomous local retrieval.
+ *
+ * Purpose: make the contract explicit: `codebase_search` is Sift-backed
+ * retrieval over a local scope, while `scout` remains the broader explorer
+ * subagent role that may choose this tool among others.
+ *
+ * Consumer: `codebase_search.execute`.
+ */
+function buildCodebaseSearchArgs(
+	strategy: string,
+	query: string,
+	scope: string,
+): string[] {
+	return ["search", "--strategy", strategy, "--agent", query, scope];
+}
+
 function formatUsageStats(
 	usage: {
 		input: number;
@@ -2290,12 +2369,12 @@ export default function (pi: ExtensionAPI) {
 		},
 	});
 
-	// ── Scout Tool ─────────────────────────────────────────────────────────────────
-	// Wraps `sift search --agent` for Planner → Scout → Worker pipeline.
-	// The Scout subagent is a thin wrapper around sift's autonomous corpus exploration.
-	// Planner calls call_scout with a query; sift explores and returns snippet-bearing evidence.
+	// ── Codebase Search Tool ───────────────────────────────────────────────────────
+	// Sift-backed local retrieval. This is intentionally not named "scout":
+	// `scout` is the explorer subagent role; `codebase_search` is the retrieval
+	// primitive that scouts, planners, and parent agents can call for evidence.
 
-	const CallScoutParams = Type.Object({
+	const CodebaseSearchParams = Type.Object({
 		query: Type.String({
 			description:
 				"Natural-language query describing what to explore (e.g. 'find where the write gate tool_call hooks are registered')",
@@ -2312,53 +2391,99 @@ export default function (pi: ExtensionAPI) {
 					"Search strategy: 'path-hybrid' (default), 'page-index-hybrid', 'bm25', or 'path'",
 			}),
 		),
+		timeoutMs: Type.Optional(
+			Type.Number({
+				description:
+					"Maximum time to wait for Sift before aborting. Defaults to 120000.",
+			}),
+		),
 	});
 
 	pi.registerTool({
-		name: "call_scout",
-		label: "Scout",
+		name: "codebase_search",
+		label: "Code Search",
 		description: [
-			"Explore the codebase using sift's autonomous agent loop.",
-			" Spawns sift search --agent with the given query, returns snippet-bearing evidence.",
-			" Use this instead of grep/read when you need to understand the architecture",
-			" of an unfamiliar subsystem — sift's autonomous loop expands queries and",
-			" finds relevant code without you needing to know file paths ahead of time.",
-			" Planner calls this before writing an execution plan.",
+			"Run Sift local retrieval over a codebase scope.",
+			" Spawns `sift search --agent` with a natural-language query and returns snippet-bearing evidence.",
+			" Use this when grep is too literal and the agent needs hybrid BM25/vector/path retrieval",
+			" before planning or implementing. This is a search backend, not the scout role.",
 		].join(""),
 		promptGuidelines: [
-			"call_scout is for exploration only — it does not write or modify files.",
+			"codebase_search is for exploration only — it does not write or modify files.",
+			" Use the scout subagent for broad investigation; use codebase_search when Sift-backed local retrieval is the right primitive.",
 			" Be specific in your query: name functions, files, or concepts you expect to find.",
 			" Use the scope param to restrict search to a specific worktree or subsystem.",
 			" Review the returned evidence before planning — it may reveal things you missed.",
 		],
-		parameters: CallScoutParams,
+		parameters: CodebaseSearchParams,
+		renderCall(args, theme) {
+			const query = typeof args.query === "string" ? args.query : "";
+			const scope = typeof args.scope === "string" ? args.scope : process.cwd();
+			const strategy =
+				typeof args.strategy === "string" ? args.strategy : "path-hybrid";
+			const preview =
+				query.length > 90 ? `${query.slice(0, 89).trimEnd()}…` : query;
+			const scopeLabel =
+				scope.length > 70 ? `…${scope.slice(Math.max(0, scope.length - 69))}` : scope;
+
+			return new Text(
+				[
+					theme.fg("toolTitle", theme.bold("Code search is querying Sift")),
+					preview ? `  ${theme.fg("toolOutput", preview)}` : "",
+					`  ${theme.fg("muted", `scope: ${scopeLabel}`)}`,
+					`  ${theme.fg("muted", `strategy: ${strategy}`)}`,
+				]
+					.filter(Boolean)
+					.join("\n"),
+				0,
+				0,
+			);
+		},
+		renderResult(result, { expanded }, theme) {
+			const text =
+				result.content.find((item) => item.type === "text")?.text ??
+				"(code search returned no text)";
+			const details = result.details as CodebaseSearchDetails | undefined;
+			const isError = isCodebaseSearchError(details);
+			const icon = isError ? theme.fg("error", "✗") : theme.fg("success", "✓");
+			const status = details?.timedOut
+				? "timed out"
+				: details?.aborted
+					? "aborted"
+					: isError
+						? "failed"
+						: "done";
+			const lines = text.split("\n");
+			const maxLines = expanded ? lines.length : 12;
+			const shown = lines.slice(0, maxLines).join("\n");
+			const hidden = Math.max(0, lines.length - maxLines);
+			let rendered = `${icon} ${theme.fg("toolTitle", theme.bold(`Code search ${status}`))}`;
+			if (details?.strategy) {
+				rendered += theme.fg("muted", ` (${details.strategy})`);
+			}
+			rendered += `\n${theme.fg(isError ? "error" : "toolOutput", shown)}`;
+			if (hidden > 0) {
+				rendered += `\n${theme.fg("muted", `${hidden} more lines hidden · Ctrl+O expands`)}`;
+			}
+			return new Text(rendered, 0, 0);
+		},
 
 		async execute(_toolCallId, params, signal) {
 			const scope = params.scope ?? process.cwd();
 			const strategy = params.strategy ?? "path-hybrid";
 			const query = params.query;
+			const timeoutMs =
+				typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)
+					? Math.max(1_000, params.timeoutMs)
+					: CODEBASE_SEARCH_TIMEOUT_MS;
 
-			// Resolve sift binary — check PATH first, then fall back to ~/.cargo/bin
-			const siftBin = (() => {
-				const pathEnv = process.env.PATH ?? "";
-				for (const dir of pathEnv.split(path.delimiter)) {
-					const candidate = path.join(dir, "sift");
-					try {
-						if (fs.existsSync(candidate)) return candidate;
-					} catch {
-						// continue
-					}
-				}
-				// Fallback to known install location
-				const homeBin = path.join(os.homedir(), ".cargo", "bin", "sift");
-				return fs.existsSync(homeBin) ? homeBin : "sift";
-			})();
-
-			const args = ["search", "--strategy", strategy, "--agent", query, scope];
+			const siftBin = resolveSiftBinary();
+			const args = buildCodebaseSearchArgs(strategy, query, scope);
 
 			const stderr: string[] = [];
 			const stdout: string[] = [];
 			let wasAborted = false;
+			let timedOut = false;
 
 			const proc = spawn(siftBin, args, {
 				cwd: scope,
@@ -2389,6 +2514,11 @@ export default function (pi: ExtensionAPI) {
 					}
 				}, 5000).unref?.();
 			};
+			const timeout = setTimeout(() => {
+				timedOut = true;
+				killProc();
+			}, timeoutMs);
+			timeout.unref?.();
 			if (signal) {
 				if (signal.aborted) killProc();
 				else signal.addEventListener("abort", killProc, { once: true });
@@ -2396,11 +2526,13 @@ export default function (pi: ExtensionAPI) {
 
 			const exitCode = await new Promise<number>((resolve) => {
 				proc.on("close", (code) => {
+					clearTimeout(timeout);
 					liveSubagentProcesses.delete(proc);
 					if (signal) signal.removeEventListener("abort", killProc);
 					resolve(code ?? 0);
 				});
 				proc.on("error", () => {
+					clearTimeout(timeout);
 					liveSubagentProcesses.delete(proc);
 					if (signal) signal.removeEventListener("abort", killProc);
 					resolve(1);
@@ -2408,20 +2540,25 @@ export default function (pi: ExtensionAPI) {
 			});
 
 			if (wasAborted) {
+				const text = timedOut
+					? `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.`
+					: "Code search aborted.";
 				return {
 					content: [
 						{
 							type: "text",
-							text: "call_scout aborted.",
+							text,
 						},
 					],
 					details: {
-						operation: "call_scout",
+						operation: "codebase_search",
 						aborted: true,
+						timedOut,
 						siftBin,
 						query,
 						scope,
 						strategy,
+						timeoutMs,
 					} as Record<string, unknown>,
 				};
 			}
@@ -2432,7 +2569,7 @@ export default function (pi: ExtensionAPI) {
 			if (exitCode !== 0 && !out) {
 				const hint =
 					err.includes("not found") || err.includes("No such file")
-						? "\n\nHint: sift is not installed. Run: uv tool install sift"
+						? "\n\nHint: install rupurt/sift and ensure `sift` is on PATH."
 						: err
 							? `\n\nsift stderr: ${err.slice(0, 500)}`
 							: "";
@@ -2440,16 +2577,17 @@ export default function (pi: ExtensionAPI) {
 					content: [
 						{
 							type: "text",
-							text: `call_scout failed (exit ${exitCode}). Is sift installed?${hint}`,
+							text: `codebase_search failed (exit ${exitCode}). Is sift installed?${hint}`,
 						},
 					],
 					details: {
-						operation: "call_scout",
+						operation: "codebase_search",
 						exitCode,
 						siftBin,
 						query,
 						scope,
 						strategy,
+						timeoutMs,
 					} as Record<string, unknown>,
 				};
 			}
@@ -2462,12 +2600,13 @@ export default function (pi: ExtensionAPI) {
 					},
 				],
 				details: {
-					operation: "call_scout",
+					operation: "codebase_search",
 					query,
 					scope,
 					strategy,
 					exitCode,
 					siftBin,
+					timeoutMs,
 				},
 			};
 		},
diff --git a/src/resources/extensions/subagent/tests/node-launch.test.ts b/src/resources/extensions/subagent/tests/node-launch.test.ts
index 4a2290758..21104ce94 100644
--- a/src/resources/extensions/subagent/tests/node-launch.test.ts
+++ b/src/resources/extensions/subagent/tests/node-launch.test.ts
@@ -6,6 +6,10 @@ import { fileURLToPath } from "node:url";
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const subagentSrc = readFileSync(join(__dirname, "..", "index.ts"), "utf-8");
+const scoutAgentSrc = readFileSync(
+	join(__dirname, "..", "..", "..", "agents", "scout.md"),
+	"utf-8",
+);
 
 test("subagent launcher resolves Node command specs instead of shelling through bash", () => {
 	assert.match(subagentSrc, /function resolveSubagentLaunchSpec\(/);
@@ -24,9 +28,9 @@ test("normal subagent execution spawns the resolved Node command with argv array
 	assert.match(subagentSrc, /shell:\s*false/);
 });
 
-test("call_scout subprocesses are tracked and killed on abort", () => {
-	const scoutIdx = subagentSrc.indexOf('name: "call_scout"');
-	assert.ok(scoutIdx > 0, "call_scout tool must be registered");
+test("codebase_search subprocesses are tracked and killed on abort", () => {
+	const scoutIdx = subagentSrc.indexOf('name: "codebase_search"');
+	assert.ok(scoutIdx > 0, "codebase_search tool must be registered");
 	const scoutSrc = subagentSrc.slice(scoutIdx);
 
 	assert.match(scoutSrc, /liveSubagentProcesses\.add\(proc\)/);
@@ -35,6 +39,35 @@ test("call_scout subprocesses are tracked and killed on abort", () => {
 	assert.match(scoutSrc, /proc\.kill\("SIGKILL"\)/);
 });
 
+test("codebase_search has human-facing rendering and a bounded timeout", () => {
+	const scoutIdx = subagentSrc.indexOf('name: "codebase_search"');
+	assert.ok(scoutIdx > 0, "codebase_search tool must be registered");
+	const scoutSrc = subagentSrc.slice(scoutIdx);
+
+	assert.match(scoutSrc, /renderCall\(args, theme\)/);
+	assert.match(scoutSrc, /Code search is querying Sift/);
+	assert.match(scoutSrc, /renderResult\(result, \{ expanded \}, theme\)/);
+	assert.match(scoutSrc, /CODEBASE_SEARCH_TIMEOUT_MS/);
+	assert.match(scoutSrc, /setTimeout\(\(\) => \{/);
+	assert.match(scoutSrc, /Code search timed out after/);
+});
+
+test("codebase_search is documented as Sift backend, not scout role", () => {
+	assert.doesNotMatch(subagentSrc, /name: "call_scout"/);
+	assert.match(subagentSrc, /not named "scout"/);
+	assert.match(subagentSrc, /`scout` is the explorer subagent role/);
+	assert.match(subagentSrc, /resolveSiftBinary/);
+	assert.match(subagentSrc, /buildCodebaseSearchArgs/);
+	assert.match(subagentSrc, /install rupurt\/sift/);
+});
+
+test("scout agent can use Sift-backed codebase_search", () => {
+	assert.match(scoutAgentSrc, /tools: .*codebase_search/);
+	assert.match(scoutAgentSrc, /Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out/);
+	assert.match(scoutAgentSrc, /Sift-backed local retrieval tool/);
+	assert.match(scoutAgentSrc, /You are still the scout role; Sift is one tool you can use\./);
+});
+
 test("cmux launcher writes only explicit environment patch, not the full process env", () => {
 	assert.match(subagentSrc, /function writeNodeSubagentLauncher\(/);
 	assert.match(
diff --git a/src/resources/skills/create-workflow/SKILL.md b/src/resources/skills/create-workflow/SKILL.md
index 3bf25eb00..3e3babf7b 100644
--- a/src/resources/skills/create-workflow/SKILL.md
+++ b/src/resources/skills/create-workflow/SKILL.md
@@ -88,6 +88,7 @@ Available templates in `templates/`:
 - `blog-post-pipeline.yaml` — Linear chain with params and content-heuristic verification.
 - `code-audit.yaml` — Iterate-based fan-out with shell-command verification.
 - `release-checklist.yaml` — Diamond dependency graph with human-review verification.
+- `product-tracking-lifecycle.yaml` — Product telemetry lifecycle with model, audit, plan, instrument, implement, and verify phases.
 </templates_index>
 
 <output_conventions>
diff --git a/src/resources/skills/create-workflow/templates/product-tracking-lifecycle.yaml b/src/resources/skills/create-workflow/templates/product-tracking-lifecycle.yaml
new file mode 100644
index 000000000..87f32570c
--- /dev/null
+++ b/src/resources/skills/create-workflow/templates/product-tracking-lifecycle.yaml
@@ -0,0 +1,144 @@
+# Example: Product Tracking Lifecycle
+# Demonstrates: a linear product telemetry workflow with durable handoff
+# artifacts, context_from chaining, params, prompt verification, and an
+# implementation verification step.
+
+version: 1
+name: product-tracking-lifecycle
+description: >-
+  Model a product, audit current analytics, design a product telemetry tracking
+  plan, generate SDK-specific instrumentation guidance, implement tracking, and
+  verify the resulting data contract.
+params:
+  tracking_destination: "existing analytics destination if one is present; otherwise generic HTTP"
+  verification_command: "npm test"
+steps:
+  - id: model-product
+    name: Model product telemetry surface
+    prompt: >-
+      Model the product so later telemetry decisions have a concrete source of
+      truth. Inspect README, docs, routes, domain modules, API handlers, UI
+      workflows, and tests. Write .telemetry/product.md in this workflow run
+      with users, accounts or groups, value moments, core entities, key
+      workflows, and open questions. Also mirror the file to the project root
+      .telemetry/product.md when this repository is the real implementation
+      target. Use product-tracking-skills:product-tracking-model-product if it
+      is installed; otherwise complete the phase directly.
+    requires: []
+    produces:
+      - .telemetry/product.md
+    verify:
+      policy: content-heuristic
+
+  - id: audit-current-tracking
+    name: Audit current tracking
+    prompt: >-
+      Audit the current analytics implementation. Search for analytics SDK
+      imports, tracking emitters, identify calls, group or account calls, event
+      constants, feature exposure calls, network endpoints, and tests. Write
+      .telemetry/current-state.yaml in this workflow run with actual events,
+      properties, identity fields, group fields, destinations, and call sites.
+      Include a short audit note in .telemetry/current-state.md. Mirror durable
+      artifacts to the project root .telemetry directory when this repository is
+      the real implementation target. Use
+      product-tracking-skills:product-tracking-audit-current-tracking if it is
+      installed; otherwise complete the phase directly.
+    requires:
+      - model-product
+    context_from:
+      - model-product
+    produces:
+      - .telemetry/current-state.yaml
+      - .telemetry/current-state.md
+    verify:
+      policy: content-heuristic
+
+  - id: design-tracking-plan
+    name: Design target tracking plan
+    prompt: >-
+      Design the target tracking plan from the product model and current-state
+      audit. Write .telemetry/tracking-plan.yaml with events, properties,
+      entities, identity rules, group hierarchy, and destination notes. Write
+      .telemetry/delta.md separating existing tracking from additions, fixes,
+      removals, and open questions. Every event must name the product question,
+      operational decision, or downstream consumer it supports. Mirror durable
+      artifacts to the project root .telemetry directory when this repository is
+      the real implementation target. Use
+      product-tracking-skills:product-tracking-design-tracking-plan if it is
+      installed; otherwise complete the phase directly.
+    requires:
+      - audit-current-tracking
+    context_from:
+      - model-product
+      - audit-current-tracking
+    produces:
+      - .telemetry/tracking-plan.yaml
+      - .telemetry/delta.md
+    verify:
+      policy: content-heuristic
+
+  - id: generate-instrument-guide
+    name: Generate instrumentation guide
+    prompt: >-
+      Generate an SDK-specific implementation guide for {{ tracking_destination }}.
+      Use the tracking plan and delta as inputs. Write .telemetry/instrument.md
+      with concrete identify, group or account, track, page or screen, and
+      feature exposure patterns that fit this repo. Include privacy, PII,
+      environment, server/client boundary, batching, and unsupported-capability
+      constraints. Mirror the guide to the project root .telemetry directory
+      when this repository is the real implementation target. Use
+      product-tracking-skills:product-tracking-instrument-guide if it is
+      installed; otherwise complete the phase directly.
+    requires:
+      - design-tracking-plan
+    context_from:
+      - design-tracking-plan
+    produces:
+      - .telemetry/instrument.md
+    verify:
+      policy: content-heuristic
+
+  - id: implement-tracking
+    name: Implement tracking
+    prompt: >-
+      Implement the highest-value tracking changes from .telemetry/delta.md
+      using .telemetry/instrument.md. Prefer a small centralized tracking module
+      or wrapper, typed event helpers or schemas where the repo language
+      supports them, and real identity or group wiring at login, account, or
+      session boundaries. Add focused tests for wrapper behavior, identity
+      mapping, and at least one real consumer path. Update .telemetry/delta.md
+      with what was implemented and what remains. Use
+      product-tracking-skills:product-tracking-implement-tracking if it is
+      installed; otherwise complete the phase directly.
+    requires:
+      - generate-instrument-guide
+    context_from:
+      - design-tracking-plan
+      - generate-instrument-guide
+    produces:
+      - .telemetry/implementation.md
+      - .telemetry/delta.md
+    verify:
+      policy: content-heuristic
+      minSize: 200
+
+  - id: verify-tracking
+    name: Verify tracking contract
+    prompt: >-
+      Verify the tracking implementation and contract. Run {{ verification_command }}
+      if it is appropriate for this repository; otherwise run the nearest focused
+      test, typecheck, lint, or build command and document why. Compare implemented
+      events against .telemetry/tracking-plan.yaml for names, required
+      properties, identity, group hierarchy, and PII constraints. Write
+      .telemetry/verification.md with commands, results, blockers, and remaining
+      follow-up work.
+    requires:
+      - implement-tracking
+    context_from:
+      - design-tracking-plan
+      - implement-tracking
+    produces:
+      - .telemetry/verification.md
+    verify:
+      policy: content-heuristic
+      minSize: 200
diff --git a/src/resources/skills/create-workflow/workflows/create-from-template.md b/src/resources/skills/create-workflow/workflows/create-from-template.md
index 57a85d02b..905f8e21c 100644
--- a/src/resources/skills/create-workflow/workflows/create-from-template.md
+++ b/src/resources/skills/create-workflow/workflows/create-from-template.md
@@ -15,6 +15,7 @@ List the available templates in `templates/`:
 2. **blog-post-pipeline.yaml** — Linear 3-step chain with `params` (topic, audience) and `content-heuristic` verification. Best for: workflows with sequential steps and configurable inputs.
 3. **code-audit.yaml** — 3 steps using `iterate` to fan out over a file list, with `shell-command` verification. Best for: workflows that process each item in a list.
 4. **release-checklist.yaml** — 4 steps with diamond dependencies and `human-review` verification. Best for: workflows with branching/merging dependency graphs.
+5. **product-tracking-lifecycle.yaml** — 6-step product telemetry lifecycle with `params`, `context_from`, documentation artifacts, implementation, and verification. Best for: product analytics and instrumentation work.
 
 Ask: "Which template would you like to start from?"
 
diff --git a/src/tests/auto-mode-piped.test.ts b/src/tests/auto-mode-piped.test.ts
index 95b2bb37b..79d56c9f7 100644
--- a/src/tests/auto-mode-piped.test.ts
+++ b/src/tests/auto-mode-piped.test.ts
@@ -1,6 +1,6 @@
 /**
- * Tests for `sf auto` routing — verifies that `auto` is recognized as a
- * subcommand alias for `headless auto` so it doesn't fall through to the
+ * Tests for autonomous routing — verifies that `autonomous` and `auto` are
+ * recognized as subcommand aliases for `headless auto` so they don't fall through to the
  * interactive TUI, which hangs when stdin/stdout are piped.
  *
  * Regression test for #2732.
@@ -23,13 +23,12 @@ const projectRoot = join(fileURLToPath(import.meta.url), "..", "..", "..");
  * interactive TUI code path. This is the definitive test — if cli.ts doesn't
  * handle 'auto', piped invocations will hang (#2732).
  */
-function cliSourceHandlesAutoBeforeTUI(): boolean {
+function cliSourceHandlesAutonomousBeforeTUI(): boolean {
 	const cliSource = readFileSync(join(projectRoot, "src", "cli.ts"), "utf-8");
 
-	// Find the position of the 'auto' subcommand handler
-	// It should appear as: messages[0] === 'auto'
+	// Find the position of the autonomous subcommand handler.
 	const autoHandlerMatch = cliSource.match(
-		/messages\[0\]\s*===\s*['"]auto['"]/,
+		/messages\[0\]\s*===\s*['"]autonomous['"]/,
 	);
 	if (!autoHandlerMatch) return false;
 
@@ -48,11 +47,11 @@ function cliSourceHandlesAutoBeforeTUI(): boolean {
 // Core regression test: `sf auto` must be handled before TUI (#2732)
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("cli.ts handles `auto` subcommand before interactive TUI (#2732)", () => {
+test("cli.ts handles `autonomous` subcommand before interactive TUI (#2732)", () => {
 	assert.ok(
-		cliSourceHandlesAutoBeforeTUI(),
-		'cli.ts must route messages[0] === "auto" to a handler BEFORE ' +
-			"reaching `new InteractiveMode()`. Without this, `sf auto` with " +
+		cliSourceHandlesAutonomousBeforeTUI(),
+		'cli.ts must route messages[0] === "autonomous" to a handler BEFORE ' +
+			"reaching `new InteractiveMode()`. Without this, `sf autonomous` with " +
 			"piped stdin/stdout falls through to the TUI and hangs.",
 	);
 });
@@ -61,16 +60,15 @@ test("cli.ts handles `auto` subcommand before interactive TUI (#2732)", () => {
 // Verify the auto handler routes to headless (not a stub/no-op)
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("cli.ts routes `auto` to headless runner", () => {
+test("cli.ts routes `autonomous` to headless runner", () => {
 	const cliSource = readFileSync(join(projectRoot, "src", "cli.ts"), "utf-8");
 
-	// The auto handler block should import or reference headless
-	// Look for the auto block and check it contains runHeadless or headless
+	// The autonomous handler block should import or reference headless.
 	const autoBlockRegex =
-		/messages\[0\]\s*===\s*['"]auto['"][\s\S]*?runHeadless/;
+		/messages\[0\]\s*===\s*['"]autonomous['"][\s\S]*?runHeadless/;
 	assert.ok(
 		autoBlockRegex.test(cliSource),
-		"`auto` subcommand handler must invoke runHeadless to delegate to headless mode",
+		"`autonomous` subcommand handler must invoke runHeadless to delegate to headless mode",
 	);
 });
 
diff --git a/src/tests/auto-piped-io.test.ts b/src/tests/auto-piped-io.test.ts
index 99c484fbc..0e58ed8d5 100644
--- a/src/tests/auto-piped-io.test.ts
+++ b/src/tests/auto-piped-io.test.ts
@@ -34,14 +34,14 @@ const EXPLICIT_SUBCOMMANDS = new Set([
  * Detect whether the current subcommand should be auto-redirected
  * to headless mode when stdout is not a TTY.
  *
- * Returns true when: the subcommand is "auto" AND stdout is piped.
+ * Returns true when: the subcommand is "auto" or "autonomous" AND stdout is piped.
  */
 function shouldRedirectAutoToHeadless(
 	subcommand: string | undefined,
 	stdoutIsTTY: boolean,
 ): boolean {
 	if (stdoutIsTTY) return false;
-	return subcommand === "auto";
+	return subcommand === "auto" || subcommand === "autonomous";
 }
 
 /**
@@ -69,6 +69,10 @@ test("redirects 'auto' to headless when stdout is piped", () => {
 	assert.ok(shouldRedirectAutoToHeadless("auto", false));
 });
 
+test("redirects 'autonomous' to headless when stdout is piped", () => {
+	assert.ok(shouldRedirectAutoToHeadless("autonomous", false));
+});
+
 test("does NOT redirect 'auto' when stdout is a TTY", () => {
 	assert.ok(!shouldRedirectAutoToHeadless("auto", true));
 });
diff --git a/src/tests/headless-cli-surface.test.ts b/src/tests/headless-cli-surface.test.ts
index bcf484769..9c23bd1ff 100644
--- a/src/tests/headless-cli-surface.test.ts
+++ b/src/tests/headless-cli-surface.test.ts
@@ -54,6 +54,7 @@ function parseHeadlessArgs(argv: string[]): HeadlessOptions {
 	};
 
 	const args = argv.slice(2);
+	let commandSeen = false;
 
 	for (let i = 0; i < args.length; i++) {
 		const arg = args[i];
@@ -107,8 +108,9 @@ function parseHeadlessArgs(argv: string[]): HeadlessOptions {
 			} else if (arg === "--bare") {
 				options.bare = true;
 			}
-		} else if (options.command === "auto") {
-			options.command = arg;
+		} else if (!commandSeen) {
+			options.command = arg === "autonomous" ? "auto" : arg;
+			commandSeen = true;
 		} else {
 			options.commandArgs.push(arg);
 		}
@@ -164,6 +166,37 @@ test("default output format is text", () => {
 	assert.equal(opts.json, false);
 });
 
+test("autonomous command is accepted as headless auto alias", () => {
+	const opts = parseHeadlessArgs(["node", "sf", "headless", "autonomous"]);
+	assert.equal(opts.command, "auto");
+	assert.deepEqual(opts.commandArgs, []);
+});
+
+test("autonomous command preserves command arguments", () => {
+	const opts = parseHeadlessArgs([
+		"node",
+		"sf",
+		"headless",
+		"autonomous",
+		"M001",
+		"extra-context",
+	]);
+	assert.equal(opts.command, "auto");
+	assert.deepEqual(opts.commandArgs, ["M001", "extra-context"]);
+});
+
+test("auto command preserves command arguments", () => {
+	const opts = parseHeadlessArgs([
+		"node",
+		"sf",
+		"headless",
+		"auto",
+		"M001",
+	]);
+	assert.equal(opts.command, "auto");
+	assert.deepEqual(opts.commandArgs, ["M001"]);
+});
+
 test("invalid --output-format value throws", () => {
 	assert.throws(
 		() =>
diff --git a/src/tests/headless-events.test.ts b/src/tests/headless-events.test.ts
index 7f73e0b41..e6eabda67 100644
--- a/src/tests/headless-events.test.ts
+++ b/src/tests/headless-events.test.ts
@@ -75,7 +75,7 @@ function parseHeadlessArgs(argv: string[]): HeadlessOptions {
 			}
 		} else if (!positionalStarted) {
 			positionalStarted = true;
-			options.command = arg;
+			options.command = arg === "autonomous" ? "auto" : arg;
 		} else {
 			options.commandArgs.push(arg);
 		}
diff --git a/src/tests/integration/web-workflow-controls-contract.test.ts b/src/tests/integration/web-workflow-controls-contract.test.ts
index 148025b3b..301461a03 100644
--- a/src/tests/integration/web-workflow-controls-contract.test.ts
+++ b/src/tests/integration/web-workflow-controls-contract.test.ts
@@ -32,18 +32,18 @@ test("planning + no auto → primary is /sf with label Plan", () => {
 	assert.equal(result.disabled, false);
 });
 
-test("executing + no auto → primary is /sf auto with label Start Auto", () => {
+test("executing + no auto → primary is /sf autonomous with label Start Autonomous", () => {
 	const result = deriveWorkflowAction(baseInput({ phase: "executing" }));
 	assert.ok(result.primary);
-	assert.equal(result.primary.command, "/sf auto");
-	assert.equal(result.primary.label, "Start Auto");
+	assert.equal(result.primary.command, "/sf autonomous");
+	assert.equal(result.primary.label, "Start Autonomous");
 });
 
-test("summarizing + no auto → primary is /sf auto with label Start Auto", () => {
+test("summarizing + no auto → primary is /sf autonomous with label Start Autonomous", () => {
 	const result = deriveWorkflowAction(baseInput({ phase: "summarizing" }));
 	assert.ok(result.primary);
-	assert.equal(result.primary.command, "/sf auto");
-	assert.equal(result.primary.label, "Start Auto");
+	assert.equal(result.primary.command, "/sf autonomous");
+	assert.equal(result.primary.label, "Start Autonomous");
 });
 
 test("auto active (not paused) → primary is /sf stop with destructive variant", () => {
@@ -52,15 +52,15 @@ test("auto active (not paused) → primary is /sf stop with destructive variant"
 	);
 	assert.ok(result.primary);
 	assert.equal(result.primary.command, "/sf stop");
-	assert.equal(result.primary.label, "Stop Auto");
+	assert.equal(result.primary.label, "Stop Autonomous");
 	assert.equal(result.primary.variant, "destructive");
 });
 
-test("auto paused → primary is /sf auto with label Resume Auto", () => {
+test("auto paused → primary is /sf autonomous with label Resume Autonomous", () => {
 	const result = deriveWorkflowAction(baseInput({ autoPaused: true }));
 	assert.ok(result.primary);
-	assert.equal(result.primary.command, "/sf auto");
-	assert.equal(result.primary.label, "Resume Auto");
+	assert.equal(result.primary.command, "/sf autonomous");
+	assert.equal(result.primary.label, "Resume Autonomous");
 	assert.equal(result.primary.variant, "default");
 });
 
diff --git a/src/tests/native-search.test.ts b/src/tests/native-search.test.ts
index 43bbaed7b..512d40728 100644
--- a/src/tests/native-search.test.ts
+++ b/src/tests/native-search.test.ts
@@ -8,6 +8,10 @@ import {
 	registerNativeSearchHooks,
 	stripThinkingFromHistory,
 } from "../resources/extensions/search-the-web/native-search.ts";
+import {
+	getMiniMaxSearchApiKey,
+	resolveSearchProvider,
+} from "../resources/extensions/search-the-web/provider.ts";
 
 /**
  * Tests for native Anthropic web search injection.
@@ -1188,3 +1192,209 @@ test("stripThinkingFromHistory handles string content (no array)", () => {
 	stripThinkingFromHistory(messages);
 	assert.equal(messages[1].content, "just a string");
 });
+
+// ─── Minimax search tests (R115) ────────────────────────────────────────────
+
+test("getMiniMaxSearchApiKey returns MINIMAX_CODE_PLAN_KEY when set", async (t) => {
+	const original = process.env.MINIMAX_CODE_PLAN_KEY;
+	const original2 = process.env.MINIMAX_CODING_API_KEY;
+	const original3 = process.env.MINIMAX_API_KEY;
+	t.after(() => {
+		process.env.MINIMAX_CODE_PLAN_KEY = original;
+		process.env.MINIMAX_CODING_API_KEY = original2;
+		process.env.MINIMAX_API_KEY = original3;
+	});
+
+	process.env.MINIMAX_CODE_PLAN_KEY = "code-plan-key";
+	process.env.MINIMAX_CODING_API_KEY = "coding-key";
+	process.env.MINIMAX_API_KEY = "api-key";
+
+	// Should return first in priority order
+	assert.equal(
+		getMiniMaxSearchApiKey(),
+		"code-plan-key",
+		"Should return MINIMAX_CODE_PLAN_KEY (highest priority)",
+	);
+});
+
+test("getMiniMaxSearchApiKey falls back to MINIMAX_CODING_API_KEY", async (t) => {
+	const original = process.env.MINIMAX_CODE_PLAN_KEY;
+	const original2 = process.env.MINIMAX_CODING_API_KEY;
+	const original3 = process.env.MINIMAX_API_KEY;
+	t.after(() => {
+		process.env.MINIMAX_CODE_PLAN_KEY = original;
+		process.env.MINIMAX_CODING_API_KEY = original2;
+		process.env.MINIMAX_API_KEY = original3;
+	});
+
+	delete process.env.MINIMAX_CODE_PLAN_KEY;
+	process.env.MINIMAX_CODING_API_KEY = "coding-key";
+	process.env.MINIMAX_API_KEY = "api-key";
+
+	assert.equal(
+		getMiniMaxSearchApiKey(),
+		"coding-key",
+		"Should return MINIMAX_CODING_API_KEY when CODE_PLAN_KEY is unset",
+	);
+});
+
+test("getMiniMaxSearchApiKey falls back to MINIMAX_API_KEY", async (t) => {
+	const original = process.env.MINIMAX_CODE_PLAN_KEY;
+	const original2 = process.env.MINIMAX_CODING_API_KEY;
+	const original3 = process.env.MINIMAX_API_KEY;
+	t.after(() => {
+		process.env.MINIMAX_CODE_PLAN_KEY = original;
+		process.env.MINIMAX_CODING_API_KEY = original2;
+		process.env.MINIMAX_API_KEY = original3;
+	});
+
+	delete process.env.MINIMAX_CODE_PLAN_KEY;
+	delete process.env.MINIMAX_CODING_API_KEY;
+	process.env.MINIMAX_API_KEY = "api-key";
+
+	assert.equal(
+		getMiniMaxSearchApiKey(),
+		"api-key",
+		"Should return MINIMAX_API_KEY when higher priority keys are unset",
+	);
+});
+
+test("getMiniMaxSearchApiKey returns empty string when no keys set", async (t) => {
+	const original = process.env.MINIMAX_CODE_PLAN_KEY;
+	const original2 = process.env.MINIMAX_CODING_API_KEY;
+	const original3 = process.env.MINIMAX_API_KEY;
+	t.after(() => {
+		process.env.MINIMAX_CODE_PLAN_KEY = original;
+		process.env.MINIMAX_CODING_API_KEY = original2;
+		process.env.MINIMAX_API_KEY = original3;
+	});
+
+	delete process.env.MINIMAX_CODE_PLAN_KEY;
+	delete process.env.MINIMAX_CODING_API_KEY;
+	delete process.env.MINIMAX_API_KEY;
+
+	assert.equal(
+		getMiniMaxSearchApiKey(),
+		"",
+		"Should return empty string when no Minimax keys are set",
+	);
+});
+
+test("resolveSearchProvider returns minimax when MINIMAX_API_KEY is set and preference is auto", async (t) => {
+	const original = process.env.MINIMAX_CODE_PLAN_KEY;
+	const original2 = process.env.MINIMAX_CODING_API_KEY;
+	const original3 = process.env.MINIMAX_API_KEY;
+	const originalTavily = process.env.TAVILY_API_KEY;
+	t.after(() => {
+		process.env.MINIMAX_CODE_PLAN_KEY = original;
+		process.env.MINIMAX_CODING_API_KEY = original2;
+		process.env.MINIMAX_API_KEY = original3;
+		process.env.TAVILY_API_KEY = originalTavily;
+	});
+
+	delete process.env.MINIMAX_CODE_PLAN_KEY;
+	delete process.env.MINIMAX_CODING_API_KEY;
+	delete process.env.TAVILY_API_KEY;
+	process.env.MINIMAX_API_KEY = "test-minimax-key";
+
+	// With no Tavily key, minimax should be selected in auto mode
+	const result = resolveSearchProvider();
+	assert.equal(
+		result,
+		"minimax",
+		"Should return minimax when Minimax key exists and Tavily does not",
+	);
+});
+
+test("resolveSearchProvider prefers tavily over minimax in auto mode", async (t) => {
+	const original = process.env.TAVILY_API_KEY;
+	const original2 = process.env.MINIMAX_API_KEY;
+	t.after(() => {
+		process.env.TAVILY_API_KEY = original;
+		process.env.MINIMAX_API_KEY = original2;
+	});
+
+	process.env.TAVILY_API_KEY = "test-tavily-key";
+	process.env.MINIMAX_API_KEY = "test-minimax-key";
+
+	// Tavily should be preferred in auto mode
+	const result = resolveSearchProvider();
+	assert.equal(
+		result,
+		"tavily",
+		"Should prefer tavily over minimax in auto mode",
+	);
+});
+
+test("resolveSearchProvider with explicit minimax preference returns minimax when key exists", async (t) => {
+	const original = process.env.MINIMAX_API_KEY;
+	const originalTavily = process.env.TAVILY_API_KEY;
+	t.after(() => {
+		process.env.MINIMAX_API_KEY = original;
+		process.env.TAVILY_API_KEY = originalTavily;
+	});
+
+	process.env.MINIMAX_API_KEY = "test-minimax-key";
+	delete process.env.TAVILY_API_KEY;
+
+	const result = resolveSearchProvider("minimax");
+	assert.equal(
+		result,
+		"minimax",
+		"Should return minimax when explicitly preferred and key exists",
+	);
+});
+
+test("resolveSearchProvider minimax preference falls back when key missing", async (t) => {
+	const original = process.env.MINIMAX_API_KEY;
+	const originalTavily = process.env.TAVILY_API_KEY;
+	const originalBrave = process.env.BRAVE_API_KEY;
+	t.after(() => {
+		process.env.MINIMAX_API_KEY = original;
+		process.env.TAVILY_API_KEY = originalTavily;
+		process.env.BRAVE_API_KEY = originalBrave;
+	});
+
+	delete process.env.MINIMAX_API_KEY;
+	delete process.env.TAVILY_API_KEY;
+	process.env.BRAVE_API_KEY = "test-brave-key";
+
+	// With explicit minimax preference but no key, should fall back to brave
+	const result = resolveSearchProvider("minimax");
+	assert.equal(
+		result,
+		"brave",
+		"Should fall back to brave when minimax preference is set but key is missing",
+	);
+});
+
+test("resolveSearchProvider returns null when no keys set", async (t) => {
+	const original = process.env.TAVILY_API_KEY;
+	const original2 = process.env.MINIMAX_API_KEY;
+	const original3 = process.env.BRAVE_API_KEY;
+	const original4 = process.env.SERPER_API_KEY;
+	const original5 = process.env.EXA_API_KEY;
+	const original6 = process.env.OLLAMA_API_KEY;
+	t.after(() => {
+		process.env.TAVILY_API_KEY = original;
+		process.env.MINIMAX_API_KEY = original2;
+		process.env.BRAVE_API_KEY = original3;
+		process.env.SERPER_API_KEY = original4;
+		process.env.EXA_API_KEY = original5;
+		process.env.OLLAMA_API_KEY = original6;
+	});
+
+	delete process.env.TAVILY_API_KEY;
+	delete process.env.MINIMAX_API_KEY;
+	delete process.env.BRAVE_API_KEY;
+	delete process.env.SERPER_API_KEY;
+	delete process.env.EXA_API_KEY;
+	delete process.env.OLLAMA_API_KEY;
+
+	const result = resolveSearchProvider();
+	assert.equal(
+		result,
+		null,
+		"Should return null when no search provider keys are set",
+	);
+});
diff --git a/src/tests/parse-cli-args.test.ts b/src/tests/parse-cli-args.test.ts
index f0dc8355c..74e0545d9 100644
--- a/src/tests/parse-cli-args.test.ts
+++ b/src/tests/parse-cli-args.test.ts
@@ -70,6 +70,14 @@ describe("parseCliArgs — short flags and basic options", () => {
 		assert.equal(parse("--no-session").noSession, true);
 	});
 
+	test("--all sets allSessions", () => {
+		assert.equal(parse("--all").allSessions, true);
+	});
+
+	test("-a sets allSessions", () => {
+		assert.equal(parse("-a").allSessions, true);
+	});
+
 	test("--model captures model id", () => {
 		assert.equal(parse("--model", "claude-opus-4-6").model, "claude-opus-4-6");
 	});
diff --git a/src/web/recovery-diagnostics-service.ts b/src/web/recovery-diagnostics-service.ts
index 0b0f745c3..c17d3fb49 100644
--- a/src/web/recovery-diagnostics-service.ts
+++ b/src/web/recovery-diagnostics-service.ts
@@ -248,7 +248,7 @@ function buildCommandSuggestions(
 
 	if (phase === "planning") add("/sf", "Open SF planning");
 	if (phase === "executing" || phase === "summarizing")
-		add("/sf auto", "Resume SF auto mode");
+		add("/sf autonomous", "Resume SF autonomous mode");
 	if (activeScope)
 		add(`/sf doctor ${activeScope}`, "Inspect scoped doctor report");
 	if (activeScope)
diff --git a/tsconfig.extensions.json b/tsconfig.extensions.json
index dbc2ef636..3cfdeff4a 100644
--- a/tsconfig.extensions.json
+++ b/tsconfig.extensions.json
@@ -22,5 +22,7 @@
     }
   },
   "include": ["src/resources/extensions"],
-  "exclude": []
+  "exclude": [
+    "src/resources/extensions/vectordrive/tests/**/*.ts"
+  ]
 }
diff --git a/web/components/sf/chat-mode.tsx b/web/components/sf/chat-mode.tsx
index c5cf9c4b4..8760f715c 100644
--- a/web/components/sf/chat-mode.tsx
+++ b/web/components/sf/chat-mode.tsx
@@ -46,10 +46,10 @@ const SF_ACTIONS: SFActionDef[] = [
   // ── Top 3 (standalone buttons) ──
   { label: "Discuss",   command: "/sf discuss",   icon: MessageCircle,     description: "Start guided milestone/slice discussion",                    category: "workflow",    disabledDuringAuto: true },
   { label: "Next",      command: "/sf next",      icon: Play,              description: "Execute next task, then pause",                              category: "workflow" },
-  { label: "Auto",      command: "/sf auto",      icon: Zap,               description: "Run all queued units continuously",                         category: "workflow" },
+  { label: "Autonomous", command: "/sf autonomous", icon: Zap,             description: "Run all queued product units continuously",                 category: "workflow" },
   // ── Overflow: Workflow ──
-  { label: "Stop",      command: "/sf stop",      icon: Square,            description: "Stop auto-mode gracefully",                                  category: "workflow" },
-  { label: "Pause",     command: "/sf pause",     icon: Pause,             description: "Pause auto-mode (preserves state)",                          category: "workflow" },
+  { label: "Stop",      command: "/sf stop",      icon: Square,            description: "Stop autonomous mode gracefully",                            category: "workflow" },
+  { label: "Pause",     command: "/sf pause",     icon: Pause,             description: "Pause autonomous mode (preserves state)",                    category: "workflow" },
   // ── Overflow: Visibility ──
   { label: "Status",    command: "/sf status",    icon: BarChart3,         description: "Show progress dashboard",                                    category: "visibility" },
   { label: "Visualize", command: "/sf visualize", icon: LayoutGrid,        description: "Interactive TUI (progress, deps, metrics, timeline)",        category: "visibility" },
@@ -2047,10 +2047,10 @@ export function ChatPane({ className, onOpenAction }: ChatPaneProps) {
     const autoPaused = state.boot?.auto?.paused ?? false
 
     if (autoActive && !autoPaused) {
-      return { label: "Stop Auto", icon: Square }
+      return { label: "Stop Autonomous", icon: Square }
     }
     if (autoPaused) {
-      return { label: "Resume Auto", icon: Play }
+      return { label: "Resume Autonomous", icon: Play }
     }
     if (phase === "complete") {
       return { label: "New Milestone", icon: Milestone }
@@ -2059,7 +2059,7 @@ export function ChatPane({ className, onOpenAction }: ChatPaneProps) {
       return { label: "Plan", icon: Play }
     }
     if (phase === "executing" || phase === "summarizing") {
-      return { label: "Start Auto", icon: Zap }
+      return { label: "Start Autonomous", icon: Zap }
     }
     if (phase === "pre-planning") {
       return { label: "Initialize Project", icon: Play }
diff --git a/web/lib/workflow-actions.ts b/web/lib/workflow-actions.ts
index 3ab07c689..a882f799c 100644
--- a/web/lib/workflow-actions.ts
+++ b/web/lib/workflow-actions.ts
@@ -64,9 +64,9 @@ export function deriveWorkflowAction(input: WorkflowActionInput): WorkflowAction
   let isNewMilestone = false
 
   if (autoActive && !autoPaused) {
-    primary = { label: "Stop Auto", command: "/sf stop", variant: "destructive" }
+    primary = { label: "Stop Autonomous", command: "/sf stop", variant: "destructive" }
   } else if (autoPaused) {
-    primary = { label: "Resume Auto", command: "/sf auto", variant: "default" }
+    primary = { label: "Resume Autonomous", command: "/sf autonomous", variant: "default" }
   } else {
     // Auto is not active
     if (phase === "complete") {
@@ -76,7 +76,7 @@ export function deriveWorkflowAction(input: WorkflowActionInput): WorkflowAction
     } else if (phase === "planning") {
       primary = { label: "Plan", command: "/sf", variant: "default" }
     } else if (phase === "executing" || phase === "summarizing") {
-      primary = { label: "Start Auto", command: "/sf auto", variant: "default" }
+      primary = { label: "Start Autonomous", command: "/sf autonomous", variant: "default" }
     } else if (phase === "pre-planning" && !hasMilestones) {
       primary = { label: "Initialize Project", command: "/sf", variant: "default" }
     } else if (phase === "blocked") {
@@ -84,7 +84,7 @@ export function deriveWorkflowAction(input: WorkflowActionInput): WorkflowAction
       disabled = true
       disabledReason = "Project is blocked — check blockers"
     } else if (phase === "paused") {
-      primary = { label: "Resume", command: "/sf auto", variant: "default" }
+      primary = { label: "Resume", command: "/sf autonomous", variant: "default" }
     } else if (phase === "validating-milestone") {
       primary = { label: "Validate", command: "/sf", variant: "default" }
     } else if (phase === "completing-milestone") {