diff --git a/.gitignore b/.gitignore
index 9269d43a0..7e8e1874d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,4 +106,17 @@ repowise.db
 .sf/scaffold-manifest.json
 .sf/interactive.lock
 .sf/interactive.lock.d/
+# SQLite WAL/SHM are ephemeral checkpoint files — only the .db is durable.
+.sf/metrics.db-wal
+.sf/metrics.db-shm
+.sf/sf.db-wal
+.sf/sf.db-shm
+# Per-dispatch trace files accumulate one-per-request and are runtime-only.
+# Consumers (sf-db-gates, adaptive verification policy) read by mtime window
+# (24h–30d) — on-disk retention is needed, but git tracking is not.
+.sf/traces/pre-dispatch:*.jsonl
+.sf/traces/finalize:*.jsonl
+.sf/traces/guard:*.jsonl
+# `latest` is a symlink retargeted on every dispatch — pure git noise.
+.sf/traces/latest
 test_output.log
diff --git a/.sf/backups/db/maintenance.json b/.sf/backups/db/maintenance.json
index 7c66cbff5..054f1799b 100644
--- a/.sf/backups/db/maintenance.json
+++ b/.sf/backups/db/maintenance.json
@@ -1,3 +1,3 @@
 {
-  "lastFullVacuumAt": "2026-05-12T13:59:07.765Z"
+  "lastFullVacuumAt": "2026-05-12T20:58:28.744Z"
 }
diff --git a/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z b/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z
deleted file mode 100644
index 8ace58971..000000000
Binary files a/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z and /dev/null differ
diff --git a/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z b/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z
deleted file mode 100644
index f18e61f46..000000000
Binary files a/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z and /dev/null differ
diff --git a/.sf/backups/db/sf.db.2026-05-10T05-57-58-732Z b/.sf/backups/db/sf.db.2026-05-10T05-57-58-732Z
deleted file mode 100644
index 43ca86cfe..000000000
Binary files a/.sf/backups/db/sf.db.2026-05-10T05-57-58-732Z and /dev/null differ
diff --git a/.sf/backups/db/sf.db.2026-05-12T20-58-28-491Z b/.sf/backups/db/sf.db.2026-05-12T20-58-28-491Z
new file mode 100644
index 000000000..91e5bc7c8
Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-12T20-58-28-491Z differ
diff --git a/.sf/backups/db/sf.db.2026-05-12T21-15-56-990Z b/.sf/backups/db/sf.db.2026-05-12T21-15-56-990Z
new file mode 100644
index 000000000..b8c23051f
Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-12T21-15-56-990Z differ
diff --git a/.sf/backups/db/sf.db.2026-05-12T23-50-31-488Z b/.sf/backups/db/sf.db.2026-05-12T23-50-31-488Z
new file mode 100644
index 000000000..5f65b2570
Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-12T23-50-31-488Z differ
diff --git a/.sf/graphs/graph.json b/.sf/graphs/graph.json
index 34f04f93b..5a422b3e7 100644
--- a/.sf/graphs/graph.json
+++ b/.sf/graphs/graph.json
@@ -60,5 +60,5 @@
       "confidence": "EXTRACTED"
     }
   ],
-  "builtAt": "2026-05-12T15:26:43.252Z"
+  "builtAt": "2026-05-12T23:53:23.408Z"
 }
\ No newline at end of file
diff --git a/.sf/metrics.db b/.sf/metrics.db
index ef8afc4bb..1c383bba1 100644
Binary files a/.sf/metrics.db and b/.sf/metrics.db differ
diff --git a/.sf/metrics.db-shm b/.sf/metrics.db-shm
index 20c56f6c4..0cd251d62 100644
Binary files a/.sf/metrics.db-shm and b/.sf/metrics.db-shm differ
diff --git a/.sf/metrics.db-wal b/.sf/metrics.db-wal
index 0a32c4de8..fe258b65e 100644
Binary files a/.sf/metrics.db-wal and b/.sf/metrics.db-wal differ
diff --git a/.sf/model-catalog/mistral.json b/.sf/model-catalog/mistral.json
index 11aef6ccf..0ff5ab62c 100644
--- a/.sf/model-catalog/mistral.json
+++ b/.sf/model-catalog/mistral.json
@@ -1 +1 @@
-{"fetchedAt":"2026-05-12T14:54:31.656Z","modelIds":["mistral-medium-2505","mistral-medium-2508","mistral-medium-latest","mistral-medium","mistral-vibe-cli-with-tools","open-mistral-nemo","open-mistral-nemo-2407","mistral-tiny-2407","mistral-tiny-latest","codestral-2508","codestral-latest","devstral-2512","devstral-medium-latest","devstral-latest","mistral-small-2603","mistral-small-latest","mistral-vibe-cli-fast","magistral-small-latest","magistral-medium-2509","magistral-medium-latest","labs-leanstral-2603","mistral-large-2512","mistral-large-latest","mistral-large-2512","mistral-large-latest","ministral-3b-2512","ministral-3b-latest","ministral-8b-2512","ministral-8b-latest","ministral-14b-2512","ministral-14b-latest","mistral-medium-3-5","mistral-medium-3.5","mistral-medium-3","mistral-medium-2604","mistral-medium-c21211-r0-75","mistral-vibe-cli-latest","mistral-large-2411","pixtral-large-2411","pixtral-large-latest","mistral-large-pixtral-2411","devstral-small-2507","devstral-medium-2507","magistral-small-2509","mistral-small-2506"]}
\ No newline at end of file
+{"fetchedAt":"2026-05-12T21:25:20.919Z","modelIds":["mistral-medium-2505","mistral-medium-2508","mistral-medium-latest","mistral-medium","mistral-vibe-cli-with-tools","open-mistral-nemo","open-mistral-nemo-2407","mistral-tiny-2407","mistral-tiny-latest","codestral-2508","codestral-latest","devstral-2512","devstral-medium-latest","devstral-latest","mistral-small-2603","mistral-small-latest","mistral-vibe-cli-fast","magistral-small-latest","magistral-medium-2509","magistral-medium-latest","labs-leanstral-2603","mistral-large-2512","mistral-large-latest","mistral-large-2512","mistral-large-latest","ministral-3b-2512","ministral-3b-latest","ministral-8b-2512","ministral-8b-latest","ministral-14b-2512","ministral-14b-latest","mistral-medium-3-5","mistral-medium-3.5","mistral-medium-3","mistral-medium-2604","mistral-medium-c21211-r0-75","mistral-vibe-cli-latest","mistral-large-2411","pixtral-large-2411","pixtral-large-latest","mistral-large-pixtral-2411","devstral-small-2507","devstral-medium-2507","magistral-small-2509","mistral-small-2506"]}
\ No newline at end of file
diff --git a/.sf/model-catalog/openrouter.json b/.sf/model-catalog/openrouter.json
index a4361cf81..d4e7045e4 100644
--- a/.sf/model-catalog/openrouter.json
+++ b/.sf/model-catalog/openrouter.json
@@ -1 +1 @@
-{"fetchedAt":"2026-05-12T14:47:40.438Z","modelIds":["inclusionai/ring-2.6-1t:free","google/gemini-3.1-flash-lite","baidu/cobuddy:free","openai/gpt-chat-latest","x-ai/grok-4.3","ibm-granite/granite-4.1-8b","mistralai/mistral-medium-3-5","openrouter/owl-alpha","nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free","poolside/laguna-xs.2:free","poolside/laguna-m.1:free","~anthropic/claude-haiku-latest","~openai/gpt-mini-latest","~google/gemini-pro-latest","~moonshotai/kimi-latest","~google/gemini-flash-latest","~anthropic/claude-sonnet-latest","~openai/gpt-latest","qwen/qwen3.5-plus-20260420","qwen/qwen3.6-flash","qwen/qwen3.6-35b-a3b","qwen/qwen3.6-max-preview","qwen/qwen3.6-27b","openai/gpt-5.5-pro","openai/gpt-5.5","deepseek/deepseek-v4-pro","deepseek/deepseek-v4-flash","inclusionai/ling-2.6-1t","tencent/hy3-preview","xiaomi/mimo-v2.5-pro","xiaomi/mimo-v2.5","openai/gpt-5.4-image-2","inclusionai/ling-2.6-flash","~anthropic/claude-opus-latest","openrouter/pareto-code","baidu/qianfan-ocr-fast:free","moonshotai/kimi-k2.6","anthropic/claude-opus-4.7","anthropic/claude-opus-4.6-fast","z-ai/glm-5.1","google/gemma-4-26b-a4b-it:free","google/gemma-4-26b-a4b-it","google/gemma-4-31b-it:free","google/gemma-4-31b-it","qwen/qwen3.6-plus","z-ai/glm-5v-turbo","arcee-ai/trinity-large-thinking:free","arcee-ai/trinity-large-thinking","x-ai/grok-4.20-multi-agent","x-ai/grok-4.20","google/lyria-3-pro-preview","google/lyria-3-clip-preview","kwaipilot/kat-coder-pro-v2","rekaai/reka-edge","xiaomi/mimo-v2-omni","xiaomi/mimo-v2-pro","minimax/minimax-m2.7","openai/gpt-5.4-nano","openai/gpt-5.4-mini","mistralai/mistral-small-2603","z-ai/glm-5-turbo","nvidia/nemotron-3-super-120b-a12b:free","nvidia/nemotron-3-super-120b-a12b","bytedance-seed/seed-2.0-lite","qwen/qwen3.5-9b","openai/gpt-5.4-pro","openai/gpt-5.4","inception/mercury-2","openai/gpt-5.3-chat","google/gemini-3.1-flash-lite-preview","bytedance-seed/seed-2.0-mini","google/gemini-3.1-flash-image-preview","qwen/qwen3.5-35b-a3b","qwen/qwen3.5-27b","qwen/qwen3.5-122b-a10b","qwen/qwen3.5-flash-02-23","liquid/lfm-2-24b-a2b","google/gemini-3.1-pro-preview-customtools","openai/gpt-5.3-codex","aion-labs/aion-2.0","google/gemini-3.1-pro-preview","anthropic/claude-sonnet-4.6","qwen/qwen3.5-plus-02-15","qwen/qwen3.5-397b-a17b","minimax/minimax-m2.5:free","minimax/minimax-m2.5","z-ai/glm-5","qwen/qwen3-max-thinking","anthropic/claude-opus-4.6","qwen/qwen3-coder-next","openrouter/free","stepfun/step-3.5-flash","arcee-ai/trinity-large-preview","moonshotai/kimi-k2.5","upstage/solar-pro-3","minimax/minimax-m2-her","writer/palmyra-x5","liquid/lfm-2.5-1.2b-thinking:free","liquid/lfm-2.5-1.2b-instruct:free","openai/gpt-audio","openai/gpt-audio-mini","z-ai/glm-4.7-flash","openai/gpt-5.2-codex","bytedance-seed/seed-1.6-flash","bytedance-seed/seed-1.6","minimax/minimax-m2.1","z-ai/glm-4.7","google/gemini-3-flash-preview","xiaomi/mimo-v2-flash","nvidia/nemotron-3-nano-30b-a3b:free","nvidia/nemotron-3-nano-30b-a3b","openai/gpt-5.2-chat","openai/gpt-5.2-pro","openai/gpt-5.2","mistralai/devstral-2512","relace/relace-search","z-ai/glm-4.6v","nex-agi/deepseek-v3.1-nex-n1","essentialai/rnj-1-instruct","openrouter/bodybuilder","openai/gpt-5.1-codex-max","amazon/nova-2-lite-v1","mistralai/ministral-14b-2512","mistralai/ministral-8b-2512","mistralai/ministral-3b-2512","mistralai/mistral-large-2512","arcee-ai/trinity-mini","deepseek/deepseek-v3.2-speciale","deepseek/deepseek-v3.2","prime-intellect/intellect-3","anthropic/claude-opus-4.5","allenai/olmo-3-32b-think","google/gemini-3-pro-image-preview","x-ai/grok-4.1-fast","deepcogito/cogito-v2.1-671b","openai/gpt-5.1","openai/gpt-5.1-chat","openai/gpt-5.1-codex","openai/gpt-5.1-codex-mini","moonshotai/kimi-k2-thinking","amazon/nova-premier-v1","perplexity/sonar-pro-search","mistralai/voxtral-small-24b-2507","openai/gpt-oss-safeguard-20b","nvidia/nemotron-nano-12b-v2-vl:free","minimax/minimax-m2","qwen/qwen3-vl-32b-instruct","ibm-granite/granite-4.0-h-micro","microsoft/phi-4-mini-instruct","openai/gpt-5-image-mini","anthropic/claude-haiku-4.5","qwen/qwen3-vl-8b-thinking","qwen/qwen3-vl-8b-instruct","openai/gpt-5-image","openai/o3-deep-research","openai/o4-mini-deep-research","nvidia/llama-3.3-nemotron-super-49b-v1.5","baidu/ernie-4.5-21b-a3b-thinking","google/gemini-2.5-flash-image","qwen/qwen3-vl-30b-a3b-thinking","qwen/qwen3-vl-30b-a3b-instruct","openai/gpt-5-pro","z-ai/glm-4.6","anthropic/claude-sonnet-4.5","deepseek/deepseek-v3.2-exp","thedrummer/cydonia-24b-v4.1","relace/relace-apply-3","google/gemini-2.5-flash-lite-preview-09-2025","qwen/qwen3-vl-235b-a22b-thinking","qwen/qwen3-vl-235b-a22b-instruct","qwen/qwen3-max","qwen/qwen3-coder-plus","openai/gpt-5-codex","deepseek/deepseek-v3.1-terminus","x-ai/grok-4-fast","alibaba/tongyi-deepresearch-30b-a3b","qwen/qwen3-coder-flash","qwen/qwen3-next-80b-a3b-thinking","qwen/qwen3-next-80b-a3b-instruct:free","qwen/qwen3-next-80b-a3b-instruct","qwen/qwen-plus-2025-07-28:thinking","qwen/qwen-plus-2025-07-28","nvidia/nemotron-nano-9b-v2:free","nvidia/nemotron-nano-9b-v2","moonshotai/kimi-k2-0905","qwen/qwen3-30b-a3b-thinking-2507","x-ai/grok-code-fast-1","nousresearch/hermes-4-70b","nousresearch/hermes-4-405b","deepseek/deepseek-chat-v3.1","openai/gpt-4o-audio-preview","mistralai/mistral-medium-3.1","baidu/ernie-4.5-21b-a3b","baidu/ernie-4.5-vl-28b-a3b","z-ai/glm-4.5v","ai21/jamba-large-1.7","openai/gpt-5-chat","openai/gpt-5","openai/gpt-5-mini","openai/gpt-5-nano","openai/gpt-oss-120b:free","openai/gpt-oss-120b","openai/gpt-oss-20b:free","openai/gpt-oss-20b","anthropic/claude-opus-4.1","mistralai/codestral-2508","qwen/qwen3-coder-30b-a3b-instruct","qwen/qwen3-30b-a3b-instruct-2507","z-ai/glm-4.5","z-ai/glm-4.5-air:free","z-ai/glm-4.5-air","qwen/qwen3-235b-a22b-thinking-2507","z-ai/glm-4-32b","qwen/qwen3-coder:free","qwen/qwen3-coder","bytedance/ui-tars-1.5-7b","google/gemini-2.5-flash-lite","qwen/qwen3-235b-a22b-2507","switchpoint/router","moonshotai/kimi-k2","mistralai/devstral-medium","mistralai/devstral-small","cognitivecomputations/dolphin-mistral-24b-venice-edition:free","x-ai/grok-4","tencent/hunyuan-a13b-instruct","morph/morph-v3-large","morph/morph-v3-fast","baidu/ernie-4.5-vl-424b-a47b","baidu/ernie-4.5-300b-a47b","mistralai/mistral-small-3.2-24b-instruct","minimax/minimax-m1","google/gemini-2.5-flash","google/gemini-2.5-pro","openai/o3-pro","x-ai/grok-3-mini","x-ai/grok-3","google/gemini-2.5-pro-preview","deepseek/deepseek-r1-0528","anthropic/claude-opus-4","anthropic/claude-sonnet-4","google/gemma-3n-e4b-it","mistralai/mistral-medium-3","google/gemini-2.5-pro-preview-05-06","arcee-ai/spotlight","arcee-ai/maestro-reasoning","arcee-ai/virtuoso-large","arcee-ai/coder-large","meta-llama/llama-guard-4-12b","qwen/qwen3-30b-a3b","qwen/qwen3-8b","qwen/qwen3-14b","qwen/qwen3-32b","qwen/qwen3-235b-a22b","openai/o4-mini-high","openai/o3","openai/o4-mini","openai/gpt-4.1","openai/gpt-4.1-mini","openai/gpt-4.1-nano","alfredpros/codellama-7b-instruct-solidity","x-ai/grok-3-mini-beta","x-ai/grok-3-beta","meta-llama/llama-4-maverick","meta-llama/llama-4-scout","deepseek/deepseek-chat-v3-0324","openai/o1-pro","mistralai/mistral-small-3.1-24b-instruct","google/gemma-3-4b-it","google/gemma-3-12b-it","cohere/command-a","openai/gpt-4o-mini-search-preview","openai/gpt-4o-search-preview","rekaai/reka-flash-3","google/gemma-3-27b-it","thedrummer/skyfall-36b-v2","perplexity/sonar-reasoning-pro","perplexity/sonar-pro","perplexity/sonar-deep-research","google/gemini-2.0-flash-lite-001","mistralai/mistral-saba","meta-llama/llama-guard-3-8b","openai/o3-mini-high","google/gemini-2.0-flash-001","qwen/qwen-vl-plus","aion-labs/aion-1.0","aion-labs/aion-1.0-mini","aion-labs/aion-rp-llama-3.1-8b","qwen/qwen-vl-max","qwen/qwen-turbo","qwen/qwen2.5-vl-72b-instruct","qwen/qwen-plus","qwen/qwen-max","openai/o3-mini","mistralai/mistral-small-24b-instruct-2501","deepseek/deepseek-r1-distill-qwen-32b","perplexity/sonar","deepseek/deepseek-r1-distill-llama-70b","deepseek/deepseek-r1","minimax/minimax-01","microsoft/phi-4","sao10k/l3.1-70b-hanami-x1","deepseek/deepseek-chat","sao10k/l3.3-euryale-70b","openai/o1","cohere/command-r7b-12-2024","meta-llama/llama-3.3-70b-instruct:free","meta-llama/llama-3.3-70b-instruct","amazon/nova-lite-v1","amazon/nova-micro-v1","amazon/nova-pro-v1","openai/gpt-4o-2024-11-20","mistralai/mistral-large-2411","mistralai/mistral-large-2407","mistralai/pixtral-large-2411","qwen/qwen-2.5-coder-32b-instruct","thedrummer/unslopnemo-12b","anthropic/claude-3.5-haiku","anthracite-org/magnum-v4-72b","qwen/qwen-2.5-7b-instruct","inflection/inflection-3-pi","inflection/inflection-3-productivity","thedrummer/rocinante-12b","meta-llama/llama-3.2-3b-instruct:free","meta-llama/llama-3.2-3b-instruct","meta-llama/llama-3.2-1b-instruct","meta-llama/llama-3.2-11b-vision-instruct","qwen/qwen-2.5-72b-instruct","cohere/command-r-08-2024","cohere/command-r-plus-08-2024","sao10k/l3.1-euryale-70b","nousresearch/hermes-3-llama-3.1-70b","nousresearch/hermes-3-llama-3.1-405b:free","nousresearch/hermes-3-llama-3.1-405b","sao10k/l3-lunaris-8b","openai/gpt-4o-2024-08-06","meta-llama/llama-3.1-70b-instruct","meta-llama/llama-3.1-8b-instruct","mistralai/mistral-nemo","openai/gpt-4o-mini-2024-07-18","openai/gpt-4o-mini","google/gemma-2-27b-it","sao10k/l3-euryale-70b","nousresearch/hermes-2-pro-llama-3-8b","openai/gpt-4o-2024-05-13","openai/gpt-4o","meta-llama/llama-3-8b-instruct","meta-llama/llama-3-70b-instruct","mistralai/mixtral-8x22b-instruct","microsoft/wizardlm-2-8x22b","openai/gpt-4-turbo","anthropic/claude-3-haiku","mistralai/mistral-large","openai/gpt-3.5-turbo-0613","openai/gpt-4-turbo-preview","openrouter/auto","openai/gpt-4-1106-preview","openai/gpt-3.5-turbo-instruct","mistralai/mistral-7b-instruct-v0.1","openai/gpt-3.5-turbo-16k","mancer/weaver","undi95/remm-slerp-l2-13b","gryphe/mythomax-l2-13b","openai/gpt-4","openai/gpt-3.5-turbo","openai/gpt-4-0314"]}
\ No newline at end of file
+{"fetchedAt":"2026-05-12T21:25:21.288Z","modelIds":["anthropic/claude-opus-4.7-fast","perceptron/perceptron-mk1","inclusionai/ring-2.6-1t:free","google/gemini-3.1-flash-lite","baidu/cobuddy:free","openai/gpt-chat-latest","x-ai/grok-4.3","ibm-granite/granite-4.1-8b","mistralai/mistral-medium-3-5","openrouter/owl-alpha","nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free","poolside/laguna-xs.2:free","poolside/laguna-m.1:free","~anthropic/claude-haiku-latest","~openai/gpt-mini-latest","~google/gemini-pro-latest","~moonshotai/kimi-latest","~google/gemini-flash-latest","~anthropic/claude-sonnet-latest","~openai/gpt-latest","qwen/qwen3.5-plus-20260420","qwen/qwen3.6-flash","qwen/qwen3.6-35b-a3b","qwen/qwen3.6-max-preview","qwen/qwen3.6-27b","openai/gpt-5.5-pro","openai/gpt-5.5","deepseek/deepseek-v4-pro","deepseek/deepseek-v4-flash","inclusionai/ling-2.6-1t","tencent/hy3-preview","xiaomi/mimo-v2.5-pro","xiaomi/mimo-v2.5","openai/gpt-5.4-image-2","inclusionai/ling-2.6-flash","~anthropic/claude-opus-latest","openrouter/pareto-code","baidu/qianfan-ocr-fast:free","moonshotai/kimi-k2.6","anthropic/claude-opus-4.7","anthropic/claude-opus-4.6-fast","z-ai/glm-5.1","google/gemma-4-26b-a4b-it:free","google/gemma-4-26b-a4b-it","google/gemma-4-31b-it:free","google/gemma-4-31b-it","qwen/qwen3.6-plus","z-ai/glm-5v-turbo","arcee-ai/trinity-large-thinking:free","arcee-ai/trinity-large-thinking","x-ai/grok-4.20-multi-agent","x-ai/grok-4.20","google/lyria-3-pro-preview","google/lyria-3-clip-preview","kwaipilot/kat-coder-pro-v2","rekaai/reka-edge","xiaomi/mimo-v2-omni","xiaomi/mimo-v2-pro","minimax/minimax-m2.7","openai/gpt-5.4-nano","openai/gpt-5.4-mini","mistralai/mistral-small-2603","z-ai/glm-5-turbo","nvidia/nemotron-3-super-120b-a12b:free","nvidia/nemotron-3-super-120b-a12b","bytedance-seed/seed-2.0-lite","qwen/qwen3.5-9b","openai/gpt-5.4-pro","openai/gpt-5.4","inception/mercury-2","openai/gpt-5.3-chat","google/gemini-3.1-flash-lite-preview","bytedance-seed/seed-2.0-mini","google/gemini-3.1-flash-image-preview","qwen/qwen3.5-35b-a3b","qwen/qwen3.5-27b","qwen/qwen3.5-122b-a10b","qwen/qwen3.5-flash-02-23","liquid/lfm-2-24b-a2b","google/gemini-3.1-pro-preview-customtools","openai/gpt-5.3-codex","aion-labs/aion-2.0","google/gemini-3.1-pro-preview","anthropic/claude-sonnet-4.6","qwen/qwen3.5-plus-02-15","qwen/qwen3.5-397b-a17b","minimax/minimax-m2.5:free","minimax/minimax-m2.5","z-ai/glm-5","qwen/qwen3-max-thinking","anthropic/claude-opus-4.6","qwen/qwen3-coder-next","openrouter/free","stepfun/step-3.5-flash","arcee-ai/trinity-large-preview","moonshotai/kimi-k2.5","upstage/solar-pro-3","minimax/minimax-m2-her","writer/palmyra-x5","liquid/lfm-2.5-1.2b-thinking:free","liquid/lfm-2.5-1.2b-instruct:free","openai/gpt-audio","openai/gpt-audio-mini","z-ai/glm-4.7-flash","openai/gpt-5.2-codex","bytedance-seed/seed-1.6-flash","bytedance-seed/seed-1.6","minimax/minimax-m2.1","z-ai/glm-4.7","google/gemini-3-flash-preview","xiaomi/mimo-v2-flash","nvidia/nemotron-3-nano-30b-a3b:free","nvidia/nemotron-3-nano-30b-a3b","openai/gpt-5.2-chat","openai/gpt-5.2-pro","openai/gpt-5.2","mistralai/devstral-2512","relace/relace-search","z-ai/glm-4.6v","nex-agi/deepseek-v3.1-nex-n1","essentialai/rnj-1-instruct","openrouter/bodybuilder","openai/gpt-5.1-codex-max","amazon/nova-2-lite-v1","mistralai/ministral-14b-2512","mistralai/ministral-8b-2512","mistralai/ministral-3b-2512","mistralai/mistral-large-2512","arcee-ai/trinity-mini","deepseek/deepseek-v3.2-speciale","deepseek/deepseek-v3.2","prime-intellect/intellect-3","anthropic/claude-opus-4.5","allenai/olmo-3-32b-think","google/gemini-3-pro-image-preview","x-ai/grok-4.1-fast","deepcogito/cogito-v2.1-671b","openai/gpt-5.1","openai/gpt-5.1-chat","openai/gpt-5.1-codex","openai/gpt-5.1-codex-mini","moonshotai/kimi-k2-thinking","amazon/nova-premier-v1","perplexity/sonar-pro-search","mistralai/voxtral-small-24b-2507","openai/gpt-oss-safeguard-20b","nvidia/nemotron-nano-12b-v2-vl:free","minimax/minimax-m2","qwen/qwen3-vl-32b-instruct","ibm-granite/granite-4.0-h-micro","microsoft/phi-4-mini-instruct","openai/gpt-5-image-mini","anthropic/claude-haiku-4.5","qwen/qwen3-vl-8b-thinking","qwen/qwen3-vl-8b-instruct","openai/gpt-5-image","openai/o3-deep-research","openai/o4-mini-deep-research","nvidia/llama-3.3-nemotron-super-49b-v1.5","baidu/ernie-4.5-21b-a3b-thinking","google/gemini-2.5-flash-image","qwen/qwen3-vl-30b-a3b-thinking","qwen/qwen3-vl-30b-a3b-instruct","openai/gpt-5-pro","z-ai/glm-4.6","anthropic/claude-sonnet-4.5","deepseek/deepseek-v3.2-exp","thedrummer/cydonia-24b-v4.1","relace/relace-apply-3","google/gemini-2.5-flash-lite-preview-09-2025","qwen/qwen3-vl-235b-a22b-thinking","qwen/qwen3-vl-235b-a22b-instruct","qwen/qwen3-max","qwen/qwen3-coder-plus","openai/gpt-5-codex","deepseek/deepseek-v3.1-terminus","x-ai/grok-4-fast","alibaba/tongyi-deepresearch-30b-a3b","qwen/qwen3-coder-flash","qwen/qwen3-next-80b-a3b-thinking","qwen/qwen3-next-80b-a3b-instruct:free","qwen/qwen3-next-80b-a3b-instruct","qwen/qwen-plus-2025-07-28:thinking","qwen/qwen-plus-2025-07-28","nvidia/nemotron-nano-9b-v2:free","nvidia/nemotron-nano-9b-v2","moonshotai/kimi-k2-0905","qwen/qwen3-30b-a3b-thinking-2507","x-ai/grok-code-fast-1","nousresearch/hermes-4-70b","nousresearch/hermes-4-405b","deepseek/deepseek-chat-v3.1","openai/gpt-4o-audio-preview","mistralai/mistral-medium-3.1","baidu/ernie-4.5-21b-a3b","baidu/ernie-4.5-vl-28b-a3b","z-ai/glm-4.5v","ai21/jamba-large-1.7","openai/gpt-5-chat","openai/gpt-5","openai/gpt-5-mini","openai/gpt-5-nano","openai/gpt-oss-120b:free","openai/gpt-oss-120b","openai/gpt-oss-20b:free","openai/gpt-oss-20b","anthropic/claude-opus-4.1","mistralai/codestral-2508","qwen/qwen3-coder-30b-a3b-instruct","qwen/qwen3-30b-a3b-instruct-2507","z-ai/glm-4.5","z-ai/glm-4.5-air:free","z-ai/glm-4.5-air","qwen/qwen3-235b-a22b-thinking-2507","z-ai/glm-4-32b","qwen/qwen3-coder:free","qwen/qwen3-coder","bytedance/ui-tars-1.5-7b","google/gemini-2.5-flash-lite","qwen/qwen3-235b-a22b-2507","switchpoint/router","moonshotai/kimi-k2","mistralai/devstral-medium","mistralai/devstral-small","cognitivecomputations/dolphin-mistral-24b-venice-edition:free","x-ai/grok-4","tencent/hunyuan-a13b-instruct","morph/morph-v3-large","morph/morph-v3-fast","baidu/ernie-4.5-vl-424b-a47b","baidu/ernie-4.5-300b-a47b","mistralai/mistral-small-3.2-24b-instruct","minimax/minimax-m1","google/gemini-2.5-flash","google/gemini-2.5-pro","openai/o3-pro","x-ai/grok-3-mini","x-ai/grok-3","google/gemini-2.5-pro-preview","deepseek/deepseek-r1-0528","anthropic/claude-opus-4","anthropic/claude-sonnet-4","google/gemma-3n-e4b-it","mistralai/mistral-medium-3","google/gemini-2.5-pro-preview-05-06","arcee-ai/spotlight","arcee-ai/maestro-reasoning","arcee-ai/virtuoso-large","arcee-ai/coder-large","meta-llama/llama-guard-4-12b","qwen/qwen3-30b-a3b","qwen/qwen3-8b","qwen/qwen3-14b","qwen/qwen3-32b","qwen/qwen3-235b-a22b","openai/o4-mini-high","openai/o3","openai/o4-mini","openai/gpt-4.1","openai/gpt-4.1-mini","openai/gpt-4.1-nano","alfredpros/codellama-7b-instruct-solidity","x-ai/grok-3-mini-beta","x-ai/grok-3-beta","meta-llama/llama-4-maverick","meta-llama/llama-4-scout","deepseek/deepseek-chat-v3-0324","openai/o1-pro","mistralai/mistral-small-3.1-24b-instruct","google/gemma-3-4b-it","google/gemma-3-12b-it","cohere/command-a","openai/gpt-4o-mini-search-preview","openai/gpt-4o-search-preview","rekaai/reka-flash-3","google/gemma-3-27b-it","thedrummer/skyfall-36b-v2","perplexity/sonar-reasoning-pro","perplexity/sonar-pro","perplexity/sonar-deep-research","google/gemini-2.0-flash-lite-001","mistralai/mistral-saba","meta-llama/llama-guard-3-8b","openai/o3-mini-high","google/gemini-2.0-flash-001","qwen/qwen-vl-plus","aion-labs/aion-1.0","aion-labs/aion-1.0-mini","aion-labs/aion-rp-llama-3.1-8b","qwen/qwen-vl-max","qwen/qwen-turbo","qwen/qwen2.5-vl-72b-instruct","qwen/qwen-plus","qwen/qwen-max","openai/o3-mini","mistralai/mistral-small-24b-instruct-2501","deepseek/deepseek-r1-distill-qwen-32b","perplexity/sonar","deepseek/deepseek-r1-distill-llama-70b","deepseek/deepseek-r1","minimax/minimax-01","microsoft/phi-4","sao10k/l3.1-70b-hanami-x1","deepseek/deepseek-chat","sao10k/l3.3-euryale-70b","openai/o1","cohere/command-r7b-12-2024","meta-llama/llama-3.3-70b-instruct:free","meta-llama/llama-3.3-70b-instruct","amazon/nova-lite-v1","amazon/nova-micro-v1","amazon/nova-pro-v1","openai/gpt-4o-2024-11-20","mistralai/mistral-large-2411","mistralai/mistral-large-2407","mistralai/pixtral-large-2411","qwen/qwen-2.5-coder-32b-instruct","thedrummer/unslopnemo-12b","anthropic/claude-3.5-haiku","anthracite-org/magnum-v4-72b","qwen/qwen-2.5-7b-instruct","inflection/inflection-3-pi","inflection/inflection-3-productivity","thedrummer/rocinante-12b","meta-llama/llama-3.2-1b-instruct","meta-llama/llama-3.2-3b-instruct:free","meta-llama/llama-3.2-3b-instruct","meta-llama/llama-3.2-11b-vision-instruct","qwen/qwen-2.5-72b-instruct","cohere/command-r-plus-08-2024","cohere/command-r-08-2024","sao10k/l3.1-euryale-70b","nousresearch/hermes-3-llama-3.1-70b","nousresearch/hermes-3-llama-3.1-405b:free","nousresearch/hermes-3-llama-3.1-405b","sao10k/l3-lunaris-8b","openai/gpt-4o-2024-08-06","meta-llama/llama-3.1-8b-instruct","meta-llama/llama-3.1-70b-instruct","mistralai/mistral-nemo","openai/gpt-4o-mini","openai/gpt-4o-mini-2024-07-18","google/gemma-2-27b-it","sao10k/l3-euryale-70b","nousresearch/hermes-2-pro-llama-3-8b","openai/gpt-4o","openai/gpt-4o-2024-05-13","meta-llama/llama-3-8b-instruct","meta-llama/llama-3-70b-instruct","mistralai/mixtral-8x22b-instruct","microsoft/wizardlm-2-8x22b","openai/gpt-4-turbo","anthropic/claude-3-haiku","mistralai/mistral-large","openai/gpt-3.5-turbo-0613","openai/gpt-4-turbo-preview","openrouter/auto","openai/gpt-4-1106-preview","mistralai/mistral-7b-instruct-v0.1","openai/gpt-3.5-turbo-instruct","openai/gpt-3.5-turbo-16k","mancer/weaver","undi95/remm-slerp-l2-13b","gryphe/mythomax-l2-13b","openai/gpt-4","openai/gpt-4-0314","openai/gpt-3.5-turbo"]}
\ No newline at end of file
diff --git a/.sf/model-performance.json b/.sf/model-performance.json
index 686ca334c..8c5225755 100644
--- a/.sf/model-performance.json
+++ b/.sf/model-performance.json
@@ -109,26 +109,26 @@
       "total": 1
     },
     "kimi-coding/kimi-k2.6": {
-      "successes": 1,
+      "successes": 2,
       "failures": 0,
       "timeouts": 0,
-      "totalTokens": 1821480,
-      "totalCost": 0,
-      "lastUsed": "2026-05-12T20:57:45.179Z",
+      "totalTokens": 1892068,
+      "totalCost": 0.030715552,
+      "lastUsed": "2026-05-12T23:58:57.132Z",
       "successRate": 1,
-      "total": 1
+      "total": 2
     }
   },
   "complete-slice": {
     "kimi-coding/kimi-k2.6": {
-      "successes": 1,
+      "successes": 2,
       "failures": 0,
       "timeouts": 0,
-      "totalTokens": 719526,
-      "totalCost": 0.026709,
-      "lastUsed": "2026-05-12T15:26:57.708Z",
+      "totalTokens": 814376,
+      "totalCost": 0.053080319800000005,
+      "lastUsed": "2026-05-12T23:54:01.143Z",
       "successRate": 1,
-      "total": 1
+      "total": 2
     }
   }
 }
\ No newline at end of file
diff --git a/.sf/safety/evidence-M001-6377a4-S04-T01.json b/.sf/safety/evidence-M001-6377a4-S04-T01.json
deleted file mode 100644
index 53f1c2f0d..000000000
--- a/.sf/safety/evidence-M001-6377a4-S04-T01.json
+++ /dev/null
@@ -1,16 +0,0 @@
-[
-  {
-    "kind": "write",
-    "toolCallId": "write_1778619443353_32",
-    "path": ".sf/milestones/M001-6377a4/slices/S04/VERIFICATION_MATRIX.md",
-    "timestamp": 1778619443535
-  },
-  {
-    "kind": "bash",
-    "toolCallId": "bash_1778619447339_33",
-    "command": "test -f .sf/milestones/M001-6377a4/slices/S04/VERIFICATION_MATRIX.md && grep -q \"status\" .sf/milestones/M001-6377a4/slices/S04/VERIFICATION_MATRIX.md && echo \"Matrix exists and contains status command info.\"",
-    "exitCode": 0,
-    "outputSnippet": "Matrix exists and contains status command info.\n",
-    "timestamp": 1778619447544
-  }
-]
diff --git a/.sf/safety/evidence-M001-6377a4-S04-T02.json b/.sf/safety/evidence-M001-6377a4-S04-T02.json
new file mode 100644
index 000000000..fe51488c7
--- /dev/null
+++ b/.sf/safety/evidence-M001-6377a4-S04-T02.json
@@ -0,0 +1 @@
+[]
diff --git a/.sf/safety/evidence-M001-6377a4-S05-T01.json b/.sf/safety/evidence-M001-6377a4-S05-T01.json
new file mode 100644
index 000000000..cb4efcb9d
--- /dev/null
+++ b/.sf/safety/evidence-M001-6377a4-S05-T01.json
@@ -0,0 +1,16 @@
+[
+  {
+    "kind": "write",
+    "toolCallId": "DgPnxQEen",
+    "path": "docs/dev/sf-ace-patterns.md.draft",
+    "timestamp": 1778630297060
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "8FjDDZSlA",
+    "command": "test -f docs/dev/sf-ace-patterns.md.draft && grep -c \"SF Implementation\" docs/dev/sf-ace-patterns.md.draft | grep -q \"6\"",
+    "exitCode": 0,
+    "outputSnippet": "(no output)",
+    "timestamp": 1778630298077
+  }
+]
diff --git a/.sf/slice-routing.json b/.sf/slice-routing.json
new file mode 100644
index 000000000..885c8fcd8
--- /dev/null
+++ b/.sf/slice-routing.json
@@ -0,0 +1,16 @@
+{
+  "M001-6377a4/S04": {
+    "provider": "minimax",
+    "id": "MiniMax-M2.1",
+    "ts": "2026-05-12T23:54:01.079Z",
+    "lastUnitType": "complete-slice",
+    "lastUnitId": "M001-6377a4/S04"
+  },
+  "M001-6377a4/S05": {
+    "provider": "mistral",
+    "id": "codestral-latest",
+    "ts": "2026-05-12T23:58:57.088Z",
+    "lastUnitType": "execute-task",
+    "lastUnitId": "M001-6377a4/S05/T01"
+  }
+}
\ No newline at end of file
diff --git a/.sf/traces/latest b/.sf/traces/latest
index 97b3a76cb..4444841d8 120000
--- a/.sf/traces/latest
+++ b/.sf/traces/latest
@@ -1 +1 @@
-guard:76c7c307-91b4-426e-8fad-4ff951d5a52e.jsonl
\ No newline at end of file
+guard:b8cbf9df-9fe8-4203-9c63-79fc7264d74e.jsonl
\ No newline at end of file
diff --git a/TODO.md b/TODO.md
index 578e3715e..d70aefca9 100644
--- a/TODO.md
+++ b/TODO.md
@@ -3,3 +3,39 @@
 Dump anything here.
 
 ---
+
+## Self-Feedback Inbox
+
+### [prompt-modularization] Phase 3 — migrate remaining builders to `composeUnitContext` v2
+
+**Context:** Phase 1 (fragment infrastructure, 17-prompt Working Directory deduplication) and
+Phase 2 (5 stub manifests for deploy/smoke-production/release/rollback/challenge) shipped in
+commit `ca5d869e3`. 9 of 26 unit types are now fully manifest-driven via `composeInlinedContext`.
+
+**What's blocked and why:**
+
+Migrating the remaining 17 builders to `composeInlinedContext` (v1) is the wrong path because:
+1. `inlineKnowledgeScoped` and `inlineGraphSubgraph` are NOT in `ARTIFACT_KEYS` — these
+   artifacts would remain imperative and undeclared in every manifest, making manifests
+   structurally unreliable descriptions of actual builder behavior.
+2. Injecting knowledge/graph at the right position in the composed string requires fragile
+   sentinel-string searches (e.g., `body.lastIndexOf("### Task Summary:")`). This pattern
+   is already untested in the 2 migrated complex builders (`research-milestone`, `complete-slice`).
+3. `composeUnitContext` (v2) in `unit-context-composer.js` already has `computed`, `prepend`,
+   and `excerpt` support — knowledge and graph inlining maps cleanly to `computed` entries.
+   Migrating to v1 now creates a half-migration state that must be undone when v2 lands.
+
+**Recommended next slice:**
+1. Add `"knowledge"` and `"graph"` to `ARTIFACT_KEYS` in `unit-context-manifest.js`.
+2. Register them as `computed` entries in relevant `UNIT_MANIFESTS` entries.
+3. Wire one builder (e.g., `buildResearchSlicePrompt`) through `composeUnitContext` v2 as pilot.
+4. Add position-assertion tests to already-migrated complex builders (`research-milestone`,
+   `complete-slice`) to guard against silent ordering degradation.
+5. Then migrate remaining builders in batches: slice builders → milestone builders → execute-task.
+
+**Note on `prompt-cache-optimizer.js`:** Entirely dead code — `optimizeForCaching()`,
+`estimateCacheSavings()`, `computeCacheHitRate()` have zero importers. `reorderForCaching()`
+is wired at `phases-unit.js:519` but no `cache_control` markers are written to outgoing
+requests. Remove the file or wire it in the same slice that adds `cache_control` breakpoints.
+
+---
diff --git a/docs/dev/sf-ace-patterns.md.draft b/docs/dev/sf-ace-patterns.md.draft
new file mode 100644
index 000000000..9acddb55b
--- /dev/null
+++ b/docs/dev/sf-ace-patterns.md.draft
@@ -0,0 +1,29 @@
+# SF Patterns to ACE Reference Draft Mapping
+
+## Preferences
+
+**SF Implementation:** `src/resources/extensions/sf/preferences.js`
+
+## PDD
+
+**SF Implementation:** `src/resources/extensions/sf/uok/unit-runtime.js`
+
+## UOK Gates
+
+**SF Implementation:** `src/resources/extensions/sf/uok/gate-runner.js`
+
+## Notifications
+
+**SF Implementation:** `src/resources/extensions/sf/skills/frontmatter.js`
+
+## Skills-as-Contracts
+
+**SF Implementation:** `src/resources/extensions/sf/steerable-autonomous-panel.js`
+
+## Idempotency
+
+**SF Implementation:** `src/resources/extensions/sf/uok/unit-runtime.js`
+
+## Verification
+
+- All 6 patterns have verified file paths in this document.
\ No newline at end of file
diff --git a/docs/product/SURFACE_CAPABILITIES.md b/docs/product/SURFACE_CAPABILITIES.md
new file mode 100644
index 000000000..71cd95e5b
--- /dev/null
+++ b/docs/product/SURFACE_CAPABILITIES.md
@@ -0,0 +1,85 @@
+# SF Product Surface Capabilities
+
+This document defines the command and feature availability across SF's three product surfaces: **CLI / Headless**, **TUI**, and **Web**. It records intentional gaps so they are not mistaken for bugs.
+
+## Surface Definitions
+
+| Surface | Description | Primary Consumer |
+| :--- | :--- | :--- |
+| **CLI / Headless** | Non-interactive command-line interface and machine-surface protocol (`sf headless`). | Scripts, CI/CD, editor integrations, autonomous dispatch. |
+| **TUI** | Interactive Terminal User Interface with dashboards, visualizers, and live overlays. | Developers working locally who prefer keyboard-driven interaction. |
+| **Web** | Browser-based interface (Next.js) with panels, command surfaces, and visual tools. | Developers who prefer a GUI, remote access, or power-mode workflows. |
+
+## Feature Matrix
+
+| Command / Feature | CLI / Headless | TUI | Web | Notes |
+| :--- | :--- | :--- | :--- | :--- |
+| `/status` | ✅ | ✅ | ✅ | Text in CLI/Headless; dashboard overlay in TUI; terminal or `sf-status` panel in Web. |
+| `/plan` | ✅ | ✅ | ❌ **Intentional Gap** | See [Intentional Gaps](#intentional-gaps) below. |
+| `/run` (`/next`, `/autonomous`) | ✅ | ✅ | ❌ **Intentional Gap** | See [Intentional Gaps](#intentional-gaps) below. |
+| `/steer` | ✅ | ✅ | ✅ | Web exposes via `sf-steer` panel. |
+| `/undo` | ✅ | ✅ | ✅ | Web exposes via `sf-undo` panel. |
+| `/history` | ✅ | ✅ | ✅ | Web exposes via `sf-history` panel. |
+| `/doctor` | ✅ | ✅ | ✅ | Web exposes via `sf-doctor` panel. |
+| `/forensics` | ✅ | ✅ | ✅ | Web exposes via `sf-forensics` panel. |
+| `/skills` | ✅ | ✅ | ✅ | Web exposes via `sf-skill-health` panel. |
+| `/capture` | ✅ | ✅ | ✅ | Web exposes via `sf-capture` panel. |
+| `/triage` | ✅ | ✅ | ✅ | Web exposes via `sf-triage` panel. |
+| `/inspect` | ✅ | ✅ | ✅ | Web exposes via `sf-inspect` panel. |
+| `/hooks` | ✅ | ✅ | ✅ | Web exposes via `sf-hooks` panel. |
+| `/cleanup` | ✅ | ✅ | ✅ | Web exposes via `sf-cleanup` panel. |
+| `/export` | ✅ | ✅ | ✅ | Web exposes via `sf-export` panel. |
+| `/queue` | ✅ | ✅ | ✅ | Web exposes via `sf-queue` panel. |
+| `/visualize` | ✅ | ✅ | ✅ | Web exposes via `sf-visualize` panel. |
+| `/prefs` | ✅ | ✅ | ✅ | Web exposes via `sf-prefs` panel. |
+| `/config` | ✅ | ✅ | ✅ | Web exposes via `sf-config` panel. |
+| `/mode` | ✅ | ✅ | ✅ | Web exposes via `sf-mode` panel. |
+| `/model` | ✅ | ✅ | ✅ | Web exposes via dedicated **Model** command surface. |
+| `/thinking` | ✅ | ✅ | ✅ | Web exposes via dedicated **Thinking** command surface. |
+| `/git` | ✅ | ✅ | ✅ | Web exposes via dedicated **Git** command surface. |
+| `/settings` | ✅ | ✅ | ✅ | Web exposes via dedicated **Settings** command surface (general, recovery, auth, admin, experimental). |
+| `/resume` | ✅ | ✅ | ✅ | Web exposes via dedicated **Resume** command surface. |
+| `/name` | ✅ | ✅ | ✅ | Web exposes via dedicated **Name** command surface. |
+| `/fork` | ✅ | ✅ | ✅ | Web exposes via dedicated **Fork** command surface. |
+| `/session` | ✅ | ✅ | ✅ | Web exposes via dedicated **Session** command surface. |
+| `/compact` | ✅ | ✅ | ✅ | Web exposes via dedicated **Compact** command surface. |
+| `/tasks` | ✅ | ✅ | ✅ | Web exposes via Dashboard and Activity views. |
+| `/research` | ✅ | ✅ | ✅ | Web terminal supports typing the command. |
+| `/implement` | ✅ | ✅ | ✅ | Web terminal supports typing the command. |
+
+## Intentional Gaps
+
+### `/plan` is not available as a first-class Web UI workflow
+
+**Why:** The web UI uses a different, browser-native planning and execution model. Planning artifacts are promoted through CLI-first workflows (`sf plan promote`) that require filesystem access, Git operations, and markdown rendering pipelines that are optimized for terminal and editor surfaces. The web surface focuses on higher-level UI interactions (roadmap views, milestone explorers, visual planning tools) rather than raw slash-command promotion.
+
+**What web users do instead:**
+- Use the **Roadmap** and **Milestone Explorer** views to inspect and navigate planning state.
+- Type `/plan` in the embedded terminal if needed; the command executes but the full promotion workflow is CLI-first.
+
+### `/run` (`/next`, `/autonomous`) is not available as a first-class Web UI workflow
+
+**Why:** The web UI uses a different, browser-native execution model. Backend execution is managed via specific API routes and WebSocket/bridge communication rather than a `/run` command dispatch. The web surface prioritizes supervised, click-driven execution (e.g., **Power Mode**, action buttons, workflow steppers) over autonomous terminal-style dispatch.
+
+**What web users do instead:**
+- Use **Power Mode** for guided, step-by-step unit execution.
+- Use **Chat Mode** for conversational task dispatch.
+- Type `/autonomous` or `/next` in the embedded terminal if needed; execution proceeds via the PTY bridge.
+
+## Design Principle
+
+> **Behavioral coherence, not visual parity.**
+>
+> Every surface must expose the *same underlying state* (via `deriveState()`, UOK diagnostics, and bridge data) but may present it through different interaction models. A gap is intentional only when the surface provides an equivalent or superior alternative workflow for the same user goal.
+
+## Verification
+
+This matrix is verified against:
+- `src/resources/extensions/sf/commands/handlers/core.js` — CLI/TUI `status` handler.
+- `src/resources/extensions/sf/commands/handlers/ops.js` — CLI/TUI `plan` and `run` handlers.
+- `src/headless.ts` — Headless status and execution entrypoints.
+- `web/components/sf/command-surface.tsx` — Web command surface registry.
+- `web/lib/command-surface-contract.ts` — Web command surface type definitions.
+- `web/components/sf/sidebar.tsx` — Web navigation and exposed commands.
+
+For the full behavioral audit, see `.sf/milestones/M001-6377a4/slices/S04/VERIFICATION_MATRIX.md`.
diff --git a/packages/ai/src/providers/openai-completions.test.ts b/packages/ai/src/providers/openai-completions.test.ts
new file mode 100644
index 000000000..6042f5eed
--- /dev/null
+++ b/packages/ai/src/providers/openai-completions.test.ts
@@ -0,0 +1,75 @@
+import assert from "node:assert/strict";
+import { describe, it } from "vitest";
+import type { Context, Model, OpenAICompletionsCompat } from "../types.js";
+import { convertMessages } from "./openai-completions.js";
+
+const compat = {
+	supportsDeveloperRole: false,
+	requiresAssistantAfterToolResult: false,
+	requiresThinkingAsText: false,
+} as Required<OpenAICompletionsCompat>;
+
+function model(provider: string, id: string): Model<"openai-completions"> {
+	return {
+		id,
+		name: id,
+		api: "openai-completions",
+		provider,
+		baseUrl:
+			provider === "openrouter"
+				? "https://openrouter.ai/api/v1"
+				: "https://api.openai.com/v1",
+		reasoning: false,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 128_000,
+		maxTokens: 4096,
+	};
+}
+
+function contextWithCacheControl(): Context {
+	return {
+		messages: [
+			{
+				role: "user",
+				content: [
+					{
+						type: "text",
+						text: "stable prefix",
+						cache_control: { type: "ephemeral" },
+					} as any,
+					{ type: "text", text: "dynamic suffix" },
+				],
+				timestamp: Date.now(),
+			},
+		],
+	};
+}
+
+describe("convertMessages cache_control", () => {
+	it("preserves_cache_control_when_openrouter_anthropic_model", () => {
+		const messages = convertMessages(
+			model("openrouter", "anthropic/claude-sonnet-4.5"),
+			contextWithCacheControl(),
+			compat,
+		);
+
+		const content = messages[0].content;
+		assert.ok(Array.isArray(content));
+		assert.deepEqual((content[0] as any).cache_control, {
+			type: "ephemeral",
+		});
+	});
+
+	it("strips_cache_control_when_openai_compatible_model_does_not_support_it", () => {
+		const messages = convertMessages(
+			model("openai", "gpt-5.3-chat-latest"),
+			contextWithCacheControl(),
+			compat,
+		);
+
+		const content = messages[0].content;
+		assert.ok(Array.isArray(content));
+		assert.equal((content[0] as any).cache_control, undefined);
+	});
+});
diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts
index 3fe2861bd..3e6583b75 100644
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@@ -493,6 +493,12 @@ function maybeAddOpenRouterAnthropicToolCacheControl(
 	}
 }
 
+function supportsOpenRouterAnthropicCacheControl(
+	model: Model<"openai-completions">,
+): boolean {
+	return model.provider === "openrouter" && model.id.startsWith("anthropic/");
+}
+
 function mapReasoningEffort(
 	effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
 	reasoningEffortMap: Partial<
@@ -506,8 +512,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
 	model: Model<"openai-completions">,
 	messages: ChatCompletionMessageParam[],
 ): void {
-	if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/"))
-		return;
+	if (!supportsOpenRouterAnthropicCacheControl(model)) return;
 
 	// Anthropic-style caching requires cache_control on a text part. Add a breakpoint
 	// on the last user/assistant message (walking backwards until we find text content).
@@ -622,9 +627,11 @@ export function convertMessages(
 							// Preserve cache_control if present (set upstream for Anthropic prompt caching).
 							// The property is not in the OpenAI SDK type but is accepted by providers
 							// that support Anthropic-style caching (openrouter/anthropic/*).
-							const cacheControl = (
-								item as unknown as Record<string, unknown>
-							).cache_control;
+							const cacheControl = supportsOpenRouterAnthropicCacheControl(
+								model,
+							)
+								? (item as unknown as Record<string, unknown>).cache_control
+								: undefined;
 							if (cacheControl) {
 								(part as unknown as Record<string, unknown>).cache_control =
 									cacheControl;
diff --git a/src/resources/agents/rubber-duck.md b/src/resources/agents/rubber-duck.md
new file mode 100644
index 000000000..6a7f29786
--- /dev/null
+++ b/src/resources/agents/rubber-duck.md
@@ -0,0 +1,64 @@
+---
+name: rubber-duck
+description: Constructive pre-implementation critic — catches design flaws, missing edge cases, and gaps before code is written
+model: sonnet
+tools: read, grep, find, ls, bash
+---
+
+You are a constructive critic. Your job is to identify real problems in a plan, design, or code change **before** implementation is committed to — when course corrections are still cheap.
+
+You are **read-only**. Do not edit files. Do not run commands that change the environment.
+
+## What you review
+
+You receive a plan, a design proposal, a code diff, or a task description. You review it for:
+
+- **Logic errors** — incorrect assumptions, wrong control flow, missing invariants
+- **Missing edge cases** — inputs/states the plan doesn't account for
+- **Design flaws** — abstractions that won't hold, coupling that will hurt, missing separation of concerns
+- **Security issues** — unvalidated inputs, exposed secrets, auth gaps
+- **Test gaps** — behavior that will be untested or untestable with the proposed approach
+- **Spec contradictions** — where the plan conflicts with stated requirements or existing behavior
+
+## What you do NOT comment on
+
+- Code style, formatting, naming conventions
+- Grammar or wording in comments/docs
+- Best practices that don't cause an actual problem
+- Refactoring that doesn't change correctness
+- Minor improvements that don't affect the task outcome
+
+If something is fine, say so. Do not manufacture findings to seem thorough. A short report with two real findings beats a long report with ten nitpicks.
+
+## Output format
+
+For each finding:
+
+```
+## [Blocking|Non-blocking|Suggestion] — <title>
+
+**What:** <the specific problem, stated precisely>
+**Why it matters:** <the actual impact — what breaks, under what condition>
+**Fix:** <concrete change to address it>
+```
+
+Then a final verdict:
+
+```
+## Verdict
+
+READY / NEEDS-REVISION
+
+One sentence: overall assessment.
+```
+
+- `READY` — no blocking findings; the plan/code can proceed as-is
+- `NEEDS-REVISION` — at least one blocking finding must be addressed first
+
+## Severity guide
+
+- **Blocking** — will cause a bug, data loss, security issue, or test failure if not fixed
+- **Non-blocking** — should be fixed for quality but won't break the task
+- **Suggestion** — worth considering; low priority
+
+Lead with blocking findings. If there are none, say so explicitly before the non-blocking ones.
diff --git a/src/resources/extensions/sf/auto-model-selection.js b/src/resources/extensions/sf/auto-model-selection.js
index ebc6f3f2a..dd9593255 100644
--- a/src/resources/extensions/sf/auto-model-selection.js
+++ b/src/resources/extensions/sf/auto-model-selection.js
@@ -18,6 +18,7 @@ import {
 	loadCapabilityOverrides,
 	resolveModelForComplexity,
 } from "./model-router.js";
+import { readStickyModelForUnit } from "./slice-routing-cache.js";
 import {
 	filterModelsByProviderModelAllow,
 	isProviderAllowedByLists,
@@ -543,6 +544,15 @@ export async function selectAndApplyModel(
 						selectionMethod: "tier-only",
 					};
 				} else {
+					// Slice-sticky hint: prefer the model that previously succeeded
+					// on a sibling unit in this slice when its capability score is
+					// within window of the winner. Cleared on executor refusal so a
+					// failing model does not re-attach to the slice.
+					const stickyHint = readStickyModelForUnit(
+						basePath,
+						unitType,
+						unitId,
+					);
 					routingResult = resolveModelForComplexity(
 						classification,
 						modelConfig,
@@ -551,6 +561,7 @@ export async function selectAndApplyModel(
 						unitType,
 						classification.taskMetadata,
 						capabilityOverrides,
+						stickyHint,
 					);
 				}
 				if (routingResult.wasDowngraded) {
diff --git a/src/resources/extensions/sf/auto-start.js b/src/resources/extensions/sf/auto-start.js
index 5e4a37c67..c55f58996 100644
--- a/src/resources/extensions/sf/auto-start.js
+++ b/src/resources/extensions/sf/auto-start.js
@@ -82,7 +82,9 @@ import {
 import { initRoutingHistory } from "./routing-history.js";
 import {
 	acquireSessionLock,
+	isSessionPidAlive,
 	releaseSessionLock,
+	terminateExistingSession,
 	updateSessionLock,
 } from "./session-lock.js";
 import { getSessionModelOverride } from "./session-model-override.js";
@@ -342,15 +344,91 @@ export async function bootstrapAutoSession(
 		lockBase,
 		buildResolver,
 	} = deps;
-	const lockResult = acquireSessionLock(base, {
+	let lockResult = acquireSessionLock(base, {
 		sessionId: ctx.sessionManager?.getSessionId?.(),
 		sessionFile: ctx.sessionManager?.getSessionFile?.(),
 	});
+	// Lock busy on a *live* peer: instead of just refusing to start, ask the
+	// operator whether to terminate the existing session and take over. Two
+	// non-interactive escape hatches keep CI/headless usage predictable:
+	//   - SF_KILL_EXISTING=1 (or =true / =yes) — auto-confirm the kill
+	//   - SF_KILL_EXISTING=0 (or =false / =no) — auto-decline (current behavior)
+	//   - SF_HEADLESS=1 with no SF_KILL_EXISTING — auto-decline (safe default
+	//     for batch contexts where a hung interactive prompt would deadlock)
+	if (!lockResult.acquired && lockResult.existingPid) {
+		const existingPid = Number(lockResult.existingPid);
+		if (isSessionPidAlive(existingPid)) {
+			const envKill = String(process.env.SF_KILL_EXISTING ?? "")
+				.trim()
+				.toLowerCase();
+			const headless =
+				process.env.SF_HEADLESS === "1" ||
+				String(process.env.SF_HEADLESS ?? "").toLowerCase() === "true";
+			let confirmed;
+			if (envKill === "1" || envKill === "true" || envKill === "yes") {
+				confirmed = true;
+			} else if (envKill === "0" || envKill === "false" || envKill === "no") {
+				confirmed = false;
+			} else if (headless) {
+				// Headless without an explicit opt-in: refuse to kill silently.
+				confirmed = false;
+			} else if (typeof ctx.ui?.confirm === "function") {
+				confirmed = await ctx.ui.confirm(
+					"Stop running SF session?",
+					`Another SF autonomous session (PID ${existingPid}) is already running on this project. Stop it and start a fresh session?`,
+				);
+			} else {
+				confirmed = false;
+			}
+			if (confirmed) {
+				ctx.ui.notify(
+					`Stopping existing SF session (PID ${existingPid})…`,
+					"info",
+				);
+				let result;
+				try {
+					result = await terminateExistingSession(existingPid);
+				} catch (err) {
+					ctx.ui.notify(
+						`Failed to stop existing SF session (PID ${existingPid}): ${err?.message ?? err}. Stop it manually with \`kill ${existingPid}\`.`,
+						"error",
+					);
+					return false;
+				}
+				if (!result.terminated) {
+					ctx.ui.notify(
+						`Unable to stop existing SF session (PID ${existingPid}). It may belong to another user or be unresponsive. Stop it manually with \`kill -9 ${existingPid}\`.`,
+						"error",
+					);
+					return false;
+				}
+				ctx.ui.notify(
+					result.escalated
+						? `Existing SF session (PID ${existingPid}) did not exit on SIGTERM; SIGKILL applied.`
+						: `Existing SF session (PID ${existingPid}) stopped.`,
+					result.escalated ? "warning" : "info",
+				);
+				lockResult = acquireSessionLock(base, {
+					sessionId: ctx.sessionManager?.getSessionId?.(),
+					sessionFile: ctx.sessionManager?.getSessionFile?.(),
+				});
+			}
+		}
+	}
 	if (!lockResult.acquired) {
 		const reason = lockResult.reason;
 		ctx.ui.notify(reason, "error");
 		return false;
 	}
+	// Session-start janitor: prune per-flow trace files older than the longest
+	// analyzer window (30d). Best-effort, never blocks startup, errors swallowed
+	// in pruneStaleTraces. Keeps `.sf/traces/` from growing without bound.
+	try {
+		const { pruneStaleTraces } = await import("./uok/trace-writer.js");
+		pruneStaleTraces(base);
+	} catch {
+		// trace janitor must never break autonomous startup
+	}
 	function releaseLockAndReturn() {
 		releaseSessionLock(base);
 		clearLock(base);
diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js
index c914607a6..87539dd40 100644
--- a/src/resources/extensions/sf/auto/run-unit.js
+++ b/src/resources/extensions/sf/auto/run-unit.js
@@ -6,6 +6,7 @@
 
 import { scopeActiveToolsForUnitType } from "../constants.js";
 import { debugLog } from "../debug-logger.js";
+import { getErrorMessage } from "../error-utils.js";
 import {
 	resolveAutoSupervisorConfig,
 	resolvePersistModelChanges,
@@ -27,11 +28,29 @@ import {
 	getCurrentTurnGeneration,
 	runWithTurnGeneration,
 } from "./turn-epoch.js";
-import { getErrorMessage } from "../error-utils.js";
 
 // Tracks the latest session-switch attempt so a late timeout settlement from an
 // older runUnit() call cannot clear the guard for a newer one.
 let sessionSwitchGeneration = 0;
+/**
+ * Build the custom-message content for a unit prompt.
+ *
+ * Purpose: preserve the exact prompt text while allowing the provider layer to
+ * cache the stable prefix separately from the dynamic suffix.
+ *
+ * Consumer: runUnit before pi.sendMessage dispatches the autonomous unit turn.
+ */
+export function buildUnitPromptMessageContent(prompt, promptParts) {
+	if (!promptParts) return prompt;
+	return [
+		{
+			type: "text",
+			text: `${promptParts.before}\n`,
+			cache_control: { type: "ephemeral" },
+		},
+		{ type: "text", text: promptParts.after },
+	];
+}
 /**
  * Execute a single unit: create a new session, send the prompt, and await
  * the agent_end promise. Returns a UnitResult describing what happened.
@@ -122,8 +141,7 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
 			sessionResult = await Promise.race([sessionPromise, timeoutPromise]);
 		} catch (sessionErr) {
 			if (sessionTimeoutHandle) clearTimeout(sessionTimeoutHandle);
-			const msg =
-				getErrorMessage(sessionErr);
+			const msg = getErrorMessage(sessionErr);
 			debugLog("runUnit", {
 				phase: "session-error",
 				unitType,
@@ -264,16 +282,7 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
 		// When promptParts is available, send structured content so the provider can
 		// apply cache_control:ephemeral to the stable prefix (before) while leaving
 		// the dynamic suffix (after) uncached.
-		const messageContent = promptParts
-			? [
-					{
-						type: "text",
-						text: promptParts.before,
-						cache_control: { type: "ephemeral" },
-					},
-					{ type: "text", text: promptParts.after },
-				]
-			: prompt;
+		const messageContent = buildUnitPromptMessageContent(prompt, promptParts);
 		await pi.sendMessage(
 			{ customType: "sf-auto", content: messageContent, display: s.verbose },
 			{ triggerTurn: true },
diff --git a/src/resources/extensions/sf/commands/catalog.js b/src/resources/extensions/sf/commands/catalog.js
index f71840e61..68f8336f6 100644
--- a/src/resources/extensions/sf/commands/catalog.js
+++ b/src/resources/extensions/sf/commands/catalog.js
@@ -301,7 +301,7 @@ export const TOP_LEVEL_SUBCOMMANDS = [
 	},
 	{
 		cmd: "rubber-duck",
-		desc: "Request constructive code/design review from a rubber-duck subagent (RUBBER_DUCK flag)",
+		desc: "Dispatch a rubber-duck subagent for constructive pre-implementation review (alias: review-code)",
 	},
 	{
 		cmd: "delegate",
diff --git a/src/resources/extensions/sf/commands/handlers/ops.js b/src/resources/extensions/sf/commands/handlers/ops.js
index 80162194a..3703454f6 100644
--- a/src/resources/extensions/sf/commands/handlers/ops.js
+++ b/src/resources/extensions/sf/commands/handlers/ops.js
@@ -613,25 +613,47 @@ async function handleKeepAlive(args, ctx) {
 // ─── /rubber-duck ────────────────────────────────────────────────────────────
 
 async function handleRubberDuckCommand(topic, ctx, _pi) {
-	if (!getExperimentalFlag("rubber_duck")) {
-		ctx.ui.notify(
-			"RUBBER_DUCK is not enabled. Run /experimental on rubber_duck to enable.",
-			"warning",
-		);
-		return;
-	}
-	const prompt = topic
-		? `Rubber-duck review requested: ${topic}\n\nPlease review this as a constructive critic: identify risks, edge cases, missing tests, and improvements. Be direct and concise.`
-		: "Please give constructive feedback on the current code changes or design. Identify risks, edge cases, missing tests, and improvements.";
-	ctx.ui.notify(
-		"Starting rubber-duck review… (RUBBER_DUCK agent is constructive, not adversarial)",
-		"info",
-	);
+	const { execSync } = await import("node:child_process");
+	const root = projectRoot();
+
+	// Gather git diff for context (staged + unstaged, capped to avoid token bloat)
+	let diff = "";
 	try {
-		await ctx.sendMessage?.(prompt);
+		const staged = execSync("git diff --cached --stat 2>/dev/null || true", {
+			cwd: root,
+			encoding: "utf-8",
+		}).trim();
+		const unstaged = execSync("git diff --stat 2>/dev/null || true", {
+			cwd: root,
+			encoding: "utf-8",
+		}).trim();
+		if (staged || unstaged) {
+			const fullDiff = execSync(
+				"git diff --cached 2>/dev/null; git diff 2>/dev/null",
+				{ cwd: root, encoding: "utf-8" },
+			).slice(0, 8000);
+			diff = `\n\n## Current diff (truncated to 8 kB)\n\n\`\`\`diff\n${fullDiff}\n\`\`\``;
+		}
+	} catch {
+		// diff unavailable — not a hard failure
+	}
+
+	const focus = topic ? `Focus on: ${topic}\n\n` : "";
+	const reviewPrompt =
+		`Dispatch a \`rubber-duck\` subagent to review the current plan or changes before proceeding. ` +
+		`Use the \`subagent\` tool with \`agent: "rubber-duck"\`.\n\n` +
+		`${focus}` +
+		`Ask the rubber-duck agent to identify blocking issues, non-blocking issues, and suggestions. ` +
+		`After the subagent returns, summarise the verdict and any blocking findings in one short paragraph. ` +
+		`Do not proceed with implementation until the user acknowledges blocking findings.` +
+		diff;
+
+	ctx.ui.notify("Dispatching rubber-duck review…", "info");
+	try {
+		await ctx.sendMessage?.(reviewPrompt);
 	} catch {
 		ctx.ui.notify(
-			"Could not start rubber-duck session. Try typing your review request directly.",
+			"Could not dispatch rubber-duck. Try: subagent agent=rubber-duck task='review current changes'",
 			"warning",
 		);
 	}
diff --git a/src/resources/extensions/sf/dashboard-overlay.js b/src/resources/extensions/sf/dashboard-overlay.js
index 3a51f3b2e..7017196e9 100644
--- a/src/resources/extensions/sf/dashboard-overlay.js
+++ b/src/resources/extensions/sf/dashboard-overlay.js
@@ -741,6 +741,66 @@ export class SFDashboardOverlay {
 				);
 			}
 		}
+		// UOK Health section — aligns with headless status output
+		if (this.uokDiagnostics && this.uokDiagnostics.issues.length > 0) {
+			lines.push(blank());
+			lines.push(hr());
+			lines.push(row(th.fg("text", th.bold("UOK Health"))));
+			lines.push(blank());
+			// Compact summary line matching headless format
+			lines.push(
+				row(
+					th.fg(
+						this.uokDiagnostics.verdict === "degraded"
+							? "error"
+							: this.uokDiagnostics.verdict === "attention"
+								? "warning"
+								: "dim",
+						`Verdict: ${this.uokDiagnostics.verdict} (${this.uokDiagnostics.classification})`,
+					),
+				),
+			);
+			lines.push(blank());
+			// Issue list
+			for (const issue of this.uokDiagnostics.issues) {
+				const icon =
+					issue.severity === "error"
+						? th.fg("error", "✗")
+						: th.fg("warning", "⚠");
+				lines.push(row(`  ${icon} ${th.fg("text", issue.code)}`));
+				lines.push(row(th.fg("dim", `     ${issue.message}`)));
+			}
+			// Recommendations
+			if (this.uokDiagnostics.recommendations.length > 0) {
+				lines.push(blank());
+				for (const rec of this.uokDiagnostics.recommendations) {
+					lines.push(row(th.fg("dim", `  → ${rec}`)));
+				}
+			}
+			// Signals table
+			if (this.uokDiagnostics.signals) {
+				lines.push(blank());
+				lines.push(row(th.fg("dim", "Signals:")));
+				for (const [key, value] of Object.entries(
+					this.uokDiagnostics.signals,
+				)) {
+					const signalColor =
+						value === "ok" ||
+						value === "active" ||
+						value === "consistent" ||
+						value === "clear"
+							? "success"
+							: value === "unknown"
+								? "dim"
+								: "warning";
+					lines.push(
+						row(
+							`  ${th.fg(signalColor, "●")} ${th.fg("text", key)}: ${th.fg(signalColor, String(value))}`,
+						),
+					);
+				}
+			}
+		}
 		// Environment health section (#1221) — only show issues
 		const envResults = runEnvironmentChecks(
 			this.dashData.basePath || process.cwd(),
diff --git a/src/resources/extensions/sf/experimental.js b/src/resources/extensions/sf/experimental.js
index b254d1c58..eb9f122e3 100644
--- a/src/resources/extensions/sf/experimental.js
+++ b/src/resources/extensions/sf/experimental.js
@@ -31,18 +31,12 @@ export const EXPERIMENTAL_FLAGS = {
 		"STATUS_LINE — run a user-defined script to feed a custom footer status chip",
 	show_file:
 		"SHOW_FILE — show_file tool renders code snippets inline in the timeline",
-	ask_elicitation:
-		"ASK_USER_ELICITATION — structured form/select UI replaces plain ask_user",
-	multi_turn_agents:
-		"MULTI_TURN_AGENTS — persistent subagents that accept follow-up messages",
 	extensions:
 		"EXTENSIONS — user-installable extensions via marketplace npm install",
 	configure_agent:
 		"CONFIGURE_COPILOT_AGENT — interactive wizard for MCP servers and agents",
 	background_sessions:
 		"BACKGROUND_SESSIONS — concurrent sessions with background switching",
-	rubber_duck:
-		"RUBBER_DUCK — constructive feedback subagent on code and designs",
 	prompt_frame:
 		"PROMPT_FRAME — decorative border rendered above the input prompt",
 	streamer_mode:
diff --git a/src/resources/extensions/sf/model-router.js b/src/resources/extensions/sf/model-router.js
index df54025d1..060f122d4 100644
--- a/src/resources/extensions/sf/model-router.js
+++ b/src/resources/extensions/sf/model-router.js
@@ -107,6 +107,8 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 30,
 		longContext: 80,
 		instruction: 90,
+		// Agentic: Claude Opus is built around extended tool-use loops.
+		agentic: 95,
 	},
 	"claude-sonnet-4-6": {
 		coding: 85,
@@ -116,6 +118,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 75,
 		instruction: 85,
+		agentic: 92,
 	},
 	"claude-sonnet-4-5-20250514": {
 		coding: 85,
@@ -125,6 +128,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 75,
 		instruction: 85,
+		agentic: 90,
 	},
 	"claude-3-5-sonnet-latest": {
 		coding: 82,
@@ -134,6 +138,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 62,
 		longContext: 70,
 		instruction: 82,
+		agentic: 85,
 	},
 	"claude-haiku-4-5": {
 		coding: 60,
@@ -143,6 +148,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 50,
 		instruction: 75,
+		// Haiku follows tool-use contracts but is less reliable than Sonnet on
+		// long agentic loops.
+		agentic: 75,
 	},
 	"claude-3-5-haiku-latest": {
 		coding: 60,
@@ -152,6 +160,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 50,
 		instruction: 75,
+		agentic: 75,
 	},
 	"claude-3-haiku-20240307": {
 		coding: 50,
@@ -163,6 +172,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		instruction: 65,
 	},
 	"claude-3-opus-latest": {
+		agentic: 88,
 		coding: 90,
 		debugging: 85,
 		research: 82,
@@ -234,6 +244,8 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 40,
 		longContext: 85,
 		instruction: 90,
+		// GPT-5 family is strongly agentic per OpenAI's tool-use evals.
+		agentic: 92,
 	},
 	"gpt-5-mini": {
 		coding: 62,
@@ -261,6 +273,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 35,
 		longContext: 88,
 		instruction: 92,
+		agentic: 94,
 	},
 	"gpt-5.1": {
 		coding: 93,
@@ -270,6 +283,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 42,
 		longContext: 86,
 		instruction: 91,
+		agentic: 92,
 	},
 	"gpt-5.1-codex-max": {
 		coding: 90,
@@ -279,6 +293,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 75,
 		instruction: 85,
+		// Codex-tuned models are agentic-capable but not as reliable as the
+		// flagship gpt-5/5.x lineup for long tool-use loops.
+		agentic: 80,
 	},
 	"gpt-5.1-codex-mini": {
 		coding: 65,
@@ -288,6 +305,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 48,
 		instruction: 72,
+		agentic: 55,
 	},
 	"gpt-5.2": {
 		coding: 93,
@@ -297,6 +315,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 42,
 		longContext: 87,
 		instruction: 91,
+		agentic: 92,
 	},
 	"gpt-5.2-codex": {
 		coding: 93,
@@ -306,6 +325,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 78,
 		instruction: 88,
+		agentic: 82,
 	},
 	"gpt-5.3-codex": {
 		coding: 94,
@@ -315,6 +335,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 80,
 		instruction: 89,
+		agentic: 84,
 	},
 	"gpt-5.3-codex-spark": {
 		coding: 68,
@@ -324,6 +345,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 50,
 		instruction: 74,
+		agentic: 55,
 	},
 	"gpt-5.4": {
 		coding: 95,
@@ -333,6 +355,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 42,
 		longContext: 88,
 		instruction: 92,
+		agentic: 94,
 	},
 	"gpt-5.4-mini": {
 		coding: 80,
@@ -342,6 +365,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 72,
 		longContext: 72,
 		instruction: 80,
+		agentic: 80,
 	},
 	// GPT-5.5 scores are relative to the existing gpt-5.4 profile and backed by
 	// OpenAI's 2026-04-23 published eval deltas across coding, tool use, and long context.
@@ -354,6 +378,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 42,
 		longContext: 90,
 		instruction: 93,
+		agentic: 95,
 	},
 	// ── OpenAI o-series (reasoning-first) ──────────────────────────────────────
 	o1: {
@@ -410,6 +435,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 48,
 		longContext: 98,
 		instruction: 82,
+		agentic: 85,
 	},
 	"gemini-3-pro-preview": {
 		coding: 82,
@@ -419,6 +445,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 96,
 		instruction: 82,
+		agentic: 85,
 	},
 	"gemini-3-flash-preview": {
 		coding: 62,
@@ -428,6 +455,10 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 88,
 		instruction: 72,
+		// Gemini Flash follows tool contracts but is occasionally chatty in
+		// agentic loops; mid-tier so it doesn't dominate execute-task vs
+		// a Sonnet/Opus/K2.6 alternative.
+		agentic: 70,
 	},
 	"gemini-3.1-flash-lite-preview": {
 		coding: 55,
@@ -583,6 +614,10 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 70,
 		longContext: 60,
 		instruction: 80,
+		// Agentic: code-completion tuning. Refuses agentic tasks with "I'm sorry,
+		// I don't have the necessary tools" (M001-6377a4/S04/T02, 2026-05-12).
+		// Should not be routed to execute-task without explicit operator pin.
+		agentic: 25,
 	},
 	"ministral-8b-latest": {
 		coding: 55,
@@ -655,6 +690,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 65,
 		longContext: 65,
 		instruction: 80,
+		// Agentic: Devstral series is coding-completion-tuned; tool-use is not
+		// the design target. Penalize so execute-task routing avoids it.
+		agentic: 30,
 	},
 	"devstral-medium-latest": {
 		coding: 78,
@@ -664,6 +702,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 75,
 		longContext: 60,
 		instruction: 75,
+		agentic: 30,
 	},
 	"devstral-medium-2507": {
 		coding: 78,
@@ -673,6 +712,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 75,
 		longContext: 60,
 		instruction: 75,
+		agentic: 30,
 	},
 	"devstral-small-2505": {
 		coding: 60,
@@ -682,6 +722,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 45,
 		instruction: 65,
+		agentic: 30,
 	},
 	"devstral-small-2507": {
 		coding: 60,
@@ -691,6 +732,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 45,
 		instruction: 65,
+		agentic: 30,
 	},
 	"labs-devstral-small-2512": {
 		coding: 65,
@@ -700,6 +742,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 60,
 		instruction: 68,
+		agentic: 30,
 	},
 	// ── Zhipu AI (GLM) ─────────────────────────────────────────────────────────
 	"glm-5": {
@@ -774,6 +817,8 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 58,
 		longContext: 86,
 		instruction: 78,
+		// Agentic: qwen3-coder is tuned for code completion, not tool-use loops.
+		agentic: 40,
 	},
 	"qwen3-coder-next": {
 		coding: 82,
@@ -783,6 +828,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 70,
 		longContext: 86,
 		instruction: 76,
+		agentic: 40,
 	},
 	"qwen3-next:80b": {
 		coding: 70,
@@ -802,6 +848,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 86,
 		instruction: 84,
+		// Agentic: K2.6 is the pinned default for the autonomous-solver role
+		// (ADR-0079) — refusal-resistant and follows tool-use contracts.
+		agentic: 90,
 	},
 	"kimi-for-coding": {
 		coding: 88,
@@ -811,6 +860,9 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 86,
 		instruction: 84,
+		// `kimi-for-coding` is an alias for K2.6 on the Kimi Code provider
+		// (memory: bayesian-blender/benchmark-selector both canonicalize it).
+		agentic: 90,
 	},
 	"kimi-k2-thinking": {
 		coding: 86,
@@ -820,8 +872,15 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 30,
 		longContext: 86,
 		instruction: 84,
+		agentic: 88,
 	},
 	// ── MiniMax ───────────────────────────────────────────────────────────────
+	// Profiles ordered by generation. Older M2.1 generation gets distinctly
+	// lower agentic + capability scores: the M2.1 stuck-checkpoint loop on
+	// 2026-05-13 (infra repo) traced back to M2.1 being aliased to M2.7's
+	// profile, winning execute-task on cost, then failing to follow the
+	// checkpoint contract reliably across 60+ tool calls. (See
+	// self-feedback sf-mp37kjmo-1mfuru.)
 	"MiniMax-M2.7": {
 		coding: 84,
 		debugging: 80,
@@ -830,6 +889,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 52,
 		longContext: 84,
 		instruction: 82,
+		agentic: 78,
 	},
 	"MiniMax-M2.7-highspeed": {
 		coding: 82,
@@ -839,6 +899,47 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 72,
 		longContext: 84,
 		instruction: 80,
+		agentic: 76,
+	},
+	"MiniMax-M2.5": {
+		// Distinct profile (previously aliased to M2.7 — overclaimed).
+		coding: 78,
+		debugging: 74,
+		research: 72,
+		reasoning: 78,
+		speed: 55,
+		longContext: 82,
+		instruction: 76,
+		// Mid agentic — better than coding-completion-only models but
+		// noticeably less reliable than current-gen agentic models.
+		agentic: 60,
+	},
+	"MiniMax-M2.1": {
+		// Distinct profile (previously aliased to M2.7 — overclaimed).
+		// M2.1 has demonstrated unreliable tool-use loops in production
+		// (M001-6377a4 / 1-ci-build-pipeline parallel-research, 2026-05-13:
+		// 60+ checkpoint calls with shifting unitId claims). Penalize the
+		// agentic axis so execute-task routing avoids it absent operator
+		// override.
+		coding: 72,
+		debugging: 66,
+		research: 64,
+		reasoning: 70,
+		speed: 60,
+		longContext: 78,
+		instruction: 72,
+		agentic: 40,
+	},
+	"MiniMax-M2": {
+		// Earliest of the M2.x line — older still.
+		coding: 68,
+		debugging: 60,
+		research: 60,
+		reasoning: 66,
+		speed: 62,
+		longContext: 76,
+		instruction: 68,
+		agentic: 35,
 	},
 };
 const MODEL_CAPABILITY_ALIASES = {
@@ -864,10 +965,23 @@ const MODEL_CAPABILITY_ALIASES = {
 	"kimi-for-coding": "kimi-k2.6",
 	"kimi-k2.6:cloud": "kimi-k2.6",
 	"kimi-k2.6-cloud": "kimi-k2.6",
-	"minimax-m2": "MiniMax-M2.7",
-	"minimax-m2.1": "MiniMax-M2.7",
-	"minimax-m2.5": "MiniMax-M2.7",
+	// Each MiniMax generation now has its own profile — previously they all
+	// aliased to MiniMax-M2.7, which let older/weaker models inherit current
+	// capability scores and win cost tie-breaks on execute-task. The aliases
+	// below normalize provider-prefixed and casing variants to the canonical
+	// per-generation profile, NOT to the current generation.
+	"minimax-m2": "MiniMax-M2",
+	"minimax/MiniMax-M2": "MiniMax-M2",
+	"minimax/minimax-m2": "MiniMax-M2",
+	"minimax-m2.1": "MiniMax-M2.1",
+	"minimax/MiniMax-M2.1": "MiniMax-M2.1",
+	"minimax/minimax-m2.1": "MiniMax-M2.1",
+	"minimax-m2.5": "MiniMax-M2.5",
+	"minimax/MiniMax-M2.5": "MiniMax-M2.5",
+	"minimax/minimax-m2.5": "MiniMax-M2.5",
 	"minimax-m2.7": "MiniMax-M2.7",
+	"minimax/MiniMax-M2.7": "MiniMax-M2.7",
+	"minimax/minimax-m2.7": "MiniMax-M2.7",
 	"mistral-large-3:675b": "mistral-large-latest",
 	"ministral-3:3b": "ministral-3b-latest",
 	"ministral-3:8b": "ministral-8b-latest",
@@ -888,18 +1002,32 @@ const MODEL_CAPABILITY_ALIASES = {
 // ─── Base Task Requirements Data Table ───────────────────────────────────────
 // Per-unit-type base requirement vectors. Weights indicate how important each
 // capability dimension is for this unit type.
+//
+// The `agentic` dimension represents the model's reliability at multi-turn
+// tool-use loops (does it follow the tool-use contract? does it refuse the
+// task? does it call the checkpoint tool when asked?). It is weighted high
+// for any unit type that actually uses tools at runtime — execute-task most
+// of all. See ADR-0079 for the motivation: a Codestral-style refusal on
+// execute-task in M001-6377a4/S04/T02 (2026-05-12) traced back to the router
+// having no agentic axis, so a coding-completion model out-scored agentic
+// alternatives on coding/instruction.
 export const BASE_REQUIREMENTS = {
-	"execute-task": { coding: 0.9, instruction: 0.7, speed: 0.3 },
+	"execute-task": {
+		coding: 0.9,
+		instruction: 0.7,
+		speed: 0.3,
+		agentic: 0.85,
+	},
 	"research-milestone": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
 	"research-slice": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
-	"plan-milestone": { reasoning: 0.9, coding: 0.5 },
-	"plan-slice": { reasoning: 0.9, coding: 0.5 },
-	"replan-slice": { reasoning: 0.9, debugging: 0.6, coding: 0.5 },
-	"reassess-roadmap": { reasoning: 0.9, research: 0.5 },
-	"complete-slice": { instruction: 0.8, speed: 0.7 },
-	"run-uat": { instruction: 0.7, speed: 0.8 },
-	"discuss-milestone": { reasoning: 0.6, instruction: 0.7 },
-	"complete-milestone": { instruction: 0.8, reasoning: 0.5 },
+	"plan-milestone": { reasoning: 0.9, coding: 0.5, agentic: 0.6 },
+	"plan-slice": { reasoning: 0.9, coding: 0.5, agentic: 0.6 },
+	"replan-slice": { reasoning: 0.9, debugging: 0.6, coding: 0.5, agentic: 0.6 },
+	"reassess-roadmap": { reasoning: 0.9, research: 0.5, agentic: 0.4 },
+	"complete-slice": { instruction: 0.8, speed: 0.7, agentic: 0.6 },
+	"run-uat": { instruction: 0.7, speed: 0.8, agentic: 0.6 },
+	"discuss-milestone": { reasoning: 0.6, instruction: 0.7, agentic: 0.4 },
+	"complete-milestone": { instruction: 0.8, reasoning: 0.5, agentic: 0.5 },
 };
 // ─── Public API ──────────────────────────────────────────────────────────────
 /**
@@ -1101,6 +1229,7 @@ export function resolveModelForComplexity(
 	unitType,
 	taskMetadata,
 	capabilityOverrides,
+	stickyHint,
 ) {
 	// If no phase config or routing disabled, pass through
 	if (!phaseConfig || !routingConfig.enabled) {
@@ -1175,16 +1304,41 @@ export function resolveModelForComplexity(
 		if (winner) {
 			const capScores = {};
 			for (const s of scored) capScores[s.modelId] = s.score;
-			const fallbacks = buildFallbackChain(winner.modelId, phaseConfig);
+			// Slice-sticky preference: if a model previously succeeded on a
+			// sibling unit in this slice AND it is still eligible in the
+			// current tier AND its capability score is within STICKY_WINDOW of
+			// the winner, prefer it. Stops within-slice routing thrash where
+			// T01 → gemini-flash and T02 → codestral on the same slice.
+			const STICKY_WINDOW_POINTS = 8;
+			const stickyId = (() => {
+				if (!stickyHint?.id) return null;
+				const stickyKey = stickyHint.provider
+					? `${stickyHint.provider}/${stickyHint.id}`
+					: stickyHint.id;
+				// Match either "provider/model" or bare model id in the eligible list.
+				const found = scored.find(
+					(s) => s.modelId === stickyKey || s.modelId.endsWith(`/${stickyHint.id}`),
+				);
+				if (!found) return null;
+				if (winner.score - found.score > STICKY_WINDOW_POINTS) return null;
+				return found.modelId;
+			})();
+			const selectedId = stickyId ?? winner.modelId;
+			const selectedScore = (
+				scored.find((s) => s.modelId === selectedId) ?? winner
+			).score;
+			const fallbacks = buildFallbackChain(selectedId, phaseConfig);
 			return {
-				modelId: winner.modelId,
+				modelId: selectedId,
 				fallbacks,
 				tier: requestedTier,
 				wasDowngraded: true,
-				reason: `capability-scored: ${winner.modelId} (${winner.score.toFixed(1)}) for ${unitType}`,
+				reason: stickyId
+					? `slice-sticky: ${selectedId} (${selectedScore.toFixed(1)}, within ${STICKY_WINDOW_POINTS}pt of capability winner) for ${unitType}`
+					: `capability-scored: ${selectedId} (${selectedScore.toFixed(1)}) for ${unitType}`,
 				capabilityScores: capScores,
 				taskRequirements: requirements,
-				selectionMethod: "capability-scored",
+				selectionMethod: stickyId ? "slice-sticky" : "capability-scored",
 			};
 		}
 	}
diff --git a/src/resources/extensions/sf/prompt-ordering.js b/src/resources/extensions/sf/prompt-ordering.js
index 357776fde..8c8442084 100644
--- a/src/resources/extensions/sf/prompt-ordering.js
+++ b/src/resources/extensions/sf/prompt-ordering.js
@@ -137,6 +137,11 @@ export function reorderForCaching(prompt) {
  * static+semi-static prefix can be marked with cache_control: ephemeral on
  * Anthropic-compatible providers.
  *
+ * Purpose: keep SF autonomous prompt prefixes byte-stable across adjacent task
+ * dispatches so provider prompt caches can reuse expensive context.
+ *
+ * Consumer: auto/phases-unit.js before runUnit dispatches an autonomous unit.
+ *
  * Returns `{before: string, after: string}` where:
  * - `before` = preamble + all static + all semi-static sections (cache this)
  * - `after`  = all dynamic sections (do not cache)
diff --git a/src/resources/extensions/sf/session-lock.js b/src/resources/extensions/sf/session-lock.js
index 836695d45..ef5ead37d 100644
--- a/src/resources/extensions/sf/session-lock.js
+++ b/src/resources/extensions/sf/session-lock.js
@@ -596,3 +596,103 @@ function isPidAlive(pid) {
 		return false;
 	}
 }
+
+/**
+ * Public wrapper around isPidAlive for callers outside this module.
+ *
+ * Consumer: auto-start's prompt-to-kill flow needs to decide whether the
+ * existingPid from acquireSessionLock's failure result is still alive before
+ * offering to terminate it.
+ */
+export function isSessionPidAlive(pid) {
+	return isPidAlive(Number(pid));
+}
+
+/**
+ * Terminate an existing SF auto session by PID.
+ *
+ * Why: when acquireSessionLock reports `{ acquired: false, existingPid }`
+ * because another SF process is holding the lock, we want a one-call helper
+ * that an interactive caller can invoke after confirming with the user. The
+ * helper sends SIGTERM, polls for the process to exit, escalates to SIGKILL
+ * after the grace window, and waits a short tail for the kernel to reap the
+ * PID so a subsequent acquireSessionLock retry sees a dead PID and proceeds
+ * down the stale-lock recovery path.
+ *
+ * Returns `{ terminated: boolean, escalated: boolean, alreadyDead: boolean }`.
+ * `terminated` is true iff the PID is no longer alive when the call returns.
+ * `escalated` is true iff SIGKILL was needed because SIGTERM did not produce
+ * an exit within `gracePeriodMs`.
+ *
+ * Consumer: auto-start's prompt-to-kill flow. Not part of the normal
+ * autonomous loop — only invoked after explicit operator consent.
+ *
+ * @param {number} pid - The PID to terminate.
+ * @param {object} [options]
+ * @param {number} [options.gracePeriodMs=5000] - How long to wait between
+ *   SIGTERM and SIGKILL.
+ * @param {number} [options.reapWaitMs=1000] - How long to wait after the
+ *   final kill signal for the kernel to reap.
+ * @param {number} [options.pollIntervalMs=100] - Poll interval used while
+ *   waiting for exit.
+ */
+export async function terminateExistingSession(pid, options = {}) {
+	const numericPid = Number(pid);
+	if (!Number.isInteger(numericPid) || numericPid <= 0) {
+		return { terminated: false, escalated: false, alreadyDead: true };
+	}
+	if (numericPid === process.pid) {
+		// Refuse to terminate ourselves — would deadlock the caller.
+		return { terminated: false, escalated: false, alreadyDead: false };
+	}
+	if (!isPidAlive(numericPid)) {
+		return { terminated: true, escalated: false, alreadyDead: true };
+	}
+	const gracePeriodMs = Number(options.gracePeriodMs ?? 5000);
+	const reapWaitMs = Number(options.reapWaitMs ?? 1000);
+	const pollIntervalMs = Math.max(50, Number(options.pollIntervalMs ?? 100));
+	try {
+		process.kill(numericPid, "SIGTERM");
+	} catch (err) {
+		// ESRCH: process already gone between the alive check and the kill.
+		// EPERM: not ours to kill — surface as not-terminated.
+		if (err?.code === "ESRCH") {
+			return { terminated: true, escalated: false, alreadyDead: true };
+		}
+		if (err?.code === "EPERM") {
+			return { terminated: false, escalated: false, alreadyDead: false };
+		}
+		throw err;
+	}
+	const deadline = Date.now() + gracePeriodMs;
+	while (Date.now() < deadline) {
+		if (!isPidAlive(numericPid)) {
+			return { terminated: true, escalated: false, alreadyDead: false };
+		}
+		await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
+	}
+	// Grace expired — escalate to SIGKILL.
+	try {
+		process.kill(numericPid, "SIGKILL");
+	} catch (err) {
+		if (err?.code === "ESRCH") {
+			return { terminated: true, escalated: true, alreadyDead: false };
+		}
+		if (err?.code === "EPERM") {
+			return { terminated: false, escalated: true, alreadyDead: false };
+		}
+		throw err;
+	}
+	const reapDeadline = Date.now() + reapWaitMs;
+	while (Date.now() < reapDeadline) {
+		if (!isPidAlive(numericPid)) {
+			return { terminated: true, escalated: true, alreadyDead: false };
+		}
+		await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
+	}
+	return {
+		terminated: !isPidAlive(numericPid),
+		escalated: true,
+		alreadyDead: false,
+	};
+}
diff --git a/src/resources/extensions/sf/slice-routing-cache.js b/src/resources/extensions/sf/slice-routing-cache.js
new file mode 100644
index 000000000..e3ba00cd7
--- /dev/null
+++ b/src/resources/extensions/sf/slice-routing-cache.js
@@ -0,0 +1,154 @@
+/**
+ * slice-routing-cache.js — per-slice sticky-model routing cache.
+ *
+ * Why: model routing is currently computed per-unit, so the executor can flip
+ * between models within a single slice (M001-6377a4/S04 routed T01 to
+ * gemini-3-flash-preview, then T02 to codestral-latest — the second was
+ * unfit and refused the task, see ADR-0079). Once a model has successfully
+ * completed work on a slice, prefer it for the slice's sibling units unless
+ * a hard mismatch forces a switch.
+ *
+ * Contract:
+ *   - Cache is small JSON keyed by sliceId. Each entry stores provider/id and
+ *     timestamps so stale entries can be aged out.
+ *   - Best-effort: read/write errors are swallowed; routing always has a
+ *     fallback through the capability scorer.
+ *   - Only successful outcomes (`continue` or `complete`) write to the cache.
+ *     Refusal/blocker outcomes clear the entry so a failing model does not
+ *     re-attach to the slice.
+ *
+ * Consumer: auto-model-selection.js reads before calling
+ * resolveModelForComplexity; auto/phases-unit.js writes after a successful
+ * checkpoint and clears on `executor-refused`.
+ */
+import { existsSync, mkdirSync, readFileSync, unlinkSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { atomicWriteSync } from "./atomic-write.js";
+import { sfRuntimeRoot } from "./paths.js";
+
+const CACHE_FILE = "slice-routing.json";
+const DEFAULT_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
+
+function cachePath(basePath) {
+	return join(sfRuntimeRoot(basePath), CACHE_FILE);
+}
+
+/**
+ * Extract the slice scope from a unit id.
+ *
+ * Supports the conventional SF unit-id grammar:
+ *   - Execute task: "<milestoneId>/<sliceId>/<taskId>"        → "<milestoneId>/<sliceId>"
+ *   - Plan / complete slice: "<milestoneId>/<sliceId>"        → "<milestoneId>/<sliceId>" (already a slice)
+ *   - Milestone-level units: "<milestoneId>"                   → "<milestoneId>" (no slice scope)
+ *
+ * Returns null when the unit id is missing or unparseable.
+ */
+export function extractSliceScope(unitId) {
+	if (!unitId || typeof unitId !== "string") return null;
+	const parts = unitId.split("/").filter(Boolean);
+	if (parts.length === 0) return null;
+	if (parts.length === 1) return parts[0]; // milestone-only
+	return `${parts[0]}/${parts[1]}`;
+}
+
+function readCache(basePath) {
+	const path = cachePath(basePath);
+	if (!existsSync(path)) return {};
+	try {
+		return JSON.parse(readFileSync(path, "utf-8"));
+	} catch {
+		return {};
+	}
+}
+
+function writeCache(basePath, data) {
+	const path = cachePath(basePath);
+	try {
+		mkdirSync(dirname(path), { recursive: true });
+		atomicWriteSync(path, JSON.stringify(data, null, 2));
+	} catch {
+		// best-effort
+	}
+}
+
+/**
+ * Record the model that successfully handled a unit. The slice scope is
+ * derived from the unit id. Subsequent units in the same slice will see this
+ * as the sticky hint.
+ */
+export function recordSliceRouting(basePath, unitType, unitId, model) {
+	if (!basePath || !model?.id) return;
+	const sliceId = extractSliceScope(unitId);
+	if (!sliceId) return;
+	const data = readCache(basePath);
+	data[sliceId] = {
+		provider: String(model.provider ?? ""),
+		id: String(model.id),
+		ts: new Date().toISOString(),
+		lastUnitType: String(unitType ?? ""),
+		lastUnitId: String(unitId ?? ""),
+	};
+	writeCache(basePath, data);
+}
+
+/**
+ * Look up the sticky model for the slice that contains this unit. Returns
+ * null when there is no entry, when it's older than maxAgeMs, or when the
+ * cache cannot be read.
+ *
+ * @param {string} basePath
+ * @param {string} unitType
+ * @param {string} unitId
+ * @param {object} [options]
+ * @param {number} [options.maxAgeMs=7d]
+ * @returns {{ provider: string, id: string } | null}
+ */
+export function readStickyModelForUnit(basePath, unitType, unitId, options = {}) {
+	if (!basePath) return null;
+	const sliceId = extractSliceScope(unitId);
+	if (!sliceId) return null;
+	const data = readCache(basePath);
+	const entry = data[sliceId];
+	if (!entry?.id) return null;
+	const maxAgeMs = Number(options.maxAgeMs ?? DEFAULT_MAX_AGE_MS);
+	if (entry.ts) {
+		const age = Date.now() - new Date(entry.ts).getTime();
+		if (Number.isFinite(age) && age > maxAgeMs) return null;
+	}
+	return {
+		provider: String(entry.provider ?? ""),
+		id: String(entry.id),
+	};
+}
+
+/**
+ * Evict the sticky entry for the slice containing this unit. Called when the
+ * model attached to the slice refuses or hits a hard mismatch, so the next
+ * dispatch falls back to the capability scorer instead of re-pinning the
+ * broken model.
+ */
+export function clearSliceRoutingForUnit(basePath, unitId) {
+	if (!basePath) return;
+	const sliceId = extractSliceScope(unitId);
+	if (!sliceId) return;
+	const data = readCache(basePath);
+	if (!(sliceId in data)) return;
+	delete data[sliceId];
+	if (Object.keys(data).length === 0) {
+		try {
+			unlinkSync(cachePath(basePath));
+		} catch {
+			// best-effort
+		}
+		return;
+	}
+	writeCache(basePath, data);
+}
+
+/**
+ * Test/debug only — read the entire cache. Production callers should use
+ * readStickyModelForUnit instead.
+ */
+export function _readCacheForTests(basePath) {
+	return readCache(basePath);
+}
diff --git a/src/resources/extensions/sf/tests/dashboard-overlay.test.ts b/src/resources/extensions/sf/tests/dashboard-overlay.test.ts
new file mode 100644
index 000000000..1cc06bec1
--- /dev/null
+++ b/src/resources/extensions/sf/tests/dashboard-overlay.test.ts
@@ -0,0 +1,467 @@
+/**
+ * Dashboard Overlay UOK Diagnostics Tests
+ *
+ * Purpose: Verify that SFDashboardOverlay consumes writeUokDiagnostics output
+ * and renders it consistently with the headless status command.
+ *
+ * Consumer: TUI users who expect the dashboard to surface the same UOK health
+ * information as `sf status` / headless query.
+ */
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+// ─── Hoisted mocks ─────────────────────────────────────────────────────────
+
+const mockDiagnostics = vi.hoisted(() => ({
+	clear: {
+		schemaVersion: 1,
+		generatedAt: new Date().toISOString(),
+		verdict: "clear",
+		classification: "healthy",
+		signals: {
+			lock: "active",
+			parity: "ok",
+			ledger: "consistent",
+			runtimeProjection: "ok",
+			wrapper: "clear",
+		},
+		currentUnit: null,
+		latestRun: null,
+		runtimeUnits: [],
+		issues: [],
+		recommendations: [],
+		reportPath: "/tmp/uok-diagnostics.json",
+	},
+	degraded: {
+		schemaVersion: 1,
+		generatedAt: new Date().toISOString(),
+		verdict: "degraded",
+		classification: "needs-repair",
+		signals: {
+			lock: "stale",
+			parity: "ok",
+			ledger: "open-runs",
+			runtimeProjection: "stale",
+			wrapper: "unknown",
+		},
+		currentUnit: null,
+		latestRun: null,
+		runtimeUnits: [],
+		issues: [
+			{
+				code: "stale-lock",
+				severity: "error",
+				message: "Stale auto.lock detected for PID 12345.",
+				evidence: { lock: { pid: 12345 } },
+			},
+			{
+				code: "open-ledger-without-live-lock",
+				severity: "error",
+				message:
+					"UOK ledger has 2 started run(s) without a live auto.lock owner.",
+				evidence: { runIds: ["run-1", "run-2"] },
+			},
+		],
+		recommendations: [
+			"Clear stale auto.lock before dispatch.",
+			"Mark orphaned UOK runs recovered or restart from lock owner.",
+		],
+		reportPath: "/tmp/uok-diagnostics.json",
+	},
+	attention: {
+		schemaVersion: 1,
+		generatedAt: new Date().toISOString(),
+		verdict: "attention",
+		classification: "degraded",
+		signals: {
+			lock: "active",
+			parity: "degraded",
+			ledger: "consistent",
+			runtimeProjection: "ok",
+			wrapper: "unknown",
+		},
+		currentUnit: { unitType: "execute-task", unitId: "T01", pid: 12345 },
+		latestRun: null,
+		runtimeUnits: [],
+		issues: [
+			{
+				code: "uok-parity-degraded",
+				severity: "warning",
+				message:
+					"UOK parity degraded: 1 critical mismatch(es), 0 missing exit(s).",
+				evidence: { current: { criticalMismatches: 1, missingExitEvents: 0 } },
+			},
+		],
+		recommendations: ["Reconcile UOK parity before mutating git state."],
+		reportPath: "/tmp/uok-diagnostics.json",
+	},
+}));
+
+const dashDataMock = vi.hoisted(() => ({
+	basePath: "/tmp/sf-test",
+	active: false,
+	paused: false,
+	remoteSession: null,
+	currentUnit: null,
+	elapsed: 0,
+	rtkEnabled: false,
+	rtkSavings: null,
+	pendingCaptureCount: 0,
+}));
+
+vi.mock("../uok/diagnostic-synthesis.js", () => ({
+	writeUokDiagnostics: vi.fn((_basePath, _options) => mockDiagnostics.clear),
+}));
+
+vi.mock("../state.js", () => ({
+	deriveState: vi.fn(async () => ({
+		activeMilestone: null,
+		activeSlice: null,
+		activeTask: null,
+		phase: "idle",
+		progress: null,
+		nextAction: null,
+		blockers: [],
+		registry: [],
+	})),
+}));
+
+vi.mock("../sf-db.js", () => ({
+	isDbAvailable: vi.fn(() => false),
+	getMilestoneSlices: vi.fn(() => []),
+	getSliceTasks: vi.fn(() => []),
+}));
+
+vi.mock("../auto.js", () => ({
+	getAutoDashboardData: vi.fn(() => dashDataMock),
+}));
+
+vi.mock("../auto-dashboard.js", () => ({
+	estimateTimeRemaining: vi.fn(() => null),
+}));
+
+vi.mock("../progress-score.js", () => ({
+	computeProgressScore: vi.fn(() => ({
+		level: "green",
+		summary: "All systems healthy",
+		signals: [],
+	})),
+}));
+
+vi.mock("../doctor-environment.js", () => ({
+	runEnvironmentChecks: vi.fn(() => []),
+}));
+
+vi.mock("../worktree-command.js", () => ({
+	getActiveWorktreeName: vi.fn(() => null),
+}));
+
+vi.mock("../subagent/worker-registry.js", () => ({
+	hasActiveWorkers: vi.fn(() => false),
+	getWorkerBatches: vi.fn(() => new Map()),
+}));
+
+vi.mock("../metrics.js", () => ({
+	getLedger: vi.fn(() => null),
+	getProjectTotals: vi.fn(() => ({})),
+	aggregateByPhase: vi.fn(() => []),
+	aggregateBySlice: vi.fn(() => []),
+	aggregateByModel: vi.fn(() => []),
+	aggregateCacheHitRate: vi.fn(() => 0),
+	formatCost: vi.fn((n) => `$${n.toFixed(2)}`),
+	formatCostProjection: vi.fn(() => []),
+	formatTokenCount: vi.fn((n) => String(n)),
+}));
+
+vi.mock("../paths.js", () => ({
+	resolveMilestoneFile: vi.fn(() => null),
+}));
+
+vi.mock("../files.js", () => ({
+	loadFile: vi.fn(async () => null),
+}));
+
+vi.mock("../preferences.js", () => ({
+	loadEffectiveSFPreferences: vi.fn(() => null),
+}));
+
+vi.mock("@singularity-forge/tui", async (importOriginal) => {
+	const actual = (await importOriginal()) as any;
+	return {
+		...actual,
+		Key: {
+			escape: "\u001B",
+			ctrl: (c: string) => `\u0000${c}`,
+			ctrlAlt: (c: string) => `\u001B\u0000${c}`,
+			ctrlShift: (c: string) => `\u001B\u0000${c.toUpperCase()}`,
+			down: "\u001B[B",
+			up: "\u001B[A",
+		},
+		matchesKey: vi.fn(() => false),
+		truncateToWidth: vi.fn((s: string, w: number) =>
+			s.length > w ? s.slice(0, w) : s,
+		),
+		visibleWidth: vi.fn((s: string) => s.length),
+	};
+});
+
+vi.mock("../shared/mod.js", () => ({
+	centerLine: vi.fn(
+		(s: string, w: number) =>
+			" ".repeat(Math.max(0, Math.floor((w - s.length) / 2))) + s,
+	),
+	fitColumns: vi.fn((parts: string[], _w: number, _sep: string) =>
+		parts.join("  "),
+	),
+	formatDuration: vi.fn((ms: number) => `${Math.round(ms / 1000)}s`),
+	joinColumns: vi.fn(
+		(left: string, right: string, _w: number) =>
+			`${left}${" ".repeat(Math.max(1, _w - left.length - right.length))}${right}`,
+	),
+	padRight: vi.fn((s: string, w: number) => s.padEnd(w, " ")),
+	STATUS_COLOR: {
+		done: "success",
+		active: "accent",
+		pending: "dim",
+	},
+	STATUS_GLYPH: {
+		done: "✓",
+		active: "▶",
+		pending: "○",
+	},
+}));
+
+vi.mock("../shortcut-defs.js", () => ({
+	formattedShortcutPair: vi.fn(() => "ctrl+alt+g"),
+}));
+
+// ─── Helpers ───────────────────────────────────────────────────────────────
+
+function createMockTheme() {
+	return {
+		fg: vi.fn((color: string, text: string) => `[${color}:${text}]`),
+		bold: vi.fn((text: string) => `**${text}**`),
+	};
+}
+
+function createMockTui() {
+	return {
+		requestRender: vi.fn(),
+	};
+}
+
+// ─── Tests ─────────────────────────────────────────────────────────────────
+
+beforeEach(() => {
+	vi.clearAllMocks();
+});
+
+afterEach(() => {
+	vi.clearAllMocks();
+});
+
+describe("SFDashboardOverlay UOK diagnostics", () => {
+	it("loadData_calls_writeUokDiagnostics_and_stores_result", async () => {
+		const { writeUokDiagnostics } = await import(
+			"../uok/diagnostic-synthesis.js"
+		);
+		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
+
+		const tui = createMockTui();
+		const theme = createMockTheme();
+		const overlay = new SFDashboardOverlay(tui, theme, () => {});
+
+		// Prevent interval from firing during test
+		clearInterval(overlay.refreshTimer);
+		overlay.refreshTimer = null as any;
+
+		await overlay.loadData();
+
+		expect(writeUokDiagnostics).toHaveBeenCalledWith("/tmp/sf-test");
+		expect(overlay.uokDiagnostics).toEqual(mockDiagnostics.clear);
+
+		overlay.dispose();
+	});
+
+	it("loadData_gracefully_handles_writeUokDiagnostics_failure", async () => {
+		const { writeUokDiagnostics } = await import(
+			"../uok/diagnostic-synthesis.js"
+		);
+		writeUokDiagnostics.mockImplementation(() => {
+			throw new Error("disk full");
+		});
+
+		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
+
+		const tui = createMockTui();
+		const theme = createMockTheme();
+		const overlay = new SFDashboardOverlay(tui, theme, () => {});
+
+		clearInterval(overlay.refreshTimer);
+		overlay.refreshTimer = null as any;
+
+		await overlay.loadData();
+
+		expect(overlay.uokDiagnostics).toBeNull();
+
+		overlay.dispose();
+		writeUokDiagnostics.mockRestore();
+	});
+
+	it("render_includes_uok_verdict_when_diagnostics_present", async () => {
+		const { writeUokDiagnostics } = await import(
+			"../uok/diagnostic-synthesis.js"
+		);
+		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
+
+		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
+
+		const tui = createMockTui();
+		const theme = createMockTheme();
+		const overlay = new SFDashboardOverlay(tui, theme, () => {});
+
+		clearInterval(overlay.refreshTimer);
+		overlay.refreshTimer = null as any;
+
+		await overlay.loadData();
+		const lines = overlay.buildContentLines(80);
+		const text = lines.join("\n");
+
+		expect(text).toContain("UOK");
+		expect(text).toContain("degraded");
+		expect(text).toContain("needs-repair");
+
+		overlay.dispose();
+	});
+
+	it("render_includes_first_issue_code_like_headless_status", async () => {
+		const { writeUokDiagnostics } = await import(
+			"../uok/diagnostic-synthesis.js"
+		);
+		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
+
+		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
+
+		const tui = createMockTui();
+		const theme = createMockTheme();
+		const overlay = new SFDashboardOverlay(tui, theme, () => {});
+
+		clearInterval(overlay.refreshTimer);
+		overlay.refreshTimer = null as any;
+
+		await overlay.loadData();
+		const lines = overlay.buildContentLines(80);
+		const text = lines.join("\n");
+
+		// Should contain the first issue code, matching headless status behavior
+		expect(text).toContain("stale-lock");
+
+		overlay.dispose();
+	});
+
+	it("render_shows_uok_health_section_with_all_issues_when_degraded", async () => {
+		const { writeUokDiagnostics } = await import(
+			"../uok/diagnostic-synthesis.js"
+		);
+		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
+
+		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
+
+		const tui = createMockTui();
+		const theme = createMockTheme();
+		const overlay = new SFDashboardOverlay(tui, theme, () => {});
+
+		clearInterval(overlay.refreshTimer);
+		overlay.refreshTimer = null as any;
+
+		await overlay.loadData();
+		const lines = overlay.buildContentLines(80);
+		const text = lines.join("\n");
+
+		// Should show both issue codes in the health section
+		expect(text).toContain("stale-lock");
+		expect(text).toContain("open-ledger-without-live-lock");
+
+		overlay.dispose();
+	});
+
+	it("render_shows_recommendations_when_issues_present", async () => {
+		const { writeUokDiagnostics } = await import(
+			"../uok/diagnostic-synthesis.js"
+		);
+		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
+
+		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
+
+		const tui = createMockTui();
+		const theme = createMockTheme();
+		const overlay = new SFDashboardOverlay(tui, theme, () => {});
+
+		clearInterval(overlay.refreshTimer);
+		overlay.refreshTimer = null as any;
+
+		await overlay.loadData();
+		const lines = overlay.buildContentLines(80);
+		const text = lines.join("\n");
+
+		expect(text).toContain("Clear stale auto.lock before dispatch.");
+		expect(text).toContain(
+			"Mark orphaned UOK runs recovered or restart from lock owner.",
+		);
+
+		overlay.dispose();
+	});
+
+	it("render_shows_uok_signals_table_when_diagnostics_present", async () => {
+		const { writeUokDiagnostics } = await import(
+			"../uok/diagnostic-synthesis.js"
+		);
+		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.degraded);
+
+		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
+
+		const tui = createMockTui();
+		const theme = createMockTheme();
+		const overlay = new SFDashboardOverlay(tui, theme, () => {});
+
+		clearInterval(overlay.refreshTimer);
+		overlay.refreshTimer = null as any;
+
+		await overlay.loadData();
+		const lines = overlay.buildContentLines(80);
+		const text = lines.join("\n");
+
+		// Signals should be visible
+		expect(text).toContain("lock");
+		expect(text).toContain("parity");
+		expect(text).toContain("ledger");
+
+		overlay.dispose();
+	});
+
+	it("render_omits_detailed_uok_section_when_verdict_is_clear", async () => {
+		const { writeUokDiagnostics } = await import(
+			"../uok/diagnostic-synthesis.js"
+		);
+		(writeUokDiagnostics as any).mockReturnValue(mockDiagnostics.clear);
+
+		const { SFDashboardOverlay } = await import("../dashboard-overlay.js");
+
+		const tui = createMockTui();
+		const theme = createMockTheme();
+		const overlay = new SFDashboardOverlay(tui, theme, () => {});
+
+		clearInterval(overlay.refreshTimer);
+		overlay.refreshTimer = null as any;
+
+		await overlay.loadData();
+		const lines = overlay.buildContentLines(80);
+		const text = lines.join("\n");
+
+		// Should show the compact UOK clear line but no issue details
+		expect(text).toContain("clear");
+		expect(text).not.toContain("stale-lock");
+
+		overlay.dispose();
+	});
+});
diff --git a/src/resources/extensions/sf/tests/model-router-agentic.test.mjs b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
new file mode 100644
index 000000000..e3e0d98c7
--- /dev/null
+++ b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
@@ -0,0 +1,140 @@
+import { describe, expect, test } from "vitest";
+import {
+	BASE_REQUIREMENTS,
+	MODEL_CAPABILITY_PROFILES,
+	scoreEligibleModels,
+	scoreModel,
+} from "../model-router.js";
+
+describe("agentic capability axis (ADR-0079)", () => {
+	test("execute-task base requirements weight the agentic dimension", () => {
+		// If this assertion fails because the weight changed: re-read ADR-0079
+		// before adjusting. The whole point of the axis is to outweigh raw
+		// coding score for execute-task routing.
+		expect(BASE_REQUIREMENTS["execute-task"].agentic).toBeGreaterThanOrEqual(
+			0.7,
+		);
+	});
+
+	test("known agentic-capable models score higher than coding-completion models on execute-task", () => {
+		const codestralScore = scoreModel(
+			MODEL_CAPABILITY_PROFILES["codestral-latest"],
+			BASE_REQUIREMENTS["execute-task"],
+		);
+		const kimiScore = scoreModel(
+			MODEL_CAPABILITY_PROFILES["kimi-k2.6"],
+			BASE_REQUIREMENTS["execute-task"],
+		);
+		const sonnetScore = scoreModel(
+			MODEL_CAPABILITY_PROFILES["claude-sonnet-4-6"],
+			BASE_REQUIREMENTS["execute-task"],
+		);
+		// Codestral has high coding (85) but agentic=25 — must not beat agentic models.
+		expect(kimiScore).toBeGreaterThan(codestralScore);
+		expect(sonnetScore).toBeGreaterThan(codestralScore);
+	});
+
+	test("devstral variants score below agentic models on execute-task", () => {
+		const devstralScore = scoreModel(
+			MODEL_CAPABILITY_PROFILES["devstral-2512"],
+			BASE_REQUIREMENTS["execute-task"],
+		);
+		const kimiScore = scoreModel(
+			MODEL_CAPABILITY_PROFILES["kimi-k2.6"],
+			BASE_REQUIREMENTS["execute-task"],
+		);
+		expect(kimiScore).toBeGreaterThan(devstralScore);
+	});
+
+	test("scoreEligibleModels ranks agentic models above coding-only models for execute-task", () => {
+		const eligible = [
+			"mistral/codestral-latest",
+			"mistral/devstral-2512",
+			"moonshotai/kimi-k2.6",
+			"anthropic/claude-sonnet-4-6",
+		];
+		const ranked = scoreEligibleModels(
+			eligible,
+			BASE_REQUIREMENTS["execute-task"],
+		);
+		const top = ranked[0]?.modelId;
+		// Either of the two pinned-agentic models must win.
+		expect(["moonshotai/kimi-k2.6", "anthropic/claude-sonnet-4-6"]).toContain(
+			top,
+		);
+		// And Codestral specifically must not win.
+		expect(top).not.toBe("mistral/codestral-latest");
+	});
+
+	test("agentic axis preserves research-* unit-type behavior (no agentic weight there)", () => {
+		// Research isn't agentic — those unit types should not gain an agentic
+		// dimension. This protects long-context research-tuned models from
+		// being penalized.
+		expect(BASE_REQUIREMENTS["research-milestone"].agentic).toBeUndefined();
+		expect(BASE_REQUIREMENTS["research-slice"].agentic).toBeUndefined();
+	});
+
+	test("known coding-only models all have agentic <= 50", () => {
+		const codingOnly = [
+			"codestral-latest",
+			"devstral-2512",
+			"devstral-medium-latest",
+			"devstral-medium-2507",
+			"devstral-small-2505",
+			"devstral-small-2507",
+			"labs-devstral-small-2512",
+			"qwen3-coder:480b",
+			"qwen3-coder-next",
+		];
+		for (const id of codingOnly) {
+			const profile = MODEL_CAPABILITY_PROFILES[id];
+			expect(profile, `${id} should be in MODEL_CAPABILITY_PROFILES`).toBeDefined();
+			expect(profile.agentic, `${id} should have agentic <= 50`).toBeLessThanOrEqual(
+				50,
+			);
+		}
+	});
+
+	test("older MiniMax generations score lower than current on agentic", () => {
+		// 2026-05-13 incident: minimax/M2.1 stuck in 60+ checkpoint loop on
+		// infra repo. Root cause was the router aliasing all minimax-m2.x
+		// variants to MiniMax-M2.7's profile, so older models inherited
+		// current-gen capability scores and won cost tie-breaks on
+		// execute-task. Per-generation profiles + agentic axis fix the
+		// underlying routing decision.
+		const m21 = MODEL_CAPABILITY_PROFILES["MiniMax-M2.1"];
+		const m25 = MODEL_CAPABILITY_PROFILES["MiniMax-M2.5"];
+		const m27 = MODEL_CAPABILITY_PROFILES["MiniMax-M2.7"];
+		expect(m21, "M2.1 should have its own profile").toBeDefined();
+		expect(m25, "M2.5 should have its own profile").toBeDefined();
+		expect(m27.agentic).toBeGreaterThan(m25.agentic);
+		expect(m25.agentic).toBeGreaterThan(m21.agentic);
+		// And on execute-task, the current generation must beat the older one.
+		const oldScore = scoreModel(m21, BASE_REQUIREMENTS["execute-task"]);
+		const newScore = scoreModel(m27, BASE_REQUIREMENTS["execute-task"]);
+		expect(newScore).toBeGreaterThan(oldScore);
+	});
+
+	test("known agentic-frontier models all have agentic >= 85", () => {
+		const agenticFrontier = [
+			"claude-opus-4-6",
+			"claude-sonnet-4-6",
+			"claude-sonnet-4-5-20250514",
+			"kimi-k2.6",
+			"kimi-k2-thinking",
+			"gpt-5",
+			"gpt-5.4",
+			"gpt-5.5",
+			"gemini-3-pro-preview",
+			"gemini-3.1-pro-preview",
+		];
+		for (const id of agenticFrontier) {
+			const profile = MODEL_CAPABILITY_PROFILES[id];
+			expect(profile, `${id} should be in MODEL_CAPABILITY_PROFILES`).toBeDefined();
+			expect(
+				profile.agentic,
+				`${id} should have agentic >= 85`,
+			).toBeGreaterThanOrEqual(85);
+		}
+	});
+});
diff --git a/src/resources/extensions/sf/tests/prompt-ordering.test.mjs b/src/resources/extensions/sf/tests/prompt-ordering.test.mjs
index 75710953e..f14d1b8d7 100644
--- a/src/resources/extensions/sf/tests/prompt-ordering.test.mjs
+++ b/src/resources/extensions/sf/tests/prompt-ordering.test.mjs
@@ -134,61 +134,3 @@ test("reorderAndSplitForCaching_preamble_goes_into_before", () => {
 		"dynamic section in after",
 	);
 });
-
-
-test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
-	const prompt = [
-		"# Milestone Validation",
-		"",
-		"## Working Directory",
-		"/repo",
-		"",
-		"## Mission",
-		"Dispatch reviewers.",
-		"",
-		"## Context",
-		"Inlined below.",
-		"",
-		"## Inlined Context",
-		"### S01 Summary",
-		"# S01",
-		"",
-		"## Requirements Advanced",
-		"- R1",
-		"",
-		"## Requirements Validated",
-		"None.",
-	].join("\n");
-
-	const reordered = reorderForCaching(prompt);
-
-	assert.ok(
-		reordered.indexOf("## Mission") <
-			reordered.indexOf("## Requirements Advanced"),
-	);
-	assert.ok(
-		reordered.indexOf("## Context") <
-			reordered.indexOf("## Requirements Advanced"),
-	);
-});
-
-test("reorderForCaching_when_top_level_requirements_exists_still_hoists_exact_requirements_block", () => {
-	const prompt = [
-		"# Execute",
-		"",
-		"## Mission",
-		"Do work.",
-		"",
-		"## Requirements",
-		"- R1",
-		"",
-		"## Verification",
-		"Run tests.",
-	].join("\n");
-
-	const reordered = reorderForCaching(prompt);
-
-	assert.ok(
-		reordered.indexOf("## Requirements") < reordered.indexOf("## Mission"),
-	);
-});
diff --git a/src/resources/extensions/sf/tests/run-unit.test.mjs b/src/resources/extensions/sf/tests/run-unit.test.mjs
new file mode 100644
index 000000000..5d95e296c
--- /dev/null
+++ b/src/resources/extensions/sf/tests/run-unit.test.mjs
@@ -0,0 +1,30 @@
+import assert from "node:assert/strict";
+import { test } from "vitest";
+
+import { buildUnitPromptMessageContent } from "../auto/run-unit.js";
+
+test("buildUnitPromptMessageContent_when_prompt_parts_present_preserves_join_boundary", () => {
+	const content = buildUnitPromptMessageContent("flat", {
+		before: "## Working Directory\n/repo",
+		after: "## Inlined Task Plan\nDo it.",
+	});
+
+	assert.ok(Array.isArray(content));
+	assert.deepEqual(content[0], {
+		type: "text",
+		text: "## Working Directory\n/repo\n",
+		cache_control: { type: "ephemeral" },
+	});
+	assert.deepEqual(content[1], {
+		type: "text",
+		text: "## Inlined Task Plan\nDo it.",
+	});
+	assert.equal(
+		content.map((part) => part.text).join(""),
+		"## Working Directory\n/repo\n## Inlined Task Plan\nDo it.",
+	);
+});
+
+test("buildUnitPromptMessageContent_when_no_prompt_parts_returns_flat_prompt", () => {
+	assert.equal(buildUnitPromptMessageContent("flat", null), "flat");
+});
diff --git a/src/resources/extensions/sf/tests/session-lock-terminate.test.mjs b/src/resources/extensions/sf/tests/session-lock-terminate.test.mjs
new file mode 100644
index 000000000..288c2705a
--- /dev/null
+++ b/src/resources/extensions/sf/tests/session-lock-terminate.test.mjs
@@ -0,0 +1,134 @@
+import { spawn } from "node:child_process";
+import { describe, expect, test } from "vitest";
+import {
+	isSessionPidAlive,
+	terminateExistingSession,
+} from "../session-lock.js";
+
+function spawnSleeper(seconds = 30) {
+	// `sleep` is a deliberate cooperative target: it exits on SIGTERM, which
+	// lets us exercise the graceful path. For the SIGKILL escalation test we
+	// spawn a child that ignores SIGTERM via `trap '' TERM`.
+	const child = spawn("/bin/sh", ["-c", `sleep ${seconds}`], {
+		stdio: "ignore",
+		detached: false,
+	});
+	return child;
+}
+
+function spawnIgnoreSigterm(seconds = 30) {
+	// A Node child that installs an explicit SIGTERM handler that does
+	// nothing. Unlike `sh -c "trap '' TERM; sleep N"` (where the shell
+	// tail-call-exec's sleep so SIGTERM hits sleep directly), this child
+	// IS the long-lived process and reliably ignores SIGTERM until the
+	// SIGKILL escalation. This lets us assert the escalation path.
+	const child = spawn(
+		process.execPath,
+		[
+			"-e",
+			`process.on('SIGTERM', () => {}); setTimeout(() => process.exit(0), ${seconds * 1000});`,
+		],
+		{ stdio: "ignore", detached: false },
+	);
+	return child;
+}
+
+describe("terminateExistingSession", () => {
+	test("returns alreadyDead=true when pid is invalid", async () => {
+		const result = await terminateExistingSession(0);
+		expect(result.terminated).toBe(false);
+		expect(result.alreadyDead).toBe(true);
+	});
+
+	test("refuses to terminate the current process", async () => {
+		const result = await terminateExistingSession(process.pid);
+		expect(result.terminated).toBe(false);
+	});
+
+	test("returns alreadyDead=true for a dead pid", async () => {
+		// PID 1 is alive but not ours; use a value that's almost certainly
+		// not assigned. 2^31 - 1 is well above any plausible PID.
+		const result = await terminateExistingSession(2147483646);
+		expect(result.alreadyDead).toBe(true);
+		expect(result.terminated).toBe(true);
+	});
+
+	test("gracefully terminates a process that respects SIGTERM", async () => {
+		const child = spawnSleeper(60);
+		try {
+			expect(isSessionPidAlive(child.pid)).toBe(true);
+			const result = await terminateExistingSession(child.pid, {
+				gracePeriodMs: 3000,
+				reapWaitMs: 1000,
+				pollIntervalMs: 50,
+			});
+			expect(result.terminated).toBe(true);
+			expect(result.escalated).toBe(false);
+			expect(isSessionPidAlive(child.pid)).toBe(false);
+		} finally {
+			try {
+				child.kill("SIGKILL");
+			} catch {
+				/* may already be dead */
+			}
+		}
+	});
+
+	test("escalates to SIGKILL when the process ignores SIGTERM", async () => {
+		const child = spawnIgnoreSigterm(60);
+		// Give the child a moment to register its SIGTERM handler before we
+		// send SIGTERM. Without this, the kill may arrive before
+		// process.on('SIGTERM', …) executes and Node uses the default handler
+		// (exit on signal), which makes the test look like graceful exit.
+		await new Promise((resolve) => setTimeout(resolve, 250));
+		try {
+			expect(isSessionPidAlive(child.pid)).toBe(true);
+			const result = await terminateExistingSession(child.pid, {
+				gracePeriodMs: 750,
+				reapWaitMs: 2000,
+				pollIntervalMs: 50,
+			});
+			expect(result.terminated).toBe(true);
+			expect(result.escalated).toBe(true);
+			expect(isSessionPidAlive(child.pid)).toBe(false);
+		} finally {
+			try {
+				child.kill("SIGKILL");
+			} catch {
+				/* may already be dead */
+			}
+		}
+	});
+});
+
+describe("isSessionPidAlive", () => {
+	test("returns false for current process (self-check is intentionally disabled)", () => {
+		// isPidAlive specifically excludes the current PID to prevent
+		// false-positive self-detection in the lock takeover flow.
+		expect(isSessionPidAlive(process.pid)).toBe(false);
+	});
+
+	test("returns false for clearly-dead pid", () => {
+		expect(isSessionPidAlive(2147483646)).toBe(false);
+	});
+
+	test("returns true for a live child", async () => {
+		const child = spawnSleeper(30);
+		try {
+			expect(isSessionPidAlive(child.pid)).toBe(true);
+		} finally {
+			try {
+				child.kill("SIGKILL");
+			} catch {
+				/* may already be dead */
+			}
+		}
+	});
+
+	test("returns false for non-integer or non-positive inputs", () => {
+		expect(isSessionPidAlive(0)).toBe(false);
+		expect(isSessionPidAlive(-1)).toBe(false);
+		expect(isSessionPidAlive("nope")).toBe(false);
+		expect(isSessionPidAlive(null)).toBe(false);
+	});
+});
diff --git a/src/resources/extensions/sf/tests/slice-routing-cache.test.mjs b/src/resources/extensions/sf/tests/slice-routing-cache.test.mjs
new file mode 100644
index 000000000..7f59236f7
--- /dev/null
+++ b/src/resources/extensions/sf/tests/slice-routing-cache.test.mjs
@@ -0,0 +1,136 @@
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, expect, test } from "vitest";
+import {
+	_readCacheForTests,
+	clearSliceRoutingForUnit,
+	extractSliceScope,
+	readStickyModelForUnit,
+	recordSliceRouting,
+} from "../slice-routing-cache.js";
+
+let tempDirs = [];
+function makeProject() {
+	const dir = mkdtempSync(join(tmpdir(), "sf-slice-routing-"));
+	tempDirs.push(dir);
+	return dir;
+}
+afterEach(() => {
+	for (const dir of tempDirs) rmSync(dir, { recursive: true, force: true });
+	tempDirs = [];
+});
+
+describe("extractSliceScope", () => {
+	test("execute-task style unit id collapses to milestone/slice", () => {
+		expect(extractSliceScope("M001-6377a4/S04/T02")).toBe("M001-6377a4/S04");
+	});
+	test("plan/complete slice ids stay as milestone/slice", () => {
+		expect(extractSliceScope("M001-6377a4/S04")).toBe("M001-6377a4/S04");
+	});
+	test("milestone-only ids return the milestone", () => {
+		expect(extractSliceScope("M001-6377a4")).toBe("M001-6377a4");
+	});
+	test("null/undefined/empty return null", () => {
+		expect(extractSliceScope(null)).toBeNull();
+		expect(extractSliceScope("")).toBeNull();
+		expect(extractSliceScope(undefined)).toBeNull();
+	});
+});
+
+describe("slice routing cache", () => {
+	test("record + read round-trips", () => {
+		const project = makeProject();
+		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
+			provider: "moonshotai",
+			id: "kimi-k2.6",
+		});
+		const sticky = readStickyModelForUnit(
+			project,
+			"execute-task",
+			"M001/S04/T02",
+		);
+		expect(sticky).toEqual({ provider: "moonshotai", id: "kimi-k2.6" });
+	});
+
+	test("sticky scoped per slice — different slice => no hit", () => {
+		const project = makeProject();
+		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
+			provider: "moonshotai",
+			id: "kimi-k2.6",
+		});
+		expect(
+			readStickyModelForUnit(project, "execute-task", "M001/S05/T01"),
+		).toBeNull();
+	});
+
+	test("clearSliceRoutingForUnit evicts only the matching slice", () => {
+		const project = makeProject();
+		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
+			provider: "moonshotai",
+			id: "kimi-k2.6",
+		});
+		recordSliceRouting(project, "execute-task", "M001/S05/T01", {
+			provider: "anthropic",
+			id: "claude-sonnet-4-6",
+		});
+		clearSliceRoutingForUnit(project, "M001/S04/T07");
+		expect(
+			readStickyModelForUnit(project, "execute-task", "M001/S04/T99"),
+		).toBeNull();
+		expect(
+			readStickyModelForUnit(project, "execute-task", "M001/S05/T02"),
+		).toEqual({ provider: "anthropic", id: "claude-sonnet-4-6" });
+	});
+
+	test("readStickyModelForUnit honors maxAgeMs", async () => {
+		const project = makeProject();
+		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
+			provider: "moonshotai",
+			id: "kimi-k2.6",
+		});
+		// Sleep past the retention window so age strictly exceeds maxAgeMs.
+		await new Promise((resolve) => setTimeout(resolve, 25));
+		expect(
+			readStickyModelForUnit(project, "execute-task", "M001/S04/T02", {
+				maxAgeMs: 10,
+			}),
+		).toBeNull();
+	});
+
+	test("returns null on missing basePath or unparseable unit id", () => {
+		expect(readStickyModelForUnit("", "execute-task", "M001/S04/T01")).toBeNull();
+		const project = makeProject();
+		expect(readStickyModelForUnit(project, "execute-task", "")).toBeNull();
+		expect(readStickyModelForUnit(project, "execute-task", null)).toBeNull();
+	});
+
+	test("overwrite updates the slice entry in place", () => {
+		const project = makeProject();
+		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
+			provider: "moonshotai",
+			id: "kimi-k2.6",
+		});
+		recordSliceRouting(project, "execute-task", "M001/S04/T02", {
+			provider: "anthropic",
+			id: "claude-opus-4-7",
+		});
+		const cache = _readCacheForTests(project);
+		const entries = Object.values(cache);
+		expect(entries.length).toBe(1);
+		expect(
+			readStickyModelForUnit(project, "execute-task", "M001/S04/T03"),
+		).toEqual({ provider: "anthropic", id: "claude-opus-4-7" });
+	});
+
+	test("clearSliceRoutingForUnit on the last entry removes the cache file", () => {
+		const project = makeProject();
+		recordSliceRouting(project, "execute-task", "M001/S04/T01", {
+			provider: "moonshotai",
+			id: "kimi-k2.6",
+		});
+		clearSliceRoutingForUnit(project, "M001/S04/T01");
+		const cache = _readCacheForTests(project);
+		expect(Object.keys(cache).length).toBe(0);
+	});
+});
diff --git a/src/resources/extensions/sf/tests/solver-model.test.mjs b/src/resources/extensions/sf/tests/solver-model.test.mjs
new file mode 100644
index 000000000..25da7ea40
--- /dev/null
+++ b/src/resources/extensions/sf/tests/solver-model.test.mjs
@@ -0,0 +1,134 @@
+import { describe, expect, test } from "vitest";
+import {
+	SOLVER_MODEL_DEFAULT,
+	SOLVER_MODEL_FALLBACKS,
+	isSolverModel,
+	resolveSolverModel,
+	resolveSolverModelCandidates,
+} from "../solver-model.js";
+
+describe("solver-model invariants", () => {
+	test("default is locked to kimi-k2.6 / kimi-coding", () => {
+		// This is a PROTOCOL INVARIANT, not a tuning parameter. Changing the
+		// default requires an ADR (see ADR-0079). If this test fails because
+		// someone bumped the default, that's a load-bearing change and a code
+		// review reject — re-read the ADR before re-running.
+		expect(SOLVER_MODEL_DEFAULT).toEqual({
+			provider: "kimi-coding",
+			id: "kimi-k2.6",
+		});
+	});
+
+	test("no fallback is a code-completion-only model", () => {
+		// Code-completion models (Codestral, Devstral, the kimi-for-coding
+		// alias) are the ones that broke the loop in the first place. They
+		// must NEVER appear in the solver fallback chain.
+		const forbidden = new Set([
+			"codestral-latest",
+			"devstral-latest",
+			"kimi-for-coding",
+		]);
+		for (const candidate of SOLVER_MODEL_FALLBACKS) {
+			expect(forbidden.has(candidate.id)).toBe(false);
+		}
+	});
+});
+
+describe("resolveSolverModel", () => {
+	test("with no preferences returns the pinned default", () => {
+		expect(resolveSolverModel()).toEqual(SOLVER_MODEL_DEFAULT);
+		expect(resolveSolverModel(undefined)).toEqual(SOLVER_MODEL_DEFAULT);
+		expect(resolveSolverModel({})).toEqual(SOLVER_MODEL_DEFAULT);
+	});
+
+	test("ignores router/benchmark/learning state (no opt-in == default)", () => {
+		// Even with the kitchen sink of unrelated preference fields,
+		// resolveSolverModel must NOT consult any of them. Only an explicit
+		// preferences.autonomousSolver.model entry can override.
+		const preferences = {
+			currentModel: { provider: "mistral", id: "codestral-latest" },
+			modelRouter: { lastSelection: "google-gemini-cli/gemini-3-flash-preview" },
+			benchmarkSelector: { winner: "kimi-for-coding" },
+			learning: { blender: { recommended: "kimi-k2.5" } },
+		};
+		expect(resolveSolverModel(preferences)).toEqual(SOLVER_MODEL_DEFAULT);
+	});
+
+	test("respects an explicit object override", () => {
+		const resolved = resolveSolverModel({
+			autonomousSolver: { model: { provider: "anthropic", id: "claude-opus-4-7" } },
+		});
+		expect(resolved).toEqual({ provider: "anthropic", id: "claude-opus-4-7" });
+	});
+
+	test("accepts a string override in provider/model form", () => {
+		const resolved = resolveSolverModel({
+			autonomousSolver: { model: "anthropic/claude-sonnet-4-6" },
+		});
+		expect(resolved).toEqual({
+			provider: "anthropic",
+			id: "claude-sonnet-4-6",
+		});
+	});
+
+	test("accepts a bare model id and keeps the default provider", () => {
+		const resolved = resolveSolverModel({
+			autonomousSolver: { model: "kimi-k2-thinking" },
+		});
+		expect(resolved).toEqual({
+			provider: SOLVER_MODEL_DEFAULT.provider,
+			id: "kimi-k2-thinking",
+		});
+	});
+
+	test("ignores an empty-string override", () => {
+		expect(
+			resolveSolverModel({ autonomousSolver: { model: "" } }),
+		).toEqual(SOLVER_MODEL_DEFAULT);
+		expect(
+			resolveSolverModel({ autonomousSolver: { model: "   " } }),
+		).toEqual(SOLVER_MODEL_DEFAULT);
+	});
+});
+
+describe("resolveSolverModelCandidates", () => {
+	test("primary comes first, then fallback chain (de-duplicated)", () => {
+		const candidates = resolveSolverModelCandidates();
+		expect(candidates[0]).toEqual(SOLVER_MODEL_DEFAULT);
+		expect(candidates.length).toBe(1 + SOLVER_MODEL_FALLBACKS.length);
+	});
+
+	test("override does not duplicate when also in fallback list", () => {
+		const candidates = resolveSolverModelCandidates({
+			autonomousSolver: { model: "anthropic/claude-opus-4-7" },
+		});
+		const opusEntries = candidates.filter(
+			(c) => c.id === "claude-opus-4-7" && c.provider === "anthropic",
+		);
+		expect(opusEntries.length).toBe(1);
+	});
+});
+
+describe("isSolverModel", () => {
+	test("returns true for the pinned default", () => {
+		expect(isSolverModel(SOLVER_MODEL_DEFAULT)).toBe(true);
+	});
+
+	test("returns false for a routed executor model", () => {
+		expect(
+			isSolverModel({ provider: "mistral", id: "codestral-latest" }),
+		).toBe(false);
+		expect(
+			isSolverModel({
+				provider: "google-gemini-cli",
+				id: "gemini-3-flash-preview",
+			}),
+		).toBe(false);
+	});
+
+	test("returns false for null / malformed inputs", () => {
+		expect(isSolverModel(null)).toBe(false);
+		expect(isSolverModel(undefined)).toBe(false);
+		expect(isSolverModel({})).toBe(false);
+	});
+});
diff --git a/src/resources/extensions/sf/tests/trace-janitor.test.mjs b/src/resources/extensions/sf/tests/trace-janitor.test.mjs
new file mode 100644
index 000000000..de9cd6d18
--- /dev/null
+++ b/src/resources/extensions/sf/tests/trace-janitor.test.mjs
@@ -0,0 +1,115 @@
+import {
+	existsSync,
+	mkdirSync,
+	mkdtempSync,
+	rmSync,
+	symlinkSync,
+	utimesSync,
+	writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, expect, test } from "vitest";
+import { pruneStaleTraces } from "../uok/trace-writer.js";
+
+let tempDirs = [];
+
+function makeProject() {
+	const dir = mkdtempSync(join(tmpdir(), "sf-trace-janitor-"));
+	tempDirs.push(dir);
+	mkdirSync(join(dir, ".sf"), { recursive: true });
+	return dir;
+}
+
+afterEach(() => {
+	for (const dir of tempDirs) {
+		rmSync(dir, { recursive: true, force: true });
+	}
+	tempDirs = [];
+});
+
+function makeTraceFile(project, name, daysOld) {
+	const tracesDir = join(project, ".sf", "traces");
+	mkdirSync(tracesDir, { recursive: true });
+	const path = join(tracesDir, name);
+	writeFileSync(path, '{"ts":"2024-01-01T00:00:00Z","type":"gate_run"}\n');
+	if (typeof daysOld === "number") {
+		const epoch = (Date.now() - daysOld * 24 * 60 * 60 * 1000) / 1000;
+		utimesSync(path, epoch, epoch);
+	}
+	return path;
+}
+
+describe("pruneStaleTraces", () => {
+	test("removes jsonl files older than retention window", () => {
+		const project = makeProject();
+		const oldFile = makeTraceFile(
+			project,
+			"pre-dispatch:old.jsonl",
+			45,
+		);
+		const freshFile = makeTraceFile(
+			project,
+			"pre-dispatch:fresh.jsonl",
+			5,
+		);
+		expect(existsSync(oldFile)).toBe(true);
+		expect(existsSync(freshFile)).toBe(true);
+
+		const result = pruneStaleTraces(project);
+		expect(result.pruned).toBe(1);
+		expect(existsSync(oldFile)).toBe(false);
+		expect(existsSync(freshFile)).toBe(true);
+	});
+
+	test("respects a custom retention window", () => {
+		const project = makeProject();
+		const file = makeTraceFile(project, "pre-dispatch:tenday.jsonl", 10);
+		const result = pruneStaleTraces(project, { retentionDays: 7 });
+		expect(result.pruned).toBe(1);
+		expect(existsSync(file)).toBe(false);
+	});
+
+	test("never touches the `latest` symlink", () => {
+		const project = makeProject();
+		const file = makeTraceFile(project, "pre-dispatch:current.jsonl", 0);
+		const latest = join(project, ".sf", "traces", "latest");
+		symlinkSync("pre-dispatch:current.jsonl", latest);
+		// Make `latest` look old via its target; the symlink itself is fine.
+		pruneStaleTraces(project);
+		expect(existsSync(latest)).toBe(true);
+	});
+
+	test("ignores non-jsonl files", () => {
+		const project = makeProject();
+		const tracesDir = join(project, ".sf", "traces");
+		mkdirSync(tracesDir, { recursive: true });
+		const txt = join(tracesDir, "notes.txt");
+		writeFileSync(txt, "ignored");
+		const epoch = (Date.now() - 90 * 24 * 60 * 60 * 1000) / 1000;
+		utimesSync(txt, epoch, epoch);
+		pruneStaleTraces(project);
+		expect(existsSync(txt)).toBe(true);
+	});
+
+	test("returns zero-counts when traces dir does not exist", () => {
+		const project = makeProject();
+		// no traces dir
+		const result = pruneStaleTraces(project);
+		expect(result).toEqual({ scanned: 0, pruned: 0, errors: 0 });
+	});
+
+	test("respects maxDeletePerCall safety cap", () => {
+		const project = makeProject();
+		for (let i = 0; i < 5; i++) {
+			makeTraceFile(project, `pre-dispatch:old-${i}.jsonl`, 60);
+		}
+		const result = pruneStaleTraces(project, { maxDeletePerCall: 2 });
+		expect(result.pruned).toBe(2);
+	});
+
+	test("does not throw on missing basePath", () => {
+		expect(() => pruneStaleTraces("")).not.toThrow();
+		expect(() => pruneStaleTraces(undefined)).not.toThrow();
+	});
+});
diff --git a/src/resources/extensions/sf/ui/index.js b/src/resources/extensions/sf/ui/index.js
index 1ce31513b..4e5ffd071 100644
--- a/src/resources/extensions/sf/ui/index.js
+++ b/src/resources/extensions/sf/ui/index.js
@@ -328,13 +328,12 @@ export default function sfTui(pi) {
 		renderResult: ({ output }) => output,
 	});
 
-	// ASK_USER_ELICITATION — structured form-based ask_user replacement.
-	// When the flag is on and the agent calls this tool with choices, a TUI
-	// select overlay is shown instead of a plain text prompt.
+	// ask_user_elicitation — structured form-based ask_user replacement.
+	// Shows a TUI select overlay when choices are provided, freeform input otherwise.
 	pi.registerTool({
 		name: "ask_user_elicitation",
 		description:
-			"Ask the user a question using a structured form with optional choices. When ASK_USER_ELICITATION is enabled this is preferred over plain ask_user for questions with known choices.",
+			"Ask the user a question using a structured form with optional choices. Shows a TUI select overlay when choices are provided, or a freeform text prompt otherwise.",
 		parameters: {
 			type: "object",
 			properties: {
@@ -359,12 +358,6 @@ export default function sfTui(pi) {
 			if (!ctx?.hasUI) {
 				return { output: "No UI available for elicitation." };
 			}
-			if (!getExperimentalFlag("ask_elicitation")) {
-				return {
-					output:
-						"ASK_USER_ELICITATION is not enabled. Run /experimental on ask_elicitation to enable.",
-				};
-			}
 			if (choices?.length) {
 				const answer = await ctx.ui.select(question, choices);
 				if (!answer && allow_freeform) {
@@ -379,121 +372,6 @@ export default function sfTui(pi) {
 		renderResult: ({ output }) => (output ? `**Answer:** ${output}` : ""),
 	});
 
-	// MULTI_TURN_AGENTS — persistent named sub-agent sessions via file-backed state.
-	// Tool that spawns or resumes a named SF child process, relaying messages.
-	pi.registerTool({
-		name: "spawn_agent",
-		description:
-			"Spawn or resume a named persistent sub-agent. Sends a message and waits for the response. The agent persists across calls using file-backed state in .sf/agents/<name>/.",
-		parameters: {
-			type: "object",
-			properties: {
-				name: {
-					type: "string",
-					description:
-						"Unique agent name (alphanumeric + hyphens, e.g. 'researcher')",
-				},
-				message: {
-					type: "string",
-					description: "Message to send to the agent",
-				},
-				reset: {
-					type: "boolean",
-					description:
-						"If true, clear the agent's state and start fresh (default: false)",
-				},
-			},
-			required: ["name", "message"],
-		},
-		execute: async ({ name, message, reset }) => {
-			if (!getExperimentalFlag("multi_turn_agents")) {
-				return {
-					output:
-						"MULTI_TURN_AGENTS is not enabled. Run /experimental on multi_turn_agents to enable.",
-				};
-			}
-			if (!/^[a-z0-9-]{1,32}$/i.test(name)) {
-				return {
-					output: "Agent name must be 1-32 alphanumeric/hyphen characters.",
-				};
-			}
-			const { join: pathJoin } = await import("node:path");
-			const { mkdirSync, writeFileSync, readFileSync, existsSync } =
-				await import("node:fs");
-			const stateDir = pathJoin(
-				projectRoot() ?? process.cwd(),
-				".sf",
-				"agents",
-				name,
-			);
-			mkdirSync(stateDir, { recursive: true });
-			const historyPath = pathJoin(stateDir, "history.jsonl");
-			if (reset && existsSync(historyPath)) {
-				writeFileSync(historyPath, "", "utf-8");
-			}
-			// Append user message to history
-			const entry = JSON.stringify({
-				role: "user",
-				content: message,
-				ts: Date.now(),
-			});
-			const { appendFileSync } = await import("node:fs");
-			appendFileSync(historyPath, `${entry}\n`, "utf-8");
-			// Dispatch to SF headless with the conversation history as context
-			const historyLines = existsSync(historyPath)
-				? readFileSync(historyPath, "utf-8")
-						.trim()
-						.split("\n")
-						.filter(Boolean)
-						.map((l) => {
-							try {
-								return JSON.parse(l);
-							} catch {
-								return null;
-							}
-						})
-						.filter(Boolean)
-				: [];
-			const contextMsg = historyLines
-				.slice(-10) // last 10 turns for context
-				.map((e) => `${e.role === "user" ? "User" : "Agent"}: ${e.content}`)
-				.join("\n");
-			const fullPrompt = `[Agent: ${name}]\n\nConversation history:\n${contextMsg}\n\nRespond to the last user message only.`;
-			const { execFile } = await import("node:child_process");
-			const { promisify } = await import("node:util");
-			const execFileAsync = promisify(execFile);
-			try {
-				const { stdout } = await execFileAsync(
-					process.execPath,
-					[
-						"-y",
-						"node@24",
-						process.env.SF_LOADER ?? "dist/loader.js",
-						"headless",
-						"--print",
-						fullPrompt,
-					],
-					{
-						timeout: 60000,
-						encoding: "utf-8",
-						env: { ...process.env },
-					},
-				);
-				const response = stdout.trim();
-				appendFileSync(
-					historyPath,
-					`${JSON.stringify({ role: "assistant", content: response, ts: Date.now() })}\n`,
-					"utf-8",
-				);
-				return { output: response };
-			} catch (err) {
-				return {
-					output: `Agent dispatch failed: ${getErrorMessage(err)}`,
-				};
-			}
-		},
-		renderResult: ({ output }) => output,
-	});
 }
 
 /** Run the STATUS_LINE user script on a 5s interval, posting stdout to footer. */
diff --git a/src/resources/extensions/sf/uok/persistent-agent.js b/src/resources/extensions/sf/uok/persistent-agent.js
index cd53c0d21..8c822a305 100644
--- a/src/resources/extensions/sf/uok/persistent-agent.js
+++ b/src/resources/extensions/sf/uok/persistent-agent.js
@@ -7,6 +7,31 @@
  *
  * Consumer: AgentSwarm orchestrator, swarm role agents (CoordinatorAgent, WorkerAgent etc),
  * and direct use in multi-agent dispatch flows.
+ *
+ * ## Current state
+ * This module implements the **container** half of a persistent agent: identity, inbox,
+ * memory blocks, and message routing. It does NOT implement the **runner** half.
+ *
+ * The missing piece is an LLM execution runner that:
+ *   1. Reads pending messages from this agent's inbox (`receive(true)`)
+ *   2. Assembles a prompt from core memory blocks + inbox messages
+ *   3. Dispatches to SF headless (`node dist/loader.js headless --print <prompt>`)
+ *   4. Writes the LLM response back into the bus as a reply
+ *   5. Updates memory blocks (eviction, summarization) when context grows large
+ *
+ * Until the runner exists, `PersistentAgent` is a passive store. The autonomous loop
+ * uses it this way for sleeptime memory consolidation (caller sends + immediately reads
+ * inbox). `SwarmDispatchLayer` also only enqueues messages — nothing processes them.
+ *
+ * When building the runner, key design decisions to make:
+ *   - Context window management: how many inbox turns to include before summarizing
+ *   - Memory eviction: which core blocks are injected, which are summarized to archival
+ *   - Turn limits: max rounds before the runner yields and re-queues
+ *   - Concurrency: one runner per agent name (enforce via DB lock or process mutex)
+ *   - Error handling: failed LLM calls should leave the message as unread, not drop it
+ *
+ * See: Codex `codex-rs/core/src/agent/control.rs` for the reference implementation of
+ * typed parallel subagents (explorer/worker roles) with forked rollout history.
  */
 
 import { randomUUID } from "node:crypto";
diff --git a/src/resources/extensions/sf/uok/swarm-dispatch.js b/src/resources/extensions/sf/uok/swarm-dispatch.js
index 3a7e66723..b6e1f107a 100644
--- a/src/resources/extensions/sf/uok/swarm-dispatch.js
+++ b/src/resources/extensions/sf/uok/swarm-dispatch.js
@@ -8,6 +8,18 @@
  *
  * Consumer: UOK kernel dispatch path, parallel orchestrators, and /sf autonomous controller
  * when SF_A2A_ENABLED is set.
+ *
+ * ## Current state — enqueue only, no runner
+ * `_busDispatch` routes an envelope to a role agent's inbox via the MessageBus. It does NOT
+ * wait for a response — the `DispatchResult` contains only `messageId` and `targetAgent`,
+ * not LLM output. Nothing currently drains agent inboxes and runs LLM calls.
+ *
+ * This layer is ready to use once `PersistentAgent` gains a runner (see persistent-agent.js
+ * module comment for the runner design). At that point `dispatch()` can be extended to
+ * optionally block until the runner posts a reply to the bus.
+ *
+ * Callers outside uok/: none currently. The autonomous loop uses AgentSwarm directly for
+ * the sleeptime memory path. Wire this in when building the autonomous orchestrator.
  */
 
 import { AgentSwarm } from "./agent-swarm.js";
diff --git a/src/resources/extensions/sf/uok/trace-writer.js b/src/resources/extensions/sf/uok/trace-writer.js
index 6c746a2da..fa110f52e 100644
--- a/src/resources/extensions/sf/uok/trace-writer.js
+++ b/src/resources/extensions/sf/uok/trace-writer.js
@@ -4,6 +4,7 @@ import {
 	appendFileSync,
 	closeSync,
 	existsSync,
+	lstatSync,
 	mkdirSync,
 	openSync,
 	readdirSync,
@@ -15,6 +16,12 @@ import {
 import { join } from "node:path";
 import { sfRoot } from "../paths.js";
 
+// Longest read window currently used by any trace consumer
+// (sf-db-gates.js:391 reads 30 days). Anything older than this is never
+// read and just consumes disk.
+const TRACE_RETENTION_DAYS_DEFAULT = 30;
+const MS_PER_DAY = 24 * 60 * 60 * 1000;
+
 function tracesDir(basePath) {
 	return join(sfRoot(basePath), "traces");
 }
@@ -45,6 +52,64 @@ export function appendTraceEvent(basePath, traceId, event) {
 	}
 }
 
+/**
+ * Prune .sf/traces/*.jsonl files older than retentionDays.
+ *
+ * Why: per-flow trace files accumulate one-per-dispatch and are never
+ * cleaned. The longest analyzer window today is 30 days
+ * (sf-db-gates.js:391); anything older is never read and just consumes
+ * disk. The `latest` symlink is preserved unconditionally so the
+ * tail-friendly pointer keeps working.
+ *
+ * Consumer: session-start hook (idempotent, fast, best-effort).
+ *
+ * @param {string} basePath
+ * @param {object} [opts]
+ * @param {number} [opts.retentionDays=30]
+ * @param {number} [opts.maxDeletePerCall=1000] - safety cap so a runaway
+ *   directory doesn't make startup slow.
+ * @returns {{ scanned: number, pruned: number, errors: number }}
+ */
+export function pruneStaleTraces(basePath, opts = {}) {
+	const retentionDays = Number(opts.retentionDays ?? TRACE_RETENTION_DAYS_DEFAULT);
+	const maxDeletePerCall = Math.max(1, Number(opts.maxDeletePerCall ?? 1000));
+	const result = { scanned: 0, pruned: 0, errors: 0 };
+	if (!basePath || typeof basePath !== "string") return result;
+	let dir;
+	try {
+		dir = tracesDir(basePath);
+	} catch {
+		return result;
+	}
+	if (!existsSync(dir)) return result;
+	const cutoff = Date.now() - retentionDays * MS_PER_DAY;
+	let entries;
+	try {
+		entries = readdirSync(dir);
+	} catch {
+		return result;
+	}
+	for (const name of entries) {
+		if (result.pruned >= maxDeletePerCall) break;
+		if (name === "latest") continue;
+		if (!name.endsWith(".jsonl")) continue;
+		const path = join(dir, name);
+		result.scanned += 1;
+		try {
+			// lstat so we don't follow a symlink (defensive — there shouldn't
+			// be any besides `latest`, but never silently chase).
+			const stat = lstatSync(path);
+			if (!stat.isFile()) continue;
+			if (stat.mtimeMs >= cutoff) continue;
+			unlinkSync(path);
+			result.pruned += 1;
+		} catch {
+			result.errors += 1;
+		}
+	}
+	return result;
+}
+
 export function readTraceEvents(basePath, type, windowHours = 24) {
 	// Read all trace files modified within windowHours, filter by event type
 	// Returns array of matching events