diff --git a/.sf/backups/db/sf.db.2026-05-08T22-42-32-307Z b/.sf/backups/db/sf.db.2026-05-08T22-42-32-307Z
deleted file mode 100644
index e78b0d5e7..000000000
Binary files a/.sf/backups/db/sf.db.2026-05-08T22-42-32-307Z and /dev/null differ
diff --git a/.sf/backups/db/sf.db.2026-05-09T17-40-16-600Z b/.sf/backups/db/sf.db.2026-05-09T17-40-16-600Z
deleted file mode 100644
index feaf7cee6..000000000
Binary files a/.sf/backups/db/sf.db.2026-05-09T17-40-16-600Z and /dev/null differ
diff --git a/.sf/backups/db/sf.db.2026-05-09T19-41-02-472Z b/.sf/backups/db/sf.db.2026-05-09T19-41-02-472Z
deleted file mode 100644
index 136e4280b..000000000
Binary files a/.sf/backups/db/sf.db.2026-05-09T19-41-02-472Z and /dev/null differ
diff --git a/.sf/backups/db/sf.db.2026-05-10T14-47-54-645Z b/.sf/backups/db/sf.db.2026-05-10T14-47-54-645Z
new file mode 100644
index 000000000..eb09e243f
Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T14-47-54-645Z differ
diff --git a/.sf/backups/db/sf.db.2026-05-10T15-16-24-679Z b/.sf/backups/db/sf.db.2026-05-10T15-16-24-679Z
new file mode 100644
index 000000000..9d3fa91ee
Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T15-16-24-679Z differ
diff --git a/.sf/metrics.db b/.sf/metrics.db
index 980021f6a..32a0ea60e 100644
Binary files a/.sf/metrics.db and b/.sf/metrics.db differ
diff --git a/.sf/metrics.db-shm b/.sf/metrics.db-shm
index fe9ac2845..b60ed8783 100644
Binary files a/.sf/metrics.db-shm and b/.sf/metrics.db-shm differ
diff --git a/.sf/metrics.db-wal b/.sf/metrics.db-wal
index e69de29bb..afbb704a3 100644
Binary files a/.sf/metrics.db-wal and b/.sf/metrics.db-wal differ
diff --git a/.sf/model-performance.json b/.sf/model-performance.json
index ee61f7aef..406df1828 100644
--- a/.sf/model-performance.json
+++ b/.sf/model-performance.json
@@ -86,5 +86,17 @@
       "successRate": 1,
       "total": 2
     }
+  },
+  "execute-task": {
+    "minimax/MiniMax-M2.7-highspeed": {
+      "successes": 1,
+      "failures": 0,
+      "timeouts": 0,
+      "totalTokens": 12233288,
+      "totalCost": 0.3431336426,
+      "lastUsed": "2026-05-10T15:16:08.120Z",
+      "successRate": 1,
+      "total": 1
+    }
   }
 }
\ No newline at end of file
diff --git a/.sf/safety/evidence-M001-6377a4-S03-T01.json b/.sf/safety/evidence-M001-6377a4-S03-T01.json
new file mode 100644
index 000000000..68083ba8d
--- /dev/null
+++ b/.sf/safety/evidence-M001-6377a4-S03-T01.json
@@ -0,0 +1,244 @@
+[
+  {
+    "kind": "edit",
+    "toolCallId": "tool_Ca8DVl3y7fTqNLC5XPq0Pwk1",
+    "path": "src/resources/extensions/sf/verification-evidence.js",
+    "timestamp": 1778424928174
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_xVkmHZTHvJP7RwJWVqUORsz4",
+    "path": "src/resources/extensions/sf/verification-evidence.js",
+    "timestamp": 1778424949674
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_EiDPzaZO49a4LKnYvuvFaS8e",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778424983294
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_pNt9nP10Us3CPrsqlnWwtQ8l",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425005515
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_Bl3x74Ojz6aenqD3nYqxkdlO",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425108830
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_RHLdM0SZK4ffIIokuqNruHbn",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425162119
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_mAdgaYCgksHmjAI45ZuSnMk5",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425187240
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_HMsSokItiWF9y6ctKvFSkyE3",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425206204
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_Jbd8uJQ6ZV4PeF8P91s2OvFG",
+    "path": "src/resources/extensions/sf/uok/unit-runtime.js",
+    "timestamp": 1778425258651
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_m1a9UNWqpwBIJvzB9LtlVTBN",
+    "path": "src/cli-status.ts",
+    "timestamp": 1778425298174
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_zcSH4Fx3bOumjphAgYisPyhE",
+    "path": "src/cli-status.ts",
+    "timestamp": 1778425348170
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_UjLWJsxhCI2bAt3kYl4QEhNK",
+    "path": "src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "timestamp": 1778425381561
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_1owe7a26pVq3k18x59p6Sy1J",
+    "path": "src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "timestamp": 1778425414995
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_zgJvYKclL8xmY8DE4c8nluTk",
+    "command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts -t \"getRecoveryDiagnostics\"",
+    "exitCode": 0,
+    "outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m | \u001b[22m\u001b[33m35 skipped\u001b[39m\u001b[2m)\u001b[22m\u001b[32m 33\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m      Tests \u001b[22m \u001b[1m\u001b[32m3 passed\u001b[39m\u001b[22m\u001b[2m | \u001b[22m\u001b[33m35 skipped\u001b[39m\u001b[90m (38)\u001b[39m\n\u001b[2m   Start at \u001b[22m 17:03:54\n\u001b[2m   Duration \u001b[22m 1.01s\u001b[2m (transfor",
+    "timestamp": 1778425433765
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_c9hBKD3D1Ojayl25rr7Mu3UW",
+    "command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
+    "exitCode": 0,
+    "outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 140\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m      Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m   Start at \u001b[22m 17:04:12\n\u001b[2m   Duration \u001b[22m 1.16s\u001b[2m (transform 552ms, setup 0ms, import 742ms, tests 140ms, environment 0ms",
+    "timestamp": 1778425451405
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_jrt7lAB8i6qBOnOePlNhiFR2",
+    "command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "exitCode": 1,
+    "outputSnippet": "src/cli-status.ts format ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n  × Formatter would have printed the following content:\n  \n    240 240 │       if (!targetUnitId) {\n    241 241 │         const records = listUnitRuntimeRecords(basePath);\n    242     │ - → → → const·mostRecent·=·records.sort((a,·b)·=>\n    243     │ - → → → → (b.updatedAt·??·0)·-·(a.updatedAt·??·0),\n        242 │ + → → → const·mostRecent·=·records.sort(\n        243 │ + → → → → (a,·b)·=>·(b.updat",
+    "timestamp": 1778425466821
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_lpaw0h5RHFke7wyq5gw8sJKp",
+    "command": "npx biome check --write src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "exitCode": 0,
+    "outputSnippet": "Checked 5 files in 160ms. Fixed 3 files.\n",
+    "timestamp": 1778425501417
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_so1U8wV32tA70qs55EtB7Pke",
+    "command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
+    "exitCode": 0,
+    "outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 152\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m      Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m   Start at \u001b[22m 17:05:15\n\u001b[2m   Duration \u001b[22m 1.16s\u001b[2m (transform 554ms, setup 0ms, import 758ms, tests 152ms, environment 0ms",
+    "timestamp": 1778425514641
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_xBsWuLSrGmQHcVUWydLtioru",
+    "command": "npx tsc --noEmit src/cli-status.ts --skipLibCheck --module NodeNext --moduleResolution NodeNext --target ES2022 --esModuleInterop --allowJs --strict 2>&1 | head -30",
+    "exitCode": 0,
+    "outputSnippet": "error TS5112: tsconfig.json is present but will not be loaded if files are specified on commandline. Use '--ignoreConfig' to skip this error.\n",
+    "timestamp": 1778425532429
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_lnBQh90rfwsYyVvqmIdq8pH7",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|auto-verification|unit-runtime|verification-evidence\" | head -20",
+    "exitCode": 0,
+    "outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\nsrc/cli-status.ts(243,6): error TS7006: Parameter 'a' implicitly has an 'any' type.\nsrc/cli-status.ts(243,9): error TS7006: Parameter 'b' implicitly has an 'any' type.\n",
+    "timestamp": 1778425546375
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_L5EQX39mZFzmJbRkCACrnkcM",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep \" Could not find a declaration file\" | head -10",
+    "exitCode": 0,
+    "outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
+    "timestamp": 1778425700527
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_bMteIOMYNzppZgJRP2bDK14p",
+    "path": "src/cli-status.ts",
+    "timestamp": 1778425760653
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_kja8VArplGz7aqlFb1rZvh9M",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|auto-verification|unit-runtime|verification-evidence\" | head -20",
+    "exitCode": 0,
+    "outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
+    "timestamp": 1778425779047
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_xAMnWlYjFmPHJj2zb3YklZhI",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep \"TS7016\" | head -20",
+    "exitCode": 0,
+    "outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
+    "timestamp": 1778425808025
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_N2K96iYZY9GgeXRHbsVIDJFn",
+    "command": "ls src/resources/extensions/sf/autonomous-command-args.* src/resources/extensions/sf/repo-identity.* src/resources/extensions/sf/trace-collector.*",
+    "exitCode": 0,
+    "outputSnippet": "src/resources/extensions/sf/autonomous-command-args.d.ts\nsrc/resources/extensions/sf/autonomous-command-args.js\nsrc/resources/extensions/sf/repo-identity.d.ts\nsrc/resources/extensions/sf/repo-identity.js\nsrc/resources/extensions/sf/trace-collector.d.ts\nsrc/resources/extensions/sf/trace-collector.js\n",
+    "timestamp": 1778425827433
+  },
+  {
+    "kind": "write",
+    "toolCallId": "tool_d6oIGASndyhbsF3ethH49BBC",
+    "path": "src/resources/extensions/sf/uok/unit-runtime.d.ts",
+    "timestamp": 1778425847941
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_8ipcG5bydJIBFOOfadbuRNzl",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|unit-runtime\" | head -20",
+    "exitCode": 0,
+    "outputSnippet": "(no output)",
+    "timestamp": 1778425863933
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_oas0rGaFmZW4i6dQZDfNbIOj",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | head -20",
+    "exitCode": 0,
+    "outputSnippet": "(no output)",
+    "timestamp": 1778425882297
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_bWplHSK7Xw5d98Gzhf69rI3E",
+    "command": "npx tsc --noEmit --project tsconfig.extensions.json --skipLibCheck 2>&1 | head -20",
+    "exitCode": 0,
+    "outputSnippet": "packages/tui/src/index.ts(52,34): error TS6142: Module './ink-bridge.js' was resolved to '/home/mhugo/code/singularity-forge/packages/tui/src/ink-bridge.tsx', but '--jsx' is not set.\npackages/tui/src/tui.ts(14,34): error TS6142: Module './ink-bridge.js' was resolved to '/home/mhugo/code/singularity-forge/packages/tui/src/ink-bridge.tsx', but '--jsx' is not set.\npackages/tui/src/tui.ts(434,46): error TS7006: Parameter 'data' implicitly has an 'any' type.\n",
+    "timestamp": 1778425897974
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_R9DLuMbsvg7Lz87cGbyzuS01",
+    "command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/resources/extensions/sf/uok/unit-runtime.d.ts src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "exitCode": 1,
+    "outputSnippet": "src/resources/extensions/sf/uok/unit-runtime.d.ts format ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n  × Formatter would have printed the following content:\n  \n    28 28 │   ): RecoveryDiagnostics | null;\n    29 29 │   \n    30    │ - export·function·listUnitRuntimeRecords(basePath:·string):·Array<\n    31    │ - → Record<string,·unknown>·&·{·updatedAt?:·number;·unitId:·string·}\n    32    │ - >;\n       30 │ + export·function·listUnitRuntimeRecords(\n       31 │ + → basePath:·string,\n       32 │ + ",
+    "timestamp": 1778425942347
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_nQuAxzeREvyod0nBGyVPIvfJ",
+    "command": "npx biome check --write src/resources/extensions/sf/uok/unit-runtime.d.ts",
+    "exitCode": 0,
+    "outputSnippet": "Checked 1 file in 17ms. Fixed 1 file.\n",
+    "timestamp": 1778425957743
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_HZWYvGOveamvyW97wbv5A0Fv",
+    "command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/resources/extensions/sf/uok/unit-runtime.d.ts src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "exitCode": 0,
+    "outputSnippet": "Checked 6 files in 70ms. No fixes applied.\n",
+    "timestamp": 1778425970841
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_SY84S3XYIIBJGZvUchG4qSRZ",
+    "command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
+    "exitCode": 0,
+    "outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 168\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m      Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m   Start at \u001b[22m 17:13:05\n\u001b[2m   Duration \u001b[22m 1.11s\u001b[2m (transform 477ms, setup 0ms, import 702ms, tests 168ms, environment 0ms",
+    "timestamp": 1778425983954
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_SdyaSTc9h2LJD9pq5ZLR3Ehc",
+    "command": "git status --short",
+    "exitCode": 0,
+    "outputSnippet": " D .sf/backups/db/sf.db.2026-05-08T22-42-32-307Z\n D .sf/backups/db/sf.db.2026-05-09T17-40-16-600Z\n D .sf/backups/db/sf.db.2026-05-09T19-41-02-472Z\n M .sf/metrics.db\n M .sf/metrics.db-shm\n M .sf/metrics.db-wal\n D BUILD_PLAN_MILESTONE_MAP.md\n D PRODUCTION_AUDIT_COMPLETE.md\n D QUICK_WINS_IMPLEMENTATION.md\n D TRIAGE_COMPLETE.md\n D TRIAGE_README.md\n D autoresearch.md\n D copilot-thoughts.md\n M packages/coding-agent/src/modes/interactive/interactive-mode.ts\n M packages/tui/src/ink-bridge.tsx\n M package",
+    "timestamp": 1778425999682
+  }
+]
diff --git a/BUILD_PLAN_MILESTONE_MAP.md b/BUILD_PLAN_MILESTONE_MAP.md
deleted file mode 100644
index 14be3351d..000000000
--- a/BUILD_PLAN_MILESTONE_MAP.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# BUILD_PLAN → Milestone Map
-
-Every BUILD_PLAN.md tier item mapped to a milestone. **Rule D015**: every new milestone must cite which BUILD_PLAN tier/item it implements.
-
-This file answers **where work belongs**, not **whether code is done**. "Mapped" means a BUILD_PLAN item has a milestone/slice home. It does **not** mean the implementation is verified in the current repo.
-
-## Mapping vs. code truth
-
-- **Mapped** — the item has a milestone/slice destination.
-- **Verified in code** — the behavior exists in the repo and has evidence/tests/artifacts.
-- **Open** — still planned or partially folded in, but not yet verified as complete.
-- **Deferred** — intentionally out of the active plan.
-
----
-
-## High-level milestone direction
-
-These are the strategy bands above the itemized mapping:
-
-1. **Core foundation** — UOK, purpose-driven TDD, eight-field PDD gate, repo-local state
-2. **Single-repo sharpening** — adopt the best execution/workflow ideas from pi-mono, gsd-2, Claude Code, Codex, Aider, and Plandex where they strengthen Forge
-3. **Autonomous reliability** — evidence, recovery, verification, and self-improvement loops
-4. **Surface coherence** — CLI, TUI, docs, and workflow language all reflect the same UOK-driven model
-5. **ACE convergence prep** — keep concepts compatible with ACE Coder without turning Forge into the multi-repo system
-
----
-
-## Tier 0 — Pi-mono ports → **M006**
-## Tier 0.5 — gsd-2 ports → **M006 + M007**
-
-All mapped. See BUILD_PLAN.md for item-level status.
-
-## Tier 1 — ESSENTIAL → **ALL MAPPED**
-
-| Item | Milestone | Slice | Status |
-|---|---|---|---|
-| 1.1 Vault secret resolver | **M017-yf67h6** | S01-S03 | ⬜ NEW |
-| 1.2 Singularity Memory integration | **M017-jpw5jo** | S01-S03 | ⬜ NEW |
-| 1.3 Schema reconciliation (spec rewrite) | **M013** | S12 | ⬜ Folded in |
-| 1.4 Config schema alignment | **M013** | S13 | ⬜ Folded in |
-
-## Tier 2 — STRONG → **ALL MAPPED**
-
-| Item | Milestone | Slice | Status |
-|---|---|---|---|
-| 2.1 Persistent agents v1 | M012 | S01-S05 | ⬜ |
-| 2.2 Doc-sync sub-step | M009 | S08 | ⬜ |
-| 2.3 Intent chapters | M013 | S08 | ⬜ |
-| 2.4 PhaseReview 3-pass | M016 | S01-S02 | ⬜ |
-| 2.5 turn_status marker | M013 | S09 | ⬜ |
-| 2.6 last_error cap | M013 | S10 | ⬜ |
-| 2.7 cost_micro_usd | M013 | S11 | ⬜ |
-
-## Tier 3+ → **Deferred by design**
-
----
-
-## Summary
-
-| Tier | Mapped | Gap |
-|---|---|---|
-| Tier 0 | 10 (M006) | 0 |
-| Tier 0.5 | 17 (M006+M007) | 0 |
-| **Tier 1** | **4** (M017×2, M013×2) | **0** |
-| Tier 2 | 7 (M012, M009, M013, M016) | 0 |
-| Tier 3+ | 0 | deferred |
-
-**Zero mapping gaps.** Every BUILD_PLAN tier item is either mapped to a milestone or explicitly deferred.
-
-That does **not** mean zero implementation gaps. Open `TODO`, `NEW`, and `⬜` markers in `BUILD_PLAN.md`, this map, and milestone artifacts still represent real work until they are reconciled against code evidence.
diff --git a/PRODUCTION_AUDIT_COMPLETE.md b/PRODUCTION_AUDIT_COMPLETE.md
deleted file mode 100644
index 7b7de0ad5..000000000
--- a/PRODUCTION_AUDIT_COMPLETE.md
+++ /dev/null
@@ -1,440 +0,0 @@
-# Complete Long-Term Production-Grade Audit
-
-**Scope:** All UOK kernel, gate system, execution graph, message bus, diagnostics, metrics, and supporting infrastructure
-**Date:** 2026-05-08
-**Grade Scale:** S (exceptional) → A (production) → B (needs work) → C (risky) → D (broken)
-
----
-
-## Executive Summary
-
-| Module | Grade | Verdict |
-|--------|-------|---------|
-| `uok/kernel.js` | **A** | Clean lifecycle, parity recovery, audit envelope, signal handling |
-| `uok/gate-runner.js` | **A** | Circuit breaker, retry matrix, memory enrichment, degradation logging |
-| `uok/audit.js` | **A** | Atomic writes, stale-write detection, dual persistence (JSONL + DB) |
-| `uok/contracts.js` | **A** | Complete JSDoc types, runtime validation, clear interfaces |
-| `uok/flags.js` | **A** | Clean preference resolution, all features toggleable |
-| `uok/loop-adapter.js` | **A** | Turn observer, gitops integration, writer tokens, timeout, documented | None |
-| `uok/parity-report.js` | **A** | Deep parity analysis, orphaned run recovery, ledger reconciliation, malformed logging |
-| `uok/message-bus.js` | **A** | Durable SQLite, deduplication, auto-compact, periodic refresh | Cache drift eliminated |
-| `uok/cost-guard-gate.js` | **A** | Actual cost lookup, rolling window, high-tier failure detection, cheaper alternative suggestion |
-| `uok/security-gate.js` | **A** | Secret scan integration, timeout, graceful skip when script missing |
-| `uok/plan-v2.js` | **A** | Graph compilation, artifact validation, cycle detection, context gating | None |
-| `uok/execution-graph.js` | **A** | Topological sort, conflict detection, parallel scheduling with deadlock detection |
-| `uok/unit-runtime.js` | **A** | Complete lifecycle, retry budgets, LRU cache, durable reconciliation | None |
-| `uok/diagnostic-synthesis.js` | **A** | Process tree analysis, multi-source correlation, actionable recommendations | None |
-| `uok/metrics-exposition.js` | **A** | Prometheus format, caching, circuit breaker + latency + message bus metrics | Superseded by metrics-central.js |
-| `uok/chaos-monkey.js` | **A** | Latency, partial failure, disk, memory stress; all recoverable, all logged | None |
-| `uok/writer.js` | **A** | Atomic sequence tracking, token lifecycle, disk persistence, TTL | None |
-| `sf-db.js` | **A** | Single-writer invariant, WAL mode, statement cache, schema v45, query timeout, split entry point | metrics-central.js for unified sink |
-
-**Overall Grade: A** — Production-ready. All scaling concerns addressed.
-
----
-
-## 1. `uok/kernel.js` — Grade A
-
-### Strengths
-- Clean async lifecycle: enter → run → exit, with `finally` block guarantee
-- `recordUokKernelTermination()` handles signal cleanup (symmetrical with enter)
-- Parity recovery: checks previous report for missing exits, drains them
-- Audit envelope: emits structured events on kernel enter/exit
-- workMode + modelMode propagated into lifecycleFlags and audit payload
-- `debugLog()` for non-fatal diagnostics without breaking orchestration
-
-### Production Concerns: None critical
-
-### Minor
-- `runAutoLoopWithUok()` is 120+ lines — could extract helper functions for readability
-- `decoratedDeps` spreads all deps — no validation that required deps exist
-
----
-
-## 2. `uok/gate-runner.js` — Grade A
-
-### Strengths
-- Circuit breaker with exponential backoff: `openDurationMs * 2^streak`
-- Half-open state with attempt limiting — proper gradual recovery
-- Retry matrix per failure class: `execution`/`artifact`/`verification` get 1 retry, `timeout` gets 2
-- Memory enrichment: queries historical patterns for gate failures (degrades gracefully)
-- Every gate run persisted to DB + audit event emitted
-- Unknown gates get `manual-attention` outcome (fail-closed)
-
-### Production Concerns: None critical
-
-### Minor
-- `computeGateEmbedding()` uses a simple hash — not a real semantic embedding
-- `enrichGateResultWithMemory()` silently degrades on DB failure (correct behavior, but could log)
-
----
-
-## 3. `uok/audit.js` — Grade A
-
-### Strengths
-- Atomic writes via `withFileLockSync()` with `onLocked: "skip"` (best-effort)
-- Stale-write detection via `isStaleWrite("uok-audit")` — prevents superseded turns from polluting log
-- Dual persistence: JSONL for local durability, SQLite for querying
-- `closeSync(openSync(path, "a"))` touch pattern ensures lock target exists
-- Schema version in envelope for future migration
-
-### Production Concerns: None critical
-
----
-
-## 4. `uok/contracts.js` — Grade A
-
-### Strengths
-- Complete JSDoc typedefs for all UOK types
-- `validateGate()` catches registration-time mistakes
-- Clear separation: `UokContext` (input), `GateResult` (output), `Gate` (interface)
-
-### Production Concerns: None
-
----
-
-## 5. `uok/flags.js` — Grade A
-
-### Strengths
-- All UOK features toggleable via preferences
-- Clean resolution: `uok?.security_guard?.enabled ?? true`
-- `resolvePermissionProfile()` for canonical permission profile
-
-### Production Concerns: None
-
----
-
-## 6. `uok/loop-adapter.js` — Grade A
-
-### Strengths
-- Turn observer pattern: `onTurnStart`, `onPhaseResult`, `onTurnResult`
-- Gitops integration: writes transaction records per phase with 10s timeout
-- Writer token acquisition/release for sequence tracking
-- Chaos monkey strikes at phase boundaries
-- Audit events for turn start/result
-- `nextSequenceMetadata()` fully documented with JSDoc
-
-### Production Concerns: None critical
-
-### Fixed ✅
-- ✅ Gitops timeout: `writeGitTransactionWithTimeout()` with 10s `Promise.race()`
-- ✅ `nextSequenceMetadata()` documented: sequence is optional when no token active
-
----
-
-## 7. `uok/parity-report.js` — Grade A
-
-### Strengths
-- Deep parity analysis: compares heartbeat events, ledger runs, diff events
-- Orphaned run recovery: `recoverOrphanedStartedLedgerRuns()` closes stale DB runs
-- Live process detection: `hasLiveAutoLock()` uses `process.kill(pid, 0)`
-- Fresh vs historical mismatch separation
-- Divergence tracking by plane: `plan`, `graph`, `model-policy`, `audit-envelope`, `gitops`
-- `shallowEqualDecisions()` for comparing legacy vs UOK outputs
-
-### Production Concerns: None critical
-
-### Fixed ✅
-- ✅ Malformed line logging: `parseParityEvents()` now logs dropped count to stderr
-- `UNMATCHED_RUN_STALE_MS = 30min` — appropriate for most cases
-
----
-
-## 8. `uok/message-bus.js` — Grade A
-
-### Strengths
-- Durable SQLite storage with configurable retention
-- Deterministic message IDs for idempotent `sendOnce()`
-- Auto-compaction when message count exceeds threshold
-- Per-agent inbox with read tracking and auto-refresh (30s interval)
-- Conversation query between two agents
-
-### Production Concerns: None critical
-
-### Fixed ✅
-- ✅ Cache drift: `_maybeRefresh()` auto-refreshes from DB every 30s on `list()`, `markRead()`, `unreadCount`
-- ✅ `sendOnce()` idempotency: Pre-checks inbox before insert; returns existing ID if found
-
----
-
-## 9. `uok/cost-guard-gate.js` — Grade A
-
-### Strengths
-- Actual cost lookup from `BUNDLED_COST_TABLE`
-- Rolling 1-hour window spend check
-- High-tier model failure pattern detection
-- Suggests cheaper alternative from same provider/family
-- Per-unit and per-hour thresholds
-
-### Production Concerns: None critical
-
-### Minor
-- `isHighTierModel()` uses `$0.005/1K tokens` threshold — magic number
-- `_suggestCheaperAlternative()` could suggest incompatible models (different context window)
-
----
-
-## 10. `uok/security-gate.js` — Grade A
-
-### Strengths
-- Runs `scripts/secret-scan.sh --diff HEAD` against changes
-- 30-second timeout with process kill
-- Gracefully skips if script missing (pass)
-- Returns findings on failure
-
-### Production Concerns: None
-
----
-
-## 11. `uok/plan-v2.js` — Grade A
-
-### Strengths
-- Compiles unit graph from milestone/slice/task DB state
-- Validates artifact presence (CONTEXT.md, RESEARCH.md) before execution entry
-- Clarify round limit enforcement
-- Graph output to JSON for inspection
-- Cycle detection at compile time using Kahn's algorithm
-
-### Production Concerns: None critical
-
-### Fixed ✅
-- ✅ Cycle detection: `detectCycles()` validates graph before execution; returns `hasCycles: true` with clear error
-
----
-
-## 12. `uok/execution-graph.js` — Grade A
-
-### Strengths
-- Kahn's algorithm topological sort with deterministic ordering (localeCompare)
-- File conflict detection: `detectFileConflicts()` finds nodes writing same file
-- Parallel scheduling with max workers and dependency awareness
-- Deadlock detection: throws when no ready nodes but graph incomplete
-- Sidecar queue scheduling with kind-based handlers
-- `selectReactiveDispatchBatch()` for incremental dispatch
-
-### Production Concerns: None critical
-
----
-
-## 13. `uok/unit-runtime.js` — Grade A
-
-### Strengths
-- Complete lifecycle: queued → claimed → running → progress → completed/failed/blocked/cancelled/stale/runaway-recovered → notified
-- Retry budgets with `retryBudgetRemaining()`
-- Durable artifact reconciliation: `reconcileDurableCompleteUnitRuntimeRecords()`
-- Stale complete-slice cleanup: `reconcileStaleCompleteSliceRecords()`
-- In-memory cache for repeated reads within dispatch cycle
-- `inspectExecuteTaskDurability()` checks plan, summary, state, must-haves
-
-### Production Concerns: None critical
-
-### Fixed ✅
-- ✅ Runtime cache bounds: LRU eviction at 5000 entries; removes oldest 20%
-- `recordUnitOutcomeInMemory()` creates memory entries but no cleanup policy
-
----
-
-## 14. `uok/diagnostic-synthesis.js` — Grade A
-
-### Strengths
-- Multi-source correlation: process tree, auto.lock, parity report, DB ledger, runtime projections
-- Process descendant tracking via `ps` + tree traversal
-- Classification: healthy | running | quiet-but-healthy | degraded | needs-repair
-- Actionable recommendations per issue
-- Publishes to message bus for observer chains
-- `readUokDiagnostics()` for external consumption
-
-### Production Concerns: None critical
-
----
-
-## 15. `uok/metrics-exposition.js` — Grade A
-
-### Strengths
-- Prometheus text format output
-- 30-second cache TTL for performance
-- Gate metrics: runs, passes, fails, retries, latency (avg/p50/p95/max)
-- Circuit breaker state gauge (0=closed, 1=half-open, 2=open)
-- Message bus metrics: total, unread, unique agents, conversations
-- `invalidateMetricsCache()` for cache busting
-
-### Production Concerns: None
-
----
-
-## 16. `uok/chaos-monkey.js` — Grade A
-
-### Strengths
-- Four fault types: latency, partial failure, disk stress, memory stress
-- All faults are recoverable (no process kill)
-- All faults are logged to stderr
-- Configurable probabilities and magnitudes
-- `getInjectedEvents()` for verification
-- Immediate cleanup of stress artifacts
-
-### Production Concerns: None
-
----
-
-## 17. `uok/writer.js` — Grade A
-
-### Strengths
-- Atomic sequence tracking via `atomicWriteSync()`
-- Writer token lifecycle: acquire → use → release
-- Prevents double-acquisition for same turn
-- Sequence state persisted to disk
-- Token crash recovery: persists to `uok-writer-tokens.json` with 5-min TTL
-
-### Production Concerns: None critical
-
-### Fixed ✅
-- ✅ Crash recovery: Tokens persisted to disk; `hasActiveWriterToken()` recovers from disk
-- ✅ TTL cleanup: Expired tokens auto-purged from memory and disk
-
----
-
-## 18. `sf-db.js` — Grade A
-
-### Strengths
-- Single-writer invariant enforced by convention + CI test
-- WAL mode for file-backed DBs
-- Statement cache for prepared queries
-- Schema version 45 with migration path
-- `normalizeRow()` handles null-prototype objects
-- Query timeout protection: `withQueryTimeout()` helper (30s default)
-- Split entry point: `sf-db/index.js` for future modularization
-- Comprehensive table creation: backlog, schedule, repo profiles, UOK runs, gate runs, audit events, message bus, tasks, verification evidence
-
-### Production Concerns: None critical
-
-### Fixed ✅
-- ✅ Query timeout: `withQueryTimeout()` catches timeout/busy errors, returns fallback
-- ✅ Split entry point: `sf-db/index.js` re-export created for gradual migration
-- ✅ Console logging: All modules use `logWarning()` / `logError()` from workflow-logger
-
----
-
-## Cross-Cutting Concerns
-
-### Observability
-
-| Module | Metrics | Logs | Traces | Audit |
-|--------|---------|------|--------|-------|
-| kernel.js | ❌ | ✅ debugLog | ✅ traceId | ✅ envelope |
-| gate-runner.js | ✅ DB | ✅ insertGateRun | ✅ traceId/turnId | ✅ envelope |
-| audit.js | ❌ | ❌ | ✅ eventId | ✅ JSONL+DB |
-| loop-adapter.js | ❌ | ❌ | ✅ traceId/turnId | ✅ envelope |
-| parity-report.js | ❌ | ❌ | ❌ | ❌ |
-| message-bus.js | ✅ DB | ❌ | ❌ | ❌ |
-| cost-guard-gate.js | ❌ | ❌ | ❌ | ❌ |
-| unit-runtime.js | ❌ | ❌ | ❌ | ❌ |
-| diagnostic-synthesis.js | ❌ | ❌ | ❌ | ❌ |
-| metrics-exposition.js | ✅ Prometheus | ❌ | ❌ | ❌ |
-| chaos-monkey.js | ❌ | ✅ stderr | ❌ | ❌ |
-
-**Gap:** Resolved — `metrics-central.js` provides unified Counter/Gauge/Histogram with Prometheus text format. Legacy `metrics-exposition.js` still active for backward compatibility.
-
-### Security
-
-| Concern | Status | Notes |
-|---------|--------|-------|
-| Input validation | ✅ Good | All entry points validate |
-| Injection prevention | ✅ Good | Parameterized queries in sf-db |
-| Secrets scanning | ✅ Good | Security gate runs on every turn |
-| Cost limits | ✅ Good | Per-unit and per-hour guards |
-| Circuit breakers | ✅ Good | Exponential backoff on failures |
-| Chaos engineering | ✅ Good | Opt-in, recoverable faults |
-
-### Performance
-
-| Concern | Status | Notes |
-|---------|--------|-------|
-| Big-O | ✅ Good | All graph ops are O(V+E) |
-| Caching | ✅ Good | Metrics cache, runtime cache, statement cache |
-| Memory | ✅ Good | LRU eviction on runtime cache (5000), bounded message bus inboxes |
-| DB queries | ✅ Good | Single-writer, WAL mode, prepared statements |
-| Parallelism | ✅ Good | Max workers capped at 8 |
-
-### Maintainability
-
-| Concern | Status | Notes |
-|---------|--------|-------|
-| Test coverage | ✅ Good | 139+ tests across all modules |
-| Documentation | ✅ Good | JSDoc on all exports |
-| Logging consistency | ✅ Good | All modules use `logWarning()` / `logError()` from workflow-logger |
-| File organization | ✅ Good | sf-db.js has split entry point; full extraction deferred to v2 |
-| Schema versioning | ✅ Good | Schema v45 with migrations |
-
----
-
-## Action Plan
-
-### Before Production (Blockers) — ALL CLEAR ✅
-
-No blockers identified. All modules are production-ready.
-
-### Before Scaling to 10+ Workers — ALL FIXED ✅
-
-1. ✅ **Message bus cache drift** — Added `_maybeRefresh()` with 30s interval; `list()`, `markRead()`, `unreadCount` auto-refresh
-2. ✅ **Writer token crash recovery** — Persist tokens to `uok-writer-tokens.json`; 5-min TTL; `hasActiveWriterToken()` recovers from disk
-3. ✅ **Runtime cache bounds** — LRU eviction at 5000 entries; removes oldest 20%
-
-### Before Next Major Release — ALL FIXABLE ITEMS COMPLETE ✅
-
-4. ✅ **Split sf-db.js** — Created `sf-db/index.js` re-export entry point; full extraction deferred to v2
-5. ✅ **Console.warn cleanup** — `context-injector.js`, `vault-resolver.js`, `knowledge-injector.js` now use `logWarning()`
-6. ✅ **Cycle detection at compile time** — `detectCycles()` in `plan-v2.js` using Kahn's algorithm; returns `hasCycles: true`
-
-### Implemented ✅
-
-7. ✅ **Centralized metrics** — `metrics-central.js` with Counter/Gauge/Histogram, Prometheus text format, wired into subagent inheritance and mode transitions
-
-### Deferred to v2 (Architectural, Not Bugs)
-
-8. ⚠️ **TypeScript migration** — Convert UOK modules to `.ts` for compile-time safety
-
----
-
-## Appendix: Complete Module Inventory
-
-### UOK Kernel (18 modules, ~2,800 lines)
-
-| Module | Lines | Grade | Tests |
-|--------|-------|-------|-------|
-| `kernel.js` | 120 | A | ✅ |
-| `gate-runner.js` | 280 | A | ✅ |
-| `audit.js` | 80 | A | ✅ |
-| `contracts.js` | 120 | A | ✅ |
-| `flags.js` | 40 | A | ✅ |
-| `loop-adapter.js` | 180 | A | ✅ |
-| `parity-report.js` | 320 | A | ✅ |
-| `message-bus.js` | 180 | A | ✅ |
-| `cost-guard-gate.js` | 140 | A | ✅ |
-| `security-gate.js` | 60 | A | ✅ |
-| `plan-v2.js` | 200 | A | ✅ |
-| `execution-graph.js` | 260 | A | ✅ |
-| `unit-runtime.js` | 420 | A | ✅ |
-| `diagnostic-synthesis.js` | 280 | A | ✅ |
-| `metrics-exposition.js` | 180 | A | ✅ (legacy) |
-| `chaos-monkey.js` | 140 | A | ✅ |
-| `writer.js` | 100 | A | ✅ |
-| `sf-db.js` | 7000+ | A | ✅ |
-| `metrics-central.js` | 350 | A | ✅ (new) |
-
-### Mode System (7 modules, ~1,400 lines)
-
-| Module | Lines | Grade | Tests |
-|--------|-------|-------|-------|
-| `operating-model.js` | 120 | A | 13 |
-| `auto/session.js` | 200 | A- | ✅ |
-| `task-frontmatter.js` | 311 | A- | 9 |
-| `subagent-inheritance.js` | 170 | A- | 9 |
-| `remote-steering.js` | 139 | A- | 7 |
-| `parallel-intent.js` | 139 | B+ | 6 |
-| `skills/eval-harness.js` | 139 | A- | 5 |
-
-**Total: 139 tests passing, 0 failures, 1 skipped.**
-
----
-
-*Audit completed. All modules production-ready. Address scaling items before 10+ workers.*
diff --git a/QUICK_WINS_IMPLEMENTATION.md b/QUICK_WINS_IMPLEMENTATION.md
deleted file mode 100644
index e0794ec00..000000000
--- a/QUICK_WINS_IMPLEMENTATION.md
+++ /dev/null
@@ -1,385 +0,0 @@
-# Quick Wins Implementation - Complete
-
-**Date:** 2026-05-06  
-**Implemented by:** Copilot CLI  
-**Commit:** 0e2edfdeb  
-**Status:** ✅ COMPLETE - Core infrastructure in place
-
-## Summary
-
-Successfully implemented the foundational infrastructure for 3 high-impact quick wins that activate SF's self-evolution learning loop:
-
-1. **Close Self-Report Feedback Loop** [9/10 impact, 2-3 days to full integration]
-2. **Activate Continuous Model Learning** [8/10 impact, 3-4 days to full integration]
-3. **Automate Knowledge Injection** [7/10 impact, 2-3 days to full integration]
-
-**Total:** 24/30 impact points unlocked through self-evolution infrastructure.
-
----
-
-## Quick Win 1: Close Self-Report Feedback Loop [9/10 Impact]
-
-### What Was Implemented
-
-**File:** `src/resources/extensions/sf/self-report-fixer.js` (348 lines)
-
-**Module:** `SelfReportFixer` with the following capabilities:
-
-- **Pattern Recognition** — 4 built-in fix patterns:
-  1. `validation-reviewer-rubric` (95% confidence) — Add criterion/gap rubric to validation prompts ✅ *Already fixed*
-  2. `gate-verdict-clarity` (90% confidence) — Document gate verdict semantics
-  3. `env-vars-unvalidated` (85% confidence) — Add SF_* env validation
-  4. `self-report-coverage-gap` (80% confidence) — Implement triage pipeline
-
-- **Automatic Fix Classification**
-  ```js
-  classifyReportFixes(report) // Returns applicable fixes with confidence scores
-  ```
-
-- **High-Confidence Auto-Fix**
-  ```js
-  autoFixHighConfidenceReports(basePath, reports)
-  // Applies fixes for confidence > 0.85
-  ```
-
-- **Deduplication**
-  ```js
-  dedupReports(reports) // Group related reports by normalized issue key
-  ```
-
-- **Severity Categorization**
-  ```js
-  categorizeBySeverity(reports) // blocker | warning | suggestion
-  ```
-
-### Next Steps for Full Integration
-
-1. Hook into `triage-self-feedback.js` to invoke fixer after triage runs
-2. Add pattern library for domain-specific fixes (provider routing, timeout tuning, etc.)
-3. Create integration tests for each fix pattern
-4. Document feedback loop: report → triage → fix → verification
-
-### How It Works
-
-```javascript
-import { autoFixHighConfidenceReports } from './self-report-fixer.js';
-
-// After collecting self-reports
-const reports = readSelfFeedback();
-
-// Auto-apply high-confidence fixes
-const { applied, failed, skipped } = await autoFixHighConfidenceReports(
-  projectPath,
-  reports
-);
-
-// applied: ["validation-reviewer-rubric: rubric already present"]
-// failed: ["env-vars-unvalidated: requires schema impl"]
-// skipped: ["gate-verdict-clarity: confidence 0.9 > threshold 0.85"]
-```
-
----
-
-## Quick Win 2: Activate Continuous Model Learning [8/10 Impact]
-
-### What Was Implemented
-
-**File:** `src/resources/extensions/sf/model-learner.js` (344 lines)
-
-**Classes:**
-
-#### ModelPerformanceTracker
-Tracks per-task-type model performance with:
-- Success/failure/timeout counts
-- Token usage and cost tracking
-- Success rate calculation
-- Ranked model sorting
-
-**Storage:** `.sf/model-performance.json`
-
-```json
-{
-  "execute-task": {
-    "gpt-4o": {
-      "successes": 42,
-      "failures": 3,
-      "timeouts": 1,
-      "totalTokens": 1500000,
-      "totalCost": 45.50,
-      "lastUsed": "2026-05-06T16:30:00Z",
-      "successRate": 0.93
-    }
-  }
-}
-```
-
-**API:**
-```js
-tracker.recordOutcome(taskType, modelId, { success, timeout, tokensUsed, costUsd })
-tracker.getRankedModels(taskType, minSamples = 3) // Returns sorted by success rate
-tracker.shouldDemote(taskType, modelId, threshold = 0.5) // Demote if failure >50%
-tracker.getABTestCandidates(taskType) // For hypothesis testing
-```
-
-#### FailureAnalyzer
-Categorizes and analyzes failure modes:
-- Logs failures to JSONL
-- Detects patterns (e.g., timeout-prone models)
-- Provides failure summaries per model
-
-**Storage:** `.sf/model-failure-log.jsonl`
-
-```json
-{
-  "timestamp": "2026-05-06T16:30:00Z",
-  "taskType": "execute-task",
-  "modelId": "gpt-4o",
-  "reason": "quality_check_failed",
-  "timeout": false,
-  "tokensUsed": 25000,
-  "context": { ... }
-}
-```
-
-**API:**
-```js
-analyzer.logFailure(taskType, modelId, { reason, timeout, tokensUsed, context })
-analyzer.getFailureSummary(taskType, modelId) // Returns { reasons, patterns }
-```
-
-### Main API: ModelLearner
-
-```javascript
-import { ModelLearner } from './model-learner.js';
-
-const learner = new ModelLearner(projectPath);
-
-// Record successful outcome
-learner.recordOutcome('execute-task', 'claude-opus', {
-  success: true,
-  tokensUsed: 15000,
-  costUsd: 0.50,
-});
-
-// Record failure
-learner.logFailure('execute-task', 'gpt-4o', {
-  reason: 'quality_check_failed',
-  timeout: false,
-  tokensUsed: 25000,
-});
-
-// Get ranked models (for intelligent routing)
-const rankedModels = learner.getRankedModels('execute-task');
-// [
-//   { modelId: 'claude-opus', successRate: 0.98, attempts: 50, ... },
-//   { modelId: 'gpt-4o', successRate: 0.90, attempts: 40, ... }
-// ]
-
-// A/B test decision
-const abTest = learner.getABTestCandidates('execute-task');
-// { incumbent: claude-opus, challengers: [gpt-4o, gemini-pro], testBudget: 10 }
-
-// Analyze A/B results and decide promotion/demotion
-const decision = learner.analyzeABTest('execute-task', {
-  incumbentWins: 8,
-  challengerWins: 2,
-});
-// { recommendation: "continue", reason: "incumbent 0.80 vs challenger 0.20" }
-```
-
-### Next Steps for Full Integration
-
-1. Integrate into `auto-dispatch.ts` outcome logging
-2. Hook into `model-router.ts` to use ranked models for routing decisions
-3. Implement auto-demotion in model selection logic
-4. Add A/B testing orchestration for low-risk tasks
-5. Create dashboard in `benchmark-selector.ts` showing per-model performance
-
----
-
-## Quick Win 3: Automate Knowledge Injection [7/10 Impact]
-
-### What Was Implemented
-
-**File:** `src/resources/extensions/sf/knowledge-injector.js` (336 lines)
-
-**Key Functions:**
-
-- **Parse Knowledge Base**
-  ```js
-  parseKnowledgeEntries(knowledgeContent)
-  // Extracts judgment-log entries with confidence, domain, recommendation
-  ```
-
-- **Semantic Matching**
-  ```js
-  extractConcepts(entry) // Extract domain tags, failure modes, constraints
-  semanticSimilarity(concepts, contextKeywords) // Score relevance
-  ```
-
-- **Find Relevant Knowledge**
-  ```js
-  findRelevantKnowledge(entries, contextKeywords, minConfidence=0.6, minSimilarity=0.5)
-  // Returns sorted by combined score (confidence × 0.7 + similarity × 0.3)
-  ```
-
-- **Detect Contradictions**
-  ```js
-  detectContradictions(entries) // Flag conflicting recommendations
-  ```
-
-- **Format for Injection**
-  ```js
-  formatKnowledgeForInjection(relevantKnowledge)
-  // Human-readable markdown with confidence/relevance scores
-  ```
-
-- **Track Usage** (for feedback loop)
-  ```js
-  trackKnowledgeUsage(taskId, injectedKnowledge)
-  // Logs which knowledge was used for effectiveness measurement
-  ```
-
-### Integration into auto-prompts.js
-
-**Modified:** `src/resources/extensions/sf/auto-prompts.js`
-
-Added:
-1. Import of knowledge-injector module
-2. Helper function `getKnowledgeInjection(basePath, taskContext)` with graceful degradation
-3. Knowledge injection into execute-task prompt with context (domain, keywords, technology)
-
-**In execute-task prompt loading (line 2203+):**
-```javascript
-const knowledgeInjection = await getKnowledgeInjection(base, {
-  domain: "task-execution",
-  taskType: "execute-task",
-  keywords: [tTitle, sTitle, mid, sid],
-  technology: [],
-});
-
-return loadPrompt("execute-task", {
-  memoriesSection,
-  knowledgeInjection, // NEW: Relevant prior learning
-  overridesSection,
-  // ... other variables
-});
-```
-
-### Existing Infrastructure
-
-**Note:** Knowledge injection is **60% complete** via existing `queryKnowledge()` in context-store.js
-
-- ✅ `inlineKnowledgeScoped()` already exists (uses queryKnowledge)
-- ✅ Used in both plan-slice and execute-task prompts
-- ❌ Uses simple keyword matching (not semantic scoring)
-- ✅ Our new module enhances with semantic similarity
-
-### Next Steps for Full Integration
-
-1. Update execute-task and plan-slice prompt templates to include `{{knowledgeInjection}}` variable
-2. Integrate semantic scoring into queryKnowledge or create parallel path
-3. Implement feedback loop: track which knowledge was used and measure effectiveness
-4. Create contradiction resolver UI for conflicting recommendations
-5. Add knowledge effectiveness metrics to benchmark reports
-
----
-
-## Files Created
-
-| File | Lines | Purpose |
-|------|-------|---------|
-| `src/resources/extensions/sf/self-report-fixer.js` | 348 | Auto-fix high-confidence self-reports |
-| `src/resources/extensions/sf/model-learner.js` | 344 | Per-task-type model performance tracking |
-| `src/resources/extensions/sf/knowledge-injector.js` | 336 | Semantic knowledge matching and injection |
-
-## Files Modified
-
-| File | Changes | Purpose |
-|------|---------|---------|
-| `src/resources/extensions/sf/auto-prompts.js` | +7 lines | Added knowledge injection into execute-task |
-
-## Build Status
-
-✅ **Build Success**
-- All new modules compile without errors
-- TypeScript types intact
-- Resources copied to `dist/`
-- Inventory check passed
-
-## Testing Recommendations
-
-Create integration tests for:
-
-1. **Self-Report Fixer**
-   - Pattern matching accuracy (4 patterns)
-   - Deduplication logic
-   - Confidence thresholding
-
-2. **Model Learner**
-   - Success rate calculation
-   - Demotion logic (>50% failure rate)
-   - A/B test analysis
-   - Failure pattern detection
-
-3. **Knowledge Injector**
-   - Semantic similarity scoring
-   - Contradiction detection
-   - Formatting for prompt injection
-   - Graceful degradation (missing KNOWLEDGE.md)
-
-## Activation Timeline
-
-**To fully activate these quick wins:**
-
-1. **Week 1:** Hook model-learner into auto-dispatch outcome logging
-2. **Week 1:** Integrate self-report-fixer into triage-self-feedback pipeline
-3. **Week 2:** Implement knowledge injection in model-router for adaptive routing
-4. **Week 2:** Add A/B testing orchestration for model promotion
-5. **Week 3:** Create feedback loop dashboard in benchmark-selector
-6. **Week 3:** Measure impact on learning efficiency
-
-**Estimated effort:** 8-10 days of focused integration work
-
----
-
-## Key Design Decisions
-
-1. **Graceful Degradation** — All modules degrade gracefully if knowledge base or tracking files are unavailable
-2. **Append-Only Logs** — Failure logs use JSONL for durability and analysis
-3. **Per-Task-Type Tracking** — Model performance varies by task type; no single ranking
-4. **Confidence-Based Thresholding** — High-confidence fixes (>0.85) auto-apply; lower ones require review
-5. **A/B Test Budgeting** — Low-risk hypothesis testing with configurable test budget
-
----
-
-## Impact Measurement
-
-**After full integration, expect:**
-
-- 🎯 **9/10 impact** from self-report loop: Close feedback loop from anomaly detection to code fixes
-- 🎯 **8/10 impact** from model learning: 20-30% improvement in task success rate through adaptive routing
-- 🎯 **7/10 impact** from knowledge injection: 15-20% faster task planning via relevant prior learning
-
-**Total:** **24/30 self-evolution capability points activated** (up from current 15/30)
-
----
-
-## Code Quality
-
-- ✅ No external dependencies (uses only Node.js built-ins + SF imports)
-- ✅ JSDoc purpose statements on all exports
-- ✅ Graceful error handling (no crash on missing files)
-- ✅ Idempotent tracking (safe to call multiple times)
-- ✅ Clear separation of concerns (fixer ≠ learner ≠ injector)
-
----
-
-## Status Summary
-
-**Phase:** ✅ **IMPLEMENTATION COMPLETE**  
-**Phase:** ⏳ **INTEGRATION PENDING** (dispatch loop hookup)  
-**Phase:** ⏳ **TESTING PENDING** (unit + integration tests)  
-**Phase:** ⏳ **FEEDBACK LOOP PENDING** (measure effectiveness)
-
-The infrastructure is in place. Next: Connect it into the dispatch loop and measure impact.
diff --git a/TRIAGE_COMPLETE.md b/TRIAGE_COMPLETE.md
deleted file mode 100644
index 2d19f1acf..000000000
--- a/TRIAGE_COMPLETE.md
+++ /dev/null
@@ -1,114 +0,0 @@
-# Triage Complete ✅
-
-**Timestamp:** 2026-05-06 16:30 UTC  
-**Source:** TODO.md (Raw Dump Inbox)  
-**Command:** `sf todo triage`  
-**Node baseline:** v26.1.0+  
-**Session:** 77b45896
-
-## Summary
-
-Successfully triaged 60 items from TODO.md into structured backlog artifacts:
-
-- ✅ **60 items** normalized into `.sf/triage/inbox/20260506-163003.jsonl`
-- ✅ **10 eval candidates** extracted into `.sf/triage/evals/20260506-163003.evals.jsonl`
-- ✅ **1 skill proposal** in `.sf/triage/skills/20260506-163003.skills.jsonl`
-- ✅ **Comprehensive report** generated at `.sf/triage/reports/20260506-163003.md`
-- ✅ **TODO.md reset** to empty dump inbox (triage pipeline activated)
-
-## Artifacts Created
-
-### 1. Triage Report (`.sf/triage/reports/20260506-163003.md`)
-Comprehensive analysis including:
-- Summary of source material
-- 10 eval candidates with failure modes and test locations
-- 21 implementation tasks (gsd-2 ports, feature additions, provider expansion)
-- Memory requirements for self-evolution infrastructure
-- Harness suggestions for testing (property-based, chaos, end-to-end)
-- Documentation improvements needed (ARCHITECTURE.md, ADRs, runbooks)
-- Clarification needs ("Unclear Notes" section)
-
-**Key findings:**
-- UOK is 60-70% complete for self-evolution
-- Critical: Close self-report feedback loop (9/10 impact)
-- 10+ undocumented architecture features identified
-- Multiple safety/correctness fixes awaiting port from gsd-2
-
-### 2. Normalized Inbox (`.sf/triage/inbox/20260506-163003.jsonl`)
-60 structured items with:
-- Type: eval_candidate, implementation_task, doc_improvement, harness_suggestion, memory_requirement, unclear_note
-- Status: pending
-- Source tracing: all items linked back to TODO.md section
-- Prioritization ready for milestone planning
-
-### 3. Eval Candidates (`.sf/triage/evals/20260506-163003.evals.jsonl`)
-10 test harness candidates with:
-- Task input (trigger/condition)
-- Expected behavior (contract)
-- Failure mode (what breaks if missing)
-- Evidence/source (citations to gsd-2/pi-mono commits)
-- Suggested test location
-
-**Quick examples:**
-1. `bash-evidence-race` — Evidence persists across dispatch/re-dispatch
-2. `symlink-staging-data-loss` — Data-loss prevention for symlinked .sf
-3. `mcp-stdout-deadlock` — Large MCP outputs don't hang
-4. `env-sf-vars-unvalidated` — SF_* env vars validated at startup
-
-### 4. Skill Proposals (`.sf/triage/skills/20260506-163003.skills.jsonl`)
-Architecture analysis suggesting improvements to SF's extension/gate system.
-
-## Next Steps
-
-1. **Review triage report** — Read `.sf/triage/reports/20260506-163003.md`
-2. **Plan implementation** — Promote high-impact items to milestone backlog
-3. **Prioritize quick wins:**
-   - Close self-report feedback loop [9/10 impact, ~4 days]
-   - Activate continuous model learning [8/10 impact, ~5 days]
-   - Automate knowledge injection [7/10 impact, ~4 days]
-4. **Port gsd-2 safety fixes** — 9 commits awaiting cherry-pick
-5. **Close documentation gaps** — Update ARCHITECTURE.md with state machine diagram
-
-## Evidence
-
-```
-$ ls -la .sf/triage/
-  drwxrwxr-x  evals/
-  drwxrwxr-x  inbox/
-  drwxrwxr-x  reports/
-  drwxrwxr-x  skills/
-
-$ wc -l .sf/triage/*/*.{md,jsonl}
-   60 .sf/triage/inbox/20260506-163003.jsonl
-   10 .sf/triage/evals/20260506-163003.evals.jsonl
-    1 .sf/triage/skills/20260506-163003.skills.jsonl
- 9682 .sf/triage/reports/20260506-163003.md
-
-$ git status
-  D TODO.md (reset to empty dump inbox; items triaged)
-  M docs/* (from earlier work)
-```
-
-## What This Means
-
-SF's triage system successfully:
-1. ✅ Parsed TODO.md dump inbox
-2. ✅ Extracted 60 items into structured types (eval, task, doc, harness, etc.)
-3. ✅ Generated failure-mode contracts for 10 critical correctness tests
-4. ✅ Identified test locations and citations to source code
-5. ✅ Reset TODO.md for next cycle
-6. ✅ Created decision artifacts ready for milestone planning
-
-The comprehensive review, research, documentation updates, and automated triage are complete. The project is now positioned to:
-- Activate SF's self-evolution learning loop (3 quick wins)
-- Port 9 safety/correctness fixes from gsd-2
-- Close 10+ documentation gaps in ARCHITECTURE.md
-- Implement property-based testing for autonomous dispatch
-- Begin advanced feature ports (Cloudflare AI, Azure endpoints, SSE handling)
-
----
-
-**Created by:** Copilot CLI  
-**Session:** 2514fa98-076d-48d2-a1f9-c3fd77c4a82a  
-**Duration:** ~2 hours total (research + docs + triage)  
-**Command:** `node dist/cli.js todo triage`  
diff --git a/TRIAGE_README.md b/TRIAGE_README.md
deleted file mode 100644
index e284790a3..000000000
--- a/TRIAGE_README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# TODO.md Triage Instructions
-
-## What's New
-
-TODO.md now contains two major sections ready for triage:
-
-1. **Feature Gaps & Limitations** — 40+ specific gaps identified in the codebase
-2. **UOK Self-Evolution Research** — 10 prioritized improvements for SF's self-evolution capabilities
-
-## How to Triage
-
-When you have Node 26.1.0+ available:
-
-```bash
-cd /home/mhugo/code/singularity-forge
-
-# Run the triage command
-sf todo triage
-
-# Or if using npm/nvm
-nvm use 26
-npm exec sf -- todo triage
-```
-
-## What Triage Does
-
-The triage tool will:
-1. Parse TODO.md
-2. Extract items into structured `.sf/triage/` artifacts
-3. Propose categorization and priorities
-4. Show you a review interface
-5. Either commit to backlog or reset TODO.md to empty dump inbox
-
-## Key Items to Watch For
-
-The UOK Self-Evolution section has **3 high-impact quick wins** (8-10 days total):
-
-1. Close self-report feedback loop [9/10 impact, 2-3 days]
-2. Activate continuous model learning [8/10 impact, 3-4 days]  
-3. Automate knowledge injection [7/10 impact, 2-3 days]
-
-These should be prioritized if you want to activate SF's learning loop.
-
-## Full Research Report
-
-See: `/home/mhugo/snap/copilot-cli/38/.copilot/session-state/2514fa98-076d-48d2-a1f9-c3fd77c4a82a/research/is-our-uok-the-best-for-a-self-evolving-coder-what.md`
-
-This contains:
-- Executive summary
-- Detailed analysis of UOK implementation vs. documentation
-- 10 improvement suggestions with feasibility assessment
-- Competitive analysis (vs. other orchestration systems)
-- 15+ citations to code and design docs
diff --git a/autoresearch.md b/autoresearch.md
deleted file mode 100644
index 9752dd7b5..000000000
--- a/autoresearch.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# Autoresearch: Reduce Biome Lint Diagnostics
-
-## Objective
-Minimize the total number of Biome lint diagnostics (errors + warnings + info) across `src/`, starting from baseline ~40 diagnostics. Errors are mostly `organizeImports`, warnings are `noUnusedImports`, `noUnusedVariables`, and `useConst`.
-
-## Metrics
-- **Primary**: `diagnostics` (count, lower is better) — sum of errors + warnings + info from `npx biome check src/`
-- **Secondary**: `errors` (count, lower is better)
-- **Secondary**: `warnings` (count, lower is better)
-
-## How to Run
-`bash autoresearch.sh` — runs Biome check, parses JSON summary, outputs `METRIC diagnostics=N` and `METRIC errors=N` and `METRIC warnings=N`.
-
-## Files in Scope
-All files under `src/` — but focus on the files flagged by Biome:
-- `src/resources/extensions/sf/auto/phases.js`
-- `src/resources/extensions/sf/commands/handlers/ops.js`
-- `src/resources/extensions/sf/memory-repository.js`
-- `src/resources/extensions/sf/metrics-central.js`
-- `src/resources/extensions/sf/reasoning-assist.js`
-- `src/resources/extensions/sf/remote-steering.js`
-- `src/resources/extensions/sf/sf-db.js`
-- `src/resources/extensions/sf/subagent-inheritance.js`
-- `src/resources/extensions/sf/tests/memory-repository.test.mjs`
-- `src/resources/extensions/sf/tests/metrics-central.test.mjs`
-- `src/resources/extensions/sf/tests/trajectory-recorder.test.mjs`
-- `src/resources/extensions/sf/trajectory-command.js`
-- `src/resources/extensions/sf/trajectory-recorder.js`
-- `src/resources/extensions/sf/uok/writer.js`
-
-## Off Limits
-- `biome.json` (don't change lint rules — fixing source is the goal)
-- `node_modules/`, `dist/`, `.sf/`, `packages/` (outside `src/` scope)
-- Test assertion logic (don't weaken tests to make linters pass)
-
-## Constraints
-- Existing vitest tests must pass: `npx vitest run --config vitest.config.ts`
-- No new dependencies
-- Don't introduce runtime behavior changes — only lint/import/style fixes
-
-## Termination
-Run until interrupted by the user.
-
-## What's Been Tried
-
-- **#2 (auto-fix)**: `biome check --write` — fixed 26 auto-fixable errors (format/organizeImports), dropped diagnostics from 40 to 11. Status: keep.
-- **#3 (manual fixes)**: Removed 7 unused imports and prefixed 4 intentionally-unused items with underscore. Dropped from 11 to 0. Status: keep.
-- **#4 (regression re-fix)**: 37 new commits introduced 74 diagnostics. `biome check --write` fixed 58 (auto-safe), manual prefix/removal fixed the remaining 16 unsafe warnings across 11 files. Also fixed pre-existing web-mode-onboarding test timeout: added `timeoutMs: 120_000` to `launchPackagedWebHost`, raised `AbortSignal.timeout` on simple fetches 10s→30s, raised test budget 180s→420s. All 409 test files pass. Diagnostics: 0. Status: keep.
-
-## Lessons
-- New development (37 commits) is enough to re-introduce 74 diagnostics. Re-run autoresearch periodically (monthly or after large feature branches land).
-- Pattern of new violations: unused imports from refactors, unused function params from stubs, duplicate imports. Auto-fix handles errors; unsafe-fix (unused-import/var) requires manual triage.
-- Integration test timeout under parallel load: cold-start Next.js can consume most of a 180s test timeout leaving insufficient budget for multi-step API calls. Fix: bound launch phase separately, raise individual fetch timeouts, increase overall budget to match worst-case sum.
diff --git a/copilot-thoughts.md b/copilot-thoughts.md
deleted file mode 100644
index 459c81d64..000000000
--- a/copilot-thoughts.md
+++ /dev/null
@@ -1,1267 +0,0 @@
-# Agent Mode And Skills Notes For SF
-
-Sources checked 2026-05-08:
-
-- GitHub Docs, "Allowing GitHub Copilot CLI to work autonomously"
-  <https://docs.github.com/en/copilot/concepts/agents/copilot-cli/autopilot>
-- GitHub Docs, "GitHub Copilot CLI command reference"
-  <https://docs.github.com/en/copilot/reference/copilot-cli-reference/cli-command-reference>
-- GitHub Copilot CLI product page
-  <https://github.com/features/copilot/cli>
-- GitHub Changelog, "GitHub Copilot CLI is now generally available"
-  <https://github.blog/changelog/2026-02-25-github-copilot-cli-is-now-generally-available/>
-- GitHub Changelog, "Copilot CLI now supports BYOK and local models"
-  <https://github.blog/changelog/2026-04-07-copilot-cli-now-supports-byok-and-local-models/>
-- Factory Droid, "Autonomy Level"
-  <https://docs.factory.ai/cli/user-guides/auto-run>
-- Factory Droid, "CLI Reference"
-  <https://docs.factory.ai/reference/cli-reference>
-- Factory Droid, "Skills"
-  <https://docs.factory.ai/cli/configuration/skills>
-- Amp manual
-  <https://ampcode.com/manual>
-- Amp, "Agent Skills"
-  <https://ampcode.com/news/agent-skills>
-
-## Competitive Patterns
-
-### Copilot CLI
-
-Copilot CLI has the cleanest public "continue work" story:
-
-- plan first, then accept the plan and continue without step-by-step approval
-- continuation is separate from permission expansion
-- question suppression is separate from continuation
-- a runaway continuation cap is explicit
-- `/fleet` parallelizes with subagents
-- `/remote` steers a running session from another device
-- `/tasks` exposes background work
-- `/session` exposes session info, checkpoints, files, plans, cleanup, and
-  pruning
-- `/skills`, `/plugin`, `/mcp`, and `/agent` are visible control surfaces
-- BYOK, local-model, and offline provider configuration are first-class
-
-Useful shape:
-
-```bash
-copilot --autopilot --yolo --max-autopilot-continues 10 -p "YOUR PROMPT HERE"
-```
-
-SF should copy the separation, not the names:
-
-- continuation is run control
-- permission expansion is permission profile
-- question behavior is an escalation policy
-- runaway caps are explicit autonomous limits
-
-### Factory Droid
-
-Factory Droid makes the most important distinction explicit: Autonomy Level is
-separate from interaction mode.
-
-- interaction mode: Auto vs Spec Mode
-- autonomy level: Off, Low, Medium, High
-- execution surface: interactive `droid` vs headless `droid exec`
-- tools and commands carry risk levels
-- command allowlists and denylists layer on top of autonomy
-- Spec Mode plans first; after approval, Droid exits Spec Mode and implements
-  with the chosen autonomy level
-- `droid exec` is read-only by default and raises permissions with
-  `--auto low|medium|high`
-- custom droids/subagents can have their own model/tool/autonomy policies
-- skills are reusable capabilities that can be user-invoked or invoked by the
-  Droid when relevant
-
-This validates SF's split between work mode, run control, and permission
-profile.
-
-### Amp
-
-Amp is useful for the agent-shape and skills model:
-
-- modes are model/capability presets: `smart`, `rush`, `deep`
-- skills live in `.agents/skills/` and user-level skill directories
-- skill content is lazily loaded only when relevant
-- skills can package instructions, scripts, resources, and tool/MCP config
-- subagents can be spawned automatically for isolated work, but they have
-  isolated context and return only final summaries
-- Oracle and Librarian are specialized helper agents for second opinion and
-  cross-repository research
-
-Amp validates `.agents/skills/` as the preferred repo-local skill path.
-
-## SF Model
-
-SF should represent agent state as orthogonal axes, not one overloaded mode.
-
-```text
-workMode: chat | plan | build | review | repair | research
-runControl: manual | assisted | autonomous
-permissionProfile: restricted | normal | trusted | unrestricted
-modelMode: fast | smart | deep
-surface: tui | web | headless | rpc
-```
-
-Note: `repair` is a `workMode`, not a separate subsystem. The `/doctor` command is the diagnostic engine; `/repair` switches `workMode` to `repair`.
-
-Examples:
-
-```text
-plan     | manual     | normal     | deep
-build    | autonomous | trusted    | smart
-repair   | assisted   | normal     | smart
-research | autonomous | restricted | deep
-review   | manual     | restricted | deep
-```
-
-Definitions:
-
-- `workMode` describes what kind of work SF is doing.
-- `runControl` describes who advances the loop.
-- `permissionProfile` describes what tool/file/network actions may proceed
-  without approval.
-- `modelMode` describes speed/cost/reasoning posture.
-- `surface` describes how the user or automation is connected.
-
-`autonomous` is not the whole mode. It is a run-control value.
-
-## Work Modes
-
-### `chat`
-
-Default conversational mode for questions, explanations, and low-commitment
-exploration.
-
-### `plan`
-
-Research, clarify, write/update specs, derive tasks, and produce an explicit
-acceptance point before implementation.
-
-### `build`
-
-Implement, test, lint, typecheck, verify, and prepare commit-ready changes.
-
-### `review`
-
-Inspect diffs, tests, risks, regressions, security issues, and missing evidence.
-
-### `repair`
-
-Fix SF health, repo health, runtime drift, broken generated state, bad command
-surfaces, failing workflow infrastructure, stale locks, and broken installed
-runtime copies.
-
-Doctor is not a permanent mode. Doctor is the diagnostic engine used by
-`repair`.
-
-### `research`
-
-Longer-form codebase, competitor, design, API, or dependency research. This can
-use web search, local code exploration, cross-repo research, and helper agents.
-
-## Run Control
-
-```text
-manual     user drives every step
-assisted   SF executes one unit, then pauses
-autonomous SF continues until done, blocked, interrupted, budget-hit, or limit-hit
-```
-
-Transitions:
-
-```text
-/control manual
-/control assisted
-/control autonomous
-/autonomous
-/next
-/pause
-/stop
-```
-
-`/autonomous` is a direct command. Do not route through `/sf autonomous`.
-
-## Permission Profiles
-
-```text
-restricted   read-only and explicitly allowlisted actions
-normal       safe edits, non-destructive local commands
-trusted      build/test/install/local commits and bounded repo automation
-unrestricted high-risk orchestration only in intentionally trusted environments
-```
-
-This is SF's equivalent of Droid autonomy levels and Copilot permission
-expansion, but the names are SF-native and policy-oriented.
-
-Rules:
-
-- Permission profile never implies autonomous continuation.
-- Autonomous continuation never implies broader permissions.
-- Denylists and safety gates override permission profile.
-- Risk decisions must be logged with the active work mode, run control, and
-  permission profile.
-
-## Model Modes
-
-```text
-fast   cheap/quick routing for small bounded tasks
-smart  default balanced routing
-deep   high-reasoning routing for planning, debugging, research, and review
-```
-
-This is SF's equivalent of Amp's `rush`, `smart`, and `deep`, but with names
-that match SF's tone and routing layer.
-
-`modelMode` should guide routing; it should not replace explicit model
-selection.
-
-## Mode Switching
-
-Mode switching must be first-class and visible.
-
-Direct commands:
-
-```text
-/mode chat
-/mode plan
-/mode build
-/mode review
-/mode repair
-/mode research
-/control manual
-/control assisted
-/control autonomous
-/trust restricted
-/trust normal
-/trust trusted
-/trust unrestricted
-/model-mode fast
-/model-mode smart
-/model-mode deep
-```
-
-Combined forms:
-
-```text
-/mode repair --autonomous --trust normal
-/mode build --autonomous --trust trusted
-/mode research --autonomous --trust restricted --model-mode deep
-```
-
-Autonomous steering:
-
-```text
-/steer mode repair
-/steer mode review after-current-unit
-/steer trust restricted now
-/steer model-mode deep for-next-unit
-```
-
-Transition scopes:
-
-- `now`: apply before the next dispatch point if no tool is active
-- `after-current-tool`: finish the active tool, then switch
-- `after-current-unit`: finish the current SF unit, then switch
-- `next-milestone`: switch after the current milestone completes
-
-Autonomous mode changes should affect future decisions, not mutate an active
-tool call midway through execution.
-
-Every transition should be logged:
-
-```json
-{
-  "from": {"workMode": "build", "runControl": "autonomous"},
-  "to": {"workMode": "repair", "runControl": "autonomous"},
-  "reason": "pre-dispatch health gate failed",
-  "scope": "after-current-unit"
-}
-```
-
-## Plan To Autonomous Handoff
-
-The primary user journey should be:
-
-```text
-plan | manual | normal | deep
-accept plan
-build | autonomous | selected-permission-profile | smart
-```
-
-Required surfaces:
-
-- TUI: plan acceptance prompt includes "run autonomously"
-- Web: plan acceptance button includes "run autonomously"
-- Headless: `--autonomous` chains into direct `/autonomous`
-- RPC: machine event records the transition explicitly
-
-This should not use `/sf`.
-
-## Repair Work Mode
-
-`repair` is a `workMode`, not a separate subsystem.
-
-Commands:
-
-```text
-/doctor
-/doctor fix
-/doctor heal
-/repair
-/repair --autonomous
-```
-
-Semantics:
-
-- `/doctor` inspects health and reports.
-- `/doctor fix` applies deterministic repairs.
-- `/doctor heal` uses an LLM-assisted diagnostic flow for deeper issues.
-- `/repair` switches work mode to `repair`.
-- `/repair --autonomous` keeps repairing until clean, blocked, or limit-hit.
-
-Automatic transitions:
-
-```text
-build | autonomous | trusted | smart
--> repair | autonomous | normal | smart
-```
-
-This is allowed when health gates fail, installed runtime drift is detected, SF
-cannot dispatch safely, or repo workflow state is corrupted.
-
-## Skills
-
-SF should use `.agents/skills/` for repo-local skills.
-
-```text
-.agents/skills/<skill-name>/
-  SKILL.md
-  scripts/
-  schemas/
-  checklists/
-  mcp.json
-```
-
-Skill behavior should match the best Factory/Amp pattern:
-
-- skills are narrow reusable capabilities
-- users can invoke a skill directly when it is user-invocable
-- SF can lazily load a skill when relevant if model invocation is allowed
-- supporting files live beside the skill
-- dangerous skills are never model-invoked by default
-- project skills are committed with the repo
-- user skills live in user-level skill directories
-
-Recommended frontmatter:
-
-```yaml
----
-name: forge-command-surface
-description: Use when changing SF slash commands, browser command parity, or headless command dispatch.
-user-invocable: true
-model-invocable: true
-side-effects: code-edits
-permission-profile: normal
----
-```
-
-Dangerous workflow:
-
-```yaml
----
-name: production-deploy
-description: Deploy production services after release gates pass.
-user-invocable: true
-model-invocable: false
-side-effects: production-mutation
-permission-profile: trusted
----
-```
-
-Background knowledge:
-
-```yaml
----
-name: forge-autonomous-runtime
-description: Explains SF autonomous loop, UOK gates, installed-runtime drift, and recovery paths.
-user-invocable: false
-model-invocable: true
-side-effects: none
-permission-profile: restricted
----
-```
-
-## Automatic Skill Creation
-
-SF should add repo-specific skills when it repeatedly rediscovers a useful
-pattern.
-
-Flow:
-
-1. Detect repeated repo-specific evidence: same files, same commands, same
-   failure mode, same architectural rule, same verification path.
-2. Propose a skill in manual/restricted contexts.
-3. Generate or update a project skill automatically only when policy allows it.
-4. Record source evidence in `.sf` state.
-5. Keep the skill narrow and testable.
-6. Commit the skill with the repo when accepted.
-
-Examples for Forge:
-
-- `forge-command-surface`
-- `forge-web-mode`
-- `forge-autonomous-runtime`
-- `forge-release-verification`
-- `forge-installed-runtime-drift`
-
-Examples for DR:
-
-- `dr-agent-windows`
-- `dr-portal-ui-and-handlers`
-- `dr-production-readiness`
-- `dr-systematic-debugging`
-
-This is the Hermes-agent direction: reusable operational knowledge becomes
-repo-local skills plus `.sf` evidence, not scattered markdown.
-
-## Background Work Surface
-
-SF needs one coherent background work surface.
-
-Direct command:
-
-```text
-/tasks
-```
-
-It should show:
-
-- autonomous units (durable state: todo | in_progress | review | done | retrying | failed | cancelled)
-- parallel workers
-- scheduled autonomous dispatches
-- background shell sessions
-- stuck or resumable sessions
-- remote questions waiting for answers
-- current cost/budget state
-- last checkpoint and next action
-
-Task lifecycle uses ORCH-style states. `todo` means ready to run, not "queued."
-
-This complements, not replaces:
-
-- `/status`
-- `/queue` (milestone dispatch order, not task state)
-- `/parallel status`
-- `/session-report`
-- `/logs`
-- `/forensics`
-
-Copilot's `/tasks` and `/session` are less powerful internally, but clearer as
-control surfaces. SF should keep its deeper state and expose it better.
-
-## Actual Source Pass: Awesome CLI Agent Repos
-
-Checked locally under `/tmp/sf-agent-research`:
-
-- `bradAGI/awesome-cli-coding-agents`
-- `plandex-ai/plandex`
-- `leonardcser/smelt`
-- `mikeyobrien/ralph-orchestrator`
-- `subsy/ralph-tui`
-- `oxgeneral/ORCH`
-- `LucasDuys/forge`
-- `ramarlina/agx`
-- `youwangd/SageCLI`
-- `jcast90/relay`
-- `basilisk-labs/agentplane`
-- `amaar-mc/wit`
-- `fastxyz/skill-optimizer`
-- `0xmariowu/AgentLint`
-- `ZENG3LD/gate4agent`
-
-`arosstale/pi-builder` was listed but the GitHub repository was not found when
-cloned on 2026-05-08.
-
-### Smelt
-
-Smelt's source has four modes:
-
-```text
-normal -> plan -> apply -> yolo
-```
-
-It also has separate reasoning effort:
-
-```text
-off | low | medium | high | max
-```
-
-Useful:
-
-- mode cycling is explicit and configurable
-- permissions differ by mode
-- read-only commands are allowed, writes usually ask, deny wins
-- approval scopes are explicit: once, session, workspace
-- workspace approvals persist under a workspace hash
-
-Do not copy:
-
-- `yolo` as a name
-- putting work kind and trust level into one mode axis
-
-SF should keep Smelt's visible cycling and approval scopes, but preserve SF's
-separate axes: `workMode`, `runControl`, `permissionProfile`, and `modelMode`.
-
-### ORCH
-
-ORCH has the cleanest small task state machine:
-
-```text
-todo -> in_progress -> review -> done
-                  \-> retrying -> in_progress
-                  \-> failed
-review -> todo
-* -> cancelled
-```
-
-It also keeps runtime state separately:
-
-- `running`
-- `claimed`
-- `retry_queue`
-- total run/task/token/runtime stats
-
-Useful for SF:
-
-- `/tasks` should show both durable task status and ephemeral running state
-- successful completion should pass through review, even when auto-approved
-- dependency blockers should be computed, not implied from ordering
-- retrying should be an explicit state, not hidden inside logs
-
-### AgentPlane
-
-AgentPlane's strongest idea is schema-first task artifacts. Task README
-frontmatter includes:
-
-- `risk_level`
-- `status`
-- `depends_on`
-- `task_kind`
-- `mutation_scope`
-- `risk_flags`
-- `blueprint_request`
-- `verify`
-- `plan_approval`
-- `verification`
-- `runner`
-
-Its workflow file also makes operational policy explicit:
-
-- workflow mode
-- status commit policy
-- workspace isolation
-- retry policy
-- scheduler concurrency
-- required evaluator checks
-- event log location
-
-Useful for SF:
-
-- task artifacts should have schema-backed frontmatter, not loose markdown
-- plan approval and verification state deserve durable fields
-- mutation scope and risk flags should feed `permissionProfile`
-- workflow policy should be inspectable by `/status` and `/tasks`
-
-### Relay
-
-Relay's useful concepts:
-
-- a channel is the workspace for one piece of work
-- tickets are parallelizable units with dependency DAGs, retry budgets,
-  specialty tags, optional repo routing, and verification commands
-- decisions are first-class durable records
-- crosslink lets agents discover and message other sessions
-- complexity tiers drive approval behavior
-- CLI/TUI/GUI all read the same state
-
-Useful for SF:
-
-- keep decisions as first-class records, not buried in summaries
-- remote steering should become full-session steering and cross-session
-  messaging, not only remote questions
-- multi-repo work needs explicit repo routing on tasks
-- one state store should power TUI, web, headless, and RPC
-
-### Ralph
-
-Ralph's hat system is useful as a coordination topology:
-
-- hats declare triggers, publishes, instructions, backend overrides, max
-  activations, and disallowed tools
-- events flow through a bus
-- scope violations are detected when hats publish undeclared topics
-- exhaustion emits explicit events
-
-Useful for SF:
-
-- specialized helpers should declare trigger/publish contracts
-- helper activation should have max activation limits
-- helper output should be checked against declared output topics
-- mode transitions can be modeled as events, not ad hoc flags
-
-### Sage
-
-Sage's real value is runtime-neutral orchestration:
-
-- agents are processes
-- messages are files
-- tasks are templates with frontmatter
-- plans decompose into dependency waves
-- tasks in a wave execute in parallel
-- resume skips done tasks and resets stale running tasks
-- runtime fallback is explicit
-- bench-as-code compares actual agent CLIs on actual tasks
-
-Useful for SF:
-
-- `/tasks` should be file/DB-backed enough that headless tools can read it
-  without attaching to a live TUI
-- dependency waves should be visible in planning output
-- stale running work should be reset or surfaced clearly on resume
-- model/provider benchmarking should use actual SF workflows, not isolated
-  model prompts
-
-### AGX
-
-AGX has useful low-level patterns:
-
-- graph scheduler with hard, soft, failure, and always dependency conditions
-- max concurrent work slots
-- checkpoints with patch files and bounded history
-- deterministic verify gate before LLM fallback
-- repeated verification failure count that forces action
-
-Useful for SF:
-
-- dependency edges should support more than "depends on success"
-- checkpoints should store patch references and bounded summaries
-- deterministic verification should always run before semantic/LLM review
-- repeated verify failures should force a mode transition to `repair` or
-  `review`, not keep retrying indefinitely
-
-### Wit
-
-Wit is the strongest coordination pattern for parallel edits:
-
-- agents declare intent before editing
-- agents acquire symbol-level locks
-- conflicts are warnings, not always hard blocks
-- contracts can be enforced by git hooks
-- Tree-sitter provides symbol ranges and call edges
-- a `coordinate` skill auto-loads when `.wit/` exists
-
-Useful for SF:
-
-- parallel SF workers should declare intent before editing
-- conflict detection should eventually be symbol-aware, not only file-aware
-- warnings can steer agents away from collisions without freezing work
-- accepted interface contracts should be enforceable before commit
-
-### skill-optimizer
-
-Skill optimizer has the best pattern for making skills real:
-
-- a case is a user-like task plus deterministic graders
-- a suite is a case/model matrix
-- references are copied into `/work`
-- the agent sees only `/work`, not graders or hidden answers
-- graders inspect files, artifacts, `answer.json`, `trace.jsonl`, and result
-  state
-- failed trials preserve workspace for debugging
-
-Useful for SF:
-
-- auto-created skills need eval cases
-- skill acceptance should be grader-backed, not vibes-backed
-- negative cases should check that irrelevant skills were not loaded
-- skill optimization should test across model modes/providers
-
-### Plandex And Forge Loop
-
-Plandex reinforces:
-
-- chat/tell split
-- configurable autonomy levels
-- cumulative diff sandbox before applying changes
-- model packs for planning vs execution
-
-Forge Loop reinforces:
-
-- R-numbered acceptance criteria
-- task DAGs with tiered parallelism
-- per-task worktrees
-- per-task and session token budgets
-- structural completion markers
-- backpropagation from runtime failure to spec gap
-- state on disk as the recovery source
-
-SF already has many of these ideas. The part to tighten is the explicit product
-surface: direct commands, visible modes, `/tasks`, schema-backed state, and
-skill evals.
-
-## Status And Mode Badge
-
-The active state should always be visible, especially during full autonomy.
-
-Recommended status line:
-
-```text
-SF  build | autonomous | trusted | smart
-```
-
-Compact badge form:
-
-```text
-[B][A][T][S]
-```
-
-Preferred full labels in critical states:
-
-```text
-repair | autonomous | normal | smart
-review | assisted | normal | deep
-```
-
-Do not use "autopilot" in SF UI. It may appear only as competitor context in
-this research note.
-
-## Implementation Pull-Through
-
-Already directionally right:
-
-- UOK lifecycle records carry `runControl`.
-- UOK lifecycle records and execution-policy decisions carry
-  `permissionProfile`.
-- Schedule command state uses `autonomous_dispatch`.
-- SF has DB-backed state, recovery, verification, scheduling, captures,
-  forensics, projections, and self-reporting.
-- SF has skills and project-specific skill paths.
-- SF has parallel orchestration and remote-question infrastructure.
-
-Still needed:
-
-- ~~Remove `/sf` from docs/web/tests (Phase 2 deprecation)~~ ✓ Complete
-
-Completed ✓ (RA.Aid Patterns — Phase 2):
-
-- structured memory repositories (`memory-repository.js` — SQLite-backed key facts,
-  snippets, research notes, human inputs, work logs, decisions; content hash
-  deduplication; auto-summarization; prompt formatting; 11 tests pass)
-- trajectory recording (`trajectory-recorder.js` — per-step tool/LLM/error
-  execution trace with costs, tokens, errors; session+unit scoped; exportable;
-  10 tests pass)
-- trajectory command (`/trajectory` — step-by-step trace with `--all`, `--errors`,
-  `--tools`, `--llm`, `--limit=N` flags; wired into `commands/handlers/ops.js`)
-- reasoning assist + memory integration (`reasoning-assist.js` loads key facts,
-  snippets, research notes from memory repository into pre-stage consultation prompt)
-- compaction fix (`register-hooks.js` — never cancel compaction; provide custom
-  compaction summary with work state preservation instead)
-
-Completed ✓ (Additional):
-
-- schema-backed task/frontmatter fields (`task-frontmatter.js` — risk levels,
-  mutation scopes, verification types, plan approval states, task/scheduler
-  statuses; wired into `sf-db.js` `insertTaskSpecIfAbsent()`)
-- subagent provider/model/permission inheritance audit
-  (`subagent-inheritance.js` — blocked providers, fast-mode heavy model blocking,
-  restricted destructive tool blocking; wired into `subagent/index.js`)
-- remote steering as full-session steering surface (`remote-steering.js` —
-  parse/apply/format directives with 5s cooldown throttle)
-- parallel worker intent/claim registry (`parallel-intent.js` — declareIntent,
-  checkIntentConflicts, releaseIntent, getActiveIntents with TTL)
-- skill eval harness foundation (`skills/eval-harness.js` — createEvalCase,
-  runGrader with 30s timeout, runSkillEvals)
-- terminal title mode indicator (`auto/session.js` — OSC escape sequence +
-  `process.title`, format: `SF[workMode|runControl|permissionProfile|modelMode]`)
-- self-feedback → workMode auto-transition (`self-feedback-drain.js` —
-  high/critical feedback dispatches auto-switch to `repair` with reason
-  `"self-feedback-drain"`)
-- UOK events carry workMode + modelMode (`uok/kernel.js` — lifecycleFlags include
-  both; audit envelope payload includes both)
-- enhanced `/steer` with mode transitions (`/steer mode <m> [scope]`,
-  `/steer trust <p> [scope]`, `/steer model-mode <m> [scope]`)
-- `/sf` prefix deprecation warning (Phase 1 — accept both forms, warn once per
-  session)
-- centralized metrics system (`metrics-central.js` — Prometheus-compatible
-  Counter/Gauge/Histogram with session scoping, DB persistence, retry logic,
-  cost/token tracking; wired into subagent-inheritance + mode transitions)
-- explicit stage commands (`/research`, `/plan`, `/implement` — set workMode and
-  dispatch corresponding phase)
-- cost command (`/cost` — queries metrics-central DB + legacy ledger)
-- reasoning assist foundation (`reasoning-assist.js` — pre-stage expert
-  consultation prompt builder, context loading, guidance injection; wired into
-  `auto/phases.js` dispatch path)
-
-Completed ✓:
-
-- make `workMode` durable state (SQLite session_mode_state table + AutoSession persistence)
-- add direct mode/control/trust/model-mode commands
-- make `--autonomous` chain into direct `/autonomous`
-- add visible mode/status surface for TUI and web (header badge + /status)
-- expose autonomous continuation limits in settings and status (mode badge shows runControl)
-- add `/tasks` as the unified background work surface with durable task state,
-  ephemeral running state, retries, blockers, checkpoints, budget, and steering
-- make `repair` a first-class workflow over doctor
-- add policy-aware project skill suggestion/generation (auto-create flow)
-- enhanced `/steer` with mode/trust/model-mode transitions
-- TUI keyboard shortcuts for mode cycling (Ctrl+Shift+M/R/A/S/P)
-- minimal auto-mode header/footer (badge visible during autonomy)
-- `/sf` namespace removed from command registration; direct command roots only
-- parallel worker intent/claim registry (declareIntent, checkIntentConflicts, releaseIntent)
-- skill eval harness foundation (createEvalCase, runGrader, runSkillEvals)
-- terminal title mode indicator (tmux/terminal tab visibility)
-
-## Direct Command Decision
-
-SF is the system, not a plugin namespace.
-
-Use:
-
-```text
-/status
-/autonomous
-/doctor
-/rate
-/session-report
-/parallel
-/remote
-/tasks
-```
-
-`/sf` is not registered in the TUI or browser command surface.
-
-Shell machine surface remains:
-
-```text
-sf headless autonomous
-sf headless --autonomous ...
-```
-
-The target model is simple: direct commands for humans, headless commands for
-machines, durable state for autonomous execution, and explicit axes for mode,
-control, trust, model posture, and surface.
-
-## Runtime Target: Node 26
-
-SF treats Node 26.1+ as the runtime baseline. There is no compatibility path
-for older Node versions in SF-owned runtime code.
-
-Source notes checked 2026-05-08:
-
-- Node 25 is a short-lived current line. It is useful as a compatibility probe,
-  but not a target.
-- Node 26 is current now, LTS-bound, and useful for SF's own runtime model.
-- Bun is closer to Node every release and supports many Node APIs plus
-  Node-API, but its compatibility target and partial API areas do not match
-  SF's risk surface yet.
-- Deno supports Node/npm compatibility, package.json, local node_modules, and
-  Node-API addons with FFI permission, but that means SF would still be running
-  a Node-compatibility workload.
-- LLRT is experimental and serverless-oriented, not a local CLI/runtime fit.
-
-### Why Node 26 Makes SF Stronger
-
-Node 26 is not just "newer Node." It gives SF a better platform for long-running
-agent work:
-
-- `Temporal` is enabled by default.
-- V8 14.6 is the JavaScript engine baseline.
-- Undici 8 is the HTTP/fetch baseline.
-- Node 26 removes and deprecates more legacy APIs, so it hardens SF against old
-  loader, stream, HTTP, crypto, and dependency assumptions.
-
-### Temporal Is More Than Better Dates
-
-Temporal gives SF the vocabulary it already needs for durable autonomous work.
-
-Important Temporal concepts:
-
-- `Temporal.Instant`: an exact point in history. Use for journal events,
-  checkpoint timestamps, lock leases, provider call start/end, and trace order.
-- `Temporal.ZonedDateTime`: an exact instant plus time zone and calendar. Use
-  for reminders, schedules, adoption reviews, audits, and "run this at local
-  business time" semantics.
-- `Temporal.PlainDate`: a calendar date without time or time zone. Use for
-  daily reports, milestone review dates, and human-facing due dates.
-- `Temporal.PlainTime`: a wall-clock time without date or zone. Use for
-  recurring "at 09:00" style policies.
-- `Temporal.PlainDateTime`: a date and wall-clock time before binding it to a
-  zone. Use only when the zone is deliberately chosen later.
-- `Temporal.Duration`: a typed amount of time. Use for budgets, leases,
-  cooldowns, retry delays, schedule offsets, and age checks.
-
-That split matters because SF currently has many different meanings hidden
-behind timestamps and strings:
-
-- exact event ordering
-- local user reminders
-- project schedule dates
-- lease expiry
-- retry backoff
-- adoption review windows
-- elapsed runtime
-- "next business day" style planning
-
-`Date` collapses those into one weak type. Temporal lets SF store and validate
-the real intent.
-
-### SF Runtime Places That Should Use Temporal
-
-Use Temporal first in the areas where wrong time semantics create real
-operational mistakes:
-
-- `sf schedule`: due dates, relative offsets, local-time reminders, audit
-  windows, and recurrence-ready storage.
-- autonomous locks and leases: exact `Instant` plus typed `Duration`, not
-  implicit millisecond math scattered through code.
-- journals and traces: exact event instants with stable ordering and explicit
-  serialization.
-- session reports: elapsed durations and grouped daily summaries without local
-  timezone drift.
-- adoption reviews and decision audits: calendar dates and wall-clock reminders
-  that survive DST and timezone changes.
-- background work surface: task age, stale-running detection, retry-after, and
-  next-action time should be typed.
-
-**Implementation Status:** `temporal-foundation.js` is a native-only Node 26
-wrapper with safe constructors (`instantFromISO`, `durationFromObject`,
-`plainDateFromISO`), serialization, deserialization, and validation. It throws
-clearly when native Temporal is unavailable instead of using compatibility
-shims.
-
-### Temporal Design Rule For SF
-
-Store the semantic type, not just the formatted string:
-
-```text
-event happened exactly now       -> Instant
-run at 09:00 in Europe/Oslo      -> ZonedDateTime or PlainTime + timeZone
-review on 2026-06-01             -> PlainDate
-retry after 30 minutes           -> Duration
-lease expires at exact timestamp -> Instant
-```
-
-Serialization should stay explicit and boring:
-
-- store ISO strings plus a field that says which Temporal type they represent
-- include timezone when wall-clock semantics matter
-- do not infer local timezone at read time unless the record explicitly asks
-  for it
-- validate schedule and lease records at DB boundaries
-
-### Node 26 Adoption Path
-
-Target policy:
-
-```text
-current compatibility floor: Node 26.1+
-internal target runtime:     Node 26.1+
-canonical baseline:          Node 26.1+
-Node 25:                     skip except quick probes
-```
-
-### Runtime Alternatives
-
-Other JavaScript runtimes are useful comparators, but none should replace Node
-as SF's primary runtime right now.
-
-SF's current runtime shape is Node-native:
-
-- npm workspaces and `package-lock.json`
-- Next.js standalone web host
-- Vitest and Node test-runner compatibility scripts
-- Rust N-API `.node` addons
-- `node-pty` native assets in the web host
-- `node:` built-ins across CLI, scripts, packages, and web services
-- child process, TTY, stream, module loader, and extension-loader behavior
-- installed runtime sync into `~/.sf/agent`
-
-#### Bun
-
-Bun is the strongest speed and developer-experience competitor.
-
-Useful:
-
-- fast package install and script startup
-- broad Node API compatibility
-- built-in TypeScript, test runner, shell, SQLite, YAML, TOML, JSONL, and other
-  convenience APIs
-- Node-API support is substantial enough to use as a compatibility probe
-
-Not primary for SF:
-
-- Bun's own docs say compatibility reflects Node v23, while SF is targeting
-  Node 26.
-- Some core APIs are partial or behaviorally different: `child_process`, module
-  loader hooks, `node:v8`, `node:test`, `node:sqlite`, `worker_threads`, and
-  inspector/debugger areas are not exact Node.
-- SF's highest-risk paths are exactly the places where "almost Node" can hurt:
-  TTY, child processes, native addons, Next standalone output, loaders, and
-  extension runtime.
-
-Decision: use Bun only for optional speed probes or isolated tooling. Do not
-make it the SF runtime until full `npm test`, web build, native build, smoke
-tests, and installed extension runtime all pass under Bun without special
-cases.
-
-#### Deno
-
-Deno has the best security and integrated-toolchain story.
-
-Useful:
-
-- explicit permissions model
-- first-class TypeScript and web standards
-- npm/package.json compatibility
-- Node-API support when local `node_modules` and FFI permission are enabled
-- good target for thinking about sandboxing and permission profiles
-
-Not primary for SF:
-
-- Deno still becomes a Node-compatibility mode for a repo like SF.
-- Deno docs recommend local `node_modules` for frameworks like Next.js and for
-  Node-API addons, which means SF would keep most Node/npm complexity anyway.
-- Native addons require local `node_modules` plus `--allow-ffi`.
-- The value would be security posture and packaging experiments, not simpler
-  runtime execution.
-
-Decision: study Deno for permission-profile design and maybe future packaged
-headless workers. Do not switch the core SF runtime to Deno.
-
-#### LLRT, WinterJS, Edge Runtimes
-
-These are not fits for SF's primary runtime.
-
-Useful:
-
-- serverless cold-start research
-- constrained worker/edge execution ideas
-- tiny isolated helper tasks
-
-Not primary for SF:
-
-- SF is a long-running local CLI/runtime, not a small stateless Lambda handler.
-- SF needs native addons, process control, TTY, filesystem state, git, shell,
-  Next web host, and Node-compatible package behavior.
-- LLRT is explicitly experimental and evaluation-oriented.
-
-Decision: ignore as primary runtime. Only revisit for isolated future worker
-surfaces.
-
-### Runtime Decision
-
-Node 26 is the target because SF is a Node-native agent runtime, not a generic
-JavaScript app.
-
-Use alternatives this way:
-
-```text
-Node 26 -> primary runtime and baseline
-Bun     -> speed/compatibility probe, not runtime
-Deno    -> permission/sandbox design reference, not runtime
-LLRT    -> ignore except tiny serverless worker research
-```
-
-The rule is simple: if a runtime cannot run the exact SF stack without special
-cases, it is not stronger for SF. Node 26 makes the existing SF stack stronger;
-alternative runtimes mostly make a different stack.
-
-Required Node 26 gate:
-
-```text
-node@26 --version
-npm run lint
-npm run typecheck:extensions
-npm run build
-npm test
-sf --version
-sf --help
-sf --print "ping"
-```
-
-SF already requires Node 26.1+ in `engines.node`; the remaining work is to keep
-the gates green under Node 26 and replace fragile `Date`/millisecond logic with
-Temporal in the schedule, lease, journal, and background task surfaces.
-
----
-
-## Appendix A: Related Source Files
-
-This section maps the concepts in this document to actual code in the repo.
-
-### A.1 Operating Model (Already Exists)
-
-**File:** `src/resources/extensions/sf/operating-model.js`
-
-Already exports canonical vocabulary:
-
-```js
-export const RUN_CONTROL_MODES = ["manual", "assisted", "autonomous"];
-export const PERMISSION_PROFILES = ["restricted", "normal", "trusted", "unrestricted"];
-```
-
-Tests: `src/resources/extensions/sf/tests/operating-model.test.mjs`
-
-**Gap:** No `workMode` or `modelMode` constants yet. Add to this file.
-
-### A.2 Execution Policy (Already Exists)
-
-**File:** `src/resources/extensions/sf/execution-policy.js`
-
-Maps permission profiles to concrete tool restrictions:
-
-```js
-EXECUTION_POLICY_PROFILES = {
-  restricted: { filesystem: "read-mostly", network: "read-only", git: "read-only", mutation: "planning-artifacts-only" },
-  normal:     { filesystem: "workspace-write", network: "allowed", git: "normal", mutation: "workspace" },
-  trusted:    { filesystem: "workspace-write", network: "allowed", git: "normal", mutation: "workspace" },
-  unrestricted: { filesystem: "danger-full-access", network: "allowed", git: "dangerous", mutation: "host" }
-};
-```
-
-**Status:** Wired to tool-call boundaries via `bootstrap/register-hooks.js` `tool_call` hook. `classifyExecutionPolicyCall()` reads `session.permissionProfile` to block destructive commands when `restricted`/`normal`. Enforcement is unified at the hook level.
-
-### A.3 Auto Session State (Already Exists)
-
-**File:** `src/resources/extensions/sf/auto/session.js`
-
-`AutoSession` class holds:
-- `active`, `paused`, `stepMode`, `canAskUser`
-- `currentUnit`, `currentMilestoneId`
-- `autoModeStartModel`, `currentUnitModel`
-
-**Status:** `workMode`, `runControl`, `permissionProfile`, `modelMode`, `surface`, and `modeUpdatedAt` are all durable properties on `AutoSession`. Persisted to SQLite `session_mode_state` table on every transition. Loaded from DB on construction.
-
-### A.4 Command Registration (Already Exists)
-
-**File:** `src/resources/extensions/sf/commands/index.js`
-
-Registers direct commands via `pi.registerCommand()`:
-
-```js
-for (const command of DIRECT_SF_COMMANDS) {
-  pi.registerCommand(command.cmd, { ... });
-}
-```
-
-**File:** `src/resources/extensions/sf/commands/catalog.js`
-
-Defines `TOP_LEVEL_SUBCOMMANDS` and `DIRECT_SF_COMMANDS`.
-
-**Status:** Direct commands implemented (`/mode`, `/control`, `/trust`, `/model-mode`, `/repair`, `/tasks`, `/skills`). `/sf` is not registered; the shell executable remains `sf`.
-
-### A.5 TUI Extension (Already Exists)
-
-**File:** `src/resources/extensions/sf-tui/index.js`
-
-Registers shortcuts:
-- `Ctrl+Alt+H` — prompt history
-- `Ctrl+Shift+H` — prompt history fallback
-- `Ctrl+Alt+M` — marketplace
-
-**File:** `src/resources/extensions/sf-tui/header.js`
-
-Renders header with project name, branch, model. No mode badge yet.
-
-**File:** `src/resources/extensions/sf-tui/footer.js`
-
-Renders footer with git status, cost, context usage. No mode badge yet.
-
-**File:** `src/resources/extensions/sf-tui/extension-manifest.json`
-
-Declares hooks: `session_start`, `session_switch`, `before_agent_start`, `tool_result`, `agent_start`, `agent_end`.
-
-**Status:** Mode badge implemented in TUI header and footer. Compact `[B∞TS]` form at <80 cols, full `build · autonomous · trusted · smart` at ≥80 cols. Paused state dims all badge parts and shows `P!` (compact) or `paused ·` (full) prefix. `renderModeBadge` exported from header.js and shared with footer via `FOOTER_THEME` adapter. `getMode()` surfaces `session.paused` on the returned mode object.
-
-### A.6 UOK Parity Report (Already Uses runControl)
-
-**File:** `src/resources/extensions/sf/tests/uok-parity-report.test.mjs`
-
-Tests verify `runControl` and `permissionProfile` in UOK events:
-
-```js
-assert.equal(events[0].runControl, "autonomous");
-assert.equal(events[0].permissionProfile, "normal");
-```
-
-**Status:** `workMode` and `modelMode` added to AutoSession. Journal logging emits `mode-transition` events. UOK kernel includes both in `lifecycleFlags` and audit envelope payload.
-
-### A.7 Routing History (Already Exists)
-
-**File:** `src/resources/extensions/sf/routing-history.js`
-
-Tracks model tier success/failure per task pattern.
-
-**Status:** Connected. `modelModeToTier()` / `tierToModelMode()` bridge in `operating-model.js`. `classifyUnitComplexity()` signature includes `modelMode`. `deep` floors at `heavy`, `fast` caps at `light`.
-
-### A.8 Doctor System (Already Exists)
-
-**File:** `src/resources/extensions/sf/doctor.js`
-**File:** `src/resources/extensions/sf/doctor-proactive.js`
-**File:** `src/resources/extensions/sf/doctor-checks.js`
-
-Health checks, auto-fix, proactive monitoring.
-
-**Status:** `/repair` command switches to `repair` work mode and runs doctor fix. Auto-transitions to repair allowed when health gates fail.
-
-### A.9 Self-Feedback (Already Exists)
-
-**File:** `src/resources/extensions/sf/self-feedback.js`
-
-Records anomalies, blocking entries, version-bump resolution.
-
-**Status:** Connected. `self-feedback-drain.js` auto-transitions to `repair` workMode when high/critical self-feedback is dispatched for inline-fix. Reason: `"self-feedback-drain"`.
-
-### A.10 Skills (Partially Exists)
-
-**File:** `src/resources/extensions/sf/skill-discovery.js`
-**File:** `src/resources/extensions/sf/skill-health.js`
-**File:** `src/resources/extensions/sf/skill-telemetry.js`
-
-Skill loading, health monitoring, telemetry.
-
-**Status:** `.agents/skills/` directory structure implemented with YAML frontmatter parser, validation, skill loader, and auto-creation flow. Auto-creation detects patterns from activity logs (≥3 occurrences) and generates skills with a SQLite-backed cooldown. Sample skills created: `forge-command-surface`, `forge-autonomous-runtime`.
-
----
-
-## Appendix B: Implementation Priority
-
-| Priority | Item | Files to Touch | Effort |
-|----------|------|----------------|--------|
-| P0 | Add `workMode` + `modelMode` to `operating-model.js` | `operating-model.js`, `operating-model.test.mjs` | Small ✓ |
-| P0 | Add `workMode` to `AutoSession` | `auto/session.js`, `auto.js` | Small ✓ |
-| P0 | Add mode badge to TUI header | `sf-tui/header.js`, `sf-tui/index.js` | Small ✓ |
-| P0 | Add mode-switching shortcuts | `sf-tui/index.js`, `extension-manifest.json` | Small ✓ |
-| P0 | Remove `/sf` namespace registration | `commands/catalog.js`, `commands/index.js` | Medium ✓ |
-| P1 | Add `/mode`, `/control`, `/trust`, `/model-mode` commands | `commands/handlers/*.js`, `commands/catalog.js` | Medium ✓ |
-| P1 | Wire `execution-policy.js` to tool boundaries | `execution-policy.js`, `bootstrap/register-hooks.js` | Medium ✓ |
-| P1 | Add `/tasks` background work surface | `commands/handlers/tasks.js` | Medium ✓ |
-| P1 | Make `repair` first-class work mode | `commands/handlers/ops.js`, `commands/handlers/core.js` | Medium ✓ |
-| P2 | Add `.agents/skills/` structure | `skills/*.js`, `.agents/skills/` | Medium ✓ |
-| P2 | Add skill YAML frontmatter parser | `skills/frontmatter.js` | Small ✓ |
-| P2 | Add skill eval harness | `skills/eval-harness.js`, eval templates | Medium ✓ |
-| P2 | Adopt Temporal in `sf schedule` | `temporal-foundation.js` | Medium ✓ |
-| P2 | Node 26 baseline | `temporal-foundation.js` native Temporal wrapper | Medium ✓ |
-
----
-
-## Appendix C: Open Questions — Resolved
-
-1. **Paused badge** → `[P!BATS]` in compact form; `paused · build · assisted · normal · smart` in full form. All parts dim when paused. Implemented in `renderModeBadge`.
-2. **Mode per-session or per-project?** → Per-session. Mode is a runtime posture for the current work, not a project-level config.
-3. **Badge in tmux/terminal window titles?** → Terminal title already handled via OSC escape in `auto/session.js`. Tmux requires users to set `set-option -g set-titles on` — SF does not inject tmux config.
-4. **Mode transitions with sound/notification?** → No. A terminal tool has no appropriate sound channel. The badge is the sole visibility mechanism.
-5. **`repair` auto-transition: ask by default for new projects?** → No. Auto-transition is correct behavior for autonomous runs. Only if `runControl` is `manual` or `assisted` is the transition blocked.
-6. **Skill eval cases: run in CI or on-demand?** → On-demand only. Gate with `SF_SKILL_EVALS=1` env var. CI is too slow and model-dependent for skill evals.
-7. **`/tasks`: TUI overlay or separate scrollable panel?** → Inline output (current). A full panel requires `pi-tui` overlay support that is not yet built.
-8. **`modelMode` replace or supplement tier system?** → Supplement via `modelModeToTier()` bridge. Direct model selection overrides `modelMode`; `modelMode` guides routing when no explicit model is set.
diff --git a/packages/coding-agent/src/modes/interactive/interactive-mode.ts b/packages/coding-agent/src/modes/interactive/interactive-mode.ts
index a5de30e24..f4e8cd4c0 100644
--- a/packages/coding-agent/src/modes/interactive/interactive-mode.ts
+++ b/packages/coding-agent/src/modes/interactive/interactive-mode.ts
@@ -1929,6 +1929,31 @@ export class InteractiveMode {
 		this.extensionTerminalInputUnsubscribers.clear();
 	}
 
+	/**
+	 * Register an extension-scoped terminal input listener.
+	 *
+	 * Purpose: allow extensions (e.g. the SF autonomous extension) to intercept
+	 * raw terminal input before it reaches the editor, so that special keys like
+	 * Ctrl+C can trigger extension actions (e.g. pause autonomous mode) rather
+	 * than always going to the default editor clear handler.
+	 *
+	 * Return `{ consume: true }` from the handler to stop the key from being
+	 * processed further. Return `undefined` or `{}` to let it propagate.
+	 *
+	 * Consumer: extension-ui-controller → ctx.ui.onTerminalInput.
+	 */
+	addExtensionTerminalInputListener(
+		handler: (data: string) => { consume?: boolean } | undefined,
+	): () => void {
+		const listener = (data: string) => handler(data);
+		const unsubscribe = this.ui.addInputListener(listener);
+		this.extensionTerminalInputUnsubscribers.add(unsubscribe);
+		return () => {
+			unsubscribe();
+			this.extensionTerminalInputUnsubscribers.delete(unsubscribe);
+		};
+	}
+
 	/**
 	 * Create the ExtensionUIContext for extensions.
 	 */
diff --git a/packages/tui/src/ink-bridge.tsx b/packages/tui/src/ink-bridge.tsx
index 3c4038dc9..1041ee00e 100644
--- a/packages/tui/src/ink-bridge.tsx
+++ b/packages/tui/src/ink-bridge.tsx
@@ -42,19 +42,29 @@ function LegacyComponentView({
  *
  * Purpose: accept keyboard input from Ink and route it to the active
  * component, then trigger a re-render so the updated state is displayed.
+ * Invalidation is event-driven: external callers invoke the returned
+ * invalidate() handle, which fires the tick signal registered here.
  *
  * Consumer: startInkRenderer.
  */
 function InkApp({
 	root,
 	onInput,
+	onRegisterTick,
 }: {
 	root: Component;
 	onInput: (data: string) => void;
+	onRegisterTick: (tick: () => void) => void;
 }) {
 	const [, tick] = useState(0);
 	const { columns } = useWindowSize();
 
+	// Register the tick function so that startInkRenderer's invalidate() can
+	// trigger a React re-render without a polling interval.
+	useEffect(() => {
+		onRegisterTick(() => tick((n) => n + 1));
+	}, [onRegisterTick]);
+
 	useInput((input, key) => {
 		// Reconstruct the escape sequences that the legacy key handlers expect.
 		let data = input;
@@ -70,12 +80,6 @@ function InkApp({
 		tick((n) => n + 1);
 	});
 
-	// Poll at 20 fps so async state changes (e.g. streaming output) appear promptly.
-	useEffect(() => {
-		const interval = setInterval(() => tick((n) => n + 1), 50);
-		return () => clearInterval(interval);
-	}, []);
-
 	return <LegacyComponentView component={root} width={columns ?? 80} />;
 }
 
@@ -84,10 +88,11 @@ function InkApp({
  *
  * Purpose: drop-in replacement for the legacy TUI render engine. Mounting
  * this drives the entire Ink React tree and forwards terminal input to
- * the root Component's handleInput chain.
+ * the root Component's handleInput chain. invalidate() triggers an
+ * immediate React re-render via an event-driven tick signal — no polling.
  *
- * Consumer: TUI class (future integration); standalone callers can use
- * this directly to render any Component tree under Ink.
+ * Consumer: TUI class; standalone callers can use this to render any
+ * Component tree under Ink.
  *
  * @param root     - The root Component whose render() output fills the screen.
  * @param onInput  - Called with each decoded key string for legacy handlers.
@@ -97,13 +102,22 @@ export function startInkRenderer(
 	root: Component,
 	onInput: (data: string) => void,
 ): { stop: () => void; invalidate: () => void } {
+	// Mutable signal populated by InkApp via onRegisterTick once the React
+	// tree has mounted. invalidate() fires this to trigger a synchronous tick.
+	let _tick: (() => void) | null = null;
+	const onRegisterTick = (tick: () => void) => {
+		_tick = tick;
+	};
+
 	const { unmount } = render(
-		<InkApp root={root} onInput={onInput} />,
+		<InkApp root={root} onInput={onInput} onRegisterTick={onRegisterTick} />,
 		{ exitOnCtrlC: false },
 	);
 	return {
-		stop: unmount,
-		// Ink re-renders automatically; manual invalidation is a no-op for now.
-		invalidate: () => {},
+		stop: () => {
+			_tick = null;
+			unmount();
+		},
+		invalidate: () => _tick?.(),
 	};
 }
diff --git a/packages/tui/src/tui.ts b/packages/tui/src/tui.ts
index 2c5f916ca..c1d9a569a 100644
--- a/packages/tui/src/tui.ts
+++ b/packages/tui/src/tui.ts
@@ -420,9 +420,16 @@ export class TUI extends Container {
 		if (!this.terminal.isTTY) {
 			return;
 		}
-		// Ink-backed render path: Ink manages raw mode and input; the legacy
-		// differential renderer is bypassed entirely.
-		if (this._useInk || process.stdout.isTTY) {
+		// Ink-backed render path: Ink manages raw mode, input, and screen output.
+		// The legacy differential renderer (doRender) is bypassed entirely on TTY.
+		// process.stdout.isTTY guards this path — Ink requires a real interactive
+		// TTY to mount. useInk() is kept as an explicit opt-in for callers that
+		// want Ink on non-standard terminal configurations. Use PI_LEGACY_TUI=1
+		// to force the legacy renderer for debugging.
+		if (
+			(this._useInk || process.stdout.isTTY) &&
+			process.env.PI_LEGACY_TUI !== "1"
+		) {
 			// Wrap `this` in a plain Component so the private handleInput doesn't
 			// conflict with the public Component.handleInput? signature.
 			const root: Component = {
@@ -506,6 +513,12 @@ export class TUI extends Container {
 	requestRender(force = false): void {
 		// Skip rendering on non-TTY stdout to prevent CPU burn (issue #3095)
 		if (!this.terminal.isTTY) return;
+		// Ink-backed path: Ink owns the terminal — delegate to the Ink handle and
+		// do NOT call doRender(), which would write conflicting ANSI escapes.
+		if (this._inkHandle) {
+			this._inkHandle.invalidate();
+			return;
+		}
 		if (force) {
 			this.previousLines = [];
 			this.previousWidth = -1; // -1 triggers widthChanged, forcing a full clear
diff --git a/src/cli-status.ts b/src/cli-status.ts
index f91aaab29..b91504374 100644
--- a/src/cli-status.ts
+++ b/src/cli-status.ts
@@ -11,6 +11,7 @@ import type { QuerySnapshot } from "./headless-query.js";
 
 interface StatusArgs {
 	watch: boolean;
+	recoveryUnitId?: string;
 }
 
 interface StatusDeps {
@@ -27,6 +28,12 @@ interface CurrentModel {
 
 function parseStatusArgs(argv: string[]): StatusArgs {
 	const args = argv.slice(1);
+	if (args[0] === "recovery") {
+		return {
+			watch: false,
+			recoveryUnitId: args[1],
+		};
+	}
 	return {
 		watch: args.includes("--watch"),
 	};
@@ -219,6 +226,76 @@ async function buildStatusText(
 	});
 }
 
+async function renderRecoveryDiagnostics(
+	basePath: string,
+	unitId: string | undefined,
+	stdout: Pick<typeof process.stdout, "write">,
+	stderr: Pick<typeof process.stderr, "write">,
+): Promise<number> {
+	try {
+		const { getRecoveryDiagnostics, listUnitRuntimeRecords } = await import(
+			"./resources/extensions/sf/uok/unit-runtime.js"
+		);
+		let targetUnitId = unitId;
+		if (!targetUnitId) {
+			const records: Array<{ updatedAt?: number; unitId: string }> =
+				listUnitRuntimeRecords(basePath);
+			const mostRecent = records.sort(
+				(a, b) => (b.updatedAt ?? 0) - (a.updatedAt ?? 0),
+			)[0];
+			if (!mostRecent) {
+				stderr.write("sf status recovery: no runtime records found\n");
+				return 1;
+			}
+			targetUnitId = mostRecent.unitId;
+		}
+		const diagnostics = getRecoveryDiagnostics(
+			basePath,
+			"execute-task",
+			targetUnitId,
+		);
+		if (!diagnostics) {
+			stderr.write(
+				`sf status recovery: no runtime record for ${targetUnitId}\n`,
+			);
+			return 1;
+		}
+		const lines: string[] = [];
+		lines.push("Recovery Diagnostics");
+		lines.push("--------------------");
+		lines.push(`Unit:      ${diagnostics.unitType} ${diagnostics.unitId}`);
+		lines.push(`Status:    ${diagnostics.status}`);
+		lines.push(
+			`Retries:   ${diagnostics.retryCount}/${diagnostics.maxRetries}`,
+		);
+		lines.push(
+			`Progress:  ${diagnostics.progressCount} (${diagnostics.lastProgressKind})`,
+		);
+		lines.push(`Recovery attempts: ${diagnostics.recoveryAttempts}`);
+		if (diagnostics.lastRecoveryReason) {
+			lines.push(`Last recovery reason: ${diagnostics.lastRecoveryReason}`);
+		}
+		if (diagnostics.lineageSummary) {
+			lines.push(
+				`Lineage:   ${diagnostics.lineageSummary.status} · ${diagnostics.lineageSummary.workerCount} worker(s) · ${diagnostics.lineageSummary.eventCount} event(s)`,
+			);
+		}
+		lines.push(
+			`Started:   ${diagnostics.startedAt ? new Date(diagnostics.startedAt).toISOString() : "n/a"}`,
+		);
+		lines.push(
+			`Updated:   ${diagnostics.updatedAt ? new Date(diagnostics.updatedAt).toISOString() : "n/a"}`,
+		);
+		stdout.write(lines.join("\n") + "\n");
+		return 0;
+	} catch (err) {
+		stderr.write(
+			`sf status recovery: ${err instanceof Error ? err.message : String(err)}\n`,
+		);
+		return 1;
+	}
+}
+
 export async function runStatusCli(
 	argv: string[],
 	deps: StatusDeps,
@@ -228,6 +305,15 @@ export async function runStatusCli(
 	const sfHome = deps.sfHome ?? process.env.SF_HOME ?? join(homedir(), ".sf");
 	const args = parseStatusArgs(argv);
 
+	if (args.recoveryUnitId !== undefined) {
+		return renderRecoveryDiagnostics(
+			deps.basePath,
+			args.recoveryUnitId,
+			stdout,
+			stderr,
+		);
+	}
+
 	const renderOnce = async () => {
 		try {
 			const text = await buildStatusText(deps.basePath, sfHome);
diff --git a/src/resources/extensions/mcp-client/index.js b/src/resources/extensions/mcp-client/index.js
index a1fff2b51..b5af83f54 100644
--- a/src/resources/extensions/mcp-client/index.js
+++ b/src/resources/extensions/mcp-client/index.js
@@ -94,6 +94,32 @@ function getServerConfig(name) {
 		(s) => s.name === trimmed || s.name.toLowerCase() === trimmed.toLowerCase(),
 	);
 }
+const SAFE_CHILD_ENV_KEYS = new Set([
+	"PATH",
+	"HOME",
+	"USER",
+	"LOGNAME",
+	"SHELL",
+	"LANG",
+	"LC_ALL",
+	"LC_CTYPE",
+	"LC_MESSAGES",
+	"LC_NUMERIC",
+	"LC_TIME",
+	"TMPDIR",
+	"TMP",
+	"TEMP",
+	"TZ",
+	"TERM",
+	"COLORTERM",
+]);
+function buildChildEnv(configEnv) {
+	const safe = {};
+	for (const key of SAFE_CHILD_ENV_KEYS) {
+		if (process.env[key] !== undefined) safe[key] = process.env[key];
+	}
+	return { ...safe, ...resolveEnv(configEnv ?? {}) };
+}
 /** Resolve ${VAR} references in env values against process.env. */
 function resolveEnv(env) {
 	const resolved = {};
@@ -210,9 +236,7 @@ async function getOrConnect(name, signal) {
 		transport = new StdioClientTransport({
 			command: config.command,
 			args: config.args,
-			env: config.env
-				? { ...process.env, ...resolveEnv(config.env) }
-				: undefined,
+			env: buildChildEnv(config.env),
 			cwd: config.cwd,
 			stderr: "pipe",
 		});
@@ -234,23 +258,27 @@ async function getOrConnect(name, signal) {
 			`Server "${config.name}" has unsupported transport: ${config.transport}`,
 		);
 	}
-	await client.connect(transport, { signal, timeout: 30000 });
+	try {
+		await client.connect(transport, { signal, timeout: 30000 });
+	} catch (err) {
+		try { await transport.close(); } catch { /* best-effort */ }
+		try { await client.close(); } catch { /* best-effort */ }
+		throw err;
+	}
 	connections.set(config.name, { client, transport });
 	return client;
 }
 async function closeAll() {
 	const closing = Array.from(connections.entries()).map(
 		async ([name, conn]) => {
-			try {
-				await conn.client.close();
-			} catch {
-				// Best-effort cleanup
-			}
+			try { await conn.transport.close(); } catch { /* best-effort */ }
+			try { await conn.client.close(); } catch { /* best-effort */ }
 			connections.delete(name);
 		},
 	);
 	await Promise.allSettled(closing);
 	toolCache.clear();
+	autoRegisteredServers.clear();
 }
 // ─── Formatters ───────────────────────────────────────────────────────────────
 function formatServerList(servers) {
@@ -312,31 +340,8 @@ export function getConnectionStatus(name) {
 	};
 }
 // ─── Test-exported helpers ────────────────────────────────────────────────────
-const SAFE_CHILD_ENV_KEYS = new Set([
-	"PATH",
-	"HOME",
-	"USER",
-	"LOGNAME",
-	"SHELL",
-	"LANG",
-	"LC_ALL",
-	"LC_CTYPE",
-	"LC_MESSAGES",
-	"LC_NUMERIC",
-	"LC_TIME",
-	"TMPDIR",
-	"TMP",
-	"TEMP",
-	"TZ",
-	"TERM",
-	"COLORTERM",
-]);
 export function _buildMcpChildEnvForTest(env) {
-	const safe = {};
-	for (const key of SAFE_CHILD_ENV_KEYS) {
-		if (process.env[key] !== undefined) safe[key] = process.env[key];
-	}
-	return { ...safe, ...resolveEnv(env) };
+	return buildChildEnv(env);
 }
 export function _buildMcpTrustConfirmOptionsForTest(signal) {
 	return { timeout: 120_000, signal };
diff --git a/src/resources/extensions/sf/auto-prompts.js b/src/resources/extensions/sf/auto-prompts.js
index abb2a9add..eb7799ed1 100644
--- a/src/resources/extensions/sf/auto-prompts.js
+++ b/src/resources/extensions/sf/auto-prompts.js
@@ -78,6 +78,7 @@ import {
 	buildSliceSummaryExcerpt,
 	getDependencyTaskSummaryPaths,
 	getPriorTaskSummaryPaths,
+	extractSliceExecutionExcerpt,
 } from "./summary-helpers.js";
 import { composeInlinedContext } from "./unit-context-composer.js";
 import { getUatType } from "./verdict-parser.js";
@@ -336,7 +337,7 @@ export function buildSourceFilePaths(base, mid, sid) {
  * If parsing fails (unrecognizable frontmatter, missing id, etc.) the
  * function falls back to `inlineFile` so the closer loses no information.
  */
-// Re-exported from summary-helpers.js:
+// Imported from summary-helpers.js:
 // - buildSliceSummaryExcerpt, getPriorTaskSummaryPaths
 // - getDependencyTaskSummaryPaths, isSummaryCleanForSkip
 // - extractSliceExecutionExcerpt
diff --git a/src/resources/extensions/sf/auto-verification.js b/src/resources/extensions/sf/auto-verification.js
index 6961d2509..1b8024d8d 100644
--- a/src/resources/extensions/sf/auto-verification.js
+++ b/src/resources/extensions/sf/auto-verification.js
@@ -32,6 +32,10 @@ import { UokGateRunner } from "./uok/gate-runner.js";
 import { MultiPackageGate } from "./uok/multi-package-gate.js";
 import { OutcomeLearningGate } from "./uok/outcome-learning-gate.js";
 import { SecurityGate } from "./uok/security-gate.js";
+import {
+	formatExecuteTaskRecoveryStatus,
+	inspectExecuteTaskDurability,
+} from "./uok/unit-runtime.js";
 import { extractVerdict } from "./verdict-parser.js";
 import { writeVerificationJSON } from "./verification-evidence.js";
 import {
@@ -42,6 +46,38 @@ import {
 } from "./verification-gate.js";
 import { logError, logWarning } from "./workflow-logger.js";
 
+function computeTokenCountFromSession(ctx) {
+	const entries = ctx.sessionManager?.getEntries?.() ?? [];
+	let total = 0;
+	for (const entry of entries) {
+		if (entry.type !== "message") continue;
+		const msg = entry.message;
+		if (!msg || msg.role !== "assistant") continue;
+		if (msg.usage?.totalTokens != null) {
+			total += msg.usage.totalTokens;
+		}
+	}
+	return total;
+}
+
+function getMemoryPressureMB() {
+	try {
+		const mem = process.memoryUsage();
+		return Math.round(mem.heapUsed / 1024 / 1024);
+	} catch {
+		return undefined;
+	}
+}
+
+function buildGateOutcomesSummary(gateIds, gateResults) {
+	if (!gateIds || !gateResults || gateIds.length === 0) return undefined;
+	const outcomes = {};
+	for (let i = 0; i < gateIds.length; i++) {
+		outcomes[gateIds[i]] = gateResults[i]?.outcome ?? "unknown";
+	}
+	return outcomes;
+}
+
 function isInfraVerificationFailure(stderr) {
 	return /\b(ENOENT|ENOTFOUND|ETIMEDOUT|ECONNRESET|EAI_AGAIN|spawn\s+\S+\s+ENOENT|command not found)\b/i.test(
 		stderr,
@@ -259,6 +295,8 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 	}
 	// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
 	// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
+	let gateIds = [];
+	let gateResults = [];
 	try {
 		if (uokFlags.gates) {
 			const gateRunner = new UokGateRunner();
@@ -304,8 +342,8 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 				iteration: s.verificationRetryCount.get(s.currentUnit.id) ?? 0,
 			};
 
-			const gateIds = gateRunner.list().map((g) => g.id);
-			const gateResults = await Promise.all(
+			gateIds = gateRunner.list().map((g) => g.id);
+			gateResults = await Promise.all(
 				gateIds.map((id) =>
 					gateRunner
 						.run(id, {
@@ -434,13 +472,39 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 		}
 		// Write verification evidence JSON
 		const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
+		const tokenCount = computeTokenCountFromSession(ctx);
+		const memoryPressureMB = getMemoryPressureMB();
+		const gateOutcomes = buildGateOutcomesSummary(gateIds, gateResults);
+		let recoveryStatus;
+		try {
+			const durability = await inspectExecuteTaskDurability(
+				s.basePath,
+				s.currentUnit.id,
+			);
+			if (durability) {
+				recoveryStatus = formatExecuteTaskRecoveryStatus(durability);
+			}
+		} catch {
+			recoveryStatus = undefined;
+		}
 		if (mid && sid && tid) {
 			try {
 				const sDir = resolveSlicePath(s.basePath, mid, sid);
 				if (sDir) {
 					const tasksDir = join(sDir, "tasks");
 					if (result.passed) {
-						writeVerificationJSON(result, tasksDir, tid, s.currentUnit.id);
+						writeVerificationJSON(
+							result,
+							tasksDir,
+							tid,
+							s.currentUnit.id,
+							undefined,
+							undefined,
+							tokenCount,
+							memoryPressureMB,
+							gateOutcomes,
+							recoveryStatus,
+						);
 					} else {
 						const nextAttempt = attempt + 1;
 						writeVerificationJSON(
@@ -450,6 +514,10 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 							s.currentUnit.id,
 							nextAttempt,
 							maxRetries,
+							tokenCount,
+							memoryPressureMB,
+							gateOutcomes,
+							recoveryStatus,
 						);
 					}
 				}
@@ -617,6 +685,10 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 						postExecChecks,
 						postExecBlockingFailure ? attempt + 1 : undefined,
 						postExecBlockingFailure ? maxRetries : undefined,
+						tokenCount,
+						memoryPressureMB,
+						gateOutcomes,
+						recoveryStatus,
 					);
 				}
 			} catch (evidenceErr) {
@@ -703,6 +775,10 @@ function writeVerificationJSONWithPostExec(
 	postExecutionChecks,
 	retryAttempt,
 	maxRetries,
+	tokenCount,
+	memoryPressureMB,
+	gateOutcomes,
+	recoveryStatus,
 ) {
 	mkdirSync(tasksDir, { recursive: true });
 	const evidence = {
@@ -720,6 +796,10 @@ function writeVerificationJSONWithPostExec(
 		})),
 		...(retryAttempt !== undefined ? { retryAttempt } : {}),
 		...(maxRetries !== undefined ? { maxRetries } : {}),
+		...(tokenCount !== undefined ? { tokenCount } : {}),
+		...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
+		...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
+		...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
 		postExecutionChecks,
 	};
 	if (result.runtimeErrors && result.runtimeErrors.length > 0) {
diff --git a/src/resources/extensions/sf/auto.js b/src/resources/extensions/sf/auto.js
index 648452524..a197c9632 100644
--- a/src/resources/extensions/sf/auto.js
+++ b/src/resources/extensions/sf/auto.js
@@ -211,6 +211,33 @@ export {
 // Tests in auto-session-encapsulation.test.ts enforce this invariant.
 // ─────────────────────────────────────────────────────────────────────────────
 const s = getAutoSession();
+/** Unsubscribe function for the Ctrl+C → pause intercept registered on autonomous start. */
+let _ctrlCUnsubscribe = null;
+/**
+ * Register a terminal input listener that intercepts Ctrl+C while autonomous
+ * mode is active and routes the first press to pauseAuto() instead of letting
+ * it silently clear the editor.
+ *
+ * Purpose: give the user a reliable single-keypress escape from a running
+ * autonomous loop without requiring the double-press exit threshold.
+ */
+function registerCtrlCInterceptor(ctx) {
+	_unregisterCtrlCInterceptor();
+	if (typeof ctx?.ui?.onTerminalInput !== "function") return;
+	_ctrlCUnsubscribe = ctx.ui.onTerminalInput((data) => {
+		if (data !== "\x03") return undefined;
+		if (!s.active) return undefined;
+		ctx.ui.notify("Ctrl+C received — pausing autonomous mode.", "info");
+		void pauseAuto(ctx, null, "ctrl-c-interrupt");
+		return { consume: true };
+	});
+}
+function _unregisterCtrlCInterceptor() {
+	if (_ctrlCUnsubscribe) {
+		_ctrlCUnsubscribe();
+		_ctrlCUnsubscribe = null;
+	}
+}
 /** Throttle STATE.md rebuilds — at most once per 30 seconds */
 const _STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
 function captureProjectRootEnv(projectRoot) {
@@ -704,6 +731,7 @@ function cleanupAfterLoopExit(ctx) {
 	s.currentUnit = null;
 	s.active = false;
 	s.runControl = "manual";
+	_unregisterCtrlCInterceptor();
 	deactivateSF();
 	clearUnitTimeout();
 	restoreProjectRootEnv();
@@ -747,6 +775,7 @@ function cleanupAfterLoopExit(ctx) {
 }
 export async function stopAuto(ctx, pi, reason) {
 	if (!s.active && !s.paused) return;
+	_unregisterCtrlCInterceptor();
 	const loadedPreferences = loadEffectiveSFPreferences()?.preferences;
 	const reasonSuffix = reason ? ` — ${reason}` : "";
 	try {
@@ -1677,6 +1706,7 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
 		s.runControl = requestedStepMode ? "assisted" : "autonomous";
 		s.cmdCtx = ctx;
 		s.basePath = base;
+		registerCtrlCInterceptor(ctx);
 		// Ensure the workflow-logger audit log is pinned to the project root
 		// even when autonomous mode is entered via a path that bypasses the
 		// bootstrap/dynamic-tools ensureDbOpen() → setLogBasePath() chain
@@ -1943,6 +1973,7 @@ export async function dispatchHookUnit(
 		s.autoStartTime = Date.now();
 		s.currentUnit = null;
 		s.pendingQuickTasks = [];
+		registerCtrlCInterceptor(hookCtx);
 	}
 	const hookUnitType = `hook/${hookName}`;
 	const hookStartedAt = Date.now();
diff --git a/src/resources/extensions/sf/commands/handlers/autonomous.js b/src/resources/extensions/sf/commands/handlers/autonomous.js
index 667925d47..a308b130c 100644
--- a/src/resources/extensions/sf/commands/handlers/autonomous.js
+++ b/src/resources/extensions/sf/commands/handlers/autonomous.js
@@ -116,6 +116,10 @@ export async function handleAutonomousCommand(trimmed, ctx, pi) {
 		});
 		return true;
 	}
+	if (trimmed === "stop") {
+		await stopAutonomousRun(ctx, pi);
+		return true;
+	}
 	if (isAutonomousVerb) {
 		const autonomousArgsText = trimmed.replace(/^autonomous\b/, "").trim();
 		if (autonomousArgsText === "stop") {
diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js
index 349071a64..96c02541f 100644
--- a/src/resources/extensions/sf/sf-db.js
+++ b/src/resources/extensions/sf/sf-db.js
@@ -4212,7 +4212,8 @@ function hasTaskSpecIntent(planning = {}) {
 }
 function insertTaskSpecIfAbsent(milestoneId, sliceId, taskId, planning = {}) {
 	if (!hasTaskSpecIntent(planning)) return;
-	const frontmatter = taskFrontmatterFromRecord(planning).normalized;
+	const { normalized: frontmatter, errors } = taskFrontmatterFromRecord(planning);
+	if (errors?.length) logWarning("sf-db:insertTaskSpec", `frontmatter validation errors for ${milestoneId}/${sliceId}/${taskId}: ${errors.join(", ")}`);
 	currentDb
 		.prepare(`INSERT OR IGNORE INTO task_specs (
       milestone_id, slice_id, task_id, verify, inputs, expected_output,
@@ -4433,7 +4434,8 @@ export function setTaskBlockerDiscovered(
 export function upsertTaskPlanning(milestoneId, sliceId, taskId, planning) {
 	if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
 	insertTaskSpecIfAbsent(milestoneId, sliceId, taskId, planning);
-	const frontmatter = taskFrontmatterFromRecord(planning).normalized;
+	const { normalized: frontmatter, errors: fmErrors } = taskFrontmatterFromRecord(planning);
+	if (fmErrors?.length) logWarning("sf-db:upsertTaskPlanning", `frontmatter validation errors for ${milestoneId}/${sliceId}/${taskId}: ${fmErrors.join(", ")}`);
 	const hasTaskStatus =
 		planning.taskStatus !== undefined ||
 		planning.task_status !== undefined ||
diff --git a/src/resources/extensions/sf/summary-helpers.js b/src/resources/extensions/sf/summary-helpers.js
index d4e248b4e..aca5e7675 100644
--- a/src/resources/extensions/sf/summary-helpers.js
+++ b/src/resources/extensions/sf/summary-helpers.js
@@ -195,3 +195,44 @@ export function isSummaryCleanForSkip(content) {
 		return false;
 	}
 }
+
+function escapeRegExpLocal(value) {
+	return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+function extractMarkdownSectionLocal(content, heading) {
+	const match = new RegExp(`^## ${escapeRegExpLocal(heading)}\\s*$`, "m").exec(content);
+	if (!match) return null;
+	const start = match.index + match[0].length;
+	const rest = content.slice(start);
+	const nextHeading = rest.match(/^##\s+/m);
+	const end = nextHeading?.index ?? rest.length;
+	return rest.slice(0, end).trim();
+}
+
+/**
+ * Extract key sections from a slice PLAN.md for use in task execution prompts.
+ * Returns Goal, Demo, Verification, and Observability sections as a compact excerpt.
+ *
+ * Purpose: give task executors the slice-level contract without inlining the full plan.
+ * Consumer: auto-prompts.js buildExecuteTask*.
+ */
+export function extractSliceExecutionExcerpt(content, relPath) {
+	if (!content) {
+		return [
+			"## Slice Plan Excerpt",
+			`Slice plan not found at dispatch time. Read \`${relPath}\` before running slice-level verification.`,
+		].join("\n");
+	}
+	const lines = content.split("\n");
+	const goalLine = lines.find((line) => line.startsWith("**Goal:**"))?.trim();
+	const demoLine = lines.find((line) => line.startsWith("**Demo:**"))?.trim();
+	const verification = extractMarkdownSectionLocal(content, "Verification");
+	const observability = extractMarkdownSectionLocal(content, "Observability / Diagnostics");
+	const parts = ["## Slice Plan Excerpt", `Source: \`${relPath}\``];
+	if (goalLine) parts.push(goalLine);
+	if (demoLine) parts.push(demoLine);
+	if (verification) parts.push("", "### Slice Verification", verification.trim());
+	if (observability) parts.push("", "### Slice Observability / Diagnostics", observability.trim());
+	return parts.join("\n");
+}
diff --git a/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs b/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs
index 86453fa7a..a6f758716 100644
--- a/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs
+++ b/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs
@@ -13,6 +13,7 @@ import {
 	clearRunawayRecoveredRuntimeRecords,
 	clearUnitRuntimeRecord,
 	decideUnitRuntimeDispatch,
+	getRecoveryDiagnostics,
 	getUnitRuntimeState,
 	isTerminalUnitRuntimeStatus,
 	listUnitRuntimeRecords,
@@ -377,3 +378,72 @@ test("listUnitRuntimeRecords_returns_empty_when_dir_missing", () => {
 	const records = listUnitRuntimeRecords(root);
 	assert.deepEqual(records, []);
 });
+
+// ─── getRecoveryDiagnostics ────────────────────────────────────────────────
+
+test("getRecoveryDiagnostics_returns_null_for_missing_record", () => {
+	const root = makeProject();
+	const diagnostics = getRecoveryDiagnostics(root, "execute-task", "MISSING");
+	assert.equal(diagnostics, null);
+});
+
+test("getRecoveryDiagnostics_returns_structured_object_for_record_with_recovery", () => {
+	const root = makeProject();
+	const t = Date.now();
+	writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t, {
+		status: "failed",
+		recoveryAttempts: 2,
+		retryCount: 2,
+		maxRetries: 3,
+		lastRecoveryReason: "timeout",
+		progressCount: 5,
+		lastProgressKind: "checkpoint",
+		lineageEvent: {
+			status: "started",
+			workerSessionId: "worker-1",
+		},
+	});
+	const diagnostics = getRecoveryDiagnostics(
+		root,
+		"execute-task",
+		"M001/S01/T01",
+	);
+	assert.ok(diagnostics);
+	assert.equal(diagnostics.unitType, "execute-task");
+	assert.equal(diagnostics.unitId, "M001/S01/T01");
+	assert.equal(diagnostics.status, "failed");
+	assert.equal(diagnostics.retryCount, 2);
+	assert.equal(diagnostics.maxRetries, 3);
+	assert.equal(diagnostics.lastRecoveryReason, "timeout");
+	assert.equal(diagnostics.progressCount, 5);
+	assert.equal(diagnostics.lastProgressKind, "checkpoint");
+	assert.equal(diagnostics.recoveryAttempts, 2);
+	assert.ok(diagnostics.lineageSummary);
+	assert.equal(diagnostics.lineageSummary.status, "started");
+	assert.equal(diagnostics.lineageSummary.workerCount, 1);
+	assert.equal(diagnostics.lineageSummary.eventCount, 1);
+	assert.equal(diagnostics.startedAt, t);
+	assert.ok(diagnostics.updatedAt);
+});
+
+test("getRecoveryDiagnostics_returns_minimal_object_for_record_without_recovery", () => {
+	const root = makeProject();
+	const t = Date.now();
+	writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T02", t, {
+		status: "running",
+	});
+	const diagnostics = getRecoveryDiagnostics(
+		root,
+		"execute-task",
+		"M001/S01/T02",
+	);
+	assert.ok(diagnostics);
+	assert.equal(diagnostics.status, "running");
+	assert.equal(diagnostics.retryCount, 0);
+	assert.equal(diagnostics.maxRetries, 1);
+	assert.equal(diagnostics.lastRecoveryReason, null);
+	assert.equal(diagnostics.progressCount, 0);
+	assert.equal(diagnostics.lastProgressKind, "dispatch");
+	assert.equal(diagnostics.recoveryAttempts, 0);
+	assert.equal(diagnostics.lineageSummary, null);
+});
diff --git a/src/resources/extensions/sf/uok/unit-runtime.d.ts b/src/resources/extensions/sf/uok/unit-runtime.d.ts
new file mode 100644
index 000000000..19868b577
--- /dev/null
+++ b/src/resources/extensions/sf/uok/unit-runtime.d.ts
@@ -0,0 +1,32 @@
+/**
+ * Type declarations for unit-runtime.js
+ */
+
+export interface RecoveryDiagnostics {
+	unitType: string;
+	unitId: string;
+	status: string;
+	retryCount: number;
+	maxRetries: number;
+	lastRecoveryReason: string | null;
+	progressCount: number;
+	lastProgressKind: string;
+	recoveryAttempts: number;
+	lineageSummary: {
+		status: string;
+		workerCount: number;
+		eventCount: number;
+	} | null;
+	updatedAt: number | null;
+	startedAt: number | null;
+}
+
+export function getRecoveryDiagnostics(
+	basePath: string,
+	unitType: string,
+	unitId: string,
+): RecoveryDiagnostics | null;
+
+export function listUnitRuntimeRecords(
+	basePath: string,
+): Array<Record<string, unknown> & { updatedAt?: number; unitId: string }>;
diff --git a/src/resources/extensions/sf/uok/unit-runtime.js b/src/resources/extensions/sf/uok/unit-runtime.js
index c0b56c918..468a7455a 100644
--- a/src/resources/extensions/sf/uok/unit-runtime.js
+++ b/src/resources/extensions/sf/uok/unit-runtime.js
@@ -582,6 +582,43 @@ export function formatExecuteTaskRecoveryStatus(status) {
 		? missing.join("; ")
 		: "all durable task artifacts present";
 }
+
+/**
+ * Read the runtime record for a unit and return structured recovery diagnostics.
+ *
+ * Purpose: surface runtime record state for post-mortem debugging of autonomous
+ * failures without requiring humans to parse `.sf/runtime/units/*.json` manually.
+ *
+ * Consumer: `sf status recovery` CLI command and verification evidence enrichment.
+ */
+export function getRecoveryDiagnostics(basePath, unitType, unitId) {
+	const record = readUnitRuntimeRecord(basePath, unitType, unitId);
+	if (!record) {
+		return null;
+	}
+	const state = getUnitRuntimeState(record);
+	const lineageSummary = record.lineage
+		? {
+				status: record.lineage.status,
+				workerCount: record.lineage.workerSessionIds?.length ?? 0,
+				eventCount: record.lineage.events?.length ?? 0,
+			}
+		: null;
+	return {
+		unitType,
+		unitId,
+		status: state.status,
+		retryCount: state.retryCount,
+		maxRetries: state.maxRetries,
+		lastRecoveryReason: record.lastRecoveryReason ?? null,
+		progressCount: record.progressCount ?? 0,
+		lastProgressKind: record.lastProgressKind ?? "dispatch",
+		recoveryAttempts: record.recoveryAttempts ?? 0,
+		lineageSummary,
+		updatedAt: record.updatedAt ?? null,
+		startedAt: record.startedAt ?? null,
+	};
+}
 // ─── Stale slice runtime record reconciliation ──────────────────────────────
 /**
  * Clear unit runtime records for complete-slice units that are in a terminal
diff --git a/src/resources/extensions/sf/verification-evidence.js b/src/resources/extensions/sf/verification-evidence.js
index 890af0cbb..bb34dd01d 100644
--- a/src/resources/extensions/sf/verification-evidence.js
+++ b/src/resources/extensions/sf/verification-evidence.js
@@ -24,6 +24,10 @@ export function writeVerificationJSON(
 	unitId,
 	retryAttempt,
 	maxRetries,
+	tokenCount,
+	memoryPressureMB,
+	gateOutcomes,
+	recoveryStatus,
 ) {
 	mkdirSync(tasksDir, { recursive: true });
 	const evidence = {
@@ -41,6 +45,10 @@ export function writeVerificationJSON(
 		})),
 		...(retryAttempt !== undefined ? { retryAttempt } : {}),
 		...(maxRetries !== undefined ? { maxRetries } : {}),
+		...(tokenCount !== undefined ? { tokenCount } : {}),
+		...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
+		...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
+		...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
 	};
 	if (result.runtimeErrors && result.runtimeErrors.length > 0) {
 		evidence.runtimeErrors = result.runtimeErrors.map((e) => ({
diff --git a/todo.md b/todo.md
new file mode 100644
index 000000000..cbd72a7ae
--- /dev/null
+++ b/todo.md
@@ -0,0 +1,53 @@
+# TODO
+
+Unimplemented items consolidated from root *.md files. Source file noted for each item.
+
+---
+
+## Critical / Correctness
+
+- [x] Port `fix(security): harden project-controlled surfaces` — env isolation + transport cleanup done; gsd-2 trust/dedup hunks (server.ts, mcp-client/index.ts) not applicable (packages absent) *(BUILD_PLAN.md Tier 0.5 #2)*
+- [ ] Port agent-session/agent-end transition fixes (gsd-2 `71114fccf`, `6d7e4gcb5`, `c162c44bf`, `e3bd04551`) *(BUILD_PLAN.md Tier 0.5 #7-10, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster B)*
+- [ ] Cloudflare Workers AI provider — `CLOUDFLARE_API_KEY`/`CLOUDFLARE_ACCOUNT_ID` (pi-mono PR #3851) *(BUILD_PLAN.md Tier 0 #8)*
+
+---
+
+## Architecture / Design Gaps
+
+- [ ] Schema reconciliation: update SPEC.md to 3-table model (milestones/slices/tasks vs single `units`) *(BUILD_PLAN.md Tier 1.3)*
+- [ ] Persistent agents v1 command surface — `/sf agent run|reset|delete|inspect` *(BUILD_PLAN.md Tier 2.1)*
+- [ ] Intent chapters (`chapter_open`/`chapter_close` — crash-resume context) *(BUILD_PLAN.md Tier 2.3)*
+- [ ] PhaseReview 3-pass review (establish-context → parallel chunked → synthesis) *(BUILD_PLAN.md Tier 2.4)*
+- [ ] `last_error` cap to 4 KB head+tail; full payload to file *(BUILD_PLAN.md Tier 2.6)*
+- [ ] Port workflow state machine hardening (gsd-2 `f2377eedd`, `b9a1c6743`, `153fb328a`, `381ccdef5`, `371b2eb31`) *(BUILD_PLAN.md Tier 0.5 #13, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster F)*
+- [ ] Port `fix(claude-code-cli): persist Always Allow for non-Bash tools` (gsd-2 `a88baeae9`) *(BUILD_PLAN.md Tier 0.5 #11)*
+
+---
+
+## Medium Priority / Quality
+
+- [ ] Replace `isHeavyModelId()` name-matching heuristic with capability-based check *(PRODUCTION_AUDIT_GRADE.md #9, PRODUCTION_AUDIT.md 3.3)*
+- [ ] Add `version` field to task frontmatter and mode state (schema versioning) *(PRODUCTION_AUDIT_GRADE.md #8)*
+- [ ] Integration tests for full remote steering pipeline *(PRODUCTION_AUDIT.md Long Term #10)*
+- [x] Log `frontmatterErrors` in sf-db.js instead of silently dropping validation errors *(PRODUCTION_AUDIT.md 3.1)*
+- [ ] Search provider registry refactor — consolidate provider list across files into `SearchProviderRegistry` *(BUILD_PLAN.md Tier 1+)*
+- [ ] Update ARCHITECTURE.md self-evolution section (triage pipeline IS active; injection IS automatic now) *(ARCHITECTURE.md)*
+- [ ] Add Mermaid state machine diagram to ARCHITECTURE.md *(ARCHITECTURE.md)*
+- [ ] Symlinked packages/resources/skills/sessions dedup (pi-mono PR #3818) *(BUILD_PLAN.md Tier 0 #6)*
+
+---
+
+## Long-term / Deferred
+
+- [ ] Singularity Knowledge + Agent Platform (Go re-platform, ~12 weeks) *(BUILD_PLAN.md Tier 1+)*
+- [ ] sf-worker SSH host (Go, `wish` + `xpty`, ~3 weeks) *(BUILD_PLAN.md Tier 4)*
+- [ ] Charm TUI client (`sf-tui` in Go, ~12-16 weeks) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Flight recorder (`x/vcr`, ~3 weeks) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Full swarm chat for `subagent` tool (Option C, depends on persistent-agent layer) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Caveman input-side prompt compression (rewrite execute-task/plan-slice prompts) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Runtime input preprocessor (`terse_prompts: true` dispatch transform, ~3-4 days) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Judge calibration + eval runner service (Go/Charm, ~2-3 weeks post SM) *(BUILD_PLAN.md Tier 1+)*
+- [ ] M009 promote-only adoption review — create `sf schedule` entry (2 weeks after M009 close) *(BACKLOG.md)*
+- [ ] Establish pi-mono SDK sync cadence (recurring check schedule) *(BUILD_PLAN.md Tier 1+)*
+- [ ] `scripts/port-from-gsd2.sh` automation script *(UPSTREAM_PORT_GUIDE.md)*
+- [ ] TypeScript migration for UOK modules (`kernel.js`, etc.) *(PRODUCTION_AUDIT_COMPLETE.md, PRODUCTION_AUDIT_GRADE.md)*