sf snapshot: uncommitted changes after 56m inactivity

2026-05-10 17:16:30 +02:00 · 2026-05-10 17:16:30 +02:00 · 3ffd882c8c
commit 3ffd882c8c
parent 37ebfcf53a
33 changed files with 813 additions and 2437 deletions
--- a/.sf/backups/db/sf.db.2026-05-08T22-42-32-307Z
+++ b/.sf/backups/db/sf.db.2026-05-08T22-42-32-307Z
--- a/.sf/backups/db/sf.db.2026-05-09T17-40-16-600Z
+++ b/.sf/backups/db/sf.db.2026-05-09T17-40-16-600Z
--- a/.sf/backups/db/sf.db.2026-05-09T19-41-02-472Z
+++ b/.sf/backups/db/sf.db.2026-05-09T19-41-02-472Z
--- a/.sf/backups/db/sf.db.2026-05-10T14-47-54-645Z
+++ b/.sf/backups/db/sf.db.2026-05-10T14-47-54-645Z
--- a/.sf/backups/db/sf.db.2026-05-10T15-16-24-679Z
+++ b/.sf/backups/db/sf.db.2026-05-10T15-16-24-679Z
--- a/.sf/metrics.db
+++ b/.sf/metrics.db
--- a/.sf/metrics.db-shm
+++ b/.sf/metrics.db-shm
--- a/.sf/metrics.db-wal
+++ b/.sf/metrics.db-wal
--- a/.sf/model-performance.json
+++ b/.sf/model-performance.json
@ -86,5 +86,17 @@
      "successRate": 1,
      "total": 2
    }
+  },
+  "execute-task": {
+    "minimax/MiniMax-M2.7-highspeed": {
+      "successes": 1,
+      "failures": 0,
+      "timeouts": 0,
+      "totalTokens": 12233288,
+      "totalCost": 0.3431336426,
+      "lastUsed": "2026-05-10T15:16:08.120Z",
+      "successRate": 1,
+      "total": 1
+    }
  }
 }
--- a/.sf/safety/evidence-M001-6377a4-S03-T01.json
+++ b/.sf/safety/evidence-M001-6377a4-S03-T01.json
@ -0,0 +1,244 @@
+[
+  {
+    "kind": "edit",
+    "toolCallId": "tool_Ca8DVl3y7fTqNLC5XPq0Pwk1",
+    "path": "src/resources/extensions/sf/verification-evidence.js",
+    "timestamp": 1778424928174
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_xVkmHZTHvJP7RwJWVqUORsz4",
+    "path": "src/resources/extensions/sf/verification-evidence.js",
+    "timestamp": 1778424949674
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_EiDPzaZO49a4LKnYvuvFaS8e",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778424983294
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_pNt9nP10Us3CPrsqlnWwtQ8l",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425005515
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_Bl3x74Ojz6aenqD3nYqxkdlO",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425108830
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_RHLdM0SZK4ffIIokuqNruHbn",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425162119
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_mAdgaYCgksHmjAI45ZuSnMk5",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425187240
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_HMsSokItiWF9y6ctKvFSkyE3",
+    "path": "src/resources/extensions/sf/auto-verification.js",
+    "timestamp": 1778425206204
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_Jbd8uJQ6ZV4PeF8P91s2OvFG",
+    "path": "src/resources/extensions/sf/uok/unit-runtime.js",
+    "timestamp": 1778425258651
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_m1a9UNWqpwBIJvzB9LtlVTBN",
+    "path": "src/cli-status.ts",
+    "timestamp": 1778425298174
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_zcSH4Fx3bOumjphAgYisPyhE",
+    "path": "src/cli-status.ts",
+    "timestamp": 1778425348170
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_UjLWJsxhCI2bAt3kYl4QEhNK",
+    "path": "src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "timestamp": 1778425381561
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_1owe7a26pVq3k18x59p6Sy1J",
+    "path": "src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "timestamp": 1778425414995
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_zgJvYKclL8xmY8DE4c8nluTk",
+    "command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts -t \"getRecoveryDiagnostics\"",
+    "exitCode": 0,
+    "outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m | \u001b[22m\u001b[33m35 skipped\u001b[39m\u001b[2m)\u001b[22m\u001b[32m 33\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m      Tests \u001b[22m \u001b[1m\u001b[32m3 passed\u001b[39m\u001b[22m\u001b[2m | \u001b[22m\u001b[33m35 skipped\u001b[39m\u001b[90m (38)\u001b[39m\n\u001b[2m   Start at \u001b[22m 17:03:54\n\u001b[2m   Duration \u001b[22m 1.01s\u001b[2m (transfor",
+    "timestamp": 1778425433765
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_c9hBKD3D1Ojayl25rr7Mu3UW",
+    "command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
+    "exitCode": 0,
+    "outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 140\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m      Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m   Start at \u001b[22m 17:04:12\n\u001b[2m   Duration \u001b[22m 1.16s\u001b[2m (transform 552ms, setup 0ms, import 742ms, tests 140ms, environment 0ms",
+    "timestamp": 1778425451405
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_jrt7lAB8i6qBOnOePlNhiFR2",
+    "command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "exitCode": 1,
+    "outputSnippet": "src/cli-status.ts format ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n  × Formatter would have printed the following content:\n  \n    240 240 │       if (!targetUnitId) {\n    241 241 │         const records = listUnitRuntimeRecords(basePath);\n    242     │ - → → → const·mostRecent·=·records.sort((a,·b)·=>\n    243     │ - → → → → (b.updatedAt·??·0)·-·(a.updatedAt·??·0),\n        242 │ + → → → const·mostRecent·=·records.sort(\n        243 │ + → → → → (a,·b)·=>·(b.updat",
+    "timestamp": 1778425466821
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_lpaw0h5RHFke7wyq5gw8sJKp",
+    "command": "npx biome check --write src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "exitCode": 0,
+    "outputSnippet": "Checked 5 files in 160ms. Fixed 3 files.\n",
+    "timestamp": 1778425501417
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_so1U8wV32tA70qs55EtB7Pke",
+    "command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
+    "exitCode": 0,
+    "outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 152\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m      Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m   Start at \u001b[22m 17:05:15\n\u001b[2m   Duration \u001b[22m 1.16s\u001b[2m (transform 554ms, setup 0ms, import 758ms, tests 152ms, environment 0ms",
+    "timestamp": 1778425514641
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_xBsWuLSrGmQHcVUWydLtioru",
+    "command": "npx tsc --noEmit src/cli-status.ts --skipLibCheck --module NodeNext --moduleResolution NodeNext --target ES2022 --esModuleInterop --allowJs --strict 2>&1 | head -30",
+    "exitCode": 0,
+    "outputSnippet": "error TS5112: tsconfig.json is present but will not be loaded if files are specified on commandline. Use '--ignoreConfig' to skip this error.\n",
+    "timestamp": 1778425532429
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_lnBQh90rfwsYyVvqmIdq8pH7",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|auto-verification|unit-runtime|verification-evidence\" | head -20",
+    "exitCode": 0,
+    "outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\nsrc/cli-status.ts(243,6): error TS7006: Parameter 'a' implicitly has an 'any' type.\nsrc/cli-status.ts(243,9): error TS7006: Parameter 'b' implicitly has an 'any' type.\n",
+    "timestamp": 1778425546375
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_L5EQX39mZFzmJbRkCACrnkcM",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep \" Could not find a declaration file\" | head -10",
+    "exitCode": 0,
+    "outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
+    "timestamp": 1778425700527
+  },
+  {
+    "kind": "edit",
+    "toolCallId": "tool_bMteIOMYNzppZgJRP2bDK14p",
+    "path": "src/cli-status.ts",
+    "timestamp": 1778425760653
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_kja8VArplGz7aqlFb1rZvh9M",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|auto-verification|unit-runtime|verification-evidence\" | head -20",
+    "exitCode": 0,
+    "outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
+    "timestamp": 1778425779047
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_xAMnWlYjFmPHJj2zb3YklZhI",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep \"TS7016\" | head -20",
+    "exitCode": 0,
+    "outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
+    "timestamp": 1778425808025
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_N2K96iYZY9GgeXRHbsVIDJFn",
+    "command": "ls src/resources/extensions/sf/autonomous-command-args.* src/resources/extensions/sf/repo-identity.* src/resources/extensions/sf/trace-collector.*",
+    "exitCode": 0,
+    "outputSnippet": "src/resources/extensions/sf/autonomous-command-args.d.ts\nsrc/resources/extensions/sf/autonomous-command-args.js\nsrc/resources/extensions/sf/repo-identity.d.ts\nsrc/resources/extensions/sf/repo-identity.js\nsrc/resources/extensions/sf/trace-collector.d.ts\nsrc/resources/extensions/sf/trace-collector.js\n",
+    "timestamp": 1778425827433
+  },
+  {
+    "kind": "write",
+    "toolCallId": "tool_d6oIGASndyhbsF3ethH49BBC",
+    "path": "src/resources/extensions/sf/uok/unit-runtime.d.ts",
+    "timestamp": 1778425847941
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_8ipcG5bydJIBFOOfadbuRNzl",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|unit-runtime\" | head -20",
+    "exitCode": 0,
+    "outputSnippet": "(no output)",
+    "timestamp": 1778425863933
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_oas0rGaFmZW4i6dQZDfNbIOj",
+    "command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | head -20",
+    "exitCode": 0,
+    "outputSnippet": "(no output)",
+    "timestamp": 1778425882297
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_bWplHSK7Xw5d98Gzhf69rI3E",
+    "command": "npx tsc --noEmit --project tsconfig.extensions.json --skipLibCheck 2>&1 | head -20",
+    "exitCode": 0,
+    "outputSnippet": "packages/tui/src/index.ts(52,34): error TS6142: Module './ink-bridge.js' was resolved to '/home/mhugo/code/singularity-forge/packages/tui/src/ink-bridge.tsx', but '--jsx' is not set.\npackages/tui/src/tui.ts(14,34): error TS6142: Module './ink-bridge.js' was resolved to '/home/mhugo/code/singularity-forge/packages/tui/src/ink-bridge.tsx', but '--jsx' is not set.\npackages/tui/src/tui.ts(434,46): error TS7006: Parameter 'data' implicitly has an 'any' type.\n",
+    "timestamp": 1778425897974
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_R9DLuMbsvg7Lz87cGbyzuS01",
+    "command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/resources/extensions/sf/uok/unit-runtime.d.ts src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "exitCode": 1,
+    "outputSnippet": "src/resources/extensions/sf/uok/unit-runtime.d.ts format ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n  × Formatter would have printed the following content:\n  \n    28 28 │   ): RecoveryDiagnostics | null;\n    29 29 │   \n    30    │ - export·function·listUnitRuntimeRecords(basePath:·string):·Array<\n    31    │ - → Record<string,·unknown>·&·{·updatedAt?:·number;·unitId:·string·}\n    32    │ - >;\n       30 │ + export·function·listUnitRuntimeRecords(\n       31 │ + → basePath:·string,\n       32 │ + ",
+    "timestamp": 1778425942347
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_nQuAxzeREvyod0nBGyVPIvfJ",
+    "command": "npx biome check --write src/resources/extensions/sf/uok/unit-runtime.d.ts",
+    "exitCode": 0,
+    "outputSnippet": "Checked 1 file in 17ms. Fixed 1 file.\n",
+    "timestamp": 1778425957743
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_HZWYvGOveamvyW97wbv5A0Fv",
+    "command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/resources/extensions/sf/uok/unit-runtime.d.ts src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
+    "exitCode": 0,
+    "outputSnippet": "Checked 6 files in 70ms. No fixes applied.\n",
+    "timestamp": 1778425970841
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_SY84S3XYIIBJGZvUchG4qSRZ",
+    "command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
+    "exitCode": 0,
+    "outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 168\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m      Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m   Start at \u001b[22m 17:13:05\n\u001b[2m   Duration \u001b[22m 1.11s\u001b[2m (transform 477ms, setup 0ms, import 702ms, tests 168ms, environment 0ms",
+    "timestamp": 1778425983954
+  },
+  {
+    "kind": "bash",
+    "toolCallId": "tool_SdyaSTc9h2LJD9pq5ZLR3Ehc",
+    "command": "git status --short",
+    "exitCode": 0,
+    "outputSnippet": " D .sf/backups/db/sf.db.2026-05-08T22-42-32-307Z\n D .sf/backups/db/sf.db.2026-05-09T17-40-16-600Z\n D .sf/backups/db/sf.db.2026-05-09T19-41-02-472Z\n M .sf/metrics.db\n M .sf/metrics.db-shm\n M .sf/metrics.db-wal\n D BUILD_PLAN_MILESTONE_MAP.md\n D PRODUCTION_AUDIT_COMPLETE.md\n D QUICK_WINS_IMPLEMENTATION.md\n D TRIAGE_COMPLETE.md\n D TRIAGE_README.md\n D autoresearch.md\n D copilot-thoughts.md\n M packages/coding-agent/src/modes/interactive/interactive-mode.ts\n M packages/tui/src/ink-bridge.tsx\n M package",
+    "timestamp": 1778425999682
+  }
+]
--- a/BUILD_PLAN_MILESTONE_MAP.md
+++ b/BUILD_PLAN_MILESTONE_MAP.md
@ -1,70 +0,0 @@
-# BUILD_PLAN → Milestone Map
-
-Every BUILD_PLAN.md tier item mapped to a milestone. **Rule D015**: every new milestone must cite which BUILD_PLAN tier/item it implements.
-
-This file answers **where work belongs**, not **whether code is done**. "Mapped" means a BUILD_PLAN item has a milestone/slice home. It does **not** mean the implementation is verified in the current repo.
-
-## Mapping vs. code truth
-
- **Mapped** — the item has a milestone/slice destination.
- **Verified in code** — the behavior exists in the repo and has evidence/tests/artifacts.
- **Open** — still planned or partially folded in, but not yet verified as complete.
- **Deferred** — intentionally out of the active plan.
-
---
-
-## High-level milestone direction
-
-These are the strategy bands above the itemized mapping:
-
-1. **Core foundation** — UOK, purpose-driven TDD, eight-field PDD gate, repo-local state
-2. **Single-repo sharpening** — adopt the best execution/workflow ideas from pi-mono, gsd-2, Claude Code, Codex, Aider, and Plandex where they strengthen Forge
-3. **Autonomous reliability** — evidence, recovery, verification, and self-improvement loops
-4. **Surface coherence** — CLI, TUI, docs, and workflow language all reflect the same UOK-driven model
-5. **ACE convergence prep** — keep concepts compatible with ACE Coder without turning Forge into the multi-repo system
-
---
-
-## Tier 0 — Pi-mono ports → **M006**
-## Tier 0.5 — gsd-2 ports → **M006 + M007**
-
-All mapped. See BUILD_PLAN.md for item-level status.
-
-## Tier 1 — ESSENTIAL → **ALL MAPPED**
-
-| Item | Milestone | Slice | Status |
-|---|---|---|---|
-| 1.1 Vault secret resolver | **M017-yf67h6** | S01-S03 | ⬜ NEW |
-| 1.2 Singularity Memory integration | **M017-jpw5jo** | S01-S03 | ⬜ NEW |
-| 1.3 Schema reconciliation (spec rewrite) | **M013** | S12 | ⬜ Folded in |
-| 1.4 Config schema alignment | **M013** | S13 | ⬜ Folded in |
-
-## Tier 2 — STRONG → **ALL MAPPED**
-
-| Item | Milestone | Slice | Status |
-|---|---|---|---|
-| 2.1 Persistent agents v1 | M012 | S01-S05 | ⬜ |
-| 2.2 Doc-sync sub-step | M009 | S08 | ⬜ |
-| 2.3 Intent chapters | M013 | S08 | ⬜ |
-| 2.4 PhaseReview 3-pass | M016 | S01-S02 | ⬜ |
-| 2.5 turn_status marker | M013 | S09 | ⬜ |
-| 2.6 last_error cap | M013 | S10 | ⬜ |
-| 2.7 cost_micro_usd | M013 | S11 | ⬜ |
-
-## Tier 3+ → **Deferred by design**
-
---
-
-## Summary
-
-| Tier | Mapped | Gap |
-|---|---|---|
-| Tier 0 | 10 (M006) | 0 |
-| Tier 0.5 | 17 (M006+M007) | 0 |
-| **Tier 1** | **4** (M017×2, M013×2) | **0** |
-| Tier 2 | 7 (M012, M009, M013, M016) | 0 |
-| Tier 3+ | 0 | deferred |
-
-**Zero mapping gaps.** Every BUILD_PLAN tier item is either mapped to a milestone or explicitly deferred.
-
-That does **not** mean zero implementation gaps. Open `TODO`, `NEW`, and `⬜` markers in `BUILD_PLAN.md`, this map, and milestone artifacts still represent real work until they are reconciled against code evidence.
--- a/PRODUCTION_AUDIT_COMPLETE.md
+++ b/PRODUCTION_AUDIT_COMPLETE.md
@ -1,440 +0,0 @@
-# Complete Long-Term Production-Grade Audit
-
-**Scope:** All UOK kernel, gate system, execution graph, message bus, diagnostics, metrics, and supporting infrastructure
-**Date:** 2026-05-08
-**Grade Scale:** S (exceptional) → A (production) → B (needs work) → C (risky) → D (broken)
-
---
-
-## Executive Summary
-
-| Module | Grade | Verdict |
-|--------|-------|---------|
-| `uok/kernel.js` | **A** | Clean lifecycle, parity recovery, audit envelope, signal handling |
-| `uok/gate-runner.js` | **A** | Circuit breaker, retry matrix, memory enrichment, degradation logging |
-| `uok/audit.js` | **A** | Atomic writes, stale-write detection, dual persistence (JSONL + DB) |
-| `uok/contracts.js` | **A** | Complete JSDoc types, runtime validation, clear interfaces |
-| `uok/flags.js` | **A** | Clean preference resolution, all features toggleable |
-| `uok/loop-adapter.js` | **A** | Turn observer, gitops integration, writer tokens, timeout, documented | None |
-| `uok/parity-report.js` | **A** | Deep parity analysis, orphaned run recovery, ledger reconciliation, malformed logging |
-| `uok/message-bus.js` | **A** | Durable SQLite, deduplication, auto-compact, periodic refresh | Cache drift eliminated |
-| `uok/cost-guard-gate.js` | **A** | Actual cost lookup, rolling window, high-tier failure detection, cheaper alternative suggestion |
-| `uok/security-gate.js` | **A** | Secret scan integration, timeout, graceful skip when script missing |
-| `uok/plan-v2.js` | **A** | Graph compilation, artifact validation, cycle detection, context gating | None |
-| `uok/execution-graph.js` | **A** | Topological sort, conflict detection, parallel scheduling with deadlock detection |
-| `uok/unit-runtime.js` | **A** | Complete lifecycle, retry budgets, LRU cache, durable reconciliation | None |
-| `uok/diagnostic-synthesis.js` | **A** | Process tree analysis, multi-source correlation, actionable recommendations | None |
-| `uok/metrics-exposition.js` | **A** | Prometheus format, caching, circuit breaker + latency + message bus metrics | Superseded by metrics-central.js |
-| `uok/chaos-monkey.js` | **A** | Latency, partial failure, disk, memory stress; all recoverable, all logged | None |
-| `uok/writer.js` | **A** | Atomic sequence tracking, token lifecycle, disk persistence, TTL | None |
-| `sf-db.js` | **A** | Single-writer invariant, WAL mode, statement cache, schema v45, query timeout, split entry point | metrics-central.js for unified sink |
-
-**Overall Grade: A** — Production-ready. All scaling concerns addressed.
-
---
-
-## 1. `uok/kernel.js` — Grade A
-
-### Strengths
- Clean async lifecycle: enter → run → exit, with `finally` block guarantee
- `recordUokKernelTermination()` handles signal cleanup (symmetrical with enter)
- Parity recovery: checks previous report for missing exits, drains them
- Audit envelope: emits structured events on kernel enter/exit
- workMode + modelMode propagated into lifecycleFlags and audit payload
- `debugLog()` for non-fatal diagnostics without breaking orchestration
-
-### Production Concerns: None critical
-
-### Minor
- `runAutoLoopWithUok()` is 120+ lines — could extract helper functions for readability
- `decoratedDeps` spreads all deps — no validation that required deps exist
-
---
-
-## 2. `uok/gate-runner.js` — Grade A
-
-### Strengths
- Circuit breaker with exponential backoff: `openDurationMs * 2^streak`
- Half-open state with attempt limiting — proper gradual recovery
- Retry matrix per failure class: `execution`/`artifact`/`verification` get 1 retry, `timeout` gets 2
- Memory enrichment: queries historical patterns for gate failures (degrades gracefully)
- Every gate run persisted to DB + audit event emitted
- Unknown gates get `manual-attention` outcome (fail-closed)
-
-### Production Concerns: None critical
-
-### Minor
- `computeGateEmbedding()` uses a simple hash — not a real semantic embedding
- `enrichGateResultWithMemory()` silently degrades on DB failure (correct behavior, but could log)
-
---
-
-## 3. `uok/audit.js` — Grade A
-
-### Strengths
- Atomic writes via `withFileLockSync()` with `onLocked: "skip"` (best-effort)
- Stale-write detection via `isStaleWrite("uok-audit")` — prevents superseded turns from polluting log
- Dual persistence: JSONL for local durability, SQLite for querying
- `closeSync(openSync(path, "a"))` touch pattern ensures lock target exists
- Schema version in envelope for future migration
-
-### Production Concerns: None critical
-
---
-
-## 4. `uok/contracts.js` — Grade A
-
-### Strengths
- Complete JSDoc typedefs for all UOK types
- `validateGate()` catches registration-time mistakes
- Clear separation: `UokContext` (input), `GateResult` (output), `Gate` (interface)
-
-### Production Concerns: None
-
---
-
-## 5. `uok/flags.js` — Grade A
-
-### Strengths
- All UOK features toggleable via preferences
- Clean resolution: `uok?.security_guard?.enabled ?? true`
- `resolvePermissionProfile()` for canonical permission profile
-
-### Production Concerns: None
-
---
-
-## 6. `uok/loop-adapter.js` — Grade A
-
-### Strengths
- Turn observer pattern: `onTurnStart`, `onPhaseResult`, `onTurnResult`
- Gitops integration: writes transaction records per phase with 10s timeout
- Writer token acquisition/release for sequence tracking
- Chaos monkey strikes at phase boundaries
- Audit events for turn start/result
- `nextSequenceMetadata()` fully documented with JSDoc
-
-### Production Concerns: None critical
-
-### Fixed ✅
- ✅ Gitops timeout: `writeGitTransactionWithTimeout()` with 10s `Promise.race()`
- ✅ `nextSequenceMetadata()` documented: sequence is optional when no token active
-
---
-
-## 7. `uok/parity-report.js` — Grade A
-
-### Strengths
- Deep parity analysis: compares heartbeat events, ledger runs, diff events
- Orphaned run recovery: `recoverOrphanedStartedLedgerRuns()` closes stale DB runs
- Live process detection: `hasLiveAutoLock()` uses `process.kill(pid, 0)`
- Fresh vs historical mismatch separation
- Divergence tracking by plane: `plan`, `graph`, `model-policy`, `audit-envelope`, `gitops`
- `shallowEqualDecisions()` for comparing legacy vs UOK outputs
-
-### Production Concerns: None critical
-
-### Fixed ✅
- ✅ Malformed line logging: `parseParityEvents()` now logs dropped count to stderr
- `UNMATCHED_RUN_STALE_MS = 30min` — appropriate for most cases
-
---
-
-## 8. `uok/message-bus.js` — Grade A
-
-### Strengths
- Durable SQLite storage with configurable retention
- Deterministic message IDs for idempotent `sendOnce()`
- Auto-compaction when message count exceeds threshold
- Per-agent inbox with read tracking and auto-refresh (30s interval)
- Conversation query between two agents
-
-### Production Concerns: None critical
-
-### Fixed ✅
- ✅ Cache drift: `_maybeRefresh()` auto-refreshes from DB every 30s on `list()`, `markRead()`, `unreadCount`
- ✅ `sendOnce()` idempotency: Pre-checks inbox before insert; returns existing ID if found
-
---
-
-## 9. `uok/cost-guard-gate.js` — Grade A
-
-### Strengths
- Actual cost lookup from `BUNDLED_COST_TABLE`
- Rolling 1-hour window spend check
- High-tier model failure pattern detection
- Suggests cheaper alternative from same provider/family
- Per-unit and per-hour thresholds
-
-### Production Concerns: None critical
-
-### Minor
- `isHighTierModel()` uses `$0.005/1K tokens` threshold — magic number
- `_suggestCheaperAlternative()` could suggest incompatible models (different context window)
-
---
-
-## 10. `uok/security-gate.js` — Grade A
-
-### Strengths
- Runs `scripts/secret-scan.sh --diff HEAD` against changes
- 30-second timeout with process kill
- Gracefully skips if script missing (pass)
- Returns findings on failure
-
-### Production Concerns: None
-
---
-
-## 11. `uok/plan-v2.js` — Grade A
-
-### Strengths
- Compiles unit graph from milestone/slice/task DB state
- Validates artifact presence (CONTEXT.md, RESEARCH.md) before execution entry
- Clarify round limit enforcement
- Graph output to JSON for inspection
- Cycle detection at compile time using Kahn's algorithm
-
-### Production Concerns: None critical
-
-### Fixed ✅
- ✅ Cycle detection: `detectCycles()` validates graph before execution; returns `hasCycles: true` with clear error
-
---
-
-## 12. `uok/execution-graph.js` — Grade A
-
-### Strengths
- Kahn's algorithm topological sort with deterministic ordering (localeCompare)
- File conflict detection: `detectFileConflicts()` finds nodes writing same file
- Parallel scheduling with max workers and dependency awareness
- Deadlock detection: throws when no ready nodes but graph incomplete
- Sidecar queue scheduling with kind-based handlers
- `selectReactiveDispatchBatch()` for incremental dispatch
-
-### Production Concerns: None critical
-
---
-
-## 13. `uok/unit-runtime.js` — Grade A
-
-### Strengths
- Complete lifecycle: queued → claimed → running → progress → completed/failed/blocked/cancelled/stale/runaway-recovered → notified
- Retry budgets with `retryBudgetRemaining()`
- Durable artifact reconciliation: `reconcileDurableCompleteUnitRuntimeRecords()`
- Stale complete-slice cleanup: `reconcileStaleCompleteSliceRecords()`
- In-memory cache for repeated reads within dispatch cycle
- `inspectExecuteTaskDurability()` checks plan, summary, state, must-haves
-
-### Production Concerns: None critical
-
-### Fixed ✅
- ✅ Runtime cache bounds: LRU eviction at 5000 entries; removes oldest 20%
- `recordUnitOutcomeInMemory()` creates memory entries but no cleanup policy
-
---
-
-## 14. `uok/diagnostic-synthesis.js` — Grade A
-
-### Strengths
- Multi-source correlation: process tree, auto.lock, parity report, DB ledger, runtime projections
- Process descendant tracking via `ps` + tree traversal
- Classification: healthy | running | quiet-but-healthy | degraded | needs-repair
- Actionable recommendations per issue
- Publishes to message bus for observer chains
- `readUokDiagnostics()` for external consumption
-
-### Production Concerns: None critical
-
---
-
-## 15. `uok/metrics-exposition.js` — Grade A
-
-### Strengths
- Prometheus text format output
- 30-second cache TTL for performance
- Gate metrics: runs, passes, fails, retries, latency (avg/p50/p95/max)
- Circuit breaker state gauge (0=closed, 1=half-open, 2=open)
- Message bus metrics: total, unread, unique agents, conversations
- `invalidateMetricsCache()` for cache busting
-
-### Production Concerns: None
-
---
-
-## 16. `uok/chaos-monkey.js` — Grade A
-
-### Strengths
- Four fault types: latency, partial failure, disk stress, memory stress
- All faults are recoverable (no process kill)
- All faults are logged to stderr
- Configurable probabilities and magnitudes
- `getInjectedEvents()` for verification
- Immediate cleanup of stress artifacts
-
-### Production Concerns: None
-
---
-
-## 17. `uok/writer.js` — Grade A
-
-### Strengths
- Atomic sequence tracking via `atomicWriteSync()`
- Writer token lifecycle: acquire → use → release
- Prevents double-acquisition for same turn
- Sequence state persisted to disk
- Token crash recovery: persists to `uok-writer-tokens.json` with 5-min TTL
-
-### Production Concerns: None critical
-
-### Fixed ✅
- ✅ Crash recovery: Tokens persisted to disk; `hasActiveWriterToken()` recovers from disk
- ✅ TTL cleanup: Expired tokens auto-purged from memory and disk
-
---
-
-## 18. `sf-db.js` — Grade A
-
-### Strengths
- Single-writer invariant enforced by convention + CI test
- WAL mode for file-backed DBs
- Statement cache for prepared queries
- Schema version 45 with migration path
- `normalizeRow()` handles null-prototype objects
- Query timeout protection: `withQueryTimeout()` helper (30s default)
- Split entry point: `sf-db/index.js` for future modularization
- Comprehensive table creation: backlog, schedule, repo profiles, UOK runs, gate runs, audit events, message bus, tasks, verification evidence
-
-### Production Concerns: None critical
-
-### Fixed ✅
- ✅ Query timeout: `withQueryTimeout()` catches timeout/busy errors, returns fallback
- ✅ Split entry point: `sf-db/index.js` re-export created for gradual migration
- ✅ Console logging: All modules use `logWarning()` / `logError()` from workflow-logger
-
---
-
-## Cross-Cutting Concerns
-
-### Observability
-
-| Module | Metrics | Logs | Traces | Audit |
-|--------|---------|------|--------|-------|
-| kernel.js | ❌ | ✅ debugLog | ✅ traceId | ✅ envelope |
-| gate-runner.js | ✅ DB | ✅ insertGateRun | ✅ traceId/turnId | ✅ envelope |
-| audit.js | ❌ | ❌ | ✅ eventId | ✅ JSONL+DB |
-| loop-adapter.js | ❌ | ❌ | ✅ traceId/turnId | ✅ envelope |
-| parity-report.js | ❌ | ❌ | ❌ | ❌ |
-| message-bus.js | ✅ DB | ❌ | ❌ | ❌ |
-| cost-guard-gate.js | ❌ | ❌ | ❌ | ❌ |
-| unit-runtime.js | ❌ | ❌ | ❌ | ❌ |
-| diagnostic-synthesis.js | ❌ | ❌ | ❌ | ❌ |
-| metrics-exposition.js | ✅ Prometheus | ❌ | ❌ | ❌ |
-| chaos-monkey.js | ❌ | ✅ stderr | ❌ | ❌ |
-
-**Gap:** Resolved — `metrics-central.js` provides unified Counter/Gauge/Histogram with Prometheus text format. Legacy `metrics-exposition.js` still active for backward compatibility.
-
-### Security
-
-| Concern | Status | Notes |
-|---------|--------|-------|
-| Input validation | ✅ Good | All entry points validate |
-| Injection prevention | ✅ Good | Parameterized queries in sf-db |
-| Secrets scanning | ✅ Good | Security gate runs on every turn |
-| Cost limits | ✅ Good | Per-unit and per-hour guards |
-| Circuit breakers | ✅ Good | Exponential backoff on failures |
-| Chaos engineering | ✅ Good | Opt-in, recoverable faults |
-
-### Performance
-
-| Concern | Status | Notes |
-|---------|--------|-------|
-| Big-O | ✅ Good | All graph ops are O(V+E) |
-| Caching | ✅ Good | Metrics cache, runtime cache, statement cache |
-| Memory | ✅ Good | LRU eviction on runtime cache (5000), bounded message bus inboxes |
-| DB queries | ✅ Good | Single-writer, WAL mode, prepared statements |
-| Parallelism | ✅ Good | Max workers capped at 8 |
-
-### Maintainability
-
-| Concern | Status | Notes |
-|---------|--------|-------|
-| Test coverage | ✅ Good | 139+ tests across all modules |
-| Documentation | ✅ Good | JSDoc on all exports |
-| Logging consistency | ✅ Good | All modules use `logWarning()` / `logError()` from workflow-logger |
-| File organization | ✅ Good | sf-db.js has split entry point; full extraction deferred to v2 |
-| Schema versioning | ✅ Good | Schema v45 with migrations |
-
---
-
-## Action Plan
-
-### Before Production (Blockers) — ALL CLEAR ✅
-
-No blockers identified. All modules are production-ready.
-
-### Before Scaling to 10+ Workers — ALL FIXED ✅
-
-1. ✅ **Message bus cache drift** — Added `_maybeRefresh()` with 30s interval; `list()`, `markRead()`, `unreadCount` auto-refresh
-2. ✅ **Writer token crash recovery** — Persist tokens to `uok-writer-tokens.json`; 5-min TTL; `hasActiveWriterToken()` recovers from disk
-3. ✅ **Runtime cache bounds** — LRU eviction at 5000 entries; removes oldest 20%
-
-### Before Next Major Release — ALL FIXABLE ITEMS COMPLETE ✅
-
-4. ✅ **Split sf-db.js** — Created `sf-db/index.js` re-export entry point; full extraction deferred to v2
-5. ✅ **Console.warn cleanup** — `context-injector.js`, `vault-resolver.js`, `knowledge-injector.js` now use `logWarning()`
-6. ✅ **Cycle detection at compile time** — `detectCycles()` in `plan-v2.js` using Kahn's algorithm; returns `hasCycles: true`
-
-### Implemented ✅
-
-7. ✅ **Centralized metrics** — `metrics-central.js` with Counter/Gauge/Histogram, Prometheus text format, wired into subagent inheritance and mode transitions
-
-### Deferred to v2 (Architectural, Not Bugs)
-
-8. ⚠️ **TypeScript migration** — Convert UOK modules to `.ts` for compile-time safety
-
---
-
-## Appendix: Complete Module Inventory
-
-### UOK Kernel (18 modules, ~2,800 lines)
-
-| Module | Lines | Grade | Tests |
-|--------|-------|-------|-------|
-| `kernel.js` | 120 | A | ✅ |
-| `gate-runner.js` | 280 | A | ✅ |
-| `audit.js` | 80 | A | ✅ |
-| `contracts.js` | 120 | A | ✅ |
-| `flags.js` | 40 | A | ✅ |
-| `loop-adapter.js` | 180 | A | ✅ |
-| `parity-report.js` | 320 | A | ✅ |
-| `message-bus.js` | 180 | A | ✅ |
-| `cost-guard-gate.js` | 140 | A | ✅ |
-| `security-gate.js` | 60 | A | ✅ |
-| `plan-v2.js` | 200 | A | ✅ |
-| `execution-graph.js` | 260 | A | ✅ |
-| `unit-runtime.js` | 420 | A | ✅ |
-| `diagnostic-synthesis.js` | 280 | A | ✅ |
-| `metrics-exposition.js` | 180 | A | ✅ (legacy) |
-| `chaos-monkey.js` | 140 | A | ✅ |
-| `writer.js` | 100 | A | ✅ |
-| `sf-db.js` | 7000+ | A | ✅ |
-| `metrics-central.js` | 350 | A | ✅ (new) |
-
-### Mode System (7 modules, ~1,400 lines)
-
-| Module | Lines | Grade | Tests |
-|--------|-------|-------|-------|
-| `operating-model.js` | 120 | A | 13 |
-| `auto/session.js` | 200 | A- | ✅ |
-| `task-frontmatter.js` | 311 | A- | 9 |
-| `subagent-inheritance.js` | 170 | A- | 9 |
-| `remote-steering.js` | 139 | A- | 7 |
-| `parallel-intent.js` | 139 | B+ | 6 |
-| `skills/eval-harness.js` | 139 | A- | 5 |
-
-**Total: 139 tests passing, 0 failures, 1 skipped.**
-
---
-
-*Audit completed. All modules production-ready. Address scaling items before 10+ workers.*
--- a/QUICK_WINS_IMPLEMENTATION.md
+++ b/QUICK_WINS_IMPLEMENTATION.md
@ -1,385 +0,0 @@
-# Quick Wins Implementation - Complete
-
-**Date:** 2026-05-06  
-**Implemented by:** Copilot CLI  
-**Commit:** 0e2edfdeb  
-**Status:** ✅ COMPLETE - Core infrastructure in place
-
-## Summary
-
-Successfully implemented the foundational infrastructure for 3 high-impact quick wins that activate SF's self-evolution learning loop:
-
-1. **Close Self-Report Feedback Loop** [9/10 impact, 2-3 days to full integration]
-2. **Activate Continuous Model Learning** [8/10 impact, 3-4 days to full integration]
-3. **Automate Knowledge Injection** [7/10 impact, 2-3 days to full integration]
-
-**Total:** 24/30 impact points unlocked through self-evolution infrastructure.
-
---
-
-## Quick Win 1: Close Self-Report Feedback Loop [9/10 Impact]
-
-### What Was Implemented
-
-**File:** `src/resources/extensions/sf/self-report-fixer.js` (348 lines)
-
-**Module:** `SelfReportFixer` with the following capabilities:
-
- **Pattern Recognition** — 4 built-in fix patterns:
-  1. `validation-reviewer-rubric` (95% confidence) — Add criterion/gap rubric to validation prompts ✅ *Already fixed*
-  2. `gate-verdict-clarity` (90% confidence) — Document gate verdict semantics
-  3. `env-vars-unvalidated` (85% confidence) — Add SF_* env validation
-  4. `self-report-coverage-gap` (80% confidence) — Implement triage pipeline
-
- **Automatic Fix Classification**
-  ```js
-  classifyReportFixes(report) // Returns applicable fixes with confidence scores
-  ```
-
- **High-Confidence Auto-Fix**
-  ```js
-  autoFixHighConfidenceReports(basePath, reports)
-  // Applies fixes for confidence > 0.85
-  ```
-
- **Deduplication**
-  ```js
-  dedupReports(reports) // Group related reports by normalized issue key
-  ```
-
- **Severity Categorization**
-  ```js
-  categorizeBySeverity(reports) // blocker | warning | suggestion
-  ```
-
-### Next Steps for Full Integration
-
-1. Hook into `triage-self-feedback.js` to invoke fixer after triage runs
-2. Add pattern library for domain-specific fixes (provider routing, timeout tuning, etc.)
-3. Create integration tests for each fix pattern
-4. Document feedback loop: report → triage → fix → verification
-
-### How It Works
-
-```javascript
-import { autoFixHighConfidenceReports } from './self-report-fixer.js';
-
-// After collecting self-reports
-const reports = readSelfFeedback();
-
-// Auto-apply high-confidence fixes
-const { applied, failed, skipped } = await autoFixHighConfidenceReports(
-  projectPath,
-  reports
-);
-
-// applied: ["validation-reviewer-rubric: rubric already present"]
-// failed: ["env-vars-unvalidated: requires schema impl"]
-// skipped: ["gate-verdict-clarity: confidence 0.9 > threshold 0.85"]
-```
-
---
-
-## Quick Win 2: Activate Continuous Model Learning [8/10 Impact]
-
-### What Was Implemented
-
-**File:** `src/resources/extensions/sf/model-learner.js` (344 lines)
-
-**Classes:**
-
-#### ModelPerformanceTracker
-Tracks per-task-type model performance with:
- Success/failure/timeout counts
- Token usage and cost tracking
- Success rate calculation
- Ranked model sorting
-
-**Storage:** `.sf/model-performance.json`
-
-```json
-{
-  "execute-task": {
-    "gpt-4o": {
-      "successes": 42,
-      "failures": 3,
-      "timeouts": 1,
-      "totalTokens": 1500000,
-      "totalCost": 45.50,
-      "lastUsed": "2026-05-06T16:30:00Z",
-      "successRate": 0.93
-    }
-  }
-}
-```
-
-**API:**
-```js
-tracker.recordOutcome(taskType, modelId, { success, timeout, tokensUsed, costUsd })
-tracker.getRankedModels(taskType, minSamples = 3) // Returns sorted by success rate
-tracker.shouldDemote(taskType, modelId, threshold = 0.5) // Demote if failure >50%
-tracker.getABTestCandidates(taskType) // For hypothesis testing
-```
-
-#### FailureAnalyzer
-Categorizes and analyzes failure modes:
- Logs failures to JSONL
- Detects patterns (e.g., timeout-prone models)
- Provides failure summaries per model
-
-**Storage:** `.sf/model-failure-log.jsonl`
-
-```json
-{
-  "timestamp": "2026-05-06T16:30:00Z",
-  "taskType": "execute-task",
-  "modelId": "gpt-4o",
-  "reason": "quality_check_failed",
-  "timeout": false,
-  "tokensUsed": 25000,
-  "context": { ... }
-}
-```
-
-**API:**
-```js
-analyzer.logFailure(taskType, modelId, { reason, timeout, tokensUsed, context })
-analyzer.getFailureSummary(taskType, modelId) // Returns { reasons, patterns }
-```
-
-### Main API: ModelLearner
-
-```javascript
-import { ModelLearner } from './model-learner.js';
-
-const learner = new ModelLearner(projectPath);
-
-// Record successful outcome
-learner.recordOutcome('execute-task', 'claude-opus', {
-  success: true,
-  tokensUsed: 15000,
-  costUsd: 0.50,
-});
-
-// Record failure
-learner.logFailure('execute-task', 'gpt-4o', {
-  reason: 'quality_check_failed',
-  timeout: false,
-  tokensUsed: 25000,
-});
-
-// Get ranked models (for intelligent routing)
-const rankedModels = learner.getRankedModels('execute-task');
-// [
-//   { modelId: 'claude-opus', successRate: 0.98, attempts: 50, ... },
-//   { modelId: 'gpt-4o', successRate: 0.90, attempts: 40, ... }
-// ]
-
-// A/B test decision
-const abTest = learner.getABTestCandidates('execute-task');
-// { incumbent: claude-opus, challengers: [gpt-4o, gemini-pro], testBudget: 10 }
-
-// Analyze A/B results and decide promotion/demotion
-const decision = learner.analyzeABTest('execute-task', {
-  incumbentWins: 8,
-  challengerWins: 2,
-});
-// { recommendation: "continue", reason: "incumbent 0.80 vs challenger 0.20" }
-```
-
-### Next Steps for Full Integration
-
-1. Integrate into `auto-dispatch.ts` outcome logging
-2. Hook into `model-router.ts` to use ranked models for routing decisions
-3. Implement auto-demotion in model selection logic
-4. Add A/B testing orchestration for low-risk tasks
-5. Create dashboard in `benchmark-selector.ts` showing per-model performance
-
---
-
-## Quick Win 3: Automate Knowledge Injection [7/10 Impact]
-
-### What Was Implemented
-
-**File:** `src/resources/extensions/sf/knowledge-injector.js` (336 lines)
-
-**Key Functions:**
-
- **Parse Knowledge Base**
-  ```js
-  parseKnowledgeEntries(knowledgeContent)
-  // Extracts judgment-log entries with confidence, domain, recommendation
-  ```
-
- **Semantic Matching**
-  ```js
-  extractConcepts(entry) // Extract domain tags, failure modes, constraints
-  semanticSimilarity(concepts, contextKeywords) // Score relevance
-  ```
-
- **Find Relevant Knowledge**
-  ```js
-  findRelevantKnowledge(entries, contextKeywords, minConfidence=0.6, minSimilarity=0.5)
-  // Returns sorted by combined score (confidence × 0.7 + similarity × 0.3)
-  ```
-
- **Detect Contradictions**
-  ```js
-  detectContradictions(entries) // Flag conflicting recommendations
-  ```
-
- **Format for Injection**
-  ```js
-  formatKnowledgeForInjection(relevantKnowledge)
-  // Human-readable markdown with confidence/relevance scores
-  ```
-
- **Track Usage** (for feedback loop)
-  ```js
-  trackKnowledgeUsage(taskId, injectedKnowledge)
-  // Logs which knowledge was used for effectiveness measurement
-  ```
-
-### Integration into auto-prompts.js
-
-**Modified:** `src/resources/extensions/sf/auto-prompts.js`
-
-Added:
-1. Import of knowledge-injector module
-2. Helper function `getKnowledgeInjection(basePath, taskContext)` with graceful degradation
-3. Knowledge injection into execute-task prompt with context (domain, keywords, technology)
-
-**In execute-task prompt loading (line 2203+):**
-```javascript
-const knowledgeInjection = await getKnowledgeInjection(base, {
-  domain: "task-execution",
-  taskType: "execute-task",
-  keywords: [tTitle, sTitle, mid, sid],
-  technology: [],
-});
-
-return loadPrompt("execute-task", {
-  memoriesSection,
-  knowledgeInjection, // NEW: Relevant prior learning
-  overridesSection,
-  // ... other variables
-});
-```
-
-### Existing Infrastructure
-
-**Note:** Knowledge injection is **60% complete** via existing `queryKnowledge()` in context-store.js
-
- ✅ `inlineKnowledgeScoped()` already exists (uses queryKnowledge)
- ✅ Used in both plan-slice and execute-task prompts
- ❌ Uses simple keyword matching (not semantic scoring)
- ✅ Our new module enhances with semantic similarity
-
-### Next Steps for Full Integration
-
-1. Update execute-task and plan-slice prompt templates to include `{{knowledgeInjection}}` variable
-2. Integrate semantic scoring into queryKnowledge or create parallel path
-3. Implement feedback loop: track which knowledge was used and measure effectiveness
-4. Create contradiction resolver UI for conflicting recommendations
-5. Add knowledge effectiveness metrics to benchmark reports
-
---
-
-## Files Created
-
-| File | Lines | Purpose |
-|------|-------|---------|
-| `src/resources/extensions/sf/self-report-fixer.js` | 348 | Auto-fix high-confidence self-reports |
-| `src/resources/extensions/sf/model-learner.js` | 344 | Per-task-type model performance tracking |
-| `src/resources/extensions/sf/knowledge-injector.js` | 336 | Semantic knowledge matching and injection |
-
-## Files Modified
-
-| File | Changes | Purpose |
-|------|---------|---------|
-| `src/resources/extensions/sf/auto-prompts.js` | +7 lines | Added knowledge injection into execute-task |
-
-## Build Status
-
-✅ **Build Success**
- All new modules compile without errors
- TypeScript types intact
- Resources copied to `dist/`
- Inventory check passed
-
-## Testing Recommendations
-
-Create integration tests for:
-
-1. **Self-Report Fixer**
-   - Pattern matching accuracy (4 patterns)
-   - Deduplication logic
-   - Confidence thresholding
-
-2. **Model Learner**
-   - Success rate calculation
-   - Demotion logic (>50% failure rate)
-   - A/B test analysis
-   - Failure pattern detection
-
-3. **Knowledge Injector**
-   - Semantic similarity scoring
-   - Contradiction detection
-   - Formatting for prompt injection
-   - Graceful degradation (missing KNOWLEDGE.md)
-
-## Activation Timeline
-
-**To fully activate these quick wins:**
-
-1. **Week 1:** Hook model-learner into auto-dispatch outcome logging
-2. **Week 1:** Integrate self-report-fixer into triage-self-feedback pipeline
-3. **Week 2:** Implement knowledge injection in model-router for adaptive routing
-4. **Week 2:** Add A/B testing orchestration for model promotion
-5. **Week 3:** Create feedback loop dashboard in benchmark-selector
-6. **Week 3:** Measure impact on learning efficiency
-
-**Estimated effort:** 8-10 days of focused integration work
-
---
-
-## Key Design Decisions
-
-1. **Graceful Degradation** — All modules degrade gracefully if knowledge base or tracking files are unavailable
-2. **Append-Only Logs** — Failure logs use JSONL for durability and analysis
-3. **Per-Task-Type Tracking** — Model performance varies by task type; no single ranking
-4. **Confidence-Based Thresholding** — High-confidence fixes (>0.85) auto-apply; lower ones require review
-5. **A/B Test Budgeting** — Low-risk hypothesis testing with configurable test budget
-
---
-
-## Impact Measurement
-
-**After full integration, expect:**
-
- 🎯 **9/10 impact** from self-report loop: Close feedback loop from anomaly detection to code fixes
- 🎯 **8/10 impact** from model learning: 20-30% improvement in task success rate through adaptive routing
- 🎯 **7/10 impact** from knowledge injection: 15-20% faster task planning via relevant prior learning
-
-**Total:** **24/30 self-evolution capability points activated** (up from current 15/30)
-
---
-
-## Code Quality
-
- ✅ No external dependencies (uses only Node.js built-ins + SF imports)
- ✅ JSDoc purpose statements on all exports
- ✅ Graceful error handling (no crash on missing files)
- ✅ Idempotent tracking (safe to call multiple times)
- ✅ Clear separation of concerns (fixer ≠ learner ≠ injector)
-
---
-
-## Status Summary
-
-**Phase:** ✅ **IMPLEMENTATION COMPLETE**  
-**Phase:** ⏳ **INTEGRATION PENDING** (dispatch loop hookup)  
-**Phase:** ⏳ **TESTING PENDING** (unit + integration tests)  
-**Phase:** ⏳ **FEEDBACK LOOP PENDING** (measure effectiveness)
-
-The infrastructure is in place. Next: Connect it into the dispatch loop and measure impact.
--- a/TRIAGE_COMPLETE.md
+++ b/TRIAGE_COMPLETE.md
@ -1,114 +0,0 @@
-# Triage Complete ✅
-
-**Timestamp:** 2026-05-06 16:30 UTC  
-**Source:** TODO.md (Raw Dump Inbox)  
-**Command:** `sf todo triage`  
-**Node baseline:** v26.1.0+  
-**Session:** 77b45896
-
-## Summary
-
-Successfully triaged 60 items from TODO.md into structured backlog artifacts:
-
- ✅ **60 items** normalized into `.sf/triage/inbox/20260506-163003.jsonl`
- ✅ **10 eval candidates** extracted into `.sf/triage/evals/20260506-163003.evals.jsonl`
- ✅ **1 skill proposal** in `.sf/triage/skills/20260506-163003.skills.jsonl`
- ✅ **Comprehensive report** generated at `.sf/triage/reports/20260506-163003.md`
- ✅ **TODO.md reset** to empty dump inbox (triage pipeline activated)
-
-## Artifacts Created
-
-### 1. Triage Report (`.sf/triage/reports/20260506-163003.md`)
-Comprehensive analysis including:
- Summary of source material
- 10 eval candidates with failure modes and test locations
- 21 implementation tasks (gsd-2 ports, feature additions, provider expansion)
- Memory requirements for self-evolution infrastructure
- Harness suggestions for testing (property-based, chaos, end-to-end)
- Documentation improvements needed (ARCHITECTURE.md, ADRs, runbooks)
- Clarification needs ("Unclear Notes" section)
-
-**Key findings:**
- UOK is 60-70% complete for self-evolution
- Critical: Close self-report feedback loop (9/10 impact)
- 10+ undocumented architecture features identified
- Multiple safety/correctness fixes awaiting port from gsd-2
-
-### 2. Normalized Inbox (`.sf/triage/inbox/20260506-163003.jsonl`)
-60 structured items with:
- Type: eval_candidate, implementation_task, doc_improvement, harness_suggestion, memory_requirement, unclear_note
- Status: pending
- Source tracing: all items linked back to TODO.md section
- Prioritization ready for milestone planning
-
-### 3. Eval Candidates (`.sf/triage/evals/20260506-163003.evals.jsonl`)
-10 test harness candidates with:
- Task input (trigger/condition)
- Expected behavior (contract)
- Failure mode (what breaks if missing)
- Evidence/source (citations to gsd-2/pi-mono commits)
- Suggested test location
-
-**Quick examples:**
-1. `bash-evidence-race` — Evidence persists across dispatch/re-dispatch
-2. `symlink-staging-data-loss` — Data-loss prevention for symlinked .sf
-3. `mcp-stdout-deadlock` — Large MCP outputs don't hang
-4. `env-sf-vars-unvalidated` — SF_* env vars validated at startup
-
-### 4. Skill Proposals (`.sf/triage/skills/20260506-163003.skills.jsonl`)
-Architecture analysis suggesting improvements to SF's extension/gate system.
-
-## Next Steps
-
-1. **Review triage report** — Read `.sf/triage/reports/20260506-163003.md`
-2. **Plan implementation** — Promote high-impact items to milestone backlog
-3. **Prioritize quick wins:**
-   - Close self-report feedback loop [9/10 impact, ~4 days]
-   - Activate continuous model learning [8/10 impact, ~5 days]
-   - Automate knowledge injection [7/10 impact, ~4 days]
-4. **Port gsd-2 safety fixes** — 9 commits awaiting cherry-pick
-5. **Close documentation gaps** — Update ARCHITECTURE.md with state machine diagram
-
-## Evidence
-
-```
-$ ls -la .sf/triage/
-  drwxrwxr-x  evals/
-  drwxrwxr-x  inbox/
-  drwxrwxr-x  reports/
-  drwxrwxr-x  skills/
-
-$ wc -l .sf/triage/*/*.{md,jsonl}
-   60 .sf/triage/inbox/20260506-163003.jsonl
-   10 .sf/triage/evals/20260506-163003.evals.jsonl
-    1 .sf/triage/skills/20260506-163003.skills.jsonl
- 9682 .sf/triage/reports/20260506-163003.md
-
-$ git status
-  D TODO.md (reset to empty dump inbox; items triaged)
-  M docs/* (from earlier work)
-```
-
-## What This Means
-
-SF's triage system successfully:
-1. ✅ Parsed TODO.md dump inbox
-2. ✅ Extracted 60 items into structured types (eval, task, doc, harness, etc.)
-3. ✅ Generated failure-mode contracts for 10 critical correctness tests
-4. ✅ Identified test locations and citations to source code
-5. ✅ Reset TODO.md for next cycle
-6. ✅ Created decision artifacts ready for milestone planning
-
-The comprehensive review, research, documentation updates, and automated triage are complete. The project is now positioned to:
- Activate SF's self-evolution learning loop (3 quick wins)
- Port 9 safety/correctness fixes from gsd-2
- Close 10+ documentation gaps in ARCHITECTURE.md
- Implement property-based testing for autonomous dispatch
- Begin advanced feature ports (Cloudflare AI, Azure endpoints, SSE handling)
-
---
-
-**Created by:** Copilot CLI  
-**Session:** 2514fa98-076d-48d2-a1f9-c3fd77c4a82a  
-**Duration:** ~2 hours total (research + docs + triage)  
-**Command:** `node dist/cli.js todo triage`  
--- a/TRIAGE_README.md
+++ b/TRIAGE_README.md
@ -1,53 +0,0 @@
-# TODO.md Triage Instructions
-
-## What's New
-
-TODO.md now contains two major sections ready for triage:
-
-1. **Feature Gaps & Limitations** — 40+ specific gaps identified in the codebase
-2. **UOK Self-Evolution Research** — 10 prioritized improvements for SF's self-evolution capabilities
-
-## How to Triage
-
-When you have Node 26.1.0+ available:
-
-```bash
-cd /home/mhugo/code/singularity-forge
-
-# Run the triage command
-sf todo triage
-
-# Or if using npm/nvm
-nvm use 26
-npm exec sf -- todo triage
-```
-
-## What Triage Does
-
-The triage tool will:
-1. Parse TODO.md
-2. Extract items into structured `.sf/triage/` artifacts
-3. Propose categorization and priorities
-4. Show you a review interface
-5. Either commit to backlog or reset TODO.md to empty dump inbox
-
-## Key Items to Watch For
-
-The UOK Self-Evolution section has **3 high-impact quick wins** (8-10 days total):
-
-1. Close self-report feedback loop [9/10 impact, 2-3 days]
-2. Activate continuous model learning [8/10 impact, 3-4 days]  
-3. Automate knowledge injection [7/10 impact, 2-3 days]
-
-These should be prioritized if you want to activate SF's learning loop.
-
-## Full Research Report
-
-See: `/home/mhugo/snap/copilot-cli/38/.copilot/session-state/2514fa98-076d-48d2-a1f9-c3fd77c4a82a/research/is-our-uok-the-best-for-a-self-evolving-coder-what.md`
-
-This contains:
- Executive summary
- Detailed analysis of UOK implementation vs. documentation
- 10 improvement suggestions with feasibility assessment
- Competitive analysis (vs. other orchestration systems)
- 15+ citations to code and design docs
--- a/autoresearch.md
+++ b/autoresearch.md
@ -1,53 +0,0 @@
-# Autoresearch: Reduce Biome Lint Diagnostics
-
-## Objective
-Minimize the total number of Biome lint diagnostics (errors + warnings + info) across `src/`, starting from baseline ~40 diagnostics. Errors are mostly `organizeImports`, warnings are `noUnusedImports`, `noUnusedVariables`, and `useConst`.
-
-## Metrics
- **Primary**: `diagnostics` (count, lower is better) — sum of errors + warnings + info from `npx biome check src/`
- **Secondary**: `errors` (count, lower is better)
- **Secondary**: `warnings` (count, lower is better)
-
-## How to Run
-`bash autoresearch.sh` — runs Biome check, parses JSON summary, outputs `METRIC diagnostics=N` and `METRIC errors=N` and `METRIC warnings=N`.
-
-## Files in Scope
-All files under `src/` — but focus on the files flagged by Biome:
- `src/resources/extensions/sf/auto/phases.js`
- `src/resources/extensions/sf/commands/handlers/ops.js`
- `src/resources/extensions/sf/memory-repository.js`
- `src/resources/extensions/sf/metrics-central.js`
- `src/resources/extensions/sf/reasoning-assist.js`
- `src/resources/extensions/sf/remote-steering.js`
- `src/resources/extensions/sf/sf-db.js`
- `src/resources/extensions/sf/subagent-inheritance.js`
- `src/resources/extensions/sf/tests/memory-repository.test.mjs`
- `src/resources/extensions/sf/tests/metrics-central.test.mjs`
- `src/resources/extensions/sf/tests/trajectory-recorder.test.mjs`
- `src/resources/extensions/sf/trajectory-command.js`
- `src/resources/extensions/sf/trajectory-recorder.js`
- `src/resources/extensions/sf/uok/writer.js`
-
-## Off Limits
- `biome.json` (don't change lint rules — fixing source is the goal)
- `node_modules/`, `dist/`, `.sf/`, `packages/` (outside `src/` scope)
- Test assertion logic (don't weaken tests to make linters pass)
-
-## Constraints
- Existing vitest tests must pass: `npx vitest run --config vitest.config.ts`
- No new dependencies
- Don't introduce runtime behavior changes — only lint/import/style fixes
-
-## Termination
-Run until interrupted by the user.
-
-## What's Been Tried
-
- **#2 (auto-fix)**: `biome check --write` — fixed 26 auto-fixable errors (format/organizeImports), dropped diagnostics from 40 to 11. Status: keep.
- **#3 (manual fixes)**: Removed 7 unused imports and prefixed 4 intentionally-unused items with underscore. Dropped from 11 to 0. Status: keep.
- **#4 (regression re-fix)**: 37 new commits introduced 74 diagnostics. `biome check --write` fixed 58 (auto-safe), manual prefix/removal fixed the remaining 16 unsafe warnings across 11 files. Also fixed pre-existing web-mode-onboarding test timeout: added `timeoutMs: 120_000` to `launchPackagedWebHost`, raised `AbortSignal.timeout` on simple fetches 10s→30s, raised test budget 180s→420s. All 409 test files pass. Diagnostics: 0. Status: keep.
-
-## Lessons
- New development (37 commits) is enough to re-introduce 74 diagnostics. Re-run autoresearch periodically (monthly or after large feature branches land).
- Pattern of new violations: unused imports from refactors, unused function params from stubs, duplicate imports. Auto-fix handles errors; unsafe-fix (unused-import/var) requires manual triage.
- Integration test timeout under parallel load: cold-start Next.js can consume most of a 180s test timeout leaving insufficient budget for multi-step API calls. Fix: bound launch phase separately, raise individual fetch timeouts, increase overall budget to match worst-case sum.
--- a/copilot-thoughts.md
+++ b/copilot-thoughts.md
--- a/packages/coding-agent/src/modes/interactive/interactive-mode.ts
+++ b/packages/coding-agent/src/modes/interactive/interactive-mode.ts
@ -1929,6 +1929,31 @@ export class InteractiveMode {
 		this.extensionTerminalInputUnsubscribers.clear();
 	}

+	/**
+	 * Register an extension-scoped terminal input listener.
+	 *
+	 * Purpose: allow extensions (e.g. the SF autonomous extension) to intercept
+	 * raw terminal input before it reaches the editor, so that special keys like
+	 * Ctrl+C can trigger extension actions (e.g. pause autonomous mode) rather
+	 * than always going to the default editor clear handler.
+	 *
+	 * Return `{ consume: true }` from the handler to stop the key from being
+	 * processed further. Return `undefined` or `{}` to let it propagate.
+	 *
+	 * Consumer: extension-ui-controller → ctx.ui.onTerminalInput.
+	 */
+	addExtensionTerminalInputListener(
+		handler: (data: string) => { consume?: boolean } | undefined,
+	): () => void {
+		const listener = (data: string) => handler(data);
+		const unsubscribe = this.ui.addInputListener(listener);
+		this.extensionTerminalInputUnsubscribers.add(unsubscribe);
+		return () => {
+			unsubscribe();
+			this.extensionTerminalInputUnsubscribers.delete(unsubscribe);
+		};
+	}
+
 	/**
 	 * Create the ExtensionUIContext for extensions.
 	 */
--- a/packages/tui/src/ink-bridge.tsx
+++ b/packages/tui/src/ink-bridge.tsx
@ -42,19 +42,29 @@ function LegacyComponentView({
 *
 * Purpose: accept keyboard input from Ink and route it to the active
 * component, then trigger a re-render so the updated state is displayed.
+ * Invalidation is event-driven: external callers invoke the returned
+ * invalidate() handle, which fires the tick signal registered here.
 *
 * Consumer: startInkRenderer.
 */
 function InkApp({
 	root,
 	onInput,
+	onRegisterTick,
 }: {
 	root: Component;
 	onInput: (data: string) => void;
+	onRegisterTick: (tick: () => void) => void;
 }) {
 	const [, tick] = useState(0);
 	const { columns } = useWindowSize();

+	// Register the tick function so that startInkRenderer's invalidate() can
+	// trigger a React re-render without a polling interval.
+	useEffect(() => {
+		onRegisterTick(() => tick((n) => n + 1));
+	}, [onRegisterTick]);
+
 	useInput((input, key) => {
 		// Reconstruct the escape sequences that the legacy key handlers expect.
 		let data = input;
@ -70,12 +80,6 @@ function InkApp({
 		tick((n) => n + 1);
 	});

-	// Poll at 20 fps so async state changes (e.g. streaming output) appear promptly.
-	useEffect(() => {
-		const interval = setInterval(() => tick((n) => n + 1), 50);
-		return () => clearInterval(interval);
-	}, []);
-
 	return <LegacyComponentView component={root} width={columns ?? 80} />;
 }

@ -84,10 +88,11 @@ function InkApp({
 *
 * Purpose: drop-in replacement for the legacy TUI render engine. Mounting
 * this drives the entire Ink React tree and forwards terminal input to
- * the root Component's handleInput chain.
+ * the root Component's handleInput chain. invalidate() triggers an
+ * immediate React re-render via an event-driven tick signal — no polling.
 *
- * Consumer: TUI class (future integration); standalone callers can use
- * this directly to render any Component tree under Ink.
+ * Consumer: TUI class; standalone callers can use this to render any
+ * Component tree under Ink.
 *
 * @param root     - The root Component whose render() output fills the screen.
 * @param onInput  - Called with each decoded key string for legacy handlers.
@ -97,13 +102,22 @@ export function startInkRenderer(
 	root: Component,
 	onInput: (data: string) => void,
 ): { stop: () => void; invalidate: () => void } {
+	// Mutable signal populated by InkApp via onRegisterTick once the React
+	// tree has mounted. invalidate() fires this to trigger a synchronous tick.
+	let _tick: (() => void) | null = null;
+	const onRegisterTick = (tick: () => void) => {
+		_tick = tick;
+	};
+
 	const { unmount } = render(
-		<InkApp root={root} onInput={onInput} />,
+		<InkApp root={root} onInput={onInput} onRegisterTick={onRegisterTick} />,
 		{ exitOnCtrlC: false },
 	);
 	return {
-		stop: unmount,
-		// Ink re-renders automatically; manual invalidation is a no-op for now.
-		invalidate: () => {},
+		stop: () => {
+			_tick = null;
+			unmount();
+		},
+		invalidate: () => _tick?.(),
 	};
 }
--- a/packages/tui/src/tui.ts
+++ b/packages/tui/src/tui.ts
@ -420,9 +420,16 @@ export class TUI extends Container {
 		if (!this.terminal.isTTY) {
 			return;
 		}
-		// Ink-backed render path: Ink manages raw mode and input; the legacy
-		// differential renderer is bypassed entirely.
-		if (this._useInk || process.stdout.isTTY) {
+		// Ink-backed render path: Ink manages raw mode, input, and screen output.
+		// The legacy differential renderer (doRender) is bypassed entirely on TTY.
+		// process.stdout.isTTY guards this path — Ink requires a real interactive
+		// TTY to mount. useInk() is kept as an explicit opt-in for callers that
+		// want Ink on non-standard terminal configurations. Use PI_LEGACY_TUI=1
+		// to force the legacy renderer for debugging.
+		if (
+			(this._useInk || process.stdout.isTTY) &&
+			process.env.PI_LEGACY_TUI !== "1"
+		) {
 			// Wrap `this` in a plain Component so the private handleInput doesn't
 			// conflict with the public Component.handleInput? signature.
 			const root: Component = {
@ -506,6 +513,12 @@ export class TUI extends Container {
 	requestRender(force = false): void {
 		// Skip rendering on non-TTY stdout to prevent CPU burn (issue #3095)
 		if (!this.terminal.isTTY) return;
+		// Ink-backed path: Ink owns the terminal — delegate to the Ink handle and
+		// do NOT call doRender(), which would write conflicting ANSI escapes.
+		if (this._inkHandle) {
+			this._inkHandle.invalidate();
+			return;
+		}
 		if (force) {
 			this.previousLines = [];
 			this.previousWidth = -1; // -1 triggers widthChanged, forcing a full clear
--- a/src/cli-status.ts
+++ b/src/cli-status.ts
@ -11,6 +11,7 @@ import type { QuerySnapshot } from "./headless-query.js";

 interface StatusArgs {
 	watch: boolean;
+	recoveryUnitId?: string;
 }

 interface StatusDeps {
@ -27,6 +28,12 @@ interface CurrentModel {

 function parseStatusArgs(argv: string[]): StatusArgs {
 	const args = argv.slice(1);
+	if (args[0] === "recovery") {
+		return {
+			watch: false,
+			recoveryUnitId: args[1],
+		};
+	}
 	return {
 		watch: args.includes("--watch"),
 	};
@ -219,6 +226,76 @@ async function buildStatusText(
 	});
 }

+async function renderRecoveryDiagnostics(
+	basePath: string,
+	unitId: string | undefined,
+	stdout: Pick<typeof process.stdout, "write">,
+	stderr: Pick<typeof process.stderr, "write">,
+): Promise<number> {
+	try {
+		const { getRecoveryDiagnostics, listUnitRuntimeRecords } = await import(
+			"./resources/extensions/sf/uok/unit-runtime.js"
+		);
+		let targetUnitId = unitId;
+		if (!targetUnitId) {
+			const records: Array<{ updatedAt?: number; unitId: string }> =
+				listUnitRuntimeRecords(basePath);
+			const mostRecent = records.sort(
+				(a, b) => (b.updatedAt ?? 0) - (a.updatedAt ?? 0),
+			)[0];
+			if (!mostRecent) {
+				stderr.write("sf status recovery: no runtime records found\n");
+				return 1;
+			}
+			targetUnitId = mostRecent.unitId;
+		}
+		const diagnostics = getRecoveryDiagnostics(
+			basePath,
+			"execute-task",
+			targetUnitId,
+		);
+		if (!diagnostics) {
+			stderr.write(
+				`sf status recovery: no runtime record for ${targetUnitId}\n`,
+			);
+			return 1;
+		}
+		const lines: string[] = [];
+		lines.push("Recovery Diagnostics");
+		lines.push("--------------------");
+		lines.push(`Unit:      ${diagnostics.unitType} ${diagnostics.unitId}`);
+		lines.push(`Status:    ${diagnostics.status}`);
+		lines.push(
+			`Retries:   ${diagnostics.retryCount}/${diagnostics.maxRetries}`,
+		);
+		lines.push(
+			`Progress:  ${diagnostics.progressCount} (${diagnostics.lastProgressKind})`,
+		);
+		lines.push(`Recovery attempts: ${diagnostics.recoveryAttempts}`);
+		if (diagnostics.lastRecoveryReason) {
+			lines.push(`Last recovery reason: ${diagnostics.lastRecoveryReason}`);
+		}
+		if (diagnostics.lineageSummary) {
+			lines.push(
+				`Lineage:   ${diagnostics.lineageSummary.status} · ${diagnostics.lineageSummary.workerCount} worker(s) · ${diagnostics.lineageSummary.eventCount} event(s)`,
+			);
+		}
+		lines.push(
+			`Started:   ${diagnostics.startedAt ? new Date(diagnostics.startedAt).toISOString() : "n/a"}`,
+		);
+		lines.push(
+			`Updated:   ${diagnostics.updatedAt ? new Date(diagnostics.updatedAt).toISOString() : "n/a"}`,
+		);
+		stdout.write(lines.join("\n") + "\n");
+		return 0;
+	} catch (err) {
+		stderr.write(
+			`sf status recovery: ${err instanceof Error ? err.message : String(err)}\n`,
+		);
+		return 1;
+	}
+}
+
 export async function runStatusCli(
 	argv: string[],
 	deps: StatusDeps,
@ -228,6 +305,15 @@ export async function runStatusCli(
 	const sfHome = deps.sfHome ?? process.env.SF_HOME ?? join(homedir(), ".sf");
 	const args = parseStatusArgs(argv);

+	if (args.recoveryUnitId !== undefined) {
+		return renderRecoveryDiagnostics(
+			deps.basePath,
+			args.recoveryUnitId,
+			stdout,
+			stderr,
+		);
+	}
+
 	const renderOnce = async () => {
 		try {
 			const text = await buildStatusText(deps.basePath, sfHome);
--- a/src/resources/extensions/mcp-client/index.js
+++ b/src/resources/extensions/mcp-client/index.js
@ -94,6 +94,32 @@ function getServerConfig(name) {
 		(s) => s.name === trimmed || s.name.toLowerCase() === trimmed.toLowerCase(),
 	);
 }
+const SAFE_CHILD_ENV_KEYS = new Set([
+	"PATH",
+	"HOME",
+	"USER",
+	"LOGNAME",
+	"SHELL",
+	"LANG",
+	"LC_ALL",
+	"LC_CTYPE",
+	"LC_MESSAGES",
+	"LC_NUMERIC",
+	"LC_TIME",
+	"TMPDIR",
+	"TMP",
+	"TEMP",
+	"TZ",
+	"TERM",
+	"COLORTERM",
+]);
+function buildChildEnv(configEnv) {
+	const safe = {};
+	for (const key of SAFE_CHILD_ENV_KEYS) {
+		if (process.env[key] !== undefined) safe[key] = process.env[key];
+	}
+	return { ...safe, ...resolveEnv(configEnv ?? {}) };
+}
 /** Resolve ${VAR} references in env values against process.env. */
 function resolveEnv(env) {
 	const resolved = {};
@ -210,9 +236,7 @@ async function getOrConnect(name, signal) {
 		transport = new StdioClientTransport({
 			command: config.command,
 			args: config.args,
-			env: config.env
-				? { ...process.env, ...resolveEnv(config.env) }
-				: undefined,
+			env: buildChildEnv(config.env),
 			cwd: config.cwd,
 			stderr: "pipe",
 		});
@ -234,23 +258,27 @@ async function getOrConnect(name, signal) {
 			`Server "${config.name}" has unsupported transport: ${config.transport}`,
 		);
 	}
-	await client.connect(transport, { signal, timeout: 30000 });
+	try {
+		await client.connect(transport, { signal, timeout: 30000 });
+	} catch (err) {
+		try { await transport.close(); } catch { /* best-effort */ }
+		try { await client.close(); } catch { /* best-effort */ }
+		throw err;
+	}
 	connections.set(config.name, { client, transport });
 	return client;
 }
 async function closeAll() {
 	const closing = Array.from(connections.entries()).map(
 		async ([name, conn]) => {
-			try {
-				await conn.client.close();
-			} catch {
-				// Best-effort cleanup
-			}
+			try { await conn.transport.close(); } catch { /* best-effort */ }
+			try { await conn.client.close(); } catch { /* best-effort */ }
 			connections.delete(name);
 		},
 	);
 	await Promise.allSettled(closing);
 	toolCache.clear();
+	autoRegisteredServers.clear();
 }
 // ─── Formatters ───────────────────────────────────────────────────────────────
 function formatServerList(servers) {
@ -312,31 +340,8 @@ export function getConnectionStatus(name) {
 	};
 }
 // ─── Test-exported helpers ────────────────────────────────────────────────────
-const SAFE_CHILD_ENV_KEYS = new Set([
-	"PATH",
-	"HOME",
-	"USER",
-	"LOGNAME",
-	"SHELL",
-	"LANG",
-	"LC_ALL",
-	"LC_CTYPE",
-	"LC_MESSAGES",
-	"LC_NUMERIC",
-	"LC_TIME",
-	"TMPDIR",
-	"TMP",
-	"TEMP",
-	"TZ",
-	"TERM",
-	"COLORTERM",
-]);
 export function _buildMcpChildEnvForTest(env) {
-	const safe = {};
-	for (const key of SAFE_CHILD_ENV_KEYS) {
-		if (process.env[key] !== undefined) safe[key] = process.env[key];
-	}
-	return { ...safe, ...resolveEnv(env) };
+	return buildChildEnv(env);
 }
 export function _buildMcpTrustConfirmOptionsForTest(signal) {
 	return { timeout: 120_000, signal };
--- a/src/resources/extensions/sf/auto-prompts.js
+++ b/src/resources/extensions/sf/auto-prompts.js
@ -78,6 +78,7 @@ import {
 	buildSliceSummaryExcerpt,
 	getDependencyTaskSummaryPaths,
 	getPriorTaskSummaryPaths,
+	extractSliceExecutionExcerpt,
 } from "./summary-helpers.js";
 import { composeInlinedContext } from "./unit-context-composer.js";
 import { getUatType } from "./verdict-parser.js";
@ -336,7 +337,7 @@ export function buildSourceFilePaths(base, mid, sid) {
 * If parsing fails (unrecognizable frontmatter, missing id, etc.) the
 * function falls back to `inlineFile` so the closer loses no information.
 */
-// Re-exported from summary-helpers.js:
+// Imported from summary-helpers.js:
 // - buildSliceSummaryExcerpt, getPriorTaskSummaryPaths
 // - getDependencyTaskSummaryPaths, isSummaryCleanForSkip
 // - extractSliceExecutionExcerpt
--- a/src/resources/extensions/sf/auto-verification.js
+++ b/src/resources/extensions/sf/auto-verification.js
@ -32,6 +32,10 @@ import { UokGateRunner } from "./uok/gate-runner.js";
 import { MultiPackageGate } from "./uok/multi-package-gate.js";
 import { OutcomeLearningGate } from "./uok/outcome-learning-gate.js";
 import { SecurityGate } from "./uok/security-gate.js";
+import {
+	formatExecuteTaskRecoveryStatus,
+	inspectExecuteTaskDurability,
+} from "./uok/unit-runtime.js";
 import { extractVerdict } from "./verdict-parser.js";
 import { writeVerificationJSON } from "./verification-evidence.js";
 import {
@ -42,6 +46,38 @@ import {
 } from "./verification-gate.js";
 import { logError, logWarning } from "./workflow-logger.js";

+function computeTokenCountFromSession(ctx) {
+	const entries = ctx.sessionManager?.getEntries?.() ?? [];
+	let total = 0;
+	for (const entry of entries) {
+		if (entry.type !== "message") continue;
+		const msg = entry.message;
+		if (!msg || msg.role !== "assistant") continue;
+		if (msg.usage?.totalTokens != null) {
+			total += msg.usage.totalTokens;
+		}
+	}
+	return total;
+}
+
+function getMemoryPressureMB() {
+	try {
+		const mem = process.memoryUsage();
+		return Math.round(mem.heapUsed / 1024 / 1024);
+	} catch {
+		return undefined;
+	}
+}
+
+function buildGateOutcomesSummary(gateIds, gateResults) {
+	if (!gateIds || !gateResults || gateIds.length === 0) return undefined;
+	const outcomes = {};
+	for (let i = 0; i < gateIds.length; i++) {
+		outcomes[gateIds[i]] = gateResults[i]?.outcome ?? "unknown";
+	}
+	return outcomes;
+}
+
 function isInfraVerificationFailure(stderr) {
 	return /\b(ENOENT|ENOTFOUND|ETIMEDOUT|ECONNRESET|EAI_AGAIN|spawn\s+\S+\s+ENOENT|command not found)\b/i.test(
 		stderr,
@ -259,6 +295,8 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 	}
 	// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
 	// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
+	let gateIds = [];
+	let gateResults = [];
 	try {
 		if (uokFlags.gates) {
 			const gateRunner = new UokGateRunner();
@ -304,8 +342,8 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 				iteration: s.verificationRetryCount.get(s.currentUnit.id) ?? 0,
 			};

-			const gateIds = gateRunner.list().map((g) => g.id);
-			const gateResults = await Promise.all(
+			gateIds = gateRunner.list().map((g) => g.id);
+			gateResults = await Promise.all(
 				gateIds.map((id) =>
 					gateRunner
 						.run(id, {
@ -434,13 +472,39 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 		}
 		// Write verification evidence JSON
 		const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
+		const tokenCount = computeTokenCountFromSession(ctx);
+		const memoryPressureMB = getMemoryPressureMB();
+		const gateOutcomes = buildGateOutcomesSummary(gateIds, gateResults);
+		let recoveryStatus;
+		try {
+			const durability = await inspectExecuteTaskDurability(
+				s.basePath,
+				s.currentUnit.id,
+			);
+			if (durability) {
+				recoveryStatus = formatExecuteTaskRecoveryStatus(durability);
+			}
+		} catch {
+			recoveryStatus = undefined;
+		}
 		if (mid && sid && tid) {
 			try {
 				const sDir = resolveSlicePath(s.basePath, mid, sid);
 				if (sDir) {
 					const tasksDir = join(sDir, "tasks");
 					if (result.passed) {
-						writeVerificationJSON(result, tasksDir, tid, s.currentUnit.id);
+						writeVerificationJSON(
+							result,
+							tasksDir,
+							tid,
+							s.currentUnit.id,
+							undefined,
+							undefined,
+							tokenCount,
+							memoryPressureMB,
+							gateOutcomes,
+							recoveryStatus,
+						);
 					} else {
 						const nextAttempt = attempt + 1;
 						writeVerificationJSON(
@ -450,6 +514,10 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 							s.currentUnit.id,
 							nextAttempt,
 							maxRetries,
+							tokenCount,
+							memoryPressureMB,
+							gateOutcomes,
+							recoveryStatus,
 						);
 					}
 				}
@ -617,6 +685,10 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 						postExecChecks,
 						postExecBlockingFailure ? attempt + 1 : undefined,
 						postExecBlockingFailure ? maxRetries : undefined,
+						tokenCount,
+						memoryPressureMB,
+						gateOutcomes,
+						recoveryStatus,
 					);
 				}
 			} catch (evidenceErr) {
@ -703,6 +775,10 @@ function writeVerificationJSONWithPostExec(
 	postExecutionChecks,
 	retryAttempt,
 	maxRetries,
+	tokenCount,
+	memoryPressureMB,
+	gateOutcomes,
+	recoveryStatus,
 ) {
 	mkdirSync(tasksDir, { recursive: true });
 	const evidence = {
@ -720,6 +796,10 @@ function writeVerificationJSONWithPostExec(
 		})),
 		...(retryAttempt !== undefined ? { retryAttempt } : {}),
 		...(maxRetries !== undefined ? { maxRetries } : {}),
+		...(tokenCount !== undefined ? { tokenCount } : {}),
+		...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
+		...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
+		...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
 		postExecutionChecks,
 	};
 	if (result.runtimeErrors && result.runtimeErrors.length > 0) {
--- a/src/resources/extensions/sf/auto.js
+++ b/src/resources/extensions/sf/auto.js
@ -211,6 +211,33 @@ export {
 // Tests in auto-session-encapsulation.test.ts enforce this invariant.
 // ─────────────────────────────────────────────────────────────────────────────
 const s = getAutoSession();
+/** Unsubscribe function for the Ctrl+C → pause intercept registered on autonomous start. */
+let _ctrlCUnsubscribe = null;
+/**
+ * Register a terminal input listener that intercepts Ctrl+C while autonomous
+ * mode is active and routes the first press to pauseAuto() instead of letting
+ * it silently clear the editor.
+ *
+ * Purpose: give the user a reliable single-keypress escape from a running
+ * autonomous loop without requiring the double-press exit threshold.
+ */
+function registerCtrlCInterceptor(ctx) {
+	_unregisterCtrlCInterceptor();
+	if (typeof ctx?.ui?.onTerminalInput !== "function") return;
+	_ctrlCUnsubscribe = ctx.ui.onTerminalInput((data) => {
+		if (data !== "\x03") return undefined;
+		if (!s.active) return undefined;
+		ctx.ui.notify("Ctrl+C received — pausing autonomous mode.", "info");
+		void pauseAuto(ctx, null, "ctrl-c-interrupt");
+		return { consume: true };
+	});
+}
+function _unregisterCtrlCInterceptor() {
+	if (_ctrlCUnsubscribe) {
+		_ctrlCUnsubscribe();
+		_ctrlCUnsubscribe = null;
+	}
+}
 /** Throttle STATE.md rebuilds — at most once per 30 seconds */
 const _STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
 function captureProjectRootEnv(projectRoot) {
@ -704,6 +731,7 @@ function cleanupAfterLoopExit(ctx) {
 	s.currentUnit = null;
 	s.active = false;
 	s.runControl = "manual";
+	_unregisterCtrlCInterceptor();
 	deactivateSF();
 	clearUnitTimeout();
 	restoreProjectRootEnv();
@ -747,6 +775,7 @@ function cleanupAfterLoopExit(ctx) {
 }
 export async function stopAuto(ctx, pi, reason) {
 	if (!s.active && !s.paused) return;
+	_unregisterCtrlCInterceptor();
 	const loadedPreferences = loadEffectiveSFPreferences()?.preferences;
 	const reasonSuffix = reason ? ` — ${reason}` : "";
 	try {
@ -1677,6 +1706,7 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
 		s.runControl = requestedStepMode ? "assisted" : "autonomous";
 		s.cmdCtx = ctx;
 		s.basePath = base;
+		registerCtrlCInterceptor(ctx);
 		// Ensure the workflow-logger audit log is pinned to the project root
 		// even when autonomous mode is entered via a path that bypasses the
 		// bootstrap/dynamic-tools ensureDbOpen() → setLogBasePath() chain
@ -1943,6 +1973,7 @@ export async function dispatchHookUnit(
 		s.autoStartTime = Date.now();
 		s.currentUnit = null;
 		s.pendingQuickTasks = [];
+		registerCtrlCInterceptor(hookCtx);
 	}
 	const hookUnitType = `hook/${hookName}`;
 	const hookStartedAt = Date.now();
--- a/src/resources/extensions/sf/commands/handlers/autonomous.js
+++ b/src/resources/extensions/sf/commands/handlers/autonomous.js
@ -116,6 +116,10 @@ export async function handleAutonomousCommand(trimmed, ctx, pi) {
 		});
 		return true;
 	}
+	if (trimmed === "stop") {
+		await stopAutonomousRun(ctx, pi);
+		return true;
+	}
 	if (isAutonomousVerb) {
 		const autonomousArgsText = trimmed.replace(/^autonomous\b/, "").trim();
 		if (autonomousArgsText === "stop") {
--- a/src/resources/extensions/sf/sf-db.js
+++ b/src/resources/extensions/sf/sf-db.js
@ -4212,7 +4212,8 @@ function hasTaskSpecIntent(planning = {}) {
 }
 function insertTaskSpecIfAbsent(milestoneId, sliceId, taskId, planning = {}) {
 	if (!hasTaskSpecIntent(planning)) return;
-	const frontmatter = taskFrontmatterFromRecord(planning).normalized;
+	const { normalized: frontmatter, errors } = taskFrontmatterFromRecord(planning);
+	if (errors?.length) logWarning("sf-db:insertTaskSpec", `frontmatter validation errors for ${milestoneId}/${sliceId}/${taskId}: ${errors.join(", ")}`);
 	currentDb
 		.prepare(`INSERT OR IGNORE INTO task_specs (
      milestone_id, slice_id, task_id, verify, inputs, expected_output,
@ -4433,7 +4434,8 @@ export function setTaskBlockerDiscovered(
 export function upsertTaskPlanning(milestoneId, sliceId, taskId, planning) {
 	if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
 	insertTaskSpecIfAbsent(milestoneId, sliceId, taskId, planning);
-	const frontmatter = taskFrontmatterFromRecord(planning).normalized;
+	const { normalized: frontmatter, errors: fmErrors } = taskFrontmatterFromRecord(planning);
+	if (fmErrors?.length) logWarning("sf-db:upsertTaskPlanning", `frontmatter validation errors for ${milestoneId}/${sliceId}/${taskId}: ${fmErrors.join(", ")}`);
 	const hasTaskStatus =
 		planning.taskStatus !== undefined ||
 		planning.task_status !== undefined ||
--- a/src/resources/extensions/sf/summary-helpers.js
+++ b/src/resources/extensions/sf/summary-helpers.js
@ -195,3 +195,44 @@ export function isSummaryCleanForSkip(content) {
 		return false;
 	}
 }
+
+function escapeRegExpLocal(value) {
+	return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+function extractMarkdownSectionLocal(content, heading) {
+	const match = new RegExp(`^## ${escapeRegExpLocal(heading)}\\s*$`, "m").exec(content);
+	if (!match) return null;
+	const start = match.index + match[0].length;
+	const rest = content.slice(start);
+	const nextHeading = rest.match(/^##\s+/m);
+	const end = nextHeading?.index ?? rest.length;
+	return rest.slice(0, end).trim();
+}
+
+/**
+ * Extract key sections from a slice PLAN.md for use in task execution prompts.
+ * Returns Goal, Demo, Verification, and Observability sections as a compact excerpt.
+ *
+ * Purpose: give task executors the slice-level contract without inlining the full plan.
+ * Consumer: auto-prompts.js buildExecuteTask*.
+ */
+export function extractSliceExecutionExcerpt(content, relPath) {
+	if (!content) {
+		return [
+			"## Slice Plan Excerpt",
+			`Slice plan not found at dispatch time. Read \`${relPath}\` before running slice-level verification.`,
+		].join("\n");
+	}
+	const lines = content.split("\n");
+	const goalLine = lines.find((line) => line.startsWith("**Goal:**"))?.trim();
+	const demoLine = lines.find((line) => line.startsWith("**Demo:**"))?.trim();
+	const verification = extractMarkdownSectionLocal(content, "Verification");
+	const observability = extractMarkdownSectionLocal(content, "Observability / Diagnostics");
+	const parts = ["## Slice Plan Excerpt", `Source: \`${relPath}\``];
+	if (goalLine) parts.push(goalLine);
+	if (demoLine) parts.push(demoLine);
+	if (verification) parts.push("", "### Slice Verification", verification.trim());
+	if (observability) parts.push("", "### Slice Observability / Diagnostics", observability.trim());
+	return parts.join("\n");
+}
--- a/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs
+++ b/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs
@ -13,6 +13,7 @@ import {
 	clearRunawayRecoveredRuntimeRecords,
 	clearUnitRuntimeRecord,
 	decideUnitRuntimeDispatch,
+	getRecoveryDiagnostics,
 	getUnitRuntimeState,
 	isTerminalUnitRuntimeStatus,
 	listUnitRuntimeRecords,
@ -377,3 +378,72 @@ test("listUnitRuntimeRecords_returns_empty_when_dir_missing", () => {
 	const records = listUnitRuntimeRecords(root);
 	assert.deepEqual(records, []);
 });
+
+// ─── getRecoveryDiagnostics ────────────────────────────────────────────────
+
+test("getRecoveryDiagnostics_returns_null_for_missing_record", () => {
+	const root = makeProject();
+	const diagnostics = getRecoveryDiagnostics(root, "execute-task", "MISSING");
+	assert.equal(diagnostics, null);
+});
+
+test("getRecoveryDiagnostics_returns_structured_object_for_record_with_recovery", () => {
+	const root = makeProject();
+	const t = Date.now();
+	writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t, {
+		status: "failed",
+		recoveryAttempts: 2,
+		retryCount: 2,
+		maxRetries: 3,
+		lastRecoveryReason: "timeout",
+		progressCount: 5,
+		lastProgressKind: "checkpoint",
+		lineageEvent: {
+			status: "started",
+			workerSessionId: "worker-1",
+		},
+	});
+	const diagnostics = getRecoveryDiagnostics(
+		root,
+		"execute-task",
+		"M001/S01/T01",
+	);
+	assert.ok(diagnostics);
+	assert.equal(diagnostics.unitType, "execute-task");
+	assert.equal(diagnostics.unitId, "M001/S01/T01");
+	assert.equal(diagnostics.status, "failed");
+	assert.equal(diagnostics.retryCount, 2);
+	assert.equal(diagnostics.maxRetries, 3);
+	assert.equal(diagnostics.lastRecoveryReason, "timeout");
+	assert.equal(diagnostics.progressCount, 5);
+	assert.equal(diagnostics.lastProgressKind, "checkpoint");
+	assert.equal(diagnostics.recoveryAttempts, 2);
+	assert.ok(diagnostics.lineageSummary);
+	assert.equal(diagnostics.lineageSummary.status, "started");
+	assert.equal(diagnostics.lineageSummary.workerCount, 1);
+	assert.equal(diagnostics.lineageSummary.eventCount, 1);
+	assert.equal(diagnostics.startedAt, t);
+	assert.ok(diagnostics.updatedAt);
+});
+
+test("getRecoveryDiagnostics_returns_minimal_object_for_record_without_recovery", () => {
+	const root = makeProject();
+	const t = Date.now();
+	writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T02", t, {
+		status: "running",
+	});
+	const diagnostics = getRecoveryDiagnostics(
+		root,
+		"execute-task",
+		"M001/S01/T02",
+	);
+	assert.ok(diagnostics);
+	assert.equal(diagnostics.status, "running");
+	assert.equal(diagnostics.retryCount, 0);
+	assert.equal(diagnostics.maxRetries, 1);
+	assert.equal(diagnostics.lastRecoveryReason, null);
+	assert.equal(diagnostics.progressCount, 0);
+	assert.equal(diagnostics.lastProgressKind, "dispatch");
+	assert.equal(diagnostics.recoveryAttempts, 0);
+	assert.equal(diagnostics.lineageSummary, null);
+});
--- a/src/resources/extensions/sf/uok/unit-runtime.d.ts
+++ b/src/resources/extensions/sf/uok/unit-runtime.d.ts
@ -0,0 +1,32 @@
+/**
+ * Type declarations for unit-runtime.js
+ */
+
+export interface RecoveryDiagnostics {
+	unitType: string;
+	unitId: string;
+	status: string;
+	retryCount: number;
+	maxRetries: number;
+	lastRecoveryReason: string | null;
+	progressCount: number;
+	lastProgressKind: string;
+	recoveryAttempts: number;
+	lineageSummary: {
+		status: string;
+		workerCount: number;
+		eventCount: number;
+	} | null;
+	updatedAt: number | null;
+	startedAt: number | null;
+}
+
+export function getRecoveryDiagnostics(
+	basePath: string,
+	unitType: string,
+	unitId: string,
+): RecoveryDiagnostics | null;
+
+export function listUnitRuntimeRecords(
+	basePath: string,
+): Array<Record<string, unknown> & { updatedAt?: number; unitId: string }>;
--- a/src/resources/extensions/sf/uok/unit-runtime.js
+++ b/src/resources/extensions/sf/uok/unit-runtime.js
@ -582,6 +582,43 @@ export function formatExecuteTaskRecoveryStatus(status) {
 		? missing.join("; ")
 		: "all durable task artifacts present";
 }
+
+/**
+ * Read the runtime record for a unit and return structured recovery diagnostics.
+ *
+ * Purpose: surface runtime record state for post-mortem debugging of autonomous
+ * failures without requiring humans to parse `.sf/runtime/units/*.json` manually.
+ *
+ * Consumer: `sf status recovery` CLI command and verification evidence enrichment.
+ */
+export function getRecoveryDiagnostics(basePath, unitType, unitId) {
+	const record = readUnitRuntimeRecord(basePath, unitType, unitId);
+	if (!record) {
+		return null;
+	}
+	const state = getUnitRuntimeState(record);
+	const lineageSummary = record.lineage
+		? {
+				status: record.lineage.status,
+				workerCount: record.lineage.workerSessionIds?.length ?? 0,
+				eventCount: record.lineage.events?.length ?? 0,
+			}
+		: null;
+	return {
+		unitType,
+		unitId,
+		status: state.status,
+		retryCount: state.retryCount,
+		maxRetries: state.maxRetries,
+		lastRecoveryReason: record.lastRecoveryReason ?? null,
+		progressCount: record.progressCount ?? 0,
+		lastProgressKind: record.lastProgressKind ?? "dispatch",
+		recoveryAttempts: record.recoveryAttempts ?? 0,
+		lineageSummary,
+		updatedAt: record.updatedAt ?? null,
+		startedAt: record.startedAt ?? null,
+	};
+}
 // ─── Stale slice runtime record reconciliation ──────────────────────────────
 /**
 * Clear unit runtime records for complete-slice units that are in a terminal
--- a/src/resources/extensions/sf/verification-evidence.js
+++ b/src/resources/extensions/sf/verification-evidence.js
@ -24,6 +24,10 @@ export function writeVerificationJSON(
 	unitId,
 	retryAttempt,
 	maxRetries,
+	tokenCount,
+	memoryPressureMB,
+	gateOutcomes,
+	recoveryStatus,
 ) {
 	mkdirSync(tasksDir, { recursive: true });
 	const evidence = {
@ -41,6 +45,10 @@ export function writeVerificationJSON(
 		})),
 		...(retryAttempt !== undefined ? { retryAttempt } : {}),
 		...(maxRetries !== undefined ? { maxRetries } : {}),
+		...(tokenCount !== undefined ? { tokenCount } : {}),
+		...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
+		...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
+		...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
 	};
 	if (result.runtimeErrors && result.runtimeErrors.length > 0) {
 		evidence.runtimeErrors = result.runtimeErrors.map((e) => ({
--- a/todo.md
+++ b/todo.md
@ -0,0 +1,53 @@
+# TODO
+
+Unimplemented items consolidated from root *.md files. Source file noted for each item.
+
+---
+
+## Critical / Correctness
+
+- [x] Port `fix(security): harden project-controlled surfaces` — env isolation + transport cleanup done; gsd-2 trust/dedup hunks (server.ts, mcp-client/index.ts) not applicable (packages absent) *(BUILD_PLAN.md Tier 0.5 #2)*
+- [ ] Port agent-session/agent-end transition fixes (gsd-2 `71114fccf`, `6d7e4gcb5`, `c162c44bf`, `e3bd04551`) *(BUILD_PLAN.md Tier 0.5 #7-10, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster B)*
+- [ ] Cloudflare Workers AI provider — `CLOUDFLARE_API_KEY`/`CLOUDFLARE_ACCOUNT_ID` (pi-mono PR #3851) *(BUILD_PLAN.md Tier 0 #8)*
+
+---
+
+## Architecture / Design Gaps
+
+- [ ] Schema reconciliation: update SPEC.md to 3-table model (milestones/slices/tasks vs single `units`) *(BUILD_PLAN.md Tier 1.3)*
+- [ ] Persistent agents v1 command surface — `/sf agent run|reset|delete|inspect` *(BUILD_PLAN.md Tier 2.1)*
+- [ ] Intent chapters (`chapter_open`/`chapter_close` — crash-resume context) *(BUILD_PLAN.md Tier 2.3)*
+- [ ] PhaseReview 3-pass review (establish-context → parallel chunked → synthesis) *(BUILD_PLAN.md Tier 2.4)*
+- [ ] `last_error` cap to 4 KB head+tail; full payload to file *(BUILD_PLAN.md Tier 2.6)*
+- [ ] Port workflow state machine hardening (gsd-2 `f2377eedd`, `b9a1c6743`, `153fb328a`, `381ccdef5`, `371b2eb31`) *(BUILD_PLAN.md Tier 0.5 #13, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster F)*
+- [ ] Port `fix(claude-code-cli): persist Always Allow for non-Bash tools` (gsd-2 `a88baeae9`) *(BUILD_PLAN.md Tier 0.5 #11)*
+
+---
+
+## Medium Priority / Quality
+
+- [ ] Replace `isHeavyModelId()` name-matching heuristic with capability-based check *(PRODUCTION_AUDIT_GRADE.md #9, PRODUCTION_AUDIT.md 3.3)*
+- [ ] Add `version` field to task frontmatter and mode state (schema versioning) *(PRODUCTION_AUDIT_GRADE.md #8)*
+- [ ] Integration tests for full remote steering pipeline *(PRODUCTION_AUDIT.md Long Term #10)*
+- [x] Log `frontmatterErrors` in sf-db.js instead of silently dropping validation errors *(PRODUCTION_AUDIT.md 3.1)*
+- [ ] Search provider registry refactor — consolidate provider list across files into `SearchProviderRegistry` *(BUILD_PLAN.md Tier 1+)*
+- [ ] Update ARCHITECTURE.md self-evolution section (triage pipeline IS active; injection IS automatic now) *(ARCHITECTURE.md)*
+- [ ] Add Mermaid state machine diagram to ARCHITECTURE.md *(ARCHITECTURE.md)*
+- [ ] Symlinked packages/resources/skills/sessions dedup (pi-mono PR #3818) *(BUILD_PLAN.md Tier 0 #6)*
+
+---
+
+## Long-term / Deferred
+
+- [ ] Singularity Knowledge + Agent Platform (Go re-platform, ~12 weeks) *(BUILD_PLAN.md Tier 1+)*
+- [ ] sf-worker SSH host (Go, `wish` + `xpty`, ~3 weeks) *(BUILD_PLAN.md Tier 4)*
+- [ ] Charm TUI client (`sf-tui` in Go, ~12-16 weeks) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Flight recorder (`x/vcr`, ~3 weeks) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Full swarm chat for `subagent` tool (Option C, depends on persistent-agent layer) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Caveman input-side prompt compression (rewrite execute-task/plan-slice prompts) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Runtime input preprocessor (`terse_prompts: true` dispatch transform, ~3-4 days) *(BUILD_PLAN.md Tier 1+)*
+- [ ] Judge calibration + eval runner service (Go/Charm, ~2-3 weeks post SM) *(BUILD_PLAN.md Tier 1+)*
+- [ ] M009 promote-only adoption review — create `sf schedule` entry (2 weeks after M009 close) *(BACKLOG.md)*
+- [ ] Establish pi-mono SDK sync cadence (recurring check schedule) *(BUILD_PLAN.md Tier 1+)*
+- [ ] `scripts/port-from-gsd2.sh` automation script *(UPSTREAM_PORT_GUIDE.md)*
+- [ ] TypeScript migration for UOK modules (`kernel.js`, etc.) *(PRODUCTION_AUDIT_COMPLETE.md, PRODUCTION_AUDIT_GRADE.md)*