sf snapshot: uncommitted changes after 56m inactivity

This commit is contained in:
Mikael Hugo 2026-05-10 17:16:30 +02:00
parent 37ebfcf53a
commit 3ffd882c8c
33 changed files with 813 additions and 2437 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -86,5 +86,17 @@
"successRate": 1,
"total": 2
}
},
"execute-task": {
"minimax/MiniMax-M2.7-highspeed": {
"successes": 1,
"failures": 0,
"timeouts": 0,
"totalTokens": 12233288,
"totalCost": 0.3431336426,
"lastUsed": "2026-05-10T15:16:08.120Z",
"successRate": 1,
"total": 1
}
}
}

View file

@ -0,0 +1,244 @@
[
{
"kind": "edit",
"toolCallId": "tool_Ca8DVl3y7fTqNLC5XPq0Pwk1",
"path": "src/resources/extensions/sf/verification-evidence.js",
"timestamp": 1778424928174
},
{
"kind": "edit",
"toolCallId": "tool_xVkmHZTHvJP7RwJWVqUORsz4",
"path": "src/resources/extensions/sf/verification-evidence.js",
"timestamp": 1778424949674
},
{
"kind": "edit",
"toolCallId": "tool_EiDPzaZO49a4LKnYvuvFaS8e",
"path": "src/resources/extensions/sf/auto-verification.js",
"timestamp": 1778424983294
},
{
"kind": "edit",
"toolCallId": "tool_pNt9nP10Us3CPrsqlnWwtQ8l",
"path": "src/resources/extensions/sf/auto-verification.js",
"timestamp": 1778425005515
},
{
"kind": "edit",
"toolCallId": "tool_Bl3x74Ojz6aenqD3nYqxkdlO",
"path": "src/resources/extensions/sf/auto-verification.js",
"timestamp": 1778425108830
},
{
"kind": "edit",
"toolCallId": "tool_RHLdM0SZK4ffIIokuqNruHbn",
"path": "src/resources/extensions/sf/auto-verification.js",
"timestamp": 1778425162119
},
{
"kind": "edit",
"toolCallId": "tool_mAdgaYCgksHmjAI45ZuSnMk5",
"path": "src/resources/extensions/sf/auto-verification.js",
"timestamp": 1778425187240
},
{
"kind": "edit",
"toolCallId": "tool_HMsSokItiWF9y6ctKvFSkyE3",
"path": "src/resources/extensions/sf/auto-verification.js",
"timestamp": 1778425206204
},
{
"kind": "edit",
"toolCallId": "tool_Jbd8uJQ6ZV4PeF8P91s2OvFG",
"path": "src/resources/extensions/sf/uok/unit-runtime.js",
"timestamp": 1778425258651
},
{
"kind": "edit",
"toolCallId": "tool_m1a9UNWqpwBIJvzB9LtlVTBN",
"path": "src/cli-status.ts",
"timestamp": 1778425298174
},
{
"kind": "edit",
"toolCallId": "tool_zcSH4Fx3bOumjphAgYisPyhE",
"path": "src/cli-status.ts",
"timestamp": 1778425348170
},
{
"kind": "edit",
"toolCallId": "tool_UjLWJsxhCI2bAt3kYl4QEhNK",
"path": "src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
"timestamp": 1778425381561
},
{
"kind": "edit",
"toolCallId": "tool_1owe7a26pVq3k18x59p6Sy1J",
"path": "src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
"timestamp": 1778425414995
},
{
"kind": "bash",
"toolCallId": "tool_zgJvYKclL8xmY8DE4c8nluTk",
"command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts -t \"getRecoveryDiagnostics\"",
"exitCode": 0,
"outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m | \u001b[22m\u001b[33m35 skipped\u001b[39m\u001b[2m)\u001b[22m\u001b[32m 33\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m Tests \u001b[22m \u001b[1m\u001b[32m3 passed\u001b[39m\u001b[22m\u001b[2m | \u001b[22m\u001b[33m35 skipped\u001b[39m\u001b[90m (38)\u001b[39m\n\u001b[2m Start at \u001b[22m 17:03:54\n\u001b[2m Duration \u001b[22m 1.01s\u001b[2m (transfor",
"timestamp": 1778425433765
},
{
"kind": "bash",
"toolCallId": "tool_c9hBKD3D1Ojayl25rr7Mu3UW",
"command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
"exitCode": 0,
"outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 140\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m Start at \u001b[22m 17:04:12\n\u001b[2m Duration \u001b[22m 1.16s\u001b[2m (transform 552ms, setup 0ms, import 742ms, tests 140ms, environment 0ms",
"timestamp": 1778425451405
},
{
"kind": "bash",
"toolCallId": "tool_jrt7lAB8i6qBOnOePlNhiFR2",
"command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
"exitCode": 1,
"outputSnippet": "src/cli-status.ts format ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n × Formatter would have printed the following content:\n \n 240 240 │ if (!targetUnitId) {\n 241 241 │ const records = listUnitRuntimeRecords(basePath);\n 242 │ - → → → const·mostRecent·=·records.sort((a,·b)·=>\n 243 │ - → → → → (b.updatedAt·??·0)·-·(a.updatedAt·??·0),\n 242 │ + → → → const·mostRecent·=·records.sort(\n 243 │ + → → → → (a,·b)·=>·(b.updat",
"timestamp": 1778425466821
},
{
"kind": "bash",
"toolCallId": "tool_lpaw0h5RHFke7wyq5gw8sJKp",
"command": "npx biome check --write src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
"exitCode": 0,
"outputSnippet": "Checked 5 files in 160ms. Fixed 3 files.\n",
"timestamp": 1778425501417
},
{
"kind": "bash",
"toolCallId": "tool_so1U8wV32tA70qs55EtB7Pke",
"command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
"exitCode": 0,
"outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 152\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m Start at \u001b[22m 17:05:15\n\u001b[2m Duration \u001b[22m 1.16s\u001b[2m (transform 554ms, setup 0ms, import 758ms, tests 152ms, environment 0ms",
"timestamp": 1778425514641
},
{
"kind": "bash",
"toolCallId": "tool_xBsWuLSrGmQHcVUWydLtioru",
"command": "npx tsc --noEmit src/cli-status.ts --skipLibCheck --module NodeNext --moduleResolution NodeNext --target ES2022 --esModuleInterop --allowJs --strict 2>&1 | head -30",
"exitCode": 0,
"outputSnippet": "error TS5112: tsconfig.json is present but will not be loaded if files are specified on commandline. Use '--ignoreConfig' to skip this error.\n",
"timestamp": 1778425532429
},
{
"kind": "bash",
"toolCallId": "tool_lnBQh90rfwsYyVvqmIdq8pH7",
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|auto-verification|unit-runtime|verification-evidence\" | head -20",
"exitCode": 0,
"outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\nsrc/cli-status.ts(243,6): error TS7006: Parameter 'a' implicitly has an 'any' type.\nsrc/cli-status.ts(243,9): error TS7006: Parameter 'b' implicitly has an 'any' type.\n",
"timestamp": 1778425546375
},
{
"kind": "bash",
"toolCallId": "tool_L5EQX39mZFzmJbRkCACrnkcM",
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep \" Could not find a declaration file\" | head -10",
"exitCode": 0,
"outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
"timestamp": 1778425700527
},
{
"kind": "edit",
"toolCallId": "tool_bMteIOMYNzppZgJRP2bDK14p",
"path": "src/cli-status.ts",
"timestamp": 1778425760653
},
{
"kind": "bash",
"toolCallId": "tool_kja8VArplGz7aqlFb1rZvh9M",
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|auto-verification|unit-runtime|verification-evidence\" | head -20",
"exitCode": 0,
"outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
"timestamp": 1778425779047
},
{
"kind": "bash",
"toolCallId": "tool_xAMnWlYjFmPHJj2zb3YklZhI",
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep \"TS7016\" | head -20",
"exitCode": 0,
"outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
"timestamp": 1778425808025
},
{
"kind": "bash",
"toolCallId": "tool_N2K96iYZY9GgeXRHbsVIDJFn",
"command": "ls src/resources/extensions/sf/autonomous-command-args.* src/resources/extensions/sf/repo-identity.* src/resources/extensions/sf/trace-collector.*",
"exitCode": 0,
"outputSnippet": "src/resources/extensions/sf/autonomous-command-args.d.ts\nsrc/resources/extensions/sf/autonomous-command-args.js\nsrc/resources/extensions/sf/repo-identity.d.ts\nsrc/resources/extensions/sf/repo-identity.js\nsrc/resources/extensions/sf/trace-collector.d.ts\nsrc/resources/extensions/sf/trace-collector.js\n",
"timestamp": 1778425827433
},
{
"kind": "write",
"toolCallId": "tool_d6oIGASndyhbsF3ethH49BBC",
"path": "src/resources/extensions/sf/uok/unit-runtime.d.ts",
"timestamp": 1778425847941
},
{
"kind": "bash",
"toolCallId": "tool_8ipcG5bydJIBFOOfadbuRNzl",
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|unit-runtime\" | head -20",
"exitCode": 0,
"outputSnippet": "(no output)",
"timestamp": 1778425863933
},
{
"kind": "bash",
"toolCallId": "tool_oas0rGaFmZW4i6dQZDfNbIOj",
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | head -20",
"exitCode": 0,
"outputSnippet": "(no output)",
"timestamp": 1778425882297
},
{
"kind": "bash",
"toolCallId": "tool_bWplHSK7Xw5d98Gzhf69rI3E",
"command": "npx tsc --noEmit --project tsconfig.extensions.json --skipLibCheck 2>&1 | head -20",
"exitCode": 0,
"outputSnippet": "packages/tui/src/index.ts(52,34): error TS6142: Module './ink-bridge.js' was resolved to '/home/mhugo/code/singularity-forge/packages/tui/src/ink-bridge.tsx', but '--jsx' is not set.\npackages/tui/src/tui.ts(14,34): error TS6142: Module './ink-bridge.js' was resolved to '/home/mhugo/code/singularity-forge/packages/tui/src/ink-bridge.tsx', but '--jsx' is not set.\npackages/tui/src/tui.ts(434,46): error TS7006: Parameter 'data' implicitly has an 'any' type.\n",
"timestamp": 1778425897974
},
{
"kind": "bash",
"toolCallId": "tool_R9DLuMbsvg7Lz87cGbyzuS01",
"command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/resources/extensions/sf/uok/unit-runtime.d.ts src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
"exitCode": 1,
"outputSnippet": "src/resources/extensions/sf/uok/unit-runtime.d.ts format ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n × Formatter would have printed the following content:\n \n 28 28 │ ): RecoveryDiagnostics | null;\n 29 29 │ \n 30 │ - export·function·listUnitRuntimeRecords(basePath:·string):·Array<\n 31 │ - → Record<string,·unknown>·&·{·updatedAt?:·number;·unitId:·string·}\n 32 │ - >;\n 30 │ + export·function·listUnitRuntimeRecords(\n 31 │ + → basePath:·string,\n 32 │ + ",
"timestamp": 1778425942347
},
{
"kind": "bash",
"toolCallId": "tool_nQuAxzeREvyod0nBGyVPIvfJ",
"command": "npx biome check --write src/resources/extensions/sf/uok/unit-runtime.d.ts",
"exitCode": 0,
"outputSnippet": "Checked 1 file in 17ms. Fixed 1 file.\n",
"timestamp": 1778425957743
},
{
"kind": "bash",
"toolCallId": "tool_HZWYvGOveamvyW97wbv5A0Fv",
"command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/resources/extensions/sf/uok/unit-runtime.d.ts src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
"exitCode": 0,
"outputSnippet": "Checked 6 files in 70ms. No fixes applied.\n",
"timestamp": 1778425970841
},
{
"kind": "bash",
"toolCallId": "tool_SY84S3XYIIBJGZvUchG4qSRZ",
"command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
"exitCode": 0,
"outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 168\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m Start at \u001b[22m 17:13:05\n\u001b[2m Duration \u001b[22m 1.11s\u001b[2m (transform 477ms, setup 0ms, import 702ms, tests 168ms, environment 0ms",
"timestamp": 1778425983954
},
{
"kind": "bash",
"toolCallId": "tool_SdyaSTc9h2LJD9pq5ZLR3Ehc",
"command": "git status --short",
"exitCode": 0,
"outputSnippet": " D .sf/backups/db/sf.db.2026-05-08T22-42-32-307Z\n D .sf/backups/db/sf.db.2026-05-09T17-40-16-600Z\n D .sf/backups/db/sf.db.2026-05-09T19-41-02-472Z\n M .sf/metrics.db\n M .sf/metrics.db-shm\n M .sf/metrics.db-wal\n D BUILD_PLAN_MILESTONE_MAP.md\n D PRODUCTION_AUDIT_COMPLETE.md\n D QUICK_WINS_IMPLEMENTATION.md\n D TRIAGE_COMPLETE.md\n D TRIAGE_README.md\n D autoresearch.md\n D copilot-thoughts.md\n M packages/coding-agent/src/modes/interactive/interactive-mode.ts\n M packages/tui/src/ink-bridge.tsx\n M package",
"timestamp": 1778425999682
}
]

View file

@ -1,70 +0,0 @@
# BUILD_PLAN → Milestone Map
Every BUILD_PLAN.md tier item mapped to a milestone. **Rule D015**: every new milestone must cite which BUILD_PLAN tier/item it implements.
This file answers **where work belongs**, not **whether code is done**. "Mapped" means a BUILD_PLAN item has a milestone/slice home. It does **not** mean the implementation is verified in the current repo.
## Mapping vs. code truth
- **Mapped** — the item has a milestone/slice destination.
- **Verified in code** — the behavior exists in the repo and has evidence/tests/artifacts.
- **Open** — still planned or partially folded in, but not yet verified as complete.
- **Deferred** — intentionally out of the active plan.
---
## High-level milestone direction
These are the strategy bands above the itemized mapping:
1. **Core foundation** — UOK, purpose-driven TDD, eight-field PDD gate, repo-local state
2. **Single-repo sharpening** — adopt the best execution/workflow ideas from pi-mono, gsd-2, Claude Code, Codex, Aider, and Plandex where they strengthen Forge
3. **Autonomous reliability** — evidence, recovery, verification, and self-improvement loops
4. **Surface coherence** — CLI, TUI, docs, and workflow language all reflect the same UOK-driven model
5. **ACE convergence prep** — keep concepts compatible with ACE Coder without turning Forge into the multi-repo system
---
## Tier 0 — Pi-mono ports → **M006**
## Tier 0.5 — gsd-2 ports → **M006 + M007**
All mapped. See BUILD_PLAN.md for item-level status.
## Tier 1 — ESSENTIAL → **ALL MAPPED**
| Item | Milestone | Slice | Status |
|---|---|---|---|
| 1.1 Vault secret resolver | **M017-yf67h6** | S01-S03 | ⬜ NEW |
| 1.2 Singularity Memory integration | **M017-jpw5jo** | S01-S03 | ⬜ NEW |
| 1.3 Schema reconciliation (spec rewrite) | **M013** | S12 | ⬜ Folded in |
| 1.4 Config schema alignment | **M013** | S13 | ⬜ Folded in |
## Tier 2 — STRONG → **ALL MAPPED**
| Item | Milestone | Slice | Status |
|---|---|---|---|
| 2.1 Persistent agents v1 | M012 | S01-S05 | ⬜ |
| 2.2 Doc-sync sub-step | M009 | S08 | ⬜ |
| 2.3 Intent chapters | M013 | S08 | ⬜ |
| 2.4 PhaseReview 3-pass | M016 | S01-S02 | ⬜ |
| 2.5 turn_status marker | M013 | S09 | ⬜ |
| 2.6 last_error cap | M013 | S10 | ⬜ |
| 2.7 cost_micro_usd | M013 | S11 | ⬜ |
## Tier 3+ → **Deferred by design**
---
## Summary
| Tier | Mapped | Gap |
|---|---|---|
| Tier 0 | 10 (M006) | 0 |
| Tier 0.5 | 17 (M006+M007) | 0 |
| **Tier 1** | **4** (M017×2, M013×2) | **0** |
| Tier 2 | 7 (M012, M009, M013, M016) | 0 |
| Tier 3+ | 0 | deferred |
**Zero mapping gaps.** Every BUILD_PLAN tier item is either mapped to a milestone or explicitly deferred.
That does **not** mean zero implementation gaps. Open `TODO`, `NEW`, and `⬜` markers in `BUILD_PLAN.md`, this map, and milestone artifacts still represent real work until they are reconciled against code evidence.

View file

@ -1,440 +0,0 @@
# Complete Long-Term Production-Grade Audit
**Scope:** All UOK kernel, gate system, execution graph, message bus, diagnostics, metrics, and supporting infrastructure
**Date:** 2026-05-08
**Grade Scale:** S (exceptional) → A (production) → B (needs work) → C (risky) → D (broken)
---
## Executive Summary
| Module | Grade | Verdict |
|--------|-------|---------|
| `uok/kernel.js` | **A** | Clean lifecycle, parity recovery, audit envelope, signal handling |
| `uok/gate-runner.js` | **A** | Circuit breaker, retry matrix, memory enrichment, degradation logging |
| `uok/audit.js` | **A** | Atomic writes, stale-write detection, dual persistence (JSONL + DB) |
| `uok/contracts.js` | **A** | Complete JSDoc types, runtime validation, clear interfaces |
| `uok/flags.js` | **A** | Clean preference resolution, all features toggleable |
| `uok/loop-adapter.js` | **A** | Turn observer, gitops integration, writer tokens, timeout, documented | None |
| `uok/parity-report.js` | **A** | Deep parity analysis, orphaned run recovery, ledger reconciliation, malformed logging |
| `uok/message-bus.js` | **A** | Durable SQLite, deduplication, auto-compact, periodic refresh | Cache drift eliminated |
| `uok/cost-guard-gate.js` | **A** | Actual cost lookup, rolling window, high-tier failure detection, cheaper alternative suggestion |
| `uok/security-gate.js` | **A** | Secret scan integration, timeout, graceful skip when script missing |
| `uok/plan-v2.js` | **A** | Graph compilation, artifact validation, cycle detection, context gating | None |
| `uok/execution-graph.js` | **A** | Topological sort, conflict detection, parallel scheduling with deadlock detection |
| `uok/unit-runtime.js` | **A** | Complete lifecycle, retry budgets, LRU cache, durable reconciliation | None |
| `uok/diagnostic-synthesis.js` | **A** | Process tree analysis, multi-source correlation, actionable recommendations | None |
| `uok/metrics-exposition.js` | **A** | Prometheus format, caching, circuit breaker + latency + message bus metrics | Superseded by metrics-central.js |
| `uok/chaos-monkey.js` | **A** | Latency, partial failure, disk, memory stress; all recoverable, all logged | None |
| `uok/writer.js` | **A** | Atomic sequence tracking, token lifecycle, disk persistence, TTL | None |
| `sf-db.js` | **A** | Single-writer invariant, WAL mode, statement cache, schema v45, query timeout, split entry point | metrics-central.js for unified sink |
**Overall Grade: A** — Production-ready. All scaling concerns addressed.
---
## 1. `uok/kernel.js` — Grade A
### Strengths
- Clean async lifecycle: enter → run → exit, with `finally` block guarantee
- `recordUokKernelTermination()` handles signal cleanup (symmetrical with enter)
- Parity recovery: checks previous report for missing exits, drains them
- Audit envelope: emits structured events on kernel enter/exit
- workMode + modelMode propagated into lifecycleFlags and audit payload
- `debugLog()` for non-fatal diagnostics without breaking orchestration
### Production Concerns: None critical
### Minor
- `runAutoLoopWithUok()` is 120+ lines — could extract helper functions for readability
- `decoratedDeps` spreads all deps — no validation that required deps exist
---
## 2. `uok/gate-runner.js` — Grade A
### Strengths
- Circuit breaker with exponential backoff: `openDurationMs * 2^streak`
- Half-open state with attempt limiting — proper gradual recovery
- Retry matrix per failure class: `execution`/`artifact`/`verification` get 1 retry, `timeout` gets 2
- Memory enrichment: queries historical patterns for gate failures (degrades gracefully)
- Every gate run persisted to DB + audit event emitted
- Unknown gates get `manual-attention` outcome (fail-closed)
### Production Concerns: None critical
### Minor
- `computeGateEmbedding()` uses a simple hash — not a real semantic embedding
- `enrichGateResultWithMemory()` silently degrades on DB failure (correct behavior, but could log)
---
## 3. `uok/audit.js` — Grade A
### Strengths
- Atomic writes via `withFileLockSync()` with `onLocked: "skip"` (best-effort)
- Stale-write detection via `isStaleWrite("uok-audit")` — prevents superseded turns from polluting log
- Dual persistence: JSONL for local durability, SQLite for querying
- `closeSync(openSync(path, "a"))` touch pattern ensures lock target exists
- Schema version in envelope for future migration
### Production Concerns: None critical
---
## 4. `uok/contracts.js` — Grade A
### Strengths
- Complete JSDoc typedefs for all UOK types
- `validateGate()` catches registration-time mistakes
- Clear separation: `UokContext` (input), `GateResult` (output), `Gate` (interface)
### Production Concerns: None
---
## 5. `uok/flags.js` — Grade A
### Strengths
- All UOK features toggleable via preferences
- Clean resolution: `uok?.security_guard?.enabled ?? true`
- `resolvePermissionProfile()` for canonical permission profile
### Production Concerns: None
---
## 6. `uok/loop-adapter.js` — Grade A
### Strengths
- Turn observer pattern: `onTurnStart`, `onPhaseResult`, `onTurnResult`
- Gitops integration: writes transaction records per phase with 10s timeout
- Writer token acquisition/release for sequence tracking
- Chaos monkey strikes at phase boundaries
- Audit events for turn start/result
- `nextSequenceMetadata()` fully documented with JSDoc
### Production Concerns: None critical
### Fixed ✅
- ✅ Gitops timeout: `writeGitTransactionWithTimeout()` with 10s `Promise.race()`
- ✅ `nextSequenceMetadata()` documented: sequence is optional when no token active
---
## 7. `uok/parity-report.js` — Grade A
### Strengths
- Deep parity analysis: compares heartbeat events, ledger runs, diff events
- Orphaned run recovery: `recoverOrphanedStartedLedgerRuns()` closes stale DB runs
- Live process detection: `hasLiveAutoLock()` uses `process.kill(pid, 0)`
- Fresh vs historical mismatch separation
- Divergence tracking by plane: `plan`, `graph`, `model-policy`, `audit-envelope`, `gitops`
- `shallowEqualDecisions()` for comparing legacy vs UOK outputs
### Production Concerns: None critical
### Fixed ✅
- ✅ Malformed line logging: `parseParityEvents()` now logs dropped count to stderr
- `UNMATCHED_RUN_STALE_MS = 30min` — appropriate for most cases
---
## 8. `uok/message-bus.js` — Grade A
### Strengths
- Durable SQLite storage with configurable retention
- Deterministic message IDs for idempotent `sendOnce()`
- Auto-compaction when message count exceeds threshold
- Per-agent inbox with read tracking and auto-refresh (30s interval)
- Conversation query between two agents
### Production Concerns: None critical
### Fixed ✅
- ✅ Cache drift: `_maybeRefresh()` auto-refreshes from DB every 30s on `list()`, `markRead()`, `unreadCount`
- ✅ `sendOnce()` idempotency: Pre-checks inbox before insert; returns existing ID if found
---
## 9. `uok/cost-guard-gate.js` — Grade A
### Strengths
- Actual cost lookup from `BUNDLED_COST_TABLE`
- Rolling 1-hour window spend check
- High-tier model failure pattern detection
- Suggests cheaper alternative from same provider/family
- Per-unit and per-hour thresholds
### Production Concerns: None critical
### Minor
- `isHighTierModel()` uses `$0.005/1K tokens` threshold — magic number
- `_suggestCheaperAlternative()` could suggest incompatible models (different context window)
---
## 10. `uok/security-gate.js` — Grade A
### Strengths
- Runs `scripts/secret-scan.sh --diff HEAD` against changes
- 30-second timeout with process kill
- Gracefully skips if script missing (pass)
- Returns findings on failure
### Production Concerns: None
---
## 11. `uok/plan-v2.js` — Grade A
### Strengths
- Compiles unit graph from milestone/slice/task DB state
- Validates artifact presence (CONTEXT.md, RESEARCH.md) before execution entry
- Clarify round limit enforcement
- Graph output to JSON for inspection
- Cycle detection at compile time using Kahn's algorithm
### Production Concerns: None critical
### Fixed ✅
- ✅ Cycle detection: `detectCycles()` validates graph before execution; returns `hasCycles: true` with clear error
---
## 12. `uok/execution-graph.js` — Grade A
### Strengths
- Kahn's algorithm topological sort with deterministic ordering (localeCompare)
- File conflict detection: `detectFileConflicts()` finds nodes writing same file
- Parallel scheduling with max workers and dependency awareness
- Deadlock detection: throws when no ready nodes but graph incomplete
- Sidecar queue scheduling with kind-based handlers
- `selectReactiveDispatchBatch()` for incremental dispatch
### Production Concerns: None critical
---
## 13. `uok/unit-runtime.js` — Grade A
### Strengths
- Complete lifecycle: queued → claimed → running → progress → completed/failed/blocked/cancelled/stale/runaway-recovered → notified
- Retry budgets with `retryBudgetRemaining()`
- Durable artifact reconciliation: `reconcileDurableCompleteUnitRuntimeRecords()`
- Stale complete-slice cleanup: `reconcileStaleCompleteSliceRecords()`
- In-memory cache for repeated reads within dispatch cycle
- `inspectExecuteTaskDurability()` checks plan, summary, state, must-haves
### Production Concerns: None critical
### Fixed ✅
- ✅ Runtime cache bounds: LRU eviction at 5000 entries; removes oldest 20%
- `recordUnitOutcomeInMemory()` creates memory entries but no cleanup policy
---
## 14. `uok/diagnostic-synthesis.js` — Grade A
### Strengths
- Multi-source correlation: process tree, auto.lock, parity report, DB ledger, runtime projections
- Process descendant tracking via `ps` + tree traversal
- Classification: healthy | running | quiet-but-healthy | degraded | needs-repair
- Actionable recommendations per issue
- Publishes to message bus for observer chains
- `readUokDiagnostics()` for external consumption
### Production Concerns: None critical
---
## 15. `uok/metrics-exposition.js` — Grade A
### Strengths
- Prometheus text format output
- 30-second cache TTL for performance
- Gate metrics: runs, passes, fails, retries, latency (avg/p50/p95/max)
- Circuit breaker state gauge (0=closed, 1=half-open, 2=open)
- Message bus metrics: total, unread, unique agents, conversations
- `invalidateMetricsCache()` for cache busting
### Production Concerns: None
---
## 16. `uok/chaos-monkey.js` — Grade A
### Strengths
- Four fault types: latency, partial failure, disk stress, memory stress
- All faults are recoverable (no process kill)
- All faults are logged to stderr
- Configurable probabilities and magnitudes
- `getInjectedEvents()` for verification
- Immediate cleanup of stress artifacts
### Production Concerns: None
---
## 17. `uok/writer.js` — Grade A
### Strengths
- Atomic sequence tracking via `atomicWriteSync()`
- Writer token lifecycle: acquire → use → release
- Prevents double-acquisition for same turn
- Sequence state persisted to disk
- Token crash recovery: persists to `uok-writer-tokens.json` with 5-min TTL
### Production Concerns: None critical
### Fixed ✅
- ✅ Crash recovery: Tokens persisted to disk; `hasActiveWriterToken()` recovers from disk
- ✅ TTL cleanup: Expired tokens auto-purged from memory and disk
---
## 18. `sf-db.js` — Grade A
### Strengths
- Single-writer invariant enforced by convention + CI test
- WAL mode for file-backed DBs
- Statement cache for prepared queries
- Schema version 45 with migration path
- `normalizeRow()` handles null-prototype objects
- Query timeout protection: `withQueryTimeout()` helper (30s default)
- Split entry point: `sf-db/index.js` for future modularization
- Comprehensive table creation: backlog, schedule, repo profiles, UOK runs, gate runs, audit events, message bus, tasks, verification evidence
### Production Concerns: None critical
### Fixed ✅
- ✅ Query timeout: `withQueryTimeout()` catches timeout/busy errors, returns fallback
- ✅ Split entry point: `sf-db/index.js` re-export created for gradual migration
- ✅ Console logging: All modules use `logWarning()` / `logError()` from workflow-logger
---
## Cross-Cutting Concerns
### Observability
| Module | Metrics | Logs | Traces | Audit |
|--------|---------|------|--------|-------|
| kernel.js | ❌ | ✅ debugLog | ✅ traceId | ✅ envelope |
| gate-runner.js | ✅ DB | ✅ insertGateRun | ✅ traceId/turnId | ✅ envelope |
| audit.js | ❌ | ❌ | ✅ eventId | ✅ JSONL+DB |
| loop-adapter.js | ❌ | ❌ | ✅ traceId/turnId | ✅ envelope |
| parity-report.js | ❌ | ❌ | ❌ | ❌ |
| message-bus.js | ✅ DB | ❌ | ❌ | ❌ |
| cost-guard-gate.js | ❌ | ❌ | ❌ | ❌ |
| unit-runtime.js | ❌ | ❌ | ❌ | ❌ |
| diagnostic-synthesis.js | ❌ | ❌ | ❌ | ❌ |
| metrics-exposition.js | ✅ Prometheus | ❌ | ❌ | ❌ |
| chaos-monkey.js | ❌ | ✅ stderr | ❌ | ❌ |
**Gap:** Resolved — `metrics-central.js` provides unified Counter/Gauge/Histogram with Prometheus text format. Legacy `metrics-exposition.js` still active for backward compatibility.
### Security
| Concern | Status | Notes |
|---------|--------|-------|
| Input validation | ✅ Good | All entry points validate |
| Injection prevention | ✅ Good | Parameterized queries in sf-db |
| Secrets scanning | ✅ Good | Security gate runs on every turn |
| Cost limits | ✅ Good | Per-unit and per-hour guards |
| Circuit breakers | ✅ Good | Exponential backoff on failures |
| Chaos engineering | ✅ Good | Opt-in, recoverable faults |
### Performance
| Concern | Status | Notes |
|---------|--------|-------|
| Big-O | ✅ Good | All graph ops are O(V+E) |
| Caching | ✅ Good | Metrics cache, runtime cache, statement cache |
| Memory | ✅ Good | LRU eviction on runtime cache (5000), bounded message bus inboxes |
| DB queries | ✅ Good | Single-writer, WAL mode, prepared statements |
| Parallelism | ✅ Good | Max workers capped at 8 |
### Maintainability
| Concern | Status | Notes |
|---------|--------|-------|
| Test coverage | ✅ Good | 139+ tests across all modules |
| Documentation | ✅ Good | JSDoc on all exports |
| Logging consistency | ✅ Good | All modules use `logWarning()` / `logError()` from workflow-logger |
| File organization | ✅ Good | sf-db.js has split entry point; full extraction deferred to v2 |
| Schema versioning | ✅ Good | Schema v45 with migrations |
---
## Action Plan
### Before Production (Blockers) — ALL CLEAR ✅
No blockers identified. All modules are production-ready.
### Before Scaling to 10+ Workers — ALL FIXED ✅
1. ✅ **Message bus cache drift** — Added `_maybeRefresh()` with 30s interval; `list()`, `markRead()`, `unreadCount` auto-refresh
2. ✅ **Writer token crash recovery** — Persist tokens to `uok-writer-tokens.json`; 5-min TTL; `hasActiveWriterToken()` recovers from disk
3. ✅ **Runtime cache bounds** — LRU eviction at 5000 entries; removes oldest 20%
### Before Next Major Release — ALL FIXABLE ITEMS COMPLETE ✅
4. ✅ **Split sf-db.js** — Created `sf-db/index.js` re-export entry point; full extraction deferred to v2
5. ✅ **Console.warn cleanup**`context-injector.js`, `vault-resolver.js`, `knowledge-injector.js` now use `logWarning()`
6. ✅ **Cycle detection at compile time**`detectCycles()` in `plan-v2.js` using Kahn's algorithm; returns `hasCycles: true`
### Implemented ✅
7. ✅ **Centralized metrics**`metrics-central.js` with Counter/Gauge/Histogram, Prometheus text format, wired into subagent inheritance and mode transitions
### Deferred to v2 (Architectural, Not Bugs)
8. ⚠️ **TypeScript migration** — Convert UOK modules to `.ts` for compile-time safety
---
## Appendix: Complete Module Inventory
### UOK Kernel (18 modules, ~2,800 lines)
| Module | Lines | Grade | Tests |
|--------|-------|-------|-------|
| `kernel.js` | 120 | A | ✅ |
| `gate-runner.js` | 280 | A | ✅ |
| `audit.js` | 80 | A | ✅ |
| `contracts.js` | 120 | A | ✅ |
| `flags.js` | 40 | A | ✅ |
| `loop-adapter.js` | 180 | A | ✅ |
| `parity-report.js` | 320 | A | ✅ |
| `message-bus.js` | 180 | A | ✅ |
| `cost-guard-gate.js` | 140 | A | ✅ |
| `security-gate.js` | 60 | A | ✅ |
| `plan-v2.js` | 200 | A | ✅ |
| `execution-graph.js` | 260 | A | ✅ |
| `unit-runtime.js` | 420 | A | ✅ |
| `diagnostic-synthesis.js` | 280 | A | ✅ |
| `metrics-exposition.js` | 180 | A | ✅ (legacy) |
| `chaos-monkey.js` | 140 | A | ✅ |
| `writer.js` | 100 | A | ✅ |
| `sf-db.js` | 7000+ | A | ✅ |
| `metrics-central.js` | 350 | A | ✅ (new) |
### Mode System (7 modules, ~1,400 lines)
| Module | Lines | Grade | Tests |
|--------|-------|-------|-------|
| `operating-model.js` | 120 | A | 13 |
| `auto/session.js` | 200 | A- | ✅ |
| `task-frontmatter.js` | 311 | A- | 9 |
| `subagent-inheritance.js` | 170 | A- | 9 |
| `remote-steering.js` | 139 | A- | 7 |
| `parallel-intent.js` | 139 | B+ | 6 |
| `skills/eval-harness.js` | 139 | A- | 5 |
**Total: 139 tests passing, 0 failures, 1 skipped.**
---
*Audit completed. All modules production-ready. Address scaling items before 10+ workers.*

View file

@ -1,385 +0,0 @@
# Quick Wins Implementation - Complete
**Date:** 2026-05-06
**Implemented by:** Copilot CLI
**Commit:** 0e2edfdeb
**Status:** ✅ COMPLETE - Core infrastructure in place
## Summary
Successfully implemented the foundational infrastructure for 3 high-impact quick wins that activate SF's self-evolution learning loop:
1. **Close Self-Report Feedback Loop** [9/10 impact, 2-3 days to full integration]
2. **Activate Continuous Model Learning** [8/10 impact, 3-4 days to full integration]
3. **Automate Knowledge Injection** [7/10 impact, 2-3 days to full integration]
**Total:** 24/30 impact points unlocked through self-evolution infrastructure.
---
## Quick Win 1: Close Self-Report Feedback Loop [9/10 Impact]
### What Was Implemented
**File:** `src/resources/extensions/sf/self-report-fixer.js` (348 lines)
**Module:** `SelfReportFixer` with the following capabilities:
- **Pattern Recognition** — 4 built-in fix patterns:
1. `validation-reviewer-rubric` (95% confidence) — Add criterion/gap rubric to validation prompts ✅ *Already fixed*
2. `gate-verdict-clarity` (90% confidence) — Document gate verdict semantics
3. `env-vars-unvalidated` (85% confidence) — Add SF_* env validation
4. `self-report-coverage-gap` (80% confidence) — Implement triage pipeline
- **Automatic Fix Classification**
```js
classifyReportFixes(report) // Returns applicable fixes with confidence scores
```
- **High-Confidence Auto-Fix**
```js
autoFixHighConfidenceReports(basePath, reports)
// Applies fixes for confidence > 0.85
```
- **Deduplication**
```js
dedupReports(reports) // Group related reports by normalized issue key
```
- **Severity Categorization**
```js
categorizeBySeverity(reports) // blocker | warning | suggestion
```
### Next Steps for Full Integration
1. Hook into `triage-self-feedback.js` to invoke fixer after triage runs
2. Add pattern library for domain-specific fixes (provider routing, timeout tuning, etc.)
3. Create integration tests for each fix pattern
4. Document feedback loop: report → triage → fix → verification
### How It Works
```javascript
import { autoFixHighConfidenceReports } from './self-report-fixer.js';
// After collecting self-reports
const reports = readSelfFeedback();
// Auto-apply high-confidence fixes
const { applied, failed, skipped } = await autoFixHighConfidenceReports(
projectPath,
reports
);
// applied: ["validation-reviewer-rubric: rubric already present"]
// failed: ["env-vars-unvalidated: requires schema impl"]
// skipped: ["gate-verdict-clarity: confidence 0.9 > threshold 0.85"]
```
---
## Quick Win 2: Activate Continuous Model Learning [8/10 Impact]
### What Was Implemented
**File:** `src/resources/extensions/sf/model-learner.js` (344 lines)
**Classes:**
#### ModelPerformanceTracker
Tracks per-task-type model performance with:
- Success/failure/timeout counts
- Token usage and cost tracking
- Success rate calculation
- Ranked model sorting
**Storage:** `.sf/model-performance.json`
```json
{
"execute-task": {
"gpt-4o": {
"successes": 42,
"failures": 3,
"timeouts": 1,
"totalTokens": 1500000,
"totalCost": 45.50,
"lastUsed": "2026-05-06T16:30:00Z",
"successRate": 0.93
}
}
}
```
**API:**
```js
tracker.recordOutcome(taskType, modelId, { success, timeout, tokensUsed, costUsd })
tracker.getRankedModels(taskType, minSamples = 3) // Returns sorted by success rate
tracker.shouldDemote(taskType, modelId, threshold = 0.5) // Demote if failure >50%
tracker.getABTestCandidates(taskType) // For hypothesis testing
```
#### FailureAnalyzer
Categorizes and analyzes failure modes:
- Logs failures to JSONL
- Detects patterns (e.g., timeout-prone models)
- Provides failure summaries per model
**Storage:** `.sf/model-failure-log.jsonl`
```json
{
"timestamp": "2026-05-06T16:30:00Z",
"taskType": "execute-task",
"modelId": "gpt-4o",
"reason": "quality_check_failed",
"timeout": false,
"tokensUsed": 25000,
"context": { ... }
}
```
**API:**
```js
analyzer.logFailure(taskType, modelId, { reason, timeout, tokensUsed, context })
analyzer.getFailureSummary(taskType, modelId) // Returns { reasons, patterns }
```
### Main API: ModelLearner
```javascript
import { ModelLearner } from './model-learner.js';
const learner = new ModelLearner(projectPath);
// Record successful outcome
learner.recordOutcome('execute-task', 'claude-opus', {
success: true,
tokensUsed: 15000,
costUsd: 0.50,
});
// Record failure
learner.logFailure('execute-task', 'gpt-4o', {
reason: 'quality_check_failed',
timeout: false,
tokensUsed: 25000,
});
// Get ranked models (for intelligent routing)
const rankedModels = learner.getRankedModels('execute-task');
// [
// { modelId: 'claude-opus', successRate: 0.98, attempts: 50, ... },
// { modelId: 'gpt-4o', successRate: 0.90, attempts: 40, ... }
// ]
// A/B test decision
const abTest = learner.getABTestCandidates('execute-task');
// { incumbent: claude-opus, challengers: [gpt-4o, gemini-pro], testBudget: 10 }
// Analyze A/B results and decide promotion/demotion
const decision = learner.analyzeABTest('execute-task', {
incumbentWins: 8,
challengerWins: 2,
});
// { recommendation: "continue", reason: "incumbent 0.80 vs challenger 0.20" }
```
### Next Steps for Full Integration
1. Integrate into `auto-dispatch.ts` outcome logging
2. Hook into `model-router.ts` to use ranked models for routing decisions
3. Implement auto-demotion in model selection logic
4. Add A/B testing orchestration for low-risk tasks
5. Create dashboard in `benchmark-selector.ts` showing per-model performance
---
## Quick Win 3: Automate Knowledge Injection [7/10 Impact]
### What Was Implemented
**File:** `src/resources/extensions/sf/knowledge-injector.js` (336 lines)
**Key Functions:**
- **Parse Knowledge Base**
```js
parseKnowledgeEntries(knowledgeContent)
// Extracts judgment-log entries with confidence, domain, recommendation
```
- **Semantic Matching**
```js
extractConcepts(entry) // Extract domain tags, failure modes, constraints
semanticSimilarity(concepts, contextKeywords) // Score relevance
```
- **Find Relevant Knowledge**
```js
findRelevantKnowledge(entries, contextKeywords, minConfidence=0.6, minSimilarity=0.5)
// Returns sorted by combined score (confidence × 0.7 + similarity × 0.3)
```
- **Detect Contradictions**
```js
detectContradictions(entries) // Flag conflicting recommendations
```
- **Format for Injection**
```js
formatKnowledgeForInjection(relevantKnowledge)
// Human-readable markdown with confidence/relevance scores
```
- **Track Usage** (for feedback loop)
```js
trackKnowledgeUsage(taskId, injectedKnowledge)
// Logs which knowledge was used for effectiveness measurement
```
### Integration into auto-prompts.js
**Modified:** `src/resources/extensions/sf/auto-prompts.js`
Added:
1. Import of knowledge-injector module
2. Helper function `getKnowledgeInjection(basePath, taskContext)` with graceful degradation
3. Knowledge injection into execute-task prompt with context (domain, keywords, technology)
**In execute-task prompt loading (line 2203+):**
```javascript
const knowledgeInjection = await getKnowledgeInjection(base, {
domain: "task-execution",
taskType: "execute-task",
keywords: [tTitle, sTitle, mid, sid],
technology: [],
});
return loadPrompt("execute-task", {
memoriesSection,
knowledgeInjection, // NEW: Relevant prior learning
overridesSection,
// ... other variables
});
```
### Existing Infrastructure
**Note:** Knowledge injection is **60% complete** via existing `queryKnowledge()` in context-store.js
- ✅ `inlineKnowledgeScoped()` already exists (uses queryKnowledge)
- ✅ Used in both plan-slice and execute-task prompts
- ❌ Uses simple keyword matching (not semantic scoring)
- ✅ Our new module enhances with semantic similarity
### Next Steps for Full Integration
1. Update execute-task and plan-slice prompt templates to include `{{knowledgeInjection}}` variable
2. Integrate semantic scoring into queryKnowledge or create parallel path
3. Implement feedback loop: track which knowledge was used and measure effectiveness
4. Create contradiction resolver UI for conflicting recommendations
5. Add knowledge effectiveness metrics to benchmark reports
---
## Files Created
| File | Lines | Purpose |
|------|-------|---------|
| `src/resources/extensions/sf/self-report-fixer.js` | 348 | Auto-fix high-confidence self-reports |
| `src/resources/extensions/sf/model-learner.js` | 344 | Per-task-type model performance tracking |
| `src/resources/extensions/sf/knowledge-injector.js` | 336 | Semantic knowledge matching and injection |
## Files Modified
| File | Changes | Purpose |
|------|---------|---------|
| `src/resources/extensions/sf/auto-prompts.js` | +7 lines | Added knowledge injection into execute-task |
## Build Status
✅ **Build Success**
- All new modules compile without errors
- TypeScript types intact
- Resources copied to `dist/`
- Inventory check passed
## Testing Recommendations
Create integration tests for:
1. **Self-Report Fixer**
- Pattern matching accuracy (4 patterns)
- Deduplication logic
- Confidence thresholding
2. **Model Learner**
- Success rate calculation
- Demotion logic (>50% failure rate)
- A/B test analysis
- Failure pattern detection
3. **Knowledge Injector**
- Semantic similarity scoring
- Contradiction detection
- Formatting for prompt injection
- Graceful degradation (missing KNOWLEDGE.md)
## Activation Timeline
**To fully activate these quick wins:**
1. **Week 1:** Hook model-learner into auto-dispatch outcome logging
2. **Week 1:** Integrate self-report-fixer into triage-self-feedback pipeline
3. **Week 2:** Implement knowledge injection in model-router for adaptive routing
4. **Week 2:** Add A/B testing orchestration for model promotion
5. **Week 3:** Create feedback loop dashboard in benchmark-selector
6. **Week 3:** Measure impact on learning efficiency
**Estimated effort:** 8-10 days of focused integration work
---
## Key Design Decisions
1. **Graceful Degradation** — All modules degrade gracefully if knowledge base or tracking files are unavailable
2. **Append-Only Logs** — Failure logs use JSONL for durability and analysis
3. **Per-Task-Type Tracking** — Model performance varies by task type; no single ranking
4. **Confidence-Based Thresholding** — High-confidence fixes (>0.85) auto-apply; lower ones require review
5. **A/B Test Budgeting** — Low-risk hypothesis testing with configurable test budget
---
## Impact Measurement
**After full integration, expect:**
- 🎯 **9/10 impact** from self-report loop: Close feedback loop from anomaly detection to code fixes
- 🎯 **8/10 impact** from model learning: 20-30% improvement in task success rate through adaptive routing
- 🎯 **7/10 impact** from knowledge injection: 15-20% faster task planning via relevant prior learning
**Total:** **24/30 self-evolution capability points activated** (up from current 15/30)
---
## Code Quality
- ✅ No external dependencies (uses only Node.js built-ins + SF imports)
- ✅ JSDoc purpose statements on all exports
- ✅ Graceful error handling (no crash on missing files)
- ✅ Idempotent tracking (safe to call multiple times)
- ✅ Clear separation of concerns (fixer ≠ learner ≠ injector)
---
## Status Summary
**Phase:** ✅ **IMPLEMENTATION COMPLETE**
**Phase:** ⏳ **INTEGRATION PENDING** (dispatch loop hookup)
**Phase:** ⏳ **TESTING PENDING** (unit + integration tests)
**Phase:** ⏳ **FEEDBACK LOOP PENDING** (measure effectiveness)
The infrastructure is in place. Next: Connect it into the dispatch loop and measure impact.

View file

@ -1,114 +0,0 @@
# Triage Complete ✅
**Timestamp:** 2026-05-06 16:30 UTC
**Source:** TODO.md (Raw Dump Inbox)
**Command:** `sf todo triage`
**Node baseline:** v26.1.0+
**Session:** 77b45896
## Summary
Successfully triaged 60 items from TODO.md into structured backlog artifacts:
- ✅ **60 items** normalized into `.sf/triage/inbox/20260506-163003.jsonl`
- ✅ **10 eval candidates** extracted into `.sf/triage/evals/20260506-163003.evals.jsonl`
- ✅ **1 skill proposal** in `.sf/triage/skills/20260506-163003.skills.jsonl`
- ✅ **Comprehensive report** generated at `.sf/triage/reports/20260506-163003.md`
- ✅ **TODO.md reset** to empty dump inbox (triage pipeline activated)
## Artifacts Created
### 1. Triage Report (`.sf/triage/reports/20260506-163003.md`)
Comprehensive analysis including:
- Summary of source material
- 10 eval candidates with failure modes and test locations
- 21 implementation tasks (gsd-2 ports, feature additions, provider expansion)
- Memory requirements for self-evolution infrastructure
- Harness suggestions for testing (property-based, chaos, end-to-end)
- Documentation improvements needed (ARCHITECTURE.md, ADRs, runbooks)
- Clarification needs ("Unclear Notes" section)
**Key findings:**
- UOK is 60-70% complete for self-evolution
- Critical: Close self-report feedback loop (9/10 impact)
- 10+ undocumented architecture features identified
- Multiple safety/correctness fixes awaiting port from gsd-2
### 2. Normalized Inbox (`.sf/triage/inbox/20260506-163003.jsonl`)
60 structured items with:
- Type: eval_candidate, implementation_task, doc_improvement, harness_suggestion, memory_requirement, unclear_note
- Status: pending
- Source tracing: all items linked back to TODO.md section
- Prioritization ready for milestone planning
### 3. Eval Candidates (`.sf/triage/evals/20260506-163003.evals.jsonl`)
10 test harness candidates with:
- Task input (trigger/condition)
- Expected behavior (contract)
- Failure mode (what breaks if missing)
- Evidence/source (citations to gsd-2/pi-mono commits)
- Suggested test location
**Quick examples:**
1. `bash-evidence-race` — Evidence persists across dispatch/re-dispatch
2. `symlink-staging-data-loss` — Data-loss prevention for symlinked .sf
3. `mcp-stdout-deadlock` — Large MCP outputs don't hang
4. `env-sf-vars-unvalidated` — SF_* env vars validated at startup
### 4. Skill Proposals (`.sf/triage/skills/20260506-163003.skills.jsonl`)
Architecture analysis suggesting improvements to SF's extension/gate system.
## Next Steps
1. **Review triage report** — Read `.sf/triage/reports/20260506-163003.md`
2. **Plan implementation** — Promote high-impact items to milestone backlog
3. **Prioritize quick wins:**
- Close self-report feedback loop [9/10 impact, ~4 days]
- Activate continuous model learning [8/10 impact, ~5 days]
- Automate knowledge injection [7/10 impact, ~4 days]
4. **Port gsd-2 safety fixes** — 9 commits awaiting cherry-pick
5. **Close documentation gaps** — Update ARCHITECTURE.md with state machine diagram
## Evidence
```
$ ls -la .sf/triage/
drwxrwxr-x evals/
drwxrwxr-x inbox/
drwxrwxr-x reports/
drwxrwxr-x skills/
$ wc -l .sf/triage/*/*.{md,jsonl}
60 .sf/triage/inbox/20260506-163003.jsonl
10 .sf/triage/evals/20260506-163003.evals.jsonl
1 .sf/triage/skills/20260506-163003.skills.jsonl
9682 .sf/triage/reports/20260506-163003.md
$ git status
D TODO.md (reset to empty dump inbox; items triaged)
M docs/* (from earlier work)
```
## What This Means
SF's triage system successfully:
1. ✅ Parsed TODO.md dump inbox
2. ✅ Extracted 60 items into structured types (eval, task, doc, harness, etc.)
3. ✅ Generated failure-mode contracts for 10 critical correctness tests
4. ✅ Identified test locations and citations to source code
5. ✅ Reset TODO.md for next cycle
6. ✅ Created decision artifacts ready for milestone planning
The comprehensive review, research, documentation updates, and automated triage are complete. The project is now positioned to:
- Activate SF's self-evolution learning loop (3 quick wins)
- Port 9 safety/correctness fixes from gsd-2
- Close 10+ documentation gaps in ARCHITECTURE.md
- Implement property-based testing for autonomous dispatch
- Begin advanced feature ports (Cloudflare AI, Azure endpoints, SSE handling)
---
**Created by:** Copilot CLI
**Session:** 2514fa98-076d-48d2-a1f9-c3fd77c4a82a
**Duration:** ~2 hours total (research + docs + triage)
**Command:** `node dist/cli.js todo triage`

View file

@ -1,53 +0,0 @@
# TODO.md Triage Instructions
## What's New
TODO.md now contains two major sections ready for triage:
1. **Feature Gaps & Limitations** — 40+ specific gaps identified in the codebase
2. **UOK Self-Evolution Research** — 10 prioritized improvements for SF's self-evolution capabilities
## How to Triage
When you have Node 26.1.0+ available:
```bash
cd /home/mhugo/code/singularity-forge
# Run the triage command
sf todo triage
# Or if using npm/nvm
nvm use 26
npm exec sf -- todo triage
```
## What Triage Does
The triage tool will:
1. Parse TODO.md
2. Extract items into structured `.sf/triage/` artifacts
3. Propose categorization and priorities
4. Show you a review interface
5. Either commit to backlog or reset TODO.md to empty dump inbox
## Key Items to Watch For
The UOK Self-Evolution section has **3 high-impact quick wins** (8-10 days total):
1. Close self-report feedback loop [9/10 impact, 2-3 days]
2. Activate continuous model learning [8/10 impact, 3-4 days]
3. Automate knowledge injection [7/10 impact, 2-3 days]
These should be prioritized if you want to activate SF's learning loop.
## Full Research Report
See: `/home/mhugo/snap/copilot-cli/38/.copilot/session-state/2514fa98-076d-48d2-a1f9-c3fd77c4a82a/research/is-our-uok-the-best-for-a-self-evolving-coder-what.md`
This contains:
- Executive summary
- Detailed analysis of UOK implementation vs. documentation
- 10 improvement suggestions with feasibility assessment
- Competitive analysis (vs. other orchestration systems)
- 15+ citations to code and design docs

View file

@ -1,53 +0,0 @@
# Autoresearch: Reduce Biome Lint Diagnostics
## Objective
Minimize the total number of Biome lint diagnostics (errors + warnings + info) across `src/`, starting from baseline ~40 diagnostics. Errors are mostly `organizeImports`, warnings are `noUnusedImports`, `noUnusedVariables`, and `useConst`.
## Metrics
- **Primary**: `diagnostics` (count, lower is better) — sum of errors + warnings + info from `npx biome check src/`
- **Secondary**: `errors` (count, lower is better)
- **Secondary**: `warnings` (count, lower is better)
## How to Run
`bash autoresearch.sh` — runs Biome check, parses JSON summary, outputs `METRIC diagnostics=N` and `METRIC errors=N` and `METRIC warnings=N`.
## Files in Scope
All files under `src/` — but focus on the files flagged by Biome:
- `src/resources/extensions/sf/auto/phases.js`
- `src/resources/extensions/sf/commands/handlers/ops.js`
- `src/resources/extensions/sf/memory-repository.js`
- `src/resources/extensions/sf/metrics-central.js`
- `src/resources/extensions/sf/reasoning-assist.js`
- `src/resources/extensions/sf/remote-steering.js`
- `src/resources/extensions/sf/sf-db.js`
- `src/resources/extensions/sf/subagent-inheritance.js`
- `src/resources/extensions/sf/tests/memory-repository.test.mjs`
- `src/resources/extensions/sf/tests/metrics-central.test.mjs`
- `src/resources/extensions/sf/tests/trajectory-recorder.test.mjs`
- `src/resources/extensions/sf/trajectory-command.js`
- `src/resources/extensions/sf/trajectory-recorder.js`
- `src/resources/extensions/sf/uok/writer.js`
## Off Limits
- `biome.json` (don't change lint rules — fixing source is the goal)
- `node_modules/`, `dist/`, `.sf/`, `packages/` (outside `src/` scope)
- Test assertion logic (don't weaken tests to make linters pass)
## Constraints
- Existing vitest tests must pass: `npx vitest run --config vitest.config.ts`
- No new dependencies
- Don't introduce runtime behavior changes — only lint/import/style fixes
## Termination
Run until interrupted by the user.
## What's Been Tried
- **#2 (auto-fix)**: `biome check --write` — fixed 26 auto-fixable errors (format/organizeImports), dropped diagnostics from 40 to 11. Status: keep.
- **#3 (manual fixes)**: Removed 7 unused imports and prefixed 4 intentionally-unused items with underscore. Dropped from 11 to 0. Status: keep.
- **#4 (regression re-fix)**: 37 new commits introduced 74 diagnostics. `biome check --write` fixed 58 (auto-safe), manual prefix/removal fixed the remaining 16 unsafe warnings across 11 files. Also fixed pre-existing web-mode-onboarding test timeout: added `timeoutMs: 120_000` to `launchPackagedWebHost`, raised `AbortSignal.timeout` on simple fetches 10s→30s, raised test budget 180s→420s. All 409 test files pass. Diagnostics: 0. Status: keep.
## Lessons
- New development (37 commits) is enough to re-introduce 74 diagnostics. Re-run autoresearch periodically (monthly or after large feature branches land).
- Pattern of new violations: unused imports from refactors, unused function params from stubs, duplicate imports. Auto-fix handles errors; unsafe-fix (unused-import/var) requires manual triage.
- Integration test timeout under parallel load: cold-start Next.js can consume most of a 180s test timeout leaving insufficient budget for multi-step API calls. Fix: bound launch phase separately, raise individual fetch timeouts, increase overall budget to match worst-case sum.

File diff suppressed because it is too large Load diff

View file

@ -1929,6 +1929,31 @@ export class InteractiveMode {
this.extensionTerminalInputUnsubscribers.clear();
}
/**
* Register an extension-scoped terminal input listener.
*
* Purpose: allow extensions (e.g. the SF autonomous extension) to intercept
* raw terminal input before it reaches the editor, so that special keys like
* Ctrl+C can trigger extension actions (e.g. pause autonomous mode) rather
* than always going to the default editor clear handler.
*
* Return `{ consume: true }` from the handler to stop the key from being
* processed further. Return `undefined` or `{}` to let it propagate.
*
* Consumer: extension-ui-controller ctx.ui.onTerminalInput.
*/
addExtensionTerminalInputListener(
handler: (data: string) => { consume?: boolean } | undefined,
): () => void {
const listener = (data: string) => handler(data);
const unsubscribe = this.ui.addInputListener(listener);
this.extensionTerminalInputUnsubscribers.add(unsubscribe);
return () => {
unsubscribe();
this.extensionTerminalInputUnsubscribers.delete(unsubscribe);
};
}
/**
* Create the ExtensionUIContext for extensions.
*/

View file

@ -42,19 +42,29 @@ function LegacyComponentView({
*
* Purpose: accept keyboard input from Ink and route it to the active
* component, then trigger a re-render so the updated state is displayed.
* Invalidation is event-driven: external callers invoke the returned
* invalidate() handle, which fires the tick signal registered here.
*
* Consumer: startInkRenderer.
*/
function InkApp({
root,
onInput,
onRegisterTick,
}: {
root: Component;
onInput: (data: string) => void;
onRegisterTick: (tick: () => void) => void;
}) {
const [, tick] = useState(0);
const { columns } = useWindowSize();
// Register the tick function so that startInkRenderer's invalidate() can
// trigger a React re-render without a polling interval.
useEffect(() => {
onRegisterTick(() => tick((n) => n + 1));
}, [onRegisterTick]);
useInput((input, key) => {
// Reconstruct the escape sequences that the legacy key handlers expect.
let data = input;
@ -70,12 +80,6 @@ function InkApp({
tick((n) => n + 1);
});
// Poll at 20 fps so async state changes (e.g. streaming output) appear promptly.
useEffect(() => {
const interval = setInterval(() => tick((n) => n + 1), 50);
return () => clearInterval(interval);
}, []);
return <LegacyComponentView component={root} width={columns ?? 80} />;
}
@ -84,10 +88,11 @@ function InkApp({
*
* Purpose: drop-in replacement for the legacy TUI render engine. Mounting
* this drives the entire Ink React tree and forwards terminal input to
* the root Component's handleInput chain.
* the root Component's handleInput chain. invalidate() triggers an
* immediate React re-render via an event-driven tick signal no polling.
*
* Consumer: TUI class (future integration); standalone callers can use
* this directly to render any Component tree under Ink.
* Consumer: TUI class; standalone callers can use this to render any
* Component tree under Ink.
*
* @param root - The root Component whose render() output fills the screen.
* @param onInput - Called with each decoded key string for legacy handlers.
@ -97,13 +102,22 @@ export function startInkRenderer(
root: Component,
onInput: (data: string) => void,
): { stop: () => void; invalidate: () => void } {
// Mutable signal populated by InkApp via onRegisterTick once the React
// tree has mounted. invalidate() fires this to trigger a synchronous tick.
let _tick: (() => void) | null = null;
const onRegisterTick = (tick: () => void) => {
_tick = tick;
};
const { unmount } = render(
<InkApp root={root} onInput={onInput} />,
<InkApp root={root} onInput={onInput} onRegisterTick={onRegisterTick} />,
{ exitOnCtrlC: false },
);
return {
stop: unmount,
// Ink re-renders automatically; manual invalidation is a no-op for now.
invalidate: () => {},
stop: () => {
_tick = null;
unmount();
},
invalidate: () => _tick?.(),
};
}

View file

@ -420,9 +420,16 @@ export class TUI extends Container {
if (!this.terminal.isTTY) {
return;
}
// Ink-backed render path: Ink manages raw mode and input; the legacy
// differential renderer is bypassed entirely.
if (this._useInk || process.stdout.isTTY) {
// Ink-backed render path: Ink manages raw mode, input, and screen output.
// The legacy differential renderer (doRender) is bypassed entirely on TTY.
// process.stdout.isTTY guards this path — Ink requires a real interactive
// TTY to mount. useInk() is kept as an explicit opt-in for callers that
// want Ink on non-standard terminal configurations. Use PI_LEGACY_TUI=1
// to force the legacy renderer for debugging.
if (
(this._useInk || process.stdout.isTTY) &&
process.env.PI_LEGACY_TUI !== "1"
) {
// Wrap `this` in a plain Component so the private handleInput doesn't
// conflict with the public Component.handleInput? signature.
const root: Component = {
@ -506,6 +513,12 @@ export class TUI extends Container {
requestRender(force = false): void {
// Skip rendering on non-TTY stdout to prevent CPU burn (issue #3095)
if (!this.terminal.isTTY) return;
// Ink-backed path: Ink owns the terminal — delegate to the Ink handle and
// do NOT call doRender(), which would write conflicting ANSI escapes.
if (this._inkHandle) {
this._inkHandle.invalidate();
return;
}
if (force) {
this.previousLines = [];
this.previousWidth = -1; // -1 triggers widthChanged, forcing a full clear

View file

@ -11,6 +11,7 @@ import type { QuerySnapshot } from "./headless-query.js";
interface StatusArgs {
watch: boolean;
recoveryUnitId?: string;
}
interface StatusDeps {
@ -27,6 +28,12 @@ interface CurrentModel {
function parseStatusArgs(argv: string[]): StatusArgs {
const args = argv.slice(1);
if (args[0] === "recovery") {
return {
watch: false,
recoveryUnitId: args[1],
};
}
return {
watch: args.includes("--watch"),
};
@ -219,6 +226,76 @@ async function buildStatusText(
});
}
async function renderRecoveryDiagnostics(
basePath: string,
unitId: string | undefined,
stdout: Pick<typeof process.stdout, "write">,
stderr: Pick<typeof process.stderr, "write">,
): Promise<number> {
try {
const { getRecoveryDiagnostics, listUnitRuntimeRecords } = await import(
"./resources/extensions/sf/uok/unit-runtime.js"
);
let targetUnitId = unitId;
if (!targetUnitId) {
const records: Array<{ updatedAt?: number; unitId: string }> =
listUnitRuntimeRecords(basePath);
const mostRecent = records.sort(
(a, b) => (b.updatedAt ?? 0) - (a.updatedAt ?? 0),
)[0];
if (!mostRecent) {
stderr.write("sf status recovery: no runtime records found\n");
return 1;
}
targetUnitId = mostRecent.unitId;
}
const diagnostics = getRecoveryDiagnostics(
basePath,
"execute-task",
targetUnitId,
);
if (!diagnostics) {
stderr.write(
`sf status recovery: no runtime record for ${targetUnitId}\n`,
);
return 1;
}
const lines: string[] = [];
lines.push("Recovery Diagnostics");
lines.push("--------------------");
lines.push(`Unit: ${diagnostics.unitType} ${diagnostics.unitId}`);
lines.push(`Status: ${diagnostics.status}`);
lines.push(
`Retries: ${diagnostics.retryCount}/${diagnostics.maxRetries}`,
);
lines.push(
`Progress: ${diagnostics.progressCount} (${diagnostics.lastProgressKind})`,
);
lines.push(`Recovery attempts: ${diagnostics.recoveryAttempts}`);
if (diagnostics.lastRecoveryReason) {
lines.push(`Last recovery reason: ${diagnostics.lastRecoveryReason}`);
}
if (diagnostics.lineageSummary) {
lines.push(
`Lineage: ${diagnostics.lineageSummary.status} · ${diagnostics.lineageSummary.workerCount} worker(s) · ${diagnostics.lineageSummary.eventCount} event(s)`,
);
}
lines.push(
`Started: ${diagnostics.startedAt ? new Date(diagnostics.startedAt).toISOString() : "n/a"}`,
);
lines.push(
`Updated: ${diagnostics.updatedAt ? new Date(diagnostics.updatedAt).toISOString() : "n/a"}`,
);
stdout.write(lines.join("\n") + "\n");
return 0;
} catch (err) {
stderr.write(
`sf status recovery: ${err instanceof Error ? err.message : String(err)}\n`,
);
return 1;
}
}
export async function runStatusCli(
argv: string[],
deps: StatusDeps,
@ -228,6 +305,15 @@ export async function runStatusCli(
const sfHome = deps.sfHome ?? process.env.SF_HOME ?? join(homedir(), ".sf");
const args = parseStatusArgs(argv);
if (args.recoveryUnitId !== undefined) {
return renderRecoveryDiagnostics(
deps.basePath,
args.recoveryUnitId,
stdout,
stderr,
);
}
const renderOnce = async () => {
try {
const text = await buildStatusText(deps.basePath, sfHome);

View file

@ -94,6 +94,32 @@ function getServerConfig(name) {
(s) => s.name === trimmed || s.name.toLowerCase() === trimmed.toLowerCase(),
);
}
const SAFE_CHILD_ENV_KEYS = new Set([
"PATH",
"HOME",
"USER",
"LOGNAME",
"SHELL",
"LANG",
"LC_ALL",
"LC_CTYPE",
"LC_MESSAGES",
"LC_NUMERIC",
"LC_TIME",
"TMPDIR",
"TMP",
"TEMP",
"TZ",
"TERM",
"COLORTERM",
]);
function buildChildEnv(configEnv) {
const safe = {};
for (const key of SAFE_CHILD_ENV_KEYS) {
if (process.env[key] !== undefined) safe[key] = process.env[key];
}
return { ...safe, ...resolveEnv(configEnv ?? {}) };
}
/** Resolve ${VAR} references in env values against process.env. */
function resolveEnv(env) {
const resolved = {};
@ -210,9 +236,7 @@ async function getOrConnect(name, signal) {
transport = new StdioClientTransport({
command: config.command,
args: config.args,
env: config.env
? { ...process.env, ...resolveEnv(config.env) }
: undefined,
env: buildChildEnv(config.env),
cwd: config.cwd,
stderr: "pipe",
});
@ -234,23 +258,27 @@ async function getOrConnect(name, signal) {
`Server "${config.name}" has unsupported transport: ${config.transport}`,
);
}
await client.connect(transport, { signal, timeout: 30000 });
try {
await client.connect(transport, { signal, timeout: 30000 });
} catch (err) {
try { await transport.close(); } catch { /* best-effort */ }
try { await client.close(); } catch { /* best-effort */ }
throw err;
}
connections.set(config.name, { client, transport });
return client;
}
async function closeAll() {
const closing = Array.from(connections.entries()).map(
async ([name, conn]) => {
try {
await conn.client.close();
} catch {
// Best-effort cleanup
}
try { await conn.transport.close(); } catch { /* best-effort */ }
try { await conn.client.close(); } catch { /* best-effort */ }
connections.delete(name);
},
);
await Promise.allSettled(closing);
toolCache.clear();
autoRegisteredServers.clear();
}
// ─── Formatters ───────────────────────────────────────────────────────────────
function formatServerList(servers) {
@ -312,31 +340,8 @@ export function getConnectionStatus(name) {
};
}
// ─── Test-exported helpers ────────────────────────────────────────────────────
const SAFE_CHILD_ENV_KEYS = new Set([
"PATH",
"HOME",
"USER",
"LOGNAME",
"SHELL",
"LANG",
"LC_ALL",
"LC_CTYPE",
"LC_MESSAGES",
"LC_NUMERIC",
"LC_TIME",
"TMPDIR",
"TMP",
"TEMP",
"TZ",
"TERM",
"COLORTERM",
]);
export function _buildMcpChildEnvForTest(env) {
const safe = {};
for (const key of SAFE_CHILD_ENV_KEYS) {
if (process.env[key] !== undefined) safe[key] = process.env[key];
}
return { ...safe, ...resolveEnv(env) };
return buildChildEnv(env);
}
export function _buildMcpTrustConfirmOptionsForTest(signal) {
return { timeout: 120_000, signal };

View file

@ -78,6 +78,7 @@ import {
buildSliceSummaryExcerpt,
getDependencyTaskSummaryPaths,
getPriorTaskSummaryPaths,
extractSliceExecutionExcerpt,
} from "./summary-helpers.js";
import { composeInlinedContext } from "./unit-context-composer.js";
import { getUatType } from "./verdict-parser.js";
@ -336,7 +337,7 @@ export function buildSourceFilePaths(base, mid, sid) {
* If parsing fails (unrecognizable frontmatter, missing id, etc.) the
* function falls back to `inlineFile` so the closer loses no information.
*/
// Re-exported from summary-helpers.js:
// Imported from summary-helpers.js:
// - buildSliceSummaryExcerpt, getPriorTaskSummaryPaths
// - getDependencyTaskSummaryPaths, isSummaryCleanForSkip
// - extractSliceExecutionExcerpt

View file

@ -32,6 +32,10 @@ import { UokGateRunner } from "./uok/gate-runner.js";
import { MultiPackageGate } from "./uok/multi-package-gate.js";
import { OutcomeLearningGate } from "./uok/outcome-learning-gate.js";
import { SecurityGate } from "./uok/security-gate.js";
import {
formatExecuteTaskRecoveryStatus,
inspectExecuteTaskDurability,
} from "./uok/unit-runtime.js";
import { extractVerdict } from "./verdict-parser.js";
import { writeVerificationJSON } from "./verification-evidence.js";
import {
@ -42,6 +46,38 @@ import {
} from "./verification-gate.js";
import { logError, logWarning } from "./workflow-logger.js";
function computeTokenCountFromSession(ctx) {
const entries = ctx.sessionManager?.getEntries?.() ?? [];
let total = 0;
for (const entry of entries) {
if (entry.type !== "message") continue;
const msg = entry.message;
if (!msg || msg.role !== "assistant") continue;
if (msg.usage?.totalTokens != null) {
total += msg.usage.totalTokens;
}
}
return total;
}
function getMemoryPressureMB() {
try {
const mem = process.memoryUsage();
return Math.round(mem.heapUsed / 1024 / 1024);
} catch {
return undefined;
}
}
function buildGateOutcomesSummary(gateIds, gateResults) {
if (!gateIds || !gateResults || gateIds.length === 0) return undefined;
const outcomes = {};
for (let i = 0; i < gateIds.length; i++) {
outcomes[gateIds[i]] = gateResults[i]?.outcome ?? "unknown";
}
return outcomes;
}
function isInfraVerificationFailure(stderr) {
return /\b(ENOENT|ENOTFOUND|ETIMEDOUT|ECONNRESET|EAI_AGAIN|spawn\s+\S+\s+ENOENT|command not found)\b/i.test(
stderr,
@ -259,6 +295,8 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
}
// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
let gateIds = [];
let gateResults = [];
try {
if (uokFlags.gates) {
const gateRunner = new UokGateRunner();
@ -304,8 +342,8 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
iteration: s.verificationRetryCount.get(s.currentUnit.id) ?? 0,
};
const gateIds = gateRunner.list().map((g) => g.id);
const gateResults = await Promise.all(
gateIds = gateRunner.list().map((g) => g.id);
gateResults = await Promise.all(
gateIds.map((id) =>
gateRunner
.run(id, {
@ -434,13 +472,39 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
}
// Write verification evidence JSON
const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
const tokenCount = computeTokenCountFromSession(ctx);
const memoryPressureMB = getMemoryPressureMB();
const gateOutcomes = buildGateOutcomesSummary(gateIds, gateResults);
let recoveryStatus;
try {
const durability = await inspectExecuteTaskDurability(
s.basePath,
s.currentUnit.id,
);
if (durability) {
recoveryStatus = formatExecuteTaskRecoveryStatus(durability);
}
} catch {
recoveryStatus = undefined;
}
if (mid && sid && tid) {
try {
const sDir = resolveSlicePath(s.basePath, mid, sid);
if (sDir) {
const tasksDir = join(sDir, "tasks");
if (result.passed) {
writeVerificationJSON(result, tasksDir, tid, s.currentUnit.id);
writeVerificationJSON(
result,
tasksDir,
tid,
s.currentUnit.id,
undefined,
undefined,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
} else {
const nextAttempt = attempt + 1;
writeVerificationJSON(
@ -450,6 +514,10 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
s.currentUnit.id,
nextAttempt,
maxRetries,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
}
}
@ -617,6 +685,10 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
postExecChecks,
postExecBlockingFailure ? attempt + 1 : undefined,
postExecBlockingFailure ? maxRetries : undefined,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
}
} catch (evidenceErr) {
@ -703,6 +775,10 @@ function writeVerificationJSONWithPostExec(
postExecutionChecks,
retryAttempt,
maxRetries,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
) {
mkdirSync(tasksDir, { recursive: true });
const evidence = {
@ -720,6 +796,10 @@ function writeVerificationJSONWithPostExec(
})),
...(retryAttempt !== undefined ? { retryAttempt } : {}),
...(maxRetries !== undefined ? { maxRetries } : {}),
...(tokenCount !== undefined ? { tokenCount } : {}),
...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
postExecutionChecks,
};
if (result.runtimeErrors && result.runtimeErrors.length > 0) {

View file

@ -211,6 +211,33 @@ export {
// Tests in auto-session-encapsulation.test.ts enforce this invariant.
// ─────────────────────────────────────────────────────────────────────────────
const s = getAutoSession();
/** Unsubscribe function for the Ctrl+C → pause intercept registered on autonomous start. */
let _ctrlCUnsubscribe = null;
/**
* Register a terminal input listener that intercepts Ctrl+C while autonomous
* mode is active and routes the first press to pauseAuto() instead of letting
* it silently clear the editor.
*
* Purpose: give the user a reliable single-keypress escape from a running
* autonomous loop without requiring the double-press exit threshold.
*/
function registerCtrlCInterceptor(ctx) {
_unregisterCtrlCInterceptor();
if (typeof ctx?.ui?.onTerminalInput !== "function") return;
_ctrlCUnsubscribe = ctx.ui.onTerminalInput((data) => {
if (data !== "\x03") return undefined;
if (!s.active) return undefined;
ctx.ui.notify("Ctrl+C received — pausing autonomous mode.", "info");
void pauseAuto(ctx, null, "ctrl-c-interrupt");
return { consume: true };
});
}
function _unregisterCtrlCInterceptor() {
if (_ctrlCUnsubscribe) {
_ctrlCUnsubscribe();
_ctrlCUnsubscribe = null;
}
}
/** Throttle STATE.md rebuilds — at most once per 30 seconds */
const _STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
function captureProjectRootEnv(projectRoot) {
@ -704,6 +731,7 @@ function cleanupAfterLoopExit(ctx) {
s.currentUnit = null;
s.active = false;
s.runControl = "manual";
_unregisterCtrlCInterceptor();
deactivateSF();
clearUnitTimeout();
restoreProjectRootEnv();
@ -747,6 +775,7 @@ function cleanupAfterLoopExit(ctx) {
}
export async function stopAuto(ctx, pi, reason) {
if (!s.active && !s.paused) return;
_unregisterCtrlCInterceptor();
const loadedPreferences = loadEffectiveSFPreferences()?.preferences;
const reasonSuffix = reason ? `${reason}` : "";
try {
@ -1677,6 +1706,7 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
s.runControl = requestedStepMode ? "assisted" : "autonomous";
s.cmdCtx = ctx;
s.basePath = base;
registerCtrlCInterceptor(ctx);
// Ensure the workflow-logger audit log is pinned to the project root
// even when autonomous mode is entered via a path that bypasses the
// bootstrap/dynamic-tools ensureDbOpen() → setLogBasePath() chain
@ -1943,6 +1973,7 @@ export async function dispatchHookUnit(
s.autoStartTime = Date.now();
s.currentUnit = null;
s.pendingQuickTasks = [];
registerCtrlCInterceptor(hookCtx);
}
const hookUnitType = `hook/${hookName}`;
const hookStartedAt = Date.now();

View file

@ -116,6 +116,10 @@ export async function handleAutonomousCommand(trimmed, ctx, pi) {
});
return true;
}
if (trimmed === "stop") {
await stopAutonomousRun(ctx, pi);
return true;
}
if (isAutonomousVerb) {
const autonomousArgsText = trimmed.replace(/^autonomous\b/, "").trim();
if (autonomousArgsText === "stop") {

View file

@ -4212,7 +4212,8 @@ function hasTaskSpecIntent(planning = {}) {
}
function insertTaskSpecIfAbsent(milestoneId, sliceId, taskId, planning = {}) {
if (!hasTaskSpecIntent(planning)) return;
const frontmatter = taskFrontmatterFromRecord(planning).normalized;
const { normalized: frontmatter, errors } = taskFrontmatterFromRecord(planning);
if (errors?.length) logWarning("sf-db:insertTaskSpec", `frontmatter validation errors for ${milestoneId}/${sliceId}/${taskId}: ${errors.join(", ")}`);
currentDb
.prepare(`INSERT OR IGNORE INTO task_specs (
milestone_id, slice_id, task_id, verify, inputs, expected_output,
@ -4433,7 +4434,8 @@ export function setTaskBlockerDiscovered(
export function upsertTaskPlanning(milestoneId, sliceId, taskId, planning) {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
insertTaskSpecIfAbsent(milestoneId, sliceId, taskId, planning);
const frontmatter = taskFrontmatterFromRecord(planning).normalized;
const { normalized: frontmatter, errors: fmErrors } = taskFrontmatterFromRecord(planning);
if (fmErrors?.length) logWarning("sf-db:upsertTaskPlanning", `frontmatter validation errors for ${milestoneId}/${sliceId}/${taskId}: ${fmErrors.join(", ")}`);
const hasTaskStatus =
planning.taskStatus !== undefined ||
planning.task_status !== undefined ||

View file

@ -195,3 +195,44 @@ export function isSummaryCleanForSkip(content) {
return false;
}
}
function escapeRegExpLocal(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function extractMarkdownSectionLocal(content, heading) {
const match = new RegExp(`^## ${escapeRegExpLocal(heading)}\\s*$`, "m").exec(content);
if (!match) return null;
const start = match.index + match[0].length;
const rest = content.slice(start);
const nextHeading = rest.match(/^##\s+/m);
const end = nextHeading?.index ?? rest.length;
return rest.slice(0, end).trim();
}
/**
* Extract key sections from a slice PLAN.md for use in task execution prompts.
* Returns Goal, Demo, Verification, and Observability sections as a compact excerpt.
*
* Purpose: give task executors the slice-level contract without inlining the full plan.
* Consumer: auto-prompts.js buildExecuteTask*.
*/
export function extractSliceExecutionExcerpt(content, relPath) {
if (!content) {
return [
"## Slice Plan Excerpt",
`Slice plan not found at dispatch time. Read \`${relPath}\` before running slice-level verification.`,
].join("\n");
}
const lines = content.split("\n");
const goalLine = lines.find((line) => line.startsWith("**Goal:**"))?.trim();
const demoLine = lines.find((line) => line.startsWith("**Demo:**"))?.trim();
const verification = extractMarkdownSectionLocal(content, "Verification");
const observability = extractMarkdownSectionLocal(content, "Observability / Diagnostics");
const parts = ["## Slice Plan Excerpt", `Source: \`${relPath}\``];
if (goalLine) parts.push(goalLine);
if (demoLine) parts.push(demoLine);
if (verification) parts.push("", "### Slice Verification", verification.trim());
if (observability) parts.push("", "### Slice Observability / Diagnostics", observability.trim());
return parts.join("\n");
}

View file

@ -13,6 +13,7 @@ import {
clearRunawayRecoveredRuntimeRecords,
clearUnitRuntimeRecord,
decideUnitRuntimeDispatch,
getRecoveryDiagnostics,
getUnitRuntimeState,
isTerminalUnitRuntimeStatus,
listUnitRuntimeRecords,
@ -377,3 +378,72 @@ test("listUnitRuntimeRecords_returns_empty_when_dir_missing", () => {
const records = listUnitRuntimeRecords(root);
assert.deepEqual(records, []);
});
// ─── getRecoveryDiagnostics ────────────────────────────────────────────────
test("getRecoveryDiagnostics_returns_null_for_missing_record", () => {
const root = makeProject();
const diagnostics = getRecoveryDiagnostics(root, "execute-task", "MISSING");
assert.equal(diagnostics, null);
});
test("getRecoveryDiagnostics_returns_structured_object_for_record_with_recovery", () => {
const root = makeProject();
const t = Date.now();
writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t, {
status: "failed",
recoveryAttempts: 2,
retryCount: 2,
maxRetries: 3,
lastRecoveryReason: "timeout",
progressCount: 5,
lastProgressKind: "checkpoint",
lineageEvent: {
status: "started",
workerSessionId: "worker-1",
},
});
const diagnostics = getRecoveryDiagnostics(
root,
"execute-task",
"M001/S01/T01",
);
assert.ok(diagnostics);
assert.equal(diagnostics.unitType, "execute-task");
assert.equal(diagnostics.unitId, "M001/S01/T01");
assert.equal(diagnostics.status, "failed");
assert.equal(diagnostics.retryCount, 2);
assert.equal(diagnostics.maxRetries, 3);
assert.equal(diagnostics.lastRecoveryReason, "timeout");
assert.equal(diagnostics.progressCount, 5);
assert.equal(diagnostics.lastProgressKind, "checkpoint");
assert.equal(diagnostics.recoveryAttempts, 2);
assert.ok(diagnostics.lineageSummary);
assert.equal(diagnostics.lineageSummary.status, "started");
assert.equal(diagnostics.lineageSummary.workerCount, 1);
assert.equal(diagnostics.lineageSummary.eventCount, 1);
assert.equal(diagnostics.startedAt, t);
assert.ok(diagnostics.updatedAt);
});
test("getRecoveryDiagnostics_returns_minimal_object_for_record_without_recovery", () => {
const root = makeProject();
const t = Date.now();
writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T02", t, {
status: "running",
});
const diagnostics = getRecoveryDiagnostics(
root,
"execute-task",
"M001/S01/T02",
);
assert.ok(diagnostics);
assert.equal(diagnostics.status, "running");
assert.equal(diagnostics.retryCount, 0);
assert.equal(diagnostics.maxRetries, 1);
assert.equal(diagnostics.lastRecoveryReason, null);
assert.equal(diagnostics.progressCount, 0);
assert.equal(diagnostics.lastProgressKind, "dispatch");
assert.equal(diagnostics.recoveryAttempts, 0);
assert.equal(diagnostics.lineageSummary, null);
});

View file

@ -0,0 +1,32 @@
/**
* Type declarations for unit-runtime.js
*/
export interface RecoveryDiagnostics {
unitType: string;
unitId: string;
status: string;
retryCount: number;
maxRetries: number;
lastRecoveryReason: string | null;
progressCount: number;
lastProgressKind: string;
recoveryAttempts: number;
lineageSummary: {
status: string;
workerCount: number;
eventCount: number;
} | null;
updatedAt: number | null;
startedAt: number | null;
}
export function getRecoveryDiagnostics(
basePath: string,
unitType: string,
unitId: string,
): RecoveryDiagnostics | null;
export function listUnitRuntimeRecords(
basePath: string,
): Array<Record<string, unknown> & { updatedAt?: number; unitId: string }>;

View file

@ -582,6 +582,43 @@ export function formatExecuteTaskRecoveryStatus(status) {
? missing.join("; ")
: "all durable task artifacts present";
}
/**
* Read the runtime record for a unit and return structured recovery diagnostics.
*
* Purpose: surface runtime record state for post-mortem debugging of autonomous
* failures without requiring humans to parse `.sf/runtime/units/*.json` manually.
*
* Consumer: `sf status recovery` CLI command and verification evidence enrichment.
*/
export function getRecoveryDiagnostics(basePath, unitType, unitId) {
const record = readUnitRuntimeRecord(basePath, unitType, unitId);
if (!record) {
return null;
}
const state = getUnitRuntimeState(record);
const lineageSummary = record.lineage
? {
status: record.lineage.status,
workerCount: record.lineage.workerSessionIds?.length ?? 0,
eventCount: record.lineage.events?.length ?? 0,
}
: null;
return {
unitType,
unitId,
status: state.status,
retryCount: state.retryCount,
maxRetries: state.maxRetries,
lastRecoveryReason: record.lastRecoveryReason ?? null,
progressCount: record.progressCount ?? 0,
lastProgressKind: record.lastProgressKind ?? "dispatch",
recoveryAttempts: record.recoveryAttempts ?? 0,
lineageSummary,
updatedAt: record.updatedAt ?? null,
startedAt: record.startedAt ?? null,
};
}
// ─── Stale slice runtime record reconciliation ──────────────────────────────
/**
* Clear unit runtime records for complete-slice units that are in a terminal

View file

@ -24,6 +24,10 @@ export function writeVerificationJSON(
unitId,
retryAttempt,
maxRetries,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
) {
mkdirSync(tasksDir, { recursive: true });
const evidence = {
@ -41,6 +45,10 @@ export function writeVerificationJSON(
})),
...(retryAttempt !== undefined ? { retryAttempt } : {}),
...(maxRetries !== undefined ? { maxRetries } : {}),
...(tokenCount !== undefined ? { tokenCount } : {}),
...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
};
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
evidence.runtimeErrors = result.runtimeErrors.map((e) => ({

53
todo.md Normal file
View file

@ -0,0 +1,53 @@
# TODO
Unimplemented items consolidated from root *.md files. Source file noted for each item.
---
## Critical / Correctness
- [x] Port `fix(security): harden project-controlled surfaces` — env isolation + transport cleanup done; gsd-2 trust/dedup hunks (server.ts, mcp-client/index.ts) not applicable (packages absent) *(BUILD_PLAN.md Tier 0.5 #2)*
- [ ] Port agent-session/agent-end transition fixes (gsd-2 `71114fccf`, `6d7e4gcb5`, `c162c44bf`, `e3bd04551`) *(BUILD_PLAN.md Tier 0.5 #7-10, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster B)*
- [ ] Cloudflare Workers AI provider — `CLOUDFLARE_API_KEY`/`CLOUDFLARE_ACCOUNT_ID` (pi-mono PR #3851) *(BUILD_PLAN.md Tier 0 #8)*
---
## Architecture / Design Gaps
- [ ] Schema reconciliation: update SPEC.md to 3-table model (milestones/slices/tasks vs single `units`) *(BUILD_PLAN.md Tier 1.3)*
- [ ] Persistent agents v1 command surface — `/sf agent run|reset|delete|inspect` *(BUILD_PLAN.md Tier 2.1)*
- [ ] Intent chapters (`chapter_open`/`chapter_close` — crash-resume context) *(BUILD_PLAN.md Tier 2.3)*
- [ ] PhaseReview 3-pass review (establish-context → parallel chunked → synthesis) *(BUILD_PLAN.md Tier 2.4)*
- [ ] `last_error` cap to 4 KB head+tail; full payload to file *(BUILD_PLAN.md Tier 2.6)*
- [ ] Port workflow state machine hardening (gsd-2 `f2377eedd`, `b9a1c6743`, `153fb328a`, `381ccdef5`, `371b2eb31`) *(BUILD_PLAN.md Tier 0.5 #13, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster F)*
- [ ] Port `fix(claude-code-cli): persist Always Allow for non-Bash tools` (gsd-2 `a88baeae9`) *(BUILD_PLAN.md Tier 0.5 #11)*
---
## Medium Priority / Quality
- [ ] Replace `isHeavyModelId()` name-matching heuristic with capability-based check *(PRODUCTION_AUDIT_GRADE.md #9, PRODUCTION_AUDIT.md 3.3)*
- [ ] Add `version` field to task frontmatter and mode state (schema versioning) *(PRODUCTION_AUDIT_GRADE.md #8)*
- [ ] Integration tests for full remote steering pipeline *(PRODUCTION_AUDIT.md Long Term #10)*
- [x] Log `frontmatterErrors` in sf-db.js instead of silently dropping validation errors *(PRODUCTION_AUDIT.md 3.1)*
- [ ] Search provider registry refactor — consolidate provider list across files into `SearchProviderRegistry` *(BUILD_PLAN.md Tier 1+)*
- [ ] Update ARCHITECTURE.md self-evolution section (triage pipeline IS active; injection IS automatic now) *(ARCHITECTURE.md)*
- [ ] Add Mermaid state machine diagram to ARCHITECTURE.md *(ARCHITECTURE.md)*
- [ ] Symlinked packages/resources/skills/sessions dedup (pi-mono PR #3818) *(BUILD_PLAN.md Tier 0 #6)*
---
## Long-term / Deferred
- [ ] Singularity Knowledge + Agent Platform (Go re-platform, ~12 weeks) *(BUILD_PLAN.md Tier 1+)*
- [ ] sf-worker SSH host (Go, `wish` + `xpty`, ~3 weeks) *(BUILD_PLAN.md Tier 4)*
- [ ] Charm TUI client (`sf-tui` in Go, ~12-16 weeks) *(BUILD_PLAN.md Tier 1+)*
- [ ] Flight recorder (`x/vcr`, ~3 weeks) *(BUILD_PLAN.md Tier 1+)*
- [ ] Full swarm chat for `subagent` tool (Option C, depends on persistent-agent layer) *(BUILD_PLAN.md Tier 1+)*
- [ ] Caveman input-side prompt compression (rewrite execute-task/plan-slice prompts) *(BUILD_PLAN.md Tier 1+)*
- [ ] Runtime input preprocessor (`terse_prompts: true` dispatch transform, ~3-4 days) *(BUILD_PLAN.md Tier 1+)*
- [ ] Judge calibration + eval runner service (Go/Charm, ~2-3 weeks post SM) *(BUILD_PLAN.md Tier 1+)*
- [ ] M009 promote-only adoption review — create `sf schedule` entry (2 weeks after M009 close) *(BACKLOG.md)*
- [ ] Establish pi-mono SDK sync cadence (recurring check schedule) *(BUILD_PLAN.md Tier 1+)*
- [ ] `scripts/port-from-gsd2.sh` automation script *(UPSTREAM_PORT_GUIDE.md)*
- [ ] TypeScript migration for UOK modules (`kernel.js`, etc.) *(PRODUCTION_AUDIT_COMPLETE.md, PRODUCTION_AUDIT_GRADE.md)*